## 研究成果 ### PP-OCRv5 API 測試 - 成功升級到 PaddleOCR 3.3.2 (PP-OCRv5) - 理解新 API 結構和調用方式 - 驗證基礎檢測功能 ### 關鍵發現 ❌ PP-OCRv5 **沒有內建手寫分類功能** - text_type 字段是語言類型,不是手寫/印刷分類 - 仍需要 OpenCV Method 3 來分離手寫和印刷文字 ### 完整 Pipeline 對比測試 - v4 (2.7.3): 檢測 14 個文字 → 4 個候選區域 - v5 (3.3.2): 檢測 50 個文字 → 7 個候選區域 - 主簽名區域:兩個版本幾乎相同 (1150x511 vs 1144x511) ### 性能分析 優點: - v5 手寫識別準確率 +13.7% (文檔承諾) - 可能減少漏檢 缺點: - 過度檢測(印章小字等) - API 完全重寫,不兼容 - 仍無法替代 OpenCV Method 3 ### 文件 - PP_OCRV5_RESEARCH_FINDINGS.md: 完整研究報告 - signature-comparison/: v4 vs v5 對比結果 - test_results/: v5 測試輸出 - test_*_pipeline.py: 完整測試腳本 ### 建議 當前方案(v2.7.3 + OpenCV Method 3)已足夠穩定, 除非遇到大量漏檢,否則暫不升級到 v5。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
182 lines
5.9 KiB
Python
182 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
可視化 PP-OCRv5 的檢測結果
|
|
"""
|
|
|
|
import json
|
|
import cv2
|
|
import numpy as np
|
|
from pathlib import Path
|
|
|
|
def load_results():
|
|
"""加載 v5 檢測結果"""
|
|
result_file = "/Volumes/NV2/pdf_recognize/test_results/v5_result.json"
|
|
with open(result_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
return data['res']
|
|
|
|
def draw_detections(image_path, results, output_path):
|
|
"""在圖片上繪製檢測框和文字"""
|
|
# 讀取圖片
|
|
img = cv2.imread(image_path)
|
|
if img is None:
|
|
print(f"❌ 無法讀取圖片: {image_path}")
|
|
return None
|
|
|
|
# 創建副本用於繪製
|
|
vis_img = img.copy()
|
|
|
|
# 獲取檢測結果
|
|
rec_texts = results.get('rec_texts', [])
|
|
rec_boxes = results.get('rec_boxes', [])
|
|
rec_scores = results.get('rec_scores', [])
|
|
|
|
print(f"\n檢測到 {len(rec_texts)} 個文字區域")
|
|
|
|
# 繪製每個檢測框
|
|
for i, (text, box, score) in enumerate(zip(rec_texts, rec_boxes, rec_scores)):
|
|
x_min, y_min, x_max, y_max = box
|
|
|
|
# 繪製矩形框(綠色)
|
|
cv2.rectangle(vis_img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
|
|
|
|
# 繪製索引號(小字)
|
|
cv2.putText(vis_img, f"{i}", (x_min, y_min - 5),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
|
|
|
# 保存結果
|
|
cv2.imwrite(output_path, vis_img)
|
|
print(f"✅ 可視化結果已保存: {output_path}")
|
|
|
|
return vis_img
|
|
|
|
def generate_text_report(results):
|
|
"""生成文字報告"""
|
|
rec_texts = results.get('rec_texts', [])
|
|
rec_scores = results.get('rec_scores', [])
|
|
rec_boxes = results.get('rec_boxes', [])
|
|
|
|
print("\n" + "=" * 80)
|
|
print("PP-OCRv5 檢測結果報告")
|
|
print("=" * 80)
|
|
|
|
print(f"\n總共檢測到: {len(rec_texts)} 個文字區域")
|
|
print(f"平均置信度: {np.mean(rec_scores):.4f}")
|
|
print(f"最高置信度: {np.max(rec_scores):.4f}")
|
|
print(f"最低置信度: {np.min(rec_scores):.4f}")
|
|
|
|
# 分類統計
|
|
high_conf = sum(1 for s in rec_scores if s >= 0.95)
|
|
medium_conf = sum(1 for s in rec_scores if 0.8 <= s < 0.95)
|
|
low_conf = sum(1 for s in rec_scores if s < 0.8)
|
|
|
|
print(f"\n置信度分布:")
|
|
print(f" 高 (≥0.95): {high_conf} 個 ({high_conf/len(rec_scores)*100:.1f}%)")
|
|
print(f" 中 (0.8-0.95): {medium_conf} 個 ({medium_conf/len(rec_scores)*100:.1f}%)")
|
|
print(f" 低 (<0.8): {low_conf} 個 ({low_conf/len(rec_scores)*100:.1f}%)")
|
|
|
|
# 顯示前 20 個檢測結果
|
|
print("\n前 20 個檢測結果:")
|
|
print("-" * 80)
|
|
for i in range(min(20, len(rec_texts))):
|
|
text = rec_texts[i]
|
|
score = rec_scores[i]
|
|
box = rec_boxes[i]
|
|
|
|
# 計算框的大小
|
|
width = box[2] - box[0]
|
|
height = box[3] - box[1]
|
|
|
|
print(f"[{i:2d}] 置信度: {score:.4f} 大小: {width:4d}x{height:3d} 文字: {text}")
|
|
|
|
if len(rec_texts) > 20:
|
|
print(f"\n... 還有 {len(rec_texts) - 20} 個結果(省略)")
|
|
|
|
# 尋找可能的手寫區域(低置信度 或 大字)
|
|
print("\n" + "=" * 80)
|
|
print("可能的手寫區域分析")
|
|
print("=" * 80)
|
|
|
|
potential_handwriting = []
|
|
for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)):
|
|
width = box[2] - box[0]
|
|
height = box[3] - box[1]
|
|
|
|
# 判斷條件:
|
|
# 1. 高度較大 (>50px)
|
|
# 2. 或置信度較低 (<0.9)
|
|
# 3. 或文字較短但字體大
|
|
is_large = height > 50
|
|
is_low_conf = score < 0.9
|
|
is_short_text = len(text) <= 3 and height > 40
|
|
|
|
if is_large or is_low_conf or is_short_text:
|
|
potential_handwriting.append({
|
|
'index': i,
|
|
'text': text,
|
|
'score': score,
|
|
'height': height,
|
|
'width': width,
|
|
'reason': []
|
|
})
|
|
|
|
if is_large:
|
|
potential_handwriting[-1]['reason'].append('大字')
|
|
if is_low_conf:
|
|
potential_handwriting[-1]['reason'].append('低置信度')
|
|
if is_short_text:
|
|
potential_handwriting[-1]['reason'].append('短文大字')
|
|
|
|
if potential_handwriting:
|
|
print(f"\n找到 {len(potential_handwriting)} 個可能的手寫區域:")
|
|
print("-" * 80)
|
|
for item in potential_handwriting[:15]: # 只顯示前 15 個
|
|
reasons = ', '.join(item['reason'])
|
|
print(f"[{item['index']:2d}] {item['height']:3d}px {item['score']:.4f} ({reasons}) {item['text']}")
|
|
else:
|
|
print("未找到明顯的手寫特徵區域")
|
|
|
|
# 保存詳細報告到文件
|
|
report_path = "/Volumes/NV2/pdf_recognize/test_results/v5_analysis_report.txt"
|
|
with open(report_path, 'w', encoding='utf-8') as f:
|
|
f.write(f"PP-OCRv5 檢測結果詳細報告\n")
|
|
f.write("=" * 80 + "\n\n")
|
|
f.write(f"總數: {len(rec_texts)}\n")
|
|
f.write(f"平均置信度: {np.mean(rec_scores):.4f}\n\n")
|
|
f.write("完整檢測列表:\n")
|
|
f.write("-" * 80 + "\n")
|
|
for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)):
|
|
width = box[2] - box[0]
|
|
height = box[3] - box[1]
|
|
f.write(f"[{i:2d}] {score:.4f} {width:4d}x{height:3d} {text}\n")
|
|
|
|
print(f"\n詳細報告已保存: {report_path}")
|
|
|
|
def main():
|
|
# 加載結果
|
|
print("加載 PP-OCRv5 檢測結果...")
|
|
results = load_results()
|
|
|
|
# 生成文字報告
|
|
generate_text_report(results)
|
|
|
|
# 可視化
|
|
print("\n" + "=" * 80)
|
|
print("生成可視化圖片")
|
|
print("=" * 80)
|
|
|
|
image_path = "/Volumes/NV2/pdf_recognize/full_page_original.png"
|
|
output_path = "/Volumes/NV2/pdf_recognize/test_results/v5_visualization.png"
|
|
|
|
if Path(image_path).exists():
|
|
draw_detections(image_path, results, output_path)
|
|
else:
|
|
print(f"⚠️ 原始圖片不存在: {image_path}")
|
|
|
|
print("\n" + "=" * 80)
|
|
print("分析完成")
|
|
print("=" * 80)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|