Complete PP-OCRv5 research and v4 vs v5 comparison

## 研究成果

### PP-OCRv5 API 測試
- 成功升級到 PaddleOCR 3.3.2 (PP-OCRv5)
- 理解新 API 結構和調用方式
- 驗證基礎檢測功能

### 關鍵發現
 PP-OCRv5 **沒有內建手寫分類功能**
- text_type 字段是語言類型,不是手寫/印刷分類
- 仍需要 OpenCV Method 3 來分離手寫和印刷文字

### 完整 Pipeline 對比測試
- v4 (2.7.3): 檢測 14 個文字 → 4 個候選區域
- v5 (3.3.2): 檢測 50 個文字 → 7 個候選區域
- 主簽名區域:兩個版本幾乎相同 (1150x511 vs 1144x511)

### 性能分析
優點:
- v5 手寫識別準確率 +13.7% (文檔承諾)
- 可能減少漏檢

缺點:
- 過度檢測(印章小字等)
- API 完全重寫,不兼容
- 仍無法替代 OpenCV Method 3

### 文件
- PP_OCRV5_RESEARCH_FINDINGS.md: 完整研究報告
- signature-comparison/: v4 vs v5 對比結果
- test_results/: v5 測試輸出
- test_*_pipeline.py: 完整測試腳本

### 建議
當前方案(v2.7.3 + OpenCV Method 3)已足夠穩定,
除非遇到大量漏檢,否則暫不升級到 v5。

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-27 11:21:55 +08:00
parent 8f231da3bc
commit 21df0ff387
10 changed files with 3726 additions and 0 deletions

181
visualize_v5_results.py Normal file
View File

@@ -0,0 +1,181 @@
#!/usr/bin/env python3
"""
可視化 PP-OCRv5 的檢測結果
"""
import json
import cv2
import numpy as np
from pathlib import Path
def load_results():
"""加載 v5 檢測結果"""
result_file = "/Volumes/NV2/pdf_recognize/test_results/v5_result.json"
with open(result_file, 'r', encoding='utf-8') as f:
data = json.load(f)
return data['res']
def draw_detections(image_path, results, output_path):
"""在圖片上繪製檢測框和文字"""
# 讀取圖片
img = cv2.imread(image_path)
if img is None:
print(f"❌ 無法讀取圖片: {image_path}")
return None
# 創建副本用於繪製
vis_img = img.copy()
# 獲取檢測結果
rec_texts = results.get('rec_texts', [])
rec_boxes = results.get('rec_boxes', [])
rec_scores = results.get('rec_scores', [])
print(f"\n檢測到 {len(rec_texts)} 個文字區域")
# 繪製每個檢測框
for i, (text, box, score) in enumerate(zip(rec_texts, rec_boxes, rec_scores)):
x_min, y_min, x_max, y_max = box
# 繪製矩形框(綠色)
cv2.rectangle(vis_img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
# 繪製索引號(小字)
cv2.putText(vis_img, f"{i}", (x_min, y_min - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
# 保存結果
cv2.imwrite(output_path, vis_img)
print(f"✅ 可視化結果已保存: {output_path}")
return vis_img
def generate_text_report(results):
"""生成文字報告"""
rec_texts = results.get('rec_texts', [])
rec_scores = results.get('rec_scores', [])
rec_boxes = results.get('rec_boxes', [])
print("\n" + "=" * 80)
print("PP-OCRv5 檢測結果報告")
print("=" * 80)
print(f"\n總共檢測到: {len(rec_texts)} 個文字區域")
print(f"平均置信度: {np.mean(rec_scores):.4f}")
print(f"最高置信度: {np.max(rec_scores):.4f}")
print(f"最低置信度: {np.min(rec_scores):.4f}")
# 分類統計
high_conf = sum(1 for s in rec_scores if s >= 0.95)
medium_conf = sum(1 for s in rec_scores if 0.8 <= s < 0.95)
low_conf = sum(1 for s in rec_scores if s < 0.8)
print(f"\n置信度分布:")
print(f" 高 (≥0.95): {high_conf} 個 ({high_conf/len(rec_scores)*100:.1f}%)")
print(f" 中 (0.8-0.95): {medium_conf} 個 ({medium_conf/len(rec_scores)*100:.1f}%)")
print(f" 低 (<0.8): {low_conf} 個 ({low_conf/len(rec_scores)*100:.1f}%)")
# 顯示前 20 個檢測結果
print("\n前 20 個檢測結果:")
print("-" * 80)
for i in range(min(20, len(rec_texts))):
text = rec_texts[i]
score = rec_scores[i]
box = rec_boxes[i]
# 計算框的大小
width = box[2] - box[0]
height = box[3] - box[1]
print(f"[{i:2d}] 置信度: {score:.4f} 大小: {width:4d}x{height:3d} 文字: {text}")
if len(rec_texts) > 20:
print(f"\n... 還有 {len(rec_texts) - 20} 個結果(省略)")
# 尋找可能的手寫區域(低置信度 或 大字)
print("\n" + "=" * 80)
print("可能的手寫區域分析")
print("=" * 80)
potential_handwriting = []
for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)):
width = box[2] - box[0]
height = box[3] - box[1]
# 判斷條件:
# 1. 高度較大 (>50px)
# 2. 或置信度較低 (<0.9)
# 3. 或文字較短但字體大
is_large = height > 50
is_low_conf = score < 0.9
is_short_text = len(text) <= 3 and height > 40
if is_large or is_low_conf or is_short_text:
potential_handwriting.append({
'index': i,
'text': text,
'score': score,
'height': height,
'width': width,
'reason': []
})
if is_large:
potential_handwriting[-1]['reason'].append('大字')
if is_low_conf:
potential_handwriting[-1]['reason'].append('低置信度')
if is_short_text:
potential_handwriting[-1]['reason'].append('短文大字')
if potential_handwriting:
print(f"\n找到 {len(potential_handwriting)} 個可能的手寫區域:")
print("-" * 80)
for item in potential_handwriting[:15]: # 只顯示前 15 個
reasons = ', '.join(item['reason'])
print(f"[{item['index']:2d}] {item['height']:3d}px {item['score']:.4f} ({reasons}) {item['text']}")
else:
print("未找到明顯的手寫特徵區域")
# 保存詳細報告到文件
report_path = "/Volumes/NV2/pdf_recognize/test_results/v5_analysis_report.txt"
with open(report_path, 'w', encoding='utf-8') as f:
f.write(f"PP-OCRv5 檢測結果詳細報告\n")
f.write("=" * 80 + "\n\n")
f.write(f"總數: {len(rec_texts)}\n")
f.write(f"平均置信度: {np.mean(rec_scores):.4f}\n\n")
f.write("完整檢測列表:\n")
f.write("-" * 80 + "\n")
for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)):
width = box[2] - box[0]
height = box[3] - box[1]
f.write(f"[{i:2d}] {score:.4f} {width:4d}x{height:3d} {text}\n")
print(f"\n詳細報告已保存: {report_path}")
def main():
# 加載結果
print("加載 PP-OCRv5 檢測結果...")
results = load_results()
# 生成文字報告
generate_text_report(results)
# 可視化
print("\n" + "=" * 80)
print("生成可視化圖片")
print("=" * 80)
image_path = "/Volumes/NV2/pdf_recognize/full_page_original.png"
output_path = "/Volumes/NV2/pdf_recognize/test_results/v5_visualization.png"
if Path(image_path).exists():
draw_detections(image_path, results, output_path)
else:
print(f"⚠️ 原始圖片不存在: {image_path}")
print("\n" + "=" * 80)
print("分析完成")
print("=" * 80)
if __name__ == "__main__":
main()