Complete PP-OCRv5 research and v4 vs v5 comparison

## 研究成果 ### PP-OCRv5 API 測試 - 成功升級到 PaddleOCR 3.3.2 (PP-OCRv5) - 理解新 API 結構和調用方式 - 驗證基礎檢測功能 ### 關鍵發現 ❌ PP-OCRv5 **沒有內建手寫分類功能** - text_type 字段是語言類型，不是手寫/印刷分類 - 仍需要 OpenCV Method 3 來分離手寫和印刷文字 ### 完整 Pipeline 對比測試 - v4 (2.7.3): 檢測 14 個文字 → 4 個候選區域 - v5 (3.3.2): 檢測 50 個文字 → 7 個候選區域 - 主簽名區域：兩個版本幾乎相同 (1150x511 vs 1144x511) ### 性能分析優點： - v5 手寫識別準確率 +13.7% (文檔承諾) - 可能減少漏檢缺點： - 過度檢測（印章小字等） - API 完全重寫，不兼容 - 仍無法替代 OpenCV Method 3 ### 文件 - PP_OCRV5_RESEARCH_FINDINGS.md: 完整研究報告 - signature-comparison/: v4 vs v5 對比結果 - test_results/: v5 測試輸出 - test_*_pipeline.py: 完整測試腳本 ### 建議當前方案（v2.7.3 + OpenCV Method 3）已足夠穩定，除非遇到大量漏檢，否則暫不升級到 v5。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 11:21:55 +08:00
parent 8f231da3bc
commit 21df0ff387
10 changed files with 3726 additions and 0 deletions
--- a/visualize_v5_results.py
+++ b/visualize_v5_results.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+"""
+可視化 PP-OCRv5 的檢測結果
+"""
+
+import json
+import cv2
+import numpy as np
+from pathlib import Path
+
+def load_results():
+    """加載 v5 檢測結果"""
+    result_file = "/Volumes/NV2/pdf_recognize/test_results/v5_result.json"
+    with open(result_file, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    return data['res']
+
+def draw_detections(image_path, results, output_path):
+    """在圖片上繪製檢測框和文字"""
+    # 讀取圖片
+    img = cv2.imread(image_path)
+    if img is None:
+        print(f"❌ 無法讀取圖片: {image_path}")
+        return None
+
+    # 創建副本用於繪製
+    vis_img = img.copy()
+
+    # 獲取檢測結果
+    rec_texts = results.get('rec_texts', [])
+    rec_boxes = results.get('rec_boxes', [])
+    rec_scores = results.get('rec_scores', [])
+
+    print(f"\n檢測到 {len(rec_texts)} 個文字區域")
+
+    # 繪製每個檢測框
+    for i, (text, box, score) in enumerate(zip(rec_texts, rec_boxes, rec_scores)):
+        x_min, y_min, x_max, y_max = box
+
+        # 繪製矩形框（綠色）
+        cv2.rectangle(vis_img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
+
+        # 繪製索引號（小字）
+        cv2.putText(vis_img, f"{i}", (x_min, y_min - 5),
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+
+    # 保存結果
+    cv2.imwrite(output_path, vis_img)
+    print(f"✅ 可視化結果已保存: {output_path}")
+
+    return vis_img
+
+def generate_text_report(results):
+    """生成文字報告"""
+    rec_texts = results.get('rec_texts', [])
+    rec_scores = results.get('rec_scores', [])
+    rec_boxes = results.get('rec_boxes', [])
+
+    print("\n" + "=" * 80)
+    print("PP-OCRv5 檢測結果報告")
+    print("=" * 80)
+
+    print(f"\n總共檢測到: {len(rec_texts)} 個文字區域")
+    print(f"平均置信度: {np.mean(rec_scores):.4f}")
+    print(f"最高置信度: {np.max(rec_scores):.4f}")
+    print(f"最低置信度: {np.min(rec_scores):.4f}")
+
+    # 分類統計
+    high_conf = sum(1 for s in rec_scores if s >= 0.95)
+    medium_conf = sum(1 for s in rec_scores if 0.8 <= s < 0.95)
+    low_conf = sum(1 for s in rec_scores if s < 0.8)
+
+    print(f"\n置信度分布:")
+    print(f"  高 (≥0.95): {high_conf} 個 ({high_conf/len(rec_scores)*100:.1f}%)")
+    print(f"  中 (0.8-0.95): {medium_conf} 個 ({medium_conf/len(rec_scores)*100:.1f}%)")
+    print(f"  低 (<0.8): {low_conf} 個 ({low_conf/len(rec_scores)*100:.1f}%)")
+
+    # 顯示前 20 個檢測結果
+    print("\n前 20 個檢測結果:")
+    print("-" * 80)
+    for i in range(min(20, len(rec_texts))):
+        text = rec_texts[i]
+        score = rec_scores[i]
+        box = rec_boxes[i]
+
+        # 計算框的大小
+        width = box[2] - box[0]
+        height = box[3] - box[1]
+
+        print(f"[{i:2d}] 置信度: {score:.4f}  大小: {width:4d}x{height:3d}  文字: {text}")
+
+    if len(rec_texts) > 20:
+        print(f"\n... 還有 {len(rec_texts) - 20} 個結果（省略）")
+
+    # 尋找可能的手寫區域（低置信度 或 大字）
+    print("\n" + "=" * 80)
+    print("可能的手寫區域分析")
+    print("=" * 80)
+
+    potential_handwriting = []
+    for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)):
+        width = box[2] - box[0]
+        height = box[3] - box[1]
+
+        # 判斷條件：
+        # 1. 高度較大 (>50px)
+        # 2. 或置信度較低 (<0.9)
+        # 3. 或文字較短但字體大
+        is_large = height > 50
+        is_low_conf = score < 0.9
+        is_short_text = len(text) <= 3 and height > 40
+
+        if is_large or is_low_conf or is_short_text:
+            potential_handwriting.append({
+                'index': i,
+                'text': text,
+                'score': score,
+                'height': height,
+                'width': width,
+                'reason': []
+            })
+
+            if is_large:
+                potential_handwriting[-1]['reason'].append('大字')
+            if is_low_conf:
+                potential_handwriting[-1]['reason'].append('低置信度')
+            if is_short_text:
+                potential_handwriting[-1]['reason'].append('短文大字')
+
+    if potential_handwriting:
+        print(f"\n找到 {len(potential_handwriting)} 個可能的手寫區域:")
+        print("-" * 80)
+        for item in potential_handwriting[:15]:  # 只顯示前 15 個
+            reasons = ', '.join(item['reason'])
+            print(f"[{item['index']:2d}] {item['height']:3d}px  {item['score']:.4f}  ({reasons})  {item['text']}")
+    else:
+        print("未找到明顯的手寫特徵區域")
+
+    # 保存詳細報告到文件
+    report_path = "/Volumes/NV2/pdf_recognize/test_results/v5_analysis_report.txt"
+    with open(report_path, 'w', encoding='utf-8') as f:
+        f.write(f"PP-OCRv5 檢測結果詳細報告\n")
+        f.write("=" * 80 + "\n\n")
+        f.write(f"總數: {len(rec_texts)}\n")
+        f.write(f"平均置信度: {np.mean(rec_scores):.4f}\n\n")
+        f.write("完整檢測列表:\n")
+        f.write("-" * 80 + "\n")
+        for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)):
+            width = box[2] - box[0]
+            height = box[3] - box[1]
+            f.write(f"[{i:2d}] {score:.4f}  {width:4d}x{height:3d}  {text}\n")
+
+    print(f"\n詳細報告已保存: {report_path}")
+
+def main():
+    # 加載結果
+    print("加載 PP-OCRv5 檢測結果...")
+    results = load_results()
+
+    # 生成文字報告
+    generate_text_report(results)
+
+    # 可視化
+    print("\n" + "=" * 80)
+    print("生成可視化圖片")
+    print("=" * 80)
+
+    image_path = "/Volumes/NV2/pdf_recognize/full_page_original.png"
+    output_path = "/Volumes/NV2/pdf_recognize/test_results/v5_visualization.png"
+
+    if Path(image_path).exists():
+        draw_detections(image_path, results, output_path)
+    else:
+        print(f"⚠️  原始圖片不存在: {image_path}")
+
+    print("\n" + "=" * 80)
+    print("分析完成")
+    print("=" * 80)
+
+if __name__ == "__main__":
+    main()