Complete PP-OCRv5 research and v4 vs v5 comparison

## 研究成果 ### PP-OCRv5 API 測試 - 成功升級到 PaddleOCR 3.3.2 (PP-OCRv5) - 理解新 API 結構和調用方式 - 驗證基礎檢測功能 ### 關鍵發現 ❌ PP-OCRv5 **沒有內建手寫分類功能** - text_type 字段是語言類型，不是手寫/印刷分類 - 仍需要 OpenCV Method 3 來分離手寫和印刷文字 ### 完整 Pipeline 對比測試 - v4 (2.7.3): 檢測 14 個文字 → 4 個候選區域 - v5 (3.3.2): 檢測 50 個文字 → 7 個候選區域 - 主簽名區域：兩個版本幾乎相同 (1150x511 vs 1144x511) ### 性能分析優點： - v5 手寫識別準確率 +13.7% (文檔承諾) - 可能減少漏檢缺點： - 過度檢測（印章小字等） - API 完全重寫，不兼容 - 仍無法替代 OpenCV Method 3 ### 文件 - PP_OCRV5_RESEARCH_FINDINGS.md: 完整研究報告 - signature-comparison/: v4 vs v5 對比結果 - test_results/: v5 測試輸出 - test_*_pipeline.py: 完整測試腳本 ### 建議當前方案（v2.7.3 + OpenCV Method 3）已足夠穩定，除非遇到大量漏檢，否則暫不升級到 v5。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 11:21:55 +08:00
parent 8f231da3bc
commit 21df0ff387
10 changed files with 3726 additions and 0 deletions
--- a/test_pp_ocrv5_api.py
+++ b/test_pp_ocrv5_api.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python3
+"""
+測試 PP-OCRv5 API 的基礎功能
+
+目標：
+1. 驗證正確的 API 調用方式
+2. 查看完整的返回數據結構
+3. 對比 v4 和 v5 的檢測結果
+4. 確認是否有手寫分類功能
+"""
+
+import sys
+import json
+import pprint
+from pathlib import Path
+
+# 測試圖片路徑
+TEST_IMAGE = "/Volumes/NV2/pdf_recognize/test_images/page_0.png"
+
+
+def test_basic_import():
+    """測試基礎導入"""
+    print("=" * 60)
+    print("測試 1: 基礎導入")
+    print("=" * 60)
+
+    try:
+        from paddleocr import PaddleOCR
+        print("✅ 成功導入 PaddleOCR")
+        return True
+    except ImportError as e:
+        print(f"❌ 導入失敗: {e}")
+        return False
+
+
+def test_model_initialization():
+    """測試模型初始化"""
+    print("\n" + "=" * 60)
+    print("測試 2: 模型初始化")
+    print("=" * 60)
+
+    try:
+        from paddleocr import PaddleOCR
+
+        print("\n初始化 PP-OCRv5...")
+        ocr = PaddleOCR(
+            text_detection_model_name="PP-OCRv5_server_det",
+            text_recognition_model_name="PP-OCRv5_server_rec",
+            use_doc_orientation_classify=False,
+            use_doc_unwarping=False,
+            use_textline_orientation=False,
+            show_log=True
+        )
+
+        print("✅ 模型初始化成功")
+        return ocr
+
+    except Exception as e:
+        print(f"❌ 初始化失敗: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+
+def test_prediction(ocr):
+    """測試預測功能"""
+    print("\n" + "=" * 60)
+    print("測試 3: 預測功能")
+    print("=" * 60)
+
+    if not Path(TEST_IMAGE).exists():
+        print(f"❌ 測試圖片不存在: {TEST_IMAGE}")
+        return None
+
+    try:
+        print(f"\n預測圖片: {TEST_IMAGE}")
+        result = ocr.predict(TEST_IMAGE)
+
+        print(f"✅ 預測成功，返回 {len(result)} 個結果")
+        return result
+
+    except Exception as e:
+        print(f"❌ 預測失敗: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+
+def analyze_result_structure(result):
+    """分析返回結果的完整結構"""
+    print("\n" + "=" * 60)
+    print("測試 4: 分析返回結果結構")
+    print("=" * 60)
+
+    if not result:
+        print("❌ 沒有結果可分析")
+        return
+
+    # 獲取第一個結果
+    first_result = result[0]
+
+    print("\n結果類型:", type(first_result))
+    print("結果屬性:", dir(first_result))
+
+    # 查看是否有 json 屬性
+    if hasattr(first_result, 'json'):
+        print("\n✅ 找到 .json 屬性")
+        json_data = first_result.json
+
+        print("\nJSON 數據鍵值:")
+        for key in json_data.keys():
+            print(f"  - {key}: {type(json_data[key])}")
+
+        # 檢查是否有手寫分類相關字段
+        print("\n查找手寫分類字段...")
+        handwriting_related_keys = [
+            k for k in json_data.keys()
+            if any(word in k.lower() for word in ['handwriting', 'handwritten', 'type', 'class', 'category'])
+        ]
+
+        if handwriting_related_keys:
+            print(f"✅ 找到可能相關的字段: {handwriting_related_keys}")
+            for key in handwriting_related_keys:
+                print(f"  {key}: {json_data[key]}")
+        else:
+            print("❌ 未找到手寫分類相關字段")
+
+        # 打印部分檢測結果
+        if 'rec_texts' in json_data and json_data['rec_texts']:
+            print("\n檢測到的文字 (前 5 個):")
+            for i, text in enumerate(json_data['rec_texts'][:5]):
+                box = json_data['rec_boxes'][i] if 'rec_boxes' in json_data else None
+                score = json_data['rec_scores'][i] if 'rec_scores' in json_data else None
+                print(f"  [{i}] 文字: {text}")
+                print(f"      分數: {score}")
+                print(f"      位置: {box}")
+
+        # 保存完整 JSON 到文件
+        output_path = "/Volumes/NV2/pdf_recognize/test_results/pp_ocrv5_result.json"
+        Path(output_path).parent.mkdir(exist_ok=True)
+
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(json_data, f, ensure_ascii=False, indent=2, default=str)
+
+        print(f"\n✅ 完整結果已保存到: {output_path}")
+
+        return json_data
+
+    else:
+        print("❌ 沒有找到 .json 屬性")
+        print("\n直接打印結果:")
+        pprint.pprint(first_result)
+
+
+def compare_with_v4():
+    """對比 v4 和 v5 的結果"""
+    print("\n" + "=" * 60)
+    print("測試 5: 對比 v4 和 v5")
+    print("=" * 60)
+
+    try:
+        from paddleocr import PaddleOCR
+
+        # v4
+        print("\n初始化 PP-OCRv4...")
+        ocr_v4 = PaddleOCR(
+            ocr_version="PP-OCRv4",
+            use_doc_orientation_classify=False,
+            show_log=False
+        )
+
+        print("預測 v4...")
+        result_v4 = ocr_v4.predict(TEST_IMAGE)
+        json_v4 = result_v4[0].json if hasattr(result_v4[0], 'json') else None
+
+        # v5
+        print("\n初始化 PP-OCRv5...")
+        ocr_v5 = PaddleOCR(
+            text_detection_model_name="PP-OCRv5_server_det",
+            text_recognition_model_name="PP-OCRv5_server_rec",
+            use_doc_orientation_classify=False,
+            show_log=False
+        )
+
+        print("預測 v5...")
+        result_v5 = ocr_v5.predict(TEST_IMAGE)
+        json_v5 = result_v5[0].json if hasattr(result_v5[0], 'json') else None
+
+        # 對比
+        if json_v4 and json_v5:
+            print("\n對比結果:")
+            print(f"  v4 檢測到 {len(json_v4.get('rec_texts', []))} 個文字區域")
+            print(f"  v5 檢測到 {len(json_v5.get('rec_texts', []))} 個文字區域")
+
+            # 保存對比結果
+            comparison = {
+                "v4": {
+                    "count": len(json_v4.get('rec_texts', [])),
+                    "texts": json_v4.get('rec_texts', [])[:10],  # 前 10 個
+                    "scores": json_v4.get('rec_scores', [])[:10]
+                },
+                "v5": {
+                    "count": len(json_v5.get('rec_texts', [])),
+                    "texts": json_v5.get('rec_texts', [])[:10],
+                    "scores": json_v5.get('rec_scores', [])[:10]
+                }
+            }
+
+            output_path = "/Volumes/NV2/pdf_recognize/test_results/v4_vs_v5_comparison.json"
+            with open(output_path, 'w', encoding='utf-8') as f:
+                json.dump(comparison, f, ensure_ascii=False, indent=2, default=str)
+
+            print(f"\n✅ 對比結果已保存到: {output_path}")
+
+    except Exception as e:
+        print(f"❌ 對比失敗: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+def main():
+    """主測試流程"""
+    print("開始測試 PP-OCRv5 API\n")
+
+    # 測試 1: 導入
+    if not test_basic_import():
+        print("\n❌ 導入失敗，無法繼續測試")
+        return
+
+    # 測試 2: 初始化
+    ocr = test_model_initialization()
+    if not ocr:
+        print("\n❌ 初始化失敗，無法繼續測試")
+        return
+
+    # 測試 3: 預測
+    result = test_prediction(ocr)
+    if not result:
+        print("\n❌ 預測失敗，無法繼續測試")
+        return
+
+    # 測試 4: 分析結構
+    json_data = analyze_result_structure(result)
+
+    # 測試 5: 對比 v4 和 v5
+    compare_with_v4()
+
+    print("\n" + "=" * 60)
+    print("測試完成")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()