## 研究成果 ### PP-OCRv5 API 測試 - 成功升級到 PaddleOCR 3.3.2 (PP-OCRv5) - 理解新 API 結構和調用方式 - 驗證基礎檢測功能 ### 關鍵發現 ❌ PP-OCRv5 **沒有內建手寫分類功能** - text_type 字段是語言類型,不是手寫/印刷分類 - 仍需要 OpenCV Method 3 來分離手寫和印刷文字 ### 完整 Pipeline 對比測試 - v4 (2.7.3): 檢測 14 個文字 → 4 個候選區域 - v5 (3.3.2): 檢測 50 個文字 → 7 個候選區域 - 主簽名區域:兩個版本幾乎相同 (1150x511 vs 1144x511) ### 性能分析 優點: - v5 手寫識別準確率 +13.7% (文檔承諾) - 可能減少漏檢 缺點: - 過度檢測(印章小字等) - API 完全重寫,不兼容 - 仍無法替代 OpenCV Method 3 ### 文件 - PP_OCRV5_RESEARCH_FINDINGS.md: 完整研究報告 - signature-comparison/: v4 vs v5 對比結果 - test_results/: v5 測試輸出 - test_*_pipeline.py: 完整測試腳本 ### 建議 當前方案(v2.7.3 + OpenCV Method 3)已足夠穩定, 除非遇到大量漏檢,否則暫不升級到 v5。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
255 lines
7.3 KiB
Python
255 lines
7.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
測試 PP-OCRv5 API 的基礎功能
|
|
|
|
目標:
|
|
1. 驗證正確的 API 調用方式
|
|
2. 查看完整的返回數據結構
|
|
3. 對比 v4 和 v5 的檢測結果
|
|
4. 確認是否有手寫分類功能
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import pprint
|
|
from pathlib import Path
|
|
|
|
# 測試圖片路徑
|
|
TEST_IMAGE = "/Volumes/NV2/pdf_recognize/test_images/page_0.png"
|
|
|
|
|
|
def test_basic_import():
|
|
"""測試基礎導入"""
|
|
print("=" * 60)
|
|
print("測試 1: 基礎導入")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
from paddleocr import PaddleOCR
|
|
print("✅ 成功導入 PaddleOCR")
|
|
return True
|
|
except ImportError as e:
|
|
print(f"❌ 導入失敗: {e}")
|
|
return False
|
|
|
|
|
|
def test_model_initialization():
|
|
"""測試模型初始化"""
|
|
print("\n" + "=" * 60)
|
|
print("測試 2: 模型初始化")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
from paddleocr import PaddleOCR
|
|
|
|
print("\n初始化 PP-OCRv5...")
|
|
ocr = PaddleOCR(
|
|
text_detection_model_name="PP-OCRv5_server_det",
|
|
text_recognition_model_name="PP-OCRv5_server_rec",
|
|
use_doc_orientation_classify=False,
|
|
use_doc_unwarping=False,
|
|
use_textline_orientation=False,
|
|
show_log=True
|
|
)
|
|
|
|
print("✅ 模型初始化成功")
|
|
return ocr
|
|
|
|
except Exception as e:
|
|
print(f"❌ 初始化失敗: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return None
|
|
|
|
|
|
def test_prediction(ocr):
|
|
"""測試預測功能"""
|
|
print("\n" + "=" * 60)
|
|
print("測試 3: 預測功能")
|
|
print("=" * 60)
|
|
|
|
if not Path(TEST_IMAGE).exists():
|
|
print(f"❌ 測試圖片不存在: {TEST_IMAGE}")
|
|
return None
|
|
|
|
try:
|
|
print(f"\n預測圖片: {TEST_IMAGE}")
|
|
result = ocr.predict(TEST_IMAGE)
|
|
|
|
print(f"✅ 預測成功,返回 {len(result)} 個結果")
|
|
return result
|
|
|
|
except Exception as e:
|
|
print(f"❌ 預測失敗: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return None
|
|
|
|
|
|
def analyze_result_structure(result):
|
|
"""分析返回結果的完整結構"""
|
|
print("\n" + "=" * 60)
|
|
print("測試 4: 分析返回結果結構")
|
|
print("=" * 60)
|
|
|
|
if not result:
|
|
print("❌ 沒有結果可分析")
|
|
return
|
|
|
|
# 獲取第一個結果
|
|
first_result = result[0]
|
|
|
|
print("\n結果類型:", type(first_result))
|
|
print("結果屬性:", dir(first_result))
|
|
|
|
# 查看是否有 json 屬性
|
|
if hasattr(first_result, 'json'):
|
|
print("\n✅ 找到 .json 屬性")
|
|
json_data = first_result.json
|
|
|
|
print("\nJSON 數據鍵值:")
|
|
for key in json_data.keys():
|
|
print(f" - {key}: {type(json_data[key])}")
|
|
|
|
# 檢查是否有手寫分類相關字段
|
|
print("\n查找手寫分類字段...")
|
|
handwriting_related_keys = [
|
|
k for k in json_data.keys()
|
|
if any(word in k.lower() for word in ['handwriting', 'handwritten', 'type', 'class', 'category'])
|
|
]
|
|
|
|
if handwriting_related_keys:
|
|
print(f"✅ 找到可能相關的字段: {handwriting_related_keys}")
|
|
for key in handwriting_related_keys:
|
|
print(f" {key}: {json_data[key]}")
|
|
else:
|
|
print("❌ 未找到手寫分類相關字段")
|
|
|
|
# 打印部分檢測結果
|
|
if 'rec_texts' in json_data and json_data['rec_texts']:
|
|
print("\n檢測到的文字 (前 5 個):")
|
|
for i, text in enumerate(json_data['rec_texts'][:5]):
|
|
box = json_data['rec_boxes'][i] if 'rec_boxes' in json_data else None
|
|
score = json_data['rec_scores'][i] if 'rec_scores' in json_data else None
|
|
print(f" [{i}] 文字: {text}")
|
|
print(f" 分數: {score}")
|
|
print(f" 位置: {box}")
|
|
|
|
# 保存完整 JSON 到文件
|
|
output_path = "/Volumes/NV2/pdf_recognize/test_results/pp_ocrv5_result.json"
|
|
Path(output_path).parent.mkdir(exist_ok=True)
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(json_data, f, ensure_ascii=False, indent=2, default=str)
|
|
|
|
print(f"\n✅ 完整結果已保存到: {output_path}")
|
|
|
|
return json_data
|
|
|
|
else:
|
|
print("❌ 沒有找到 .json 屬性")
|
|
print("\n直接打印結果:")
|
|
pprint.pprint(first_result)
|
|
|
|
|
|
def compare_with_v4():
|
|
"""對比 v4 和 v5 的結果"""
|
|
print("\n" + "=" * 60)
|
|
print("測試 5: 對比 v4 和 v5")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
from paddleocr import PaddleOCR
|
|
|
|
# v4
|
|
print("\n初始化 PP-OCRv4...")
|
|
ocr_v4 = PaddleOCR(
|
|
ocr_version="PP-OCRv4",
|
|
use_doc_orientation_classify=False,
|
|
show_log=False
|
|
)
|
|
|
|
print("預測 v4...")
|
|
result_v4 = ocr_v4.predict(TEST_IMAGE)
|
|
json_v4 = result_v4[0].json if hasattr(result_v4[0], 'json') else None
|
|
|
|
# v5
|
|
print("\n初始化 PP-OCRv5...")
|
|
ocr_v5 = PaddleOCR(
|
|
text_detection_model_name="PP-OCRv5_server_det",
|
|
text_recognition_model_name="PP-OCRv5_server_rec",
|
|
use_doc_orientation_classify=False,
|
|
show_log=False
|
|
)
|
|
|
|
print("預測 v5...")
|
|
result_v5 = ocr_v5.predict(TEST_IMAGE)
|
|
json_v5 = result_v5[0].json if hasattr(result_v5[0], 'json') else None
|
|
|
|
# 對比
|
|
if json_v4 and json_v5:
|
|
print("\n對比結果:")
|
|
print(f" v4 檢測到 {len(json_v4.get('rec_texts', []))} 個文字區域")
|
|
print(f" v5 檢測到 {len(json_v5.get('rec_texts', []))} 個文字區域")
|
|
|
|
# 保存對比結果
|
|
comparison = {
|
|
"v4": {
|
|
"count": len(json_v4.get('rec_texts', [])),
|
|
"texts": json_v4.get('rec_texts', [])[:10], # 前 10 個
|
|
"scores": json_v4.get('rec_scores', [])[:10]
|
|
},
|
|
"v5": {
|
|
"count": len(json_v5.get('rec_texts', [])),
|
|
"texts": json_v5.get('rec_texts', [])[:10],
|
|
"scores": json_v5.get('rec_scores', [])[:10]
|
|
}
|
|
}
|
|
|
|
output_path = "/Volumes/NV2/pdf_recognize/test_results/v4_vs_v5_comparison.json"
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(comparison, f, ensure_ascii=False, indent=2, default=str)
|
|
|
|
print(f"\n✅ 對比結果已保存到: {output_path}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ 對比失敗: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
|
|
def main():
|
|
"""主測試流程"""
|
|
print("開始測試 PP-OCRv5 API\n")
|
|
|
|
# 測試 1: 導入
|
|
if not test_basic_import():
|
|
print("\n❌ 導入失敗,無法繼續測試")
|
|
return
|
|
|
|
# 測試 2: 初始化
|
|
ocr = test_model_initialization()
|
|
if not ocr:
|
|
print("\n❌ 初始化失敗,無法繼續測試")
|
|
return
|
|
|
|
# 測試 3: 預測
|
|
result = test_prediction(ocr)
|
|
if not result:
|
|
print("\n❌ 預測失敗,無法繼續測試")
|
|
return
|
|
|
|
# 測試 4: 分析結構
|
|
json_data = analyze_result_structure(result)
|
|
|
|
# 測試 5: 對比 v4 和 v5
|
|
compare_with_v4()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("測試完成")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|