Complete PP-OCRv5 research and v4 vs v5 comparison
## 研究成果 ### PP-OCRv5 API 測試 - 成功升級到 PaddleOCR 3.3.2 (PP-OCRv5) - 理解新 API 結構和調用方式 - 驗證基礎檢測功能 ### 關鍵發現 ❌ PP-OCRv5 **沒有內建手寫分類功能** - text_type 字段是語言類型,不是手寫/印刷分類 - 仍需要 OpenCV Method 3 來分離手寫和印刷文字 ### 完整 Pipeline 對比測試 - v4 (2.7.3): 檢測 14 個文字 → 4 個候選區域 - v5 (3.3.2): 檢測 50 個文字 → 7 個候選區域 - 主簽名區域:兩個版本幾乎相同 (1150x511 vs 1144x511) ### 性能分析 優點: - v5 手寫識別準確率 +13.7% (文檔承諾) - 可能減少漏檢 缺點: - 過度檢測(印章小字等) - API 完全重寫,不兼容 - 仍無法替代 OpenCV Method 3 ### 文件 - PP_OCRV5_RESEARCH_FINDINGS.md: 完整研究報告 - signature-comparison/: v4 vs v5 對比結果 - test_results/: v5 測試輸出 - test_*_pipeline.py: 完整測試腳本 ### 建議 當前方案(v2.7.3 + OpenCV Method 3)已足夠穩定, 除非遇到大量漏檢,否則暫不升級到 v5。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
254
test_pp_ocrv5_api.py
Normal file
254
test_pp_ocrv5_api.py
Normal file
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
測試 PP-OCRv5 API 的基礎功能
|
||||
|
||||
目標:
|
||||
1. 驗證正確的 API 調用方式
|
||||
2. 查看完整的返回數據結構
|
||||
3. 對比 v4 和 v5 的檢測結果
|
||||
4. 確認是否有手寫分類功能
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import pprint
|
||||
from pathlib import Path
|
||||
|
||||
# 測試圖片路徑
|
||||
TEST_IMAGE = "/Volumes/NV2/pdf_recognize/test_images/page_0.png"
|
||||
|
||||
|
||||
def test_basic_import():
|
||||
"""測試基礎導入"""
|
||||
print("=" * 60)
|
||||
print("測試 1: 基礎導入")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
from paddleocr import PaddleOCR
|
||||
print("✅ 成功導入 PaddleOCR")
|
||||
return True
|
||||
except ImportError as e:
|
||||
print(f"❌ 導入失敗: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_model_initialization():
|
||||
"""測試模型初始化"""
|
||||
print("\n" + "=" * 60)
|
||||
print("測試 2: 模型初始化")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
print("\n初始化 PP-OCRv5...")
|
||||
ocr = PaddleOCR(
|
||||
text_detection_model_name="PP-OCRv5_server_det",
|
||||
text_recognition_model_name="PP-OCRv5_server_rec",
|
||||
use_doc_orientation_classify=False,
|
||||
use_doc_unwarping=False,
|
||||
use_textline_orientation=False,
|
||||
show_log=True
|
||||
)
|
||||
|
||||
print("✅ 模型初始化成功")
|
||||
return ocr
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 初始化失敗: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
|
||||
def test_prediction(ocr):
|
||||
"""測試預測功能"""
|
||||
print("\n" + "=" * 60)
|
||||
print("測試 3: 預測功能")
|
||||
print("=" * 60)
|
||||
|
||||
if not Path(TEST_IMAGE).exists():
|
||||
print(f"❌ 測試圖片不存在: {TEST_IMAGE}")
|
||||
return None
|
||||
|
||||
try:
|
||||
print(f"\n預測圖片: {TEST_IMAGE}")
|
||||
result = ocr.predict(TEST_IMAGE)
|
||||
|
||||
print(f"✅ 預測成功,返回 {len(result)} 個結果")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 預測失敗: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
|
||||
def analyze_result_structure(result):
|
||||
"""分析返回結果的完整結構"""
|
||||
print("\n" + "=" * 60)
|
||||
print("測試 4: 分析返回結果結構")
|
||||
print("=" * 60)
|
||||
|
||||
if not result:
|
||||
print("❌ 沒有結果可分析")
|
||||
return
|
||||
|
||||
# 獲取第一個結果
|
||||
first_result = result[0]
|
||||
|
||||
print("\n結果類型:", type(first_result))
|
||||
print("結果屬性:", dir(first_result))
|
||||
|
||||
# 查看是否有 json 屬性
|
||||
if hasattr(first_result, 'json'):
|
||||
print("\n✅ 找到 .json 屬性")
|
||||
json_data = first_result.json
|
||||
|
||||
print("\nJSON 數據鍵值:")
|
||||
for key in json_data.keys():
|
||||
print(f" - {key}: {type(json_data[key])}")
|
||||
|
||||
# 檢查是否有手寫分類相關字段
|
||||
print("\n查找手寫分類字段...")
|
||||
handwriting_related_keys = [
|
||||
k for k in json_data.keys()
|
||||
if any(word in k.lower() for word in ['handwriting', 'handwritten', 'type', 'class', 'category'])
|
||||
]
|
||||
|
||||
if handwriting_related_keys:
|
||||
print(f"✅ 找到可能相關的字段: {handwriting_related_keys}")
|
||||
for key in handwriting_related_keys:
|
||||
print(f" {key}: {json_data[key]}")
|
||||
else:
|
||||
print("❌ 未找到手寫分類相關字段")
|
||||
|
||||
# 打印部分檢測結果
|
||||
if 'rec_texts' in json_data and json_data['rec_texts']:
|
||||
print("\n檢測到的文字 (前 5 個):")
|
||||
for i, text in enumerate(json_data['rec_texts'][:5]):
|
||||
box = json_data['rec_boxes'][i] if 'rec_boxes' in json_data else None
|
||||
score = json_data['rec_scores'][i] if 'rec_scores' in json_data else None
|
||||
print(f" [{i}] 文字: {text}")
|
||||
print(f" 分數: {score}")
|
||||
print(f" 位置: {box}")
|
||||
|
||||
# 保存完整 JSON 到文件
|
||||
output_path = "/Volumes/NV2/pdf_recognize/test_results/pp_ocrv5_result.json"
|
||||
Path(output_path).parent.mkdir(exist_ok=True)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(json_data, f, ensure_ascii=False, indent=2, default=str)
|
||||
|
||||
print(f"\n✅ 完整結果已保存到: {output_path}")
|
||||
|
||||
return json_data
|
||||
|
||||
else:
|
||||
print("❌ 沒有找到 .json 屬性")
|
||||
print("\n直接打印結果:")
|
||||
pprint.pprint(first_result)
|
||||
|
||||
|
||||
def compare_with_v4():
|
||||
"""對比 v4 和 v5 的結果"""
|
||||
print("\n" + "=" * 60)
|
||||
print("測試 5: 對比 v4 和 v5")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
# v4
|
||||
print("\n初始化 PP-OCRv4...")
|
||||
ocr_v4 = PaddleOCR(
|
||||
ocr_version="PP-OCRv4",
|
||||
use_doc_orientation_classify=False,
|
||||
show_log=False
|
||||
)
|
||||
|
||||
print("預測 v4...")
|
||||
result_v4 = ocr_v4.predict(TEST_IMAGE)
|
||||
json_v4 = result_v4[0].json if hasattr(result_v4[0], 'json') else None
|
||||
|
||||
# v5
|
||||
print("\n初始化 PP-OCRv5...")
|
||||
ocr_v5 = PaddleOCR(
|
||||
text_detection_model_name="PP-OCRv5_server_det",
|
||||
text_recognition_model_name="PP-OCRv5_server_rec",
|
||||
use_doc_orientation_classify=False,
|
||||
show_log=False
|
||||
)
|
||||
|
||||
print("預測 v5...")
|
||||
result_v5 = ocr_v5.predict(TEST_IMAGE)
|
||||
json_v5 = result_v5[0].json if hasattr(result_v5[0], 'json') else None
|
||||
|
||||
# 對比
|
||||
if json_v4 and json_v5:
|
||||
print("\n對比結果:")
|
||||
print(f" v4 檢測到 {len(json_v4.get('rec_texts', []))} 個文字區域")
|
||||
print(f" v5 檢測到 {len(json_v5.get('rec_texts', []))} 個文字區域")
|
||||
|
||||
# 保存對比結果
|
||||
comparison = {
|
||||
"v4": {
|
||||
"count": len(json_v4.get('rec_texts', [])),
|
||||
"texts": json_v4.get('rec_texts', [])[:10], # 前 10 個
|
||||
"scores": json_v4.get('rec_scores', [])[:10]
|
||||
},
|
||||
"v5": {
|
||||
"count": len(json_v5.get('rec_texts', [])),
|
||||
"texts": json_v5.get('rec_texts', [])[:10],
|
||||
"scores": json_v5.get('rec_scores', [])[:10]
|
||||
}
|
||||
}
|
||||
|
||||
output_path = "/Volumes/NV2/pdf_recognize/test_results/v4_vs_v5_comparison.json"
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(comparison, f, ensure_ascii=False, indent=2, default=str)
|
||||
|
||||
print(f"\n✅ 對比結果已保存到: {output_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 對比失敗: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
def main():
|
||||
"""主測試流程"""
|
||||
print("開始測試 PP-OCRv5 API\n")
|
||||
|
||||
# 測試 1: 導入
|
||||
if not test_basic_import():
|
||||
print("\n❌ 導入失敗,無法繼續測試")
|
||||
return
|
||||
|
||||
# 測試 2: 初始化
|
||||
ocr = test_model_initialization()
|
||||
if not ocr:
|
||||
print("\n❌ 初始化失敗,無法繼續測試")
|
||||
return
|
||||
|
||||
# 測試 3: 預測
|
||||
result = test_prediction(ocr)
|
||||
if not result:
|
||||
print("\n❌ 預測失敗,無法繼續測試")
|
||||
return
|
||||
|
||||
# 測試 4: 分析結構
|
||||
json_data = analyze_result_structure(result)
|
||||
|
||||
# 測試 5: 對比 v4 和 v5
|
||||
compare_with_v4()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("測試完成")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user