Complete PP-OCRv5 research and v4 vs v5 comparison

## 研究成果 ### PP-OCRv5 API 測試 - 成功升級到 PaddleOCR 3.3.2 (PP-OCRv5) - 理解新 API 結構和調用方式 - 驗證基礎檢測功能 ### 關鍵發現 ❌ PP-OCRv5 **沒有內建手寫分類功能** - text_type 字段是語言類型，不是手寫/印刷分類 - 仍需要 OpenCV Method 3 來分離手寫和印刷文字 ### 完整 Pipeline 對比測試 - v4 (2.7.3): 檢測 14 個文字 → 4 個候選區域 - v5 (3.3.2): 檢測 50 個文字 → 7 個候選區域 - 主簽名區域：兩個版本幾乎相同 (1150x511 vs 1144x511) ### 性能分析優點： - v5 手寫識別準確率 +13.7% (文檔承諾) - 可能減少漏檢缺點： - 過度檢測（印章小字等） - API 完全重寫，不兼容 - 仍無法替代 OpenCV Method 3 ### 文件 - PP_OCRV5_RESEARCH_FINDINGS.md: 完整研究報告 - signature-comparison/: v4 vs v5 對比結果 - test_results/: v5 測試輸出 - test_*_pipeline.py: 完整測試腳本 ### 建議當前方案（v2.7.3 + OpenCV Method 3）已足夠穩定，除非遇到大量漏檢，否則暫不升級到 v5。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 11:21:55 +08:00
parent 8f231da3bc
commit 21df0ff387
10 changed files with 3726 additions and 0 deletions
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""
+使用 PaddleOCR v2.7.3 (v4) 跑完整的簽名提取 pipeline
+與 v5 對比
+"""
+
+import sys
+import json
+import cv2
+import numpy as np
+import requests
+from pathlib import Path
+
+# 配置
+OCR_SERVER = "http://192.168.30.36:5555"
+OUTPUT_DIR = Path("/Volumes/NV2/pdf_recognize/signature-comparison/v4-current")
+MASKING_PADDING = 0
+
+
+def setup_output_dir():
+    """創建輸出目錄"""
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+    print(f"輸出目錄: {OUTPUT_DIR}")
+
+
+def get_page_image():
+    """獲取測試頁面圖片"""
+    test_image = "/Volumes/NV2/pdf_recognize/full_page_original.png"
+    if Path(test_image).exists():
+        return cv2.imread(test_image)
+    else:
+        print(f"❌ 測試圖片不存在: {test_image}")
+        return None
+
+
+def call_ocr_server(image):
+    """調用服務器端的 PaddleOCR v2.7.3"""
+    print("\n調用 PaddleOCR v2.7.3 服務器...")
+
+    try:
+        import base64
+        _, buffer = cv2.imencode('.png', image)
+        img_base64 = base64.b64encode(buffer).decode('utf-8')
+
+        response = requests.post(
+            f"{OCR_SERVER}/ocr",
+            json={'image': img_base64},
+            timeout=30
+        )
+
+        if response.status_code == 200:
+            result = response.json()
+            print(f"✅ OCR 完成，檢測到 {len(result.get('results', []))} 個文字區域")
+            return result.get('results', [])
+        else:
+            print(f"❌ 服務器錯誤: {response.status_code}")
+            return None
+
+    except Exception as e:
+        print(f"❌ OCR 調用失敗: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+
+def mask_printed_text(image, ocr_results):
+    """遮罩印刷文字"""
+    print("\n遮罩印刷文字...")
+
+    masked_image = image.copy()
+
+    for i, result in enumerate(ocr_results):
+        box = result.get('box')
+        if box is None:
+            continue
+
+        # v2.7.3 返回多邊形格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+        # 轉換為矩形
+        box_points = np.array(box)
+        x_min = int(box_points[:, 0].min())
+        y_min = int(box_points[:, 1].min())
+        x_max = int(box_points[:, 0].max())
+        y_max = int(box_points[:, 1].max())
+
+        cv2.rectangle(
+            masked_image,
+            (x_min - MASKING_PADDING, y_min - MASKING_PADDING),
+            (x_max + MASKING_PADDING, y_max + MASKING_PADDING),
+            (0, 0, 0),
+            -1
+        )
+
+    masked_path = OUTPUT_DIR / "01_masked.png"
+    cv2.imwrite(str(masked_path), masked_image)
+    print(f"✅ 遮罩完成: {masked_path}")
+
+    return masked_image
+
+
+def detect_regions(masked_image):
+    """檢測候選區域"""
+    print("\n檢測候選區域...")
+
+    gray = cv2.cvtColor(masked_image, cv2.COLOR_BGR2GRAY)
+    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
+
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)
+
+    cv2.imwrite(str(OUTPUT_DIR / "02_binary.png"), binary)
+    cv2.imwrite(str(OUTPUT_DIR / "03_morphed.png"), morphed)
+
+    contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    MIN_AREA = 3000
+    MAX_AREA = 300000
+
+    candidate_regions = []
+    for contour in contours:
+        area = cv2.contourArea(contour)
+        if MIN_AREA <= area <= MAX_AREA:
+            x, y, w, h = cv2.boundingRect(contour)
+            aspect_ratio = w / h if h > 0 else 0
+
+            candidate_regions.append({
+                'box': (x, y, w, h),
+                'area': area,
+                'aspect_ratio': aspect_ratio
+            })
+
+    candidate_regions.sort(key=lambda r: r['area'], reverse=True)
+
+    print(f"✅ 找到 {len(candidate_regions)} 個候選區域")
+
+    return candidate_regions
+
+
+def merge_nearby_regions(regions, h_distance=100, v_distance=50):
+    """合併鄰近區域"""
+    print("\n合併鄰近區域...")
+
+    if not regions:
+        return []
+
+    merged = []
+    used = set()
+
+    for i, r1 in enumerate(regions):
+        if i in used:
+            continue
+
+        x1, y1, w1, h1 = r1['box']
+        merged_box = [x1, y1, x1 + w1, y1 + h1]
+        group = [i]
+
+        for j, r2 in enumerate(regions):
+            if j <= i or j in used:
+                continue
+
+            x2, y2, w2, h2 = r2['box']
+
+            h_dist = min(abs(x1 - (x2 + w2)), abs((x1 + w1) - x2))
+            v_dist = min(abs(y1 - (y2 + h2)), abs((y1 + h1) - y2))
+
+            x_overlap = not (x1 + w1 < x2 or x2 + w2 < x1)
+            y_overlap = not (y1 + h1 < y2 or y2 + h2 < y1)
+
+            if (x_overlap and v_dist <= v_distance) or (y_overlap and h_dist <= h_distance):
+                merged_box[0] = min(merged_box[0], x2)
+                merged_box[1] = min(merged_box[1], y2)
+                merged_box[2] = max(merged_box[2], x2 + w2)
+                merged_box[3] = max(merged_box[3], y2 + h2)
+                group.append(j)
+                used.add(j)
+
+        used.add(i)
+
+        x, y = merged_box[0], merged_box[1]
+        w, h = merged_box[2] - merged_box[0], merged_box[3] - merged_box[1]
+
+        merged.append({
+            'box': (x, y, w, h),
+            'area': w * h,
+            'merged_count': len(group)
+        })
+
+    print(f"✅ 合併後剩餘 {len(merged)} 個區域")
+
+    return merged
+
+
+def extract_signatures(image, regions):
+    """提取簽名區域"""
+    print("\n提取簽名區域...")
+
+    vis_image = image.copy()
+
+    for i, region in enumerate(regions):
+        x, y, w, h = region['box']
+
+        cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 3)
+        cv2.putText(vis_image, f"Region {i+1}", (x, y - 10),
+                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+
+        signature = image[y:y+h, x:x+w]
+        sig_path = OUTPUT_DIR / f"signature_{i+1}.png"
+        cv2.imwrite(str(sig_path), signature)
+        print(f"  Region {i+1}: {w}x{h} 像素, 面積={region['area']}")
+
+    vis_path = OUTPUT_DIR / "04_detected_regions.png"
+    cv2.imwrite(str(vis_path), vis_image)
+    print(f"\n✅ 標註圖已保存: {vis_path}")
+
+    return vis_image
+
+
+def generate_summary(ocr_count, regions):
+    """生成摘要報告"""
+    summary = f"""
+PaddleOCR v2.7.3 (v4) 完整 Pipeline 測試結果
+{'=' * 60}
+
+1. OCR 檢測: {ocr_count} 個文字區域
+2. 遮罩印刷文字: 完成
+3. 檢測候選區域: {len(regions)} 個
+4. 提取簽名: {len(regions)} 個
+
+候選區域詳情:
+{'-' * 60}
+"""
+
+    for i, region in enumerate(regions):
+        x, y, w, h = region['box']
+        area = region['area']
+        summary += f"Region {i+1}: 位置({x}, {y}), 大小{w}x{h}, 面積={area}\n"
+
+    summary += f"\n所有結果保存在: {OUTPUT_DIR}\n"
+
+    return summary
+
+
+def main():
+    print("=" * 60)
+    print("PaddleOCR v2.7.3 (v4) 完整 Pipeline 測試")
+    print("=" * 60)
+
+    setup_output_dir()
+
+    print("\n1. 讀取測試圖片...")
+    image = get_page_image()
+    if image is None:
+        return
+    print(f"   圖片大小: {image.shape}")
+
+    cv2.imwrite(str(OUTPUT_DIR / "00_original.png"), image)
+
+    print("\n2. PaddleOCR v2.7.3 檢測文字...")
+    ocr_results = call_ocr_server(image)
+    if ocr_results is None:
+        print("❌ OCR 失敗，終止測試")
+        return
+
+    print("\n3. 遮罩印刷文字...")
+    masked_image = mask_printed_text(image, ocr_results)
+
+    print("\n4. 檢測候選區域...")
+    regions = detect_regions(masked_image)
+
+    print("\n5. 合併鄰近區域...")
+    merged_regions = merge_nearby_regions(regions)
+
+    print("\n6. 提取簽名...")
+    vis_image = extract_signatures(image, merged_regions)
+
+    print("\n7. 生成摘要報告...")
+    summary = generate_summary(len(ocr_results), merged_regions)
+    print(summary)
+
+    summary_path = OUTPUT_DIR / "SUMMARY.txt"
+    with open(summary_path, 'w', encoding='utf-8') as f:
+        f.write(summary)
+
+    print("=" * 60)
+    print("✅ v4 測試完成！")
+    print(f"結果目錄: {OUTPUT_DIR}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()