pdf_signature_extraction/test_v5_full_pipeline.py

#!/usr/bin/env python3
"""
使用 PP-OCRv5 跑完整的簽名提取 pipeline

流程：
1. 使用服務器上的 PP-OCRv5 檢測文字
2. 遮罩印刷文字
3. 檢測候選區域
4. 提取簽名
"""

import sys
import json
import cv2
import numpy as np
import requests
from pathlib import Path

# 配置
OCR_SERVER = "http://192.168.30.36:5555"
PDF_PATH = "/Volumes/NV2/pdf_recognize/test.pdf"
OUTPUT_DIR = Path("/Volumes/NV2/pdf_recognize/test_results/v5_pipeline")
MASKING_PADDING = 0


def setup_output_dir():
    """創建輸出目錄"""
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    print(f"輸出目錄: {OUTPUT_DIR}")


def get_page_image():
    """獲取測試頁面圖片"""
    # 使用已有的測試圖片
    test_image = "/Volumes/NV2/pdf_recognize/full_page_original.png"
    if Path(test_image).exists():
        return cv2.imread(test_image)
    else:
        print(f"❌ 測試圖片不存在: {test_image}")
        return None


def call_ocr_server(image):
    """調用服務器端的 PP-OCRv5"""
    print("\n調用 PP-OCRv5 服務器...")

    try:
        # 編碼圖片
        import base64
        _, buffer = cv2.imencode('.png', image)
        img_base64 = base64.b64encode(buffer).decode('utf-8')

        # 發送請求
        response = requests.post(
            f"{OCR_SERVER}/ocr",
            json={'image': img_base64},
            timeout=30
        )

        if response.status_code == 200:
            result = response.json()
            print(f"✅ OCR 完成，檢測到 {len(result.get('results', []))} 個文字區域")
            return result.get('results', [])
        else:
            print(f"❌ 服務器錯誤: {response.status_code}")
            return None

    except Exception as e:
        print(f"❌ OCR 調用失敗: {e}")
        import traceback
        traceback.print_exc()
        return None


def mask_printed_text(image, ocr_results):
    """遮罩印刷文字"""
    print("\n遮罩印刷文字...")

    masked_image = image.copy()

    for i, result in enumerate(ocr_results):
        box = result.get('box')
        if box is None:
            continue

        # box 格式: [x, y, w, h]
        x, y, w, h = box

        # 遮罩（黑色矩形）
        cv2.rectangle(
            masked_image,
            (x - MASKING_PADDING, y - MASKING_PADDING),
            (x + w + MASKING_PADDING, y + h + MASKING_PADDING),
            (0, 0, 0),
            -1
        )

    # 保存遮罩後的圖片
    masked_path = OUTPUT_DIR / "01_masked.png"
    cv2.imwrite(str(masked_path), masked_image)
    print(f"✅ 遮罩完成: {masked_path}")

    return masked_image


def detect_regions(masked_image):
    """檢測候選區域"""
    print("\n檢測候選區域...")

    # 轉灰度
    gray = cv2.cvtColor(masked_image, cv2.COLOR_BGR2GRAY)

    # 二值化
    _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)

    # 形態學操作
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)

    # 保存中間結果
    cv2.imwrite(str(OUTPUT_DIR / "02_binary.png"), binary)
    cv2.imwrite(str(OUTPUT_DIR / "03_morphed.png"), morphed)

    # 找輪廓
    contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # 過濾候選區域
    MIN_AREA = 3000
    MAX_AREA = 300000

    candidate_regions = []
    for contour in contours:
        area = cv2.contourArea(contour)
        if MIN_AREA <= area <= MAX_AREA:
            x, y, w, h = cv2.boundingRect(contour)
            aspect_ratio = w / h if h > 0 else 0

            candidate_regions.append({
                'box': (x, y, w, h),
                'area': area,
                'aspect_ratio': aspect_ratio
            })

    # 按面積排序
    candidate_regions.sort(key=lambda r: r['area'], reverse=True)

    print(f"✅ 找到 {len(candidate_regions)} 個候選區域")

    return candidate_regions


def merge_nearby_regions(regions, h_distance=100, v_distance=50):
    """合併鄰近區域"""
    print("\n合併鄰近區域...")

    if not regions:
        return []

    merged = []
    used = set()

    for i, r1 in enumerate(regions):
        if i in used:
            continue

        x1, y1, w1, h1 = r1['box']
        merged_box = [x1, y1, x1 + w1, y1 + h1]  # [x_min, y_min, x_max, y_max]
        group = [i]

        for j, r2 in enumerate(regions):
            if j <= i or j in used:
                continue

            x2, y2, w2, h2 = r2['box']

            # 計算距離
            h_dist = min(abs(x1 - (x2 + w2)), abs((x1 + w1) - x2))
            v_dist = min(abs(y1 - (y2 + h2)), abs((y1 + h1) - y2))

            # 檢查重疊或接近
            x_overlap = not (x1 + w1 < x2 or x2 + w2 < x1)
            y_overlap = not (y1 + h1 < y2 or y2 + h2 < y1)

            if (x_overlap and v_dist <= v_distance) or (y_overlap and h_dist <= h_distance):
                # 合併
                merged_box[0] = min(merged_box[0], x2)
                merged_box[1] = min(merged_box[1], y2)
                merged_box[2] = max(merged_box[2], x2 + w2)
                merged_box[3] = max(merged_box[3], y2 + h2)
                group.append(j)
                used.add(j)

        used.add(i)

        # 轉回 (x, y, w, h) 格式
        x, y = merged_box[0], merged_box[1]
        w, h = merged_box[2] - merged_box[0], merged_box[3] - merged_box[1]

        merged.append({
            'box': (x, y, w, h),
            'area': w * h,
            'merged_count': len(group)
        })

    print(f"✅ 合併後剩餘 {len(merged)} 個區域")

    return merged


def extract_signatures(image, regions):
    """提取簽名區域"""
    print("\n提取簽名區域...")

    # 在圖片上標註所有區域
    vis_image = image.copy()

    for i, region in enumerate(regions):
        x, y, w, h = region['box']

        # 繪製框
        cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 3)
        cv2.putText(vis_image, f"Region {i+1}", (x, y - 10),
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # 提取並保存
        signature = image[y:y+h, x:x+w]
        sig_path = OUTPUT_DIR / f"signature_{i+1}.png"
        cv2.imwrite(str(sig_path), signature)
        print(f"  Region {i+1}: {w}x{h} 像素, 面積={region['area']}")

    # 保存標註圖
    vis_path = OUTPUT_DIR / "04_detected_regions.png"
    cv2.imwrite(str(vis_path), vis_image)
    print(f"\n✅ 標註圖已保存: {vis_path}")

    return vis_image


def generate_summary(ocr_count, masked_path, regions):
    """生成摘要報告"""
    summary = f"""
PP-OCRv5 完整 Pipeline 測試結果
{'=' * 60}

1. OCR 檢測: {ocr_count} 個文字區域
2. 遮罩印刷文字: {masked_path}
3. 檢測候選區域: {len(regions)} 個
4. 提取簽名: {len(regions)} 個

候選區域詳情:
{'-' * 60}
"""

    for i, region in enumerate(regions):
        x, y, w, h = region['box']
        area = region['area']
        summary += f"Region {i+1}: 位置({x}, {y}), 大小{w}x{h}, 面積={area}\n"

    summary += f"\n所有結果保存在: {OUTPUT_DIR}\n"

    return summary


def main():
    print("=" * 60)
    print("PP-OCRv5 完整 Pipeline 測試")
    print("=" * 60)

    # 準備
    setup_output_dir()

    # 1. 獲取圖片
    print("\n1. 讀取測試圖片...")
    image = get_page_image()
    if image is None:
        return
    print(f"   圖片大小: {image.shape}")

    # 保存原圖
    cv2.imwrite(str(OUTPUT_DIR / "00_original.png"), image)

    # 2. OCR 檢測
    print("\n2. PP-OCRv5 檢測文字...")
    ocr_results = call_ocr_server(image)
    if ocr_results is None:
        print("❌ OCR 失敗，終止測試")
        return

    # 3. 遮罩印刷文字
    print("\n3. 遮罩印刷文字...")
    masked_image = mask_printed_text(image, ocr_results)

    # 4. 檢測候選區域
    print("\n4. 檢測候選區域...")
    regions = detect_regions(masked_image)

    # 5. 合併鄰近區域
    print("\n5. 合併鄰近區域...")
    merged_regions = merge_nearby_regions(regions)

    # 6. 提取簽名
    print("\n6. 提取簽名...")
    vis_image = extract_signatures(image, merged_regions)

    # 7. 生成摘要
    print("\n7. 生成摘要報告...")
    summary = generate_summary(len(ocr_results), OUTPUT_DIR / "01_masked.png", merged_regions)
    print(summary)

    # 保存摘要
    summary_path = OUTPUT_DIR / "SUMMARY.txt"
    with open(summary_path, 'w', encoding='utf-8') as f:
        f.write(summary)

    print("=" * 60)
    print("✅ 測試完成！")
    print(f"結果目錄: {OUTPUT_DIR}")
    print("=" * 60)


if __name__ == "__main__":
    main()