#!/usr/bin/env python3 """ 使用 PaddleOCR v2.7.3 (v4) 跑完整的簽名提取 pipeline 與 v5 對比 """ import sys import json import cv2 import numpy as np import requests from pathlib import Path # 配置 OCR_SERVER = "http://192.168.30.36:5555" OUTPUT_DIR = Path("/Volumes/NV2/pdf_recognize/signature-comparison/v4-current") MASKING_PADDING = 0 def setup_output_dir(): """創建輸出目錄""" OUTPUT_DIR.mkdir(parents=True, exist_ok=True) print(f"輸出目錄: {OUTPUT_DIR}") def get_page_image(): """獲取測試頁面圖片""" test_image = "/Volumes/NV2/pdf_recognize/full_page_original.png" if Path(test_image).exists(): return cv2.imread(test_image) else: print(f"❌ 測試圖片不存在: {test_image}") return None def call_ocr_server(image): """調用服務器端的 PaddleOCR v2.7.3""" print("\n調用 PaddleOCR v2.7.3 服務器...") try: import base64 _, buffer = cv2.imencode('.png', image) img_base64 = base64.b64encode(buffer).decode('utf-8') response = requests.post( f"{OCR_SERVER}/ocr", json={'image': img_base64}, timeout=30 ) if response.status_code == 200: result = response.json() print(f"✅ OCR 完成,檢測到 {len(result.get('results', []))} 個文字區域") return result.get('results', []) else: print(f"❌ 服務器錯誤: {response.status_code}") return None except Exception as e: print(f"❌ OCR 調用失敗: {e}") import traceback traceback.print_exc() return None def mask_printed_text(image, ocr_results): """遮罩印刷文字""" print("\n遮罩印刷文字...") masked_image = image.copy() for i, result in enumerate(ocr_results): box = result.get('box') if box is None: continue # v2.7.3 返回多邊形格式: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] # 轉換為矩形 box_points = np.array(box) x_min = int(box_points[:, 0].min()) y_min = int(box_points[:, 1].min()) x_max = int(box_points[:, 0].max()) y_max = int(box_points[:, 1].max()) cv2.rectangle( masked_image, (x_min - MASKING_PADDING, y_min - MASKING_PADDING), (x_max + MASKING_PADDING, y_max + MASKING_PADDING), (0, 0, 0), -1 ) masked_path = OUTPUT_DIR / "01_masked.png" cv2.imwrite(str(masked_path), masked_image) print(f"✅ 遮罩完成: {masked_path}") return masked_image def detect_regions(masked_image): """檢測候選區域""" print("\n檢測候選區域...") gray = cv2.cvtColor(masked_image, cv2.COLOR_BGR2GRAY) _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2) cv2.imwrite(str(OUTPUT_DIR / "02_binary.png"), binary) cv2.imwrite(str(OUTPUT_DIR / "03_morphed.png"), morphed) contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) MIN_AREA = 3000 MAX_AREA = 300000 candidate_regions = [] for contour in contours: area = cv2.contourArea(contour) if MIN_AREA <= area <= MAX_AREA: x, y, w, h = cv2.boundingRect(contour) aspect_ratio = w / h if h > 0 else 0 candidate_regions.append({ 'box': (x, y, w, h), 'area': area, 'aspect_ratio': aspect_ratio }) candidate_regions.sort(key=lambda r: r['area'], reverse=True) print(f"✅ 找到 {len(candidate_regions)} 個候選區域") return candidate_regions def merge_nearby_regions(regions, h_distance=100, v_distance=50): """合併鄰近區域""" print("\n合併鄰近區域...") if not regions: return [] merged = [] used = set() for i, r1 in enumerate(regions): if i in used: continue x1, y1, w1, h1 = r1['box'] merged_box = [x1, y1, x1 + w1, y1 + h1] group = [i] for j, r2 in enumerate(regions): if j <= i or j in used: continue x2, y2, w2, h2 = r2['box'] h_dist = min(abs(x1 - (x2 + w2)), abs((x1 + w1) - x2)) v_dist = min(abs(y1 - (y2 + h2)), abs((y1 + h1) - y2)) x_overlap = not (x1 + w1 < x2 or x2 + w2 < x1) y_overlap = not (y1 + h1 < y2 or y2 + h2 < y1) if (x_overlap and v_dist <= v_distance) or (y_overlap and h_dist <= h_distance): merged_box[0] = min(merged_box[0], x2) merged_box[1] = min(merged_box[1], y2) merged_box[2] = max(merged_box[2], x2 + w2) merged_box[3] = max(merged_box[3], y2 + h2) group.append(j) used.add(j) used.add(i) x, y = merged_box[0], merged_box[1] w, h = merged_box[2] - merged_box[0], merged_box[3] - merged_box[1] merged.append({ 'box': (x, y, w, h), 'area': w * h, 'merged_count': len(group) }) print(f"✅ 合併後剩餘 {len(merged)} 個區域") return merged def extract_signatures(image, regions): """提取簽名區域""" print("\n提取簽名區域...") vis_image = image.copy() for i, region in enumerate(regions): x, y, w, h = region['box'] cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 3) cv2.putText(vis_image, f"Region {i+1}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) signature = image[y:y+h, x:x+w] sig_path = OUTPUT_DIR / f"signature_{i+1}.png" cv2.imwrite(str(sig_path), signature) print(f" Region {i+1}: {w}x{h} 像素, 面積={region['area']}") vis_path = OUTPUT_DIR / "04_detected_regions.png" cv2.imwrite(str(vis_path), vis_image) print(f"\n✅ 標註圖已保存: {vis_path}") return vis_image def generate_summary(ocr_count, regions): """生成摘要報告""" summary = f""" PaddleOCR v2.7.3 (v4) 完整 Pipeline 測試結果 {'=' * 60} 1. OCR 檢測: {ocr_count} 個文字區域 2. 遮罩印刷文字: 完成 3. 檢測候選區域: {len(regions)} 個 4. 提取簽名: {len(regions)} 個 候選區域詳情: {'-' * 60} """ for i, region in enumerate(regions): x, y, w, h = region['box'] area = region['area'] summary += f"Region {i+1}: 位置({x}, {y}), 大小{w}x{h}, 面積={area}\n" summary += f"\n所有結果保存在: {OUTPUT_DIR}\n" return summary def main(): print("=" * 60) print("PaddleOCR v2.7.3 (v4) 完整 Pipeline 測試") print("=" * 60) setup_output_dir() print("\n1. 讀取測試圖片...") image = get_page_image() if image is None: return print(f" 圖片大小: {image.shape}") cv2.imwrite(str(OUTPUT_DIR / "00_original.png"), image) print("\n2. PaddleOCR v2.7.3 檢測文字...") ocr_results = call_ocr_server(image) if ocr_results is None: print("❌ OCR 失敗,終止測試") return print("\n3. 遮罩印刷文字...") masked_image = mask_printed_text(image, ocr_results) print("\n4. 檢測候選區域...") regions = detect_regions(masked_image) print("\n5. 合併鄰近區域...") merged_regions = merge_nearby_regions(regions) print("\n6. 提取簽名...") vis_image = extract_signatures(image, merged_regions) print("\n7. 生成摘要報告...") summary = generate_summary(len(ocr_results), merged_regions) print(summary) summary_path = OUTPUT_DIR / "SUMMARY.txt" with open(summary_path, 'w', encoding='utf-8') as f: f.write(summary) print("=" * 60) print("✅ v4 測試完成!") print(f"結果目錄: {OUTPUT_DIR}") print("=" * 60) if __name__ == "__main__": main()