#!/usr/bin/env python3 """ 使用 PP-OCRv5 跑完整的簽名提取 pipeline 流程: 1. 使用服務器上的 PP-OCRv5 檢測文字 2. 遮罩印刷文字 3. 檢測候選區域 4. 提取簽名 """ import sys import json import cv2 import numpy as np import requests from pathlib import Path # 配置 OCR_SERVER = "http://192.168.30.36:5555" PDF_PATH = "/Volumes/NV2/pdf_recognize/test.pdf" OUTPUT_DIR = Path("/Volumes/NV2/pdf_recognize/test_results/v5_pipeline") MASKING_PADDING = 0 def setup_output_dir(): """創建輸出目錄""" OUTPUT_DIR.mkdir(parents=True, exist_ok=True) print(f"輸出目錄: {OUTPUT_DIR}") def get_page_image(): """獲取測試頁面圖片""" # 使用已有的測試圖片 test_image = "/Volumes/NV2/pdf_recognize/full_page_original.png" if Path(test_image).exists(): return cv2.imread(test_image) else: print(f"❌ 測試圖片不存在: {test_image}") return None def call_ocr_server(image): """調用服務器端的 PP-OCRv5""" print("\n調用 PP-OCRv5 服務器...") try: # 編碼圖片 import base64 _, buffer = cv2.imencode('.png', image) img_base64 = base64.b64encode(buffer).decode('utf-8') # 發送請求 response = requests.post( f"{OCR_SERVER}/ocr", json={'image': img_base64}, timeout=30 ) if response.status_code == 200: result = response.json() print(f"✅ OCR 完成,檢測到 {len(result.get('results', []))} 個文字區域") return result.get('results', []) else: print(f"❌ 服務器錯誤: {response.status_code}") return None except Exception as e: print(f"❌ OCR 調用失敗: {e}") import traceback traceback.print_exc() return None def mask_printed_text(image, ocr_results): """遮罩印刷文字""" print("\n遮罩印刷文字...") masked_image = image.copy() for i, result in enumerate(ocr_results): box = result.get('box') if box is None: continue # box 格式: [x, y, w, h] x, y, w, h = box # 遮罩(黑色矩形) cv2.rectangle( masked_image, (x - MASKING_PADDING, y - MASKING_PADDING), (x + w + MASKING_PADDING, y + h + MASKING_PADDING), (0, 0, 0), -1 ) # 保存遮罩後的圖片 masked_path = OUTPUT_DIR / "01_masked.png" cv2.imwrite(str(masked_path), masked_image) print(f"✅ 遮罩完成: {masked_path}") return masked_image def detect_regions(masked_image): """檢測候選區域""" print("\n檢測候選區域...") # 轉灰度 gray = cv2.cvtColor(masked_image, cv2.COLOR_BGR2GRAY) # 二值化 _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV) # 形態學操作 kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2) # 保存中間結果 cv2.imwrite(str(OUTPUT_DIR / "02_binary.png"), binary) cv2.imwrite(str(OUTPUT_DIR / "03_morphed.png"), morphed) # 找輪廓 contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 過濾候選區域 MIN_AREA = 3000 MAX_AREA = 300000 candidate_regions = [] for contour in contours: area = cv2.contourArea(contour) if MIN_AREA <= area <= MAX_AREA: x, y, w, h = cv2.boundingRect(contour) aspect_ratio = w / h if h > 0 else 0 candidate_regions.append({ 'box': (x, y, w, h), 'area': area, 'aspect_ratio': aspect_ratio }) # 按面積排序 candidate_regions.sort(key=lambda r: r['area'], reverse=True) print(f"✅ 找到 {len(candidate_regions)} 個候選區域") return candidate_regions def merge_nearby_regions(regions, h_distance=100, v_distance=50): """合併鄰近區域""" print("\n合併鄰近區域...") if not regions: return [] merged = [] used = set() for i, r1 in enumerate(regions): if i in used: continue x1, y1, w1, h1 = r1['box'] merged_box = [x1, y1, x1 + w1, y1 + h1] # [x_min, y_min, x_max, y_max] group = [i] for j, r2 in enumerate(regions): if j <= i or j in used: continue x2, y2, w2, h2 = r2['box'] # 計算距離 h_dist = min(abs(x1 - (x2 + w2)), abs((x1 + w1) - x2)) v_dist = min(abs(y1 - (y2 + h2)), abs((y1 + h1) - y2)) # 檢查重疊或接近 x_overlap = not (x1 + w1 < x2 or x2 + w2 < x1) y_overlap = not (y1 + h1 < y2 or y2 + h2 < y1) if (x_overlap and v_dist <= v_distance) or (y_overlap and h_dist <= h_distance): # 合併 merged_box[0] = min(merged_box[0], x2) merged_box[1] = min(merged_box[1], y2) merged_box[2] = max(merged_box[2], x2 + w2) merged_box[3] = max(merged_box[3], y2 + h2) group.append(j) used.add(j) used.add(i) # 轉回 (x, y, w, h) 格式 x, y = merged_box[0], merged_box[1] w, h = merged_box[2] - merged_box[0], merged_box[3] - merged_box[1] merged.append({ 'box': (x, y, w, h), 'area': w * h, 'merged_count': len(group) }) print(f"✅ 合併後剩餘 {len(merged)} 個區域") return merged def extract_signatures(image, regions): """提取簽名區域""" print("\n提取簽名區域...") # 在圖片上標註所有區域 vis_image = image.copy() for i, region in enumerate(regions): x, y, w, h = region['box'] # 繪製框 cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 3) cv2.putText(vis_image, f"Region {i+1}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # 提取並保存 signature = image[y:y+h, x:x+w] sig_path = OUTPUT_DIR / f"signature_{i+1}.png" cv2.imwrite(str(sig_path), signature) print(f" Region {i+1}: {w}x{h} 像素, 面積={region['area']}") # 保存標註圖 vis_path = OUTPUT_DIR / "04_detected_regions.png" cv2.imwrite(str(vis_path), vis_image) print(f"\n✅ 標註圖已保存: {vis_path}") return vis_image def generate_summary(ocr_count, masked_path, regions): """生成摘要報告""" summary = f""" PP-OCRv5 完整 Pipeline 測試結果 {'=' * 60} 1. OCR 檢測: {ocr_count} 個文字區域 2. 遮罩印刷文字: {masked_path} 3. 檢測候選區域: {len(regions)} 個 4. 提取簽名: {len(regions)} 個 候選區域詳情: {'-' * 60} """ for i, region in enumerate(regions): x, y, w, h = region['box'] area = region['area'] summary += f"Region {i+1}: 位置({x}, {y}), 大小{w}x{h}, 面積={area}\n" summary += f"\n所有結果保存在: {OUTPUT_DIR}\n" return summary def main(): print("=" * 60) print("PP-OCRv5 完整 Pipeline 測試") print("=" * 60) # 準備 setup_output_dir() # 1. 獲取圖片 print("\n1. 讀取測試圖片...") image = get_page_image() if image is None: return print(f" 圖片大小: {image.shape}") # 保存原圖 cv2.imwrite(str(OUTPUT_DIR / "00_original.png"), image) # 2. OCR 檢測 print("\n2. PP-OCRv5 檢測文字...") ocr_results = call_ocr_server(image) if ocr_results is None: print("❌ OCR 失敗,終止測試") return # 3. 遮罩印刷文字 print("\n3. 遮罩印刷文字...") masked_image = mask_printed_text(image, ocr_results) # 4. 檢測候選區域 print("\n4. 檢測候選區域...") regions = detect_regions(masked_image) # 5. 合併鄰近區域 print("\n5. 合併鄰近區域...") merged_regions = merge_nearby_regions(regions) # 6. 提取簽名 print("\n6. 提取簽名...") vis_image = extract_signatures(image, merged_regions) # 7. 生成摘要 print("\n7. 生成摘要報告...") summary = generate_summary(len(ocr_results), OUTPUT_DIR / "01_masked.png", merged_regions) print(summary) # 保存摘要 summary_path = OUTPUT_DIR / "SUMMARY.txt" with open(summary_path, 'w', encoding='utf-8') as f: f.write(summary) print("=" * 60) print("✅ 測試完成!") print(f"結果目錄: {OUTPUT_DIR}") print("=" * 60) if __name__ == "__main__": main()