## 研究成果 ### PP-OCRv5 API 測試 - 成功升級到 PaddleOCR 3.3.2 (PP-OCRv5) - 理解新 API 結構和調用方式 - 驗證基礎檢測功能 ### 關鍵發現 ❌ PP-OCRv5 **沒有內建手寫分類功能** - text_type 字段是語言類型,不是手寫/印刷分類 - 仍需要 OpenCV Method 3 來分離手寫和印刷文字 ### 完整 Pipeline 對比測試 - v4 (2.7.3): 檢測 14 個文字 → 4 個候選區域 - v5 (3.3.2): 檢測 50 個文字 → 7 個候選區域 - 主簽名區域:兩個版本幾乎相同 (1150x511 vs 1144x511) ### 性能分析 優點: - v5 手寫識別準確率 +13.7% (文檔承諾) - 可能減少漏檢 缺點: - 過度檢測(印章小字等) - API 完全重寫,不兼容 - 仍無法替代 OpenCV Method 3 ### 文件 - PP_OCRV5_RESEARCH_FINDINGS.md: 完整研究報告 - signature-comparison/: v4 vs v5 對比結果 - test_results/: v5 測試輸出 - test_*_pipeline.py: 完整測試腳本 ### 建議 當前方案(v2.7.3 + OpenCV Method 3)已足夠穩定, 除非遇到大量漏檢,否則暫不升級到 v5。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
323 lines
8.5 KiB
Python
323 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
使用 PP-OCRv5 跑完整的簽名提取 pipeline
|
|
|
|
流程:
|
|
1. 使用服務器上的 PP-OCRv5 檢測文字
|
|
2. 遮罩印刷文字
|
|
3. 檢測候選區域
|
|
4. 提取簽名
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import cv2
|
|
import numpy as np
|
|
import requests
|
|
from pathlib import Path
|
|
|
|
# 配置
|
|
OCR_SERVER = "http://192.168.30.36:5555"
|
|
PDF_PATH = "/Volumes/NV2/pdf_recognize/test.pdf"
|
|
OUTPUT_DIR = Path("/Volumes/NV2/pdf_recognize/test_results/v5_pipeline")
|
|
MASKING_PADDING = 0
|
|
|
|
|
|
def setup_output_dir():
|
|
"""創建輸出目錄"""
|
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
print(f"輸出目錄: {OUTPUT_DIR}")
|
|
|
|
|
|
def get_page_image():
|
|
"""獲取測試頁面圖片"""
|
|
# 使用已有的測試圖片
|
|
test_image = "/Volumes/NV2/pdf_recognize/full_page_original.png"
|
|
if Path(test_image).exists():
|
|
return cv2.imread(test_image)
|
|
else:
|
|
print(f"❌ 測試圖片不存在: {test_image}")
|
|
return None
|
|
|
|
|
|
def call_ocr_server(image):
|
|
"""調用服務器端的 PP-OCRv5"""
|
|
print("\n調用 PP-OCRv5 服務器...")
|
|
|
|
try:
|
|
# 編碼圖片
|
|
import base64
|
|
_, buffer = cv2.imencode('.png', image)
|
|
img_base64 = base64.b64encode(buffer).decode('utf-8')
|
|
|
|
# 發送請求
|
|
response = requests.post(
|
|
f"{OCR_SERVER}/ocr",
|
|
json={'image': img_base64},
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
print(f"✅ OCR 完成,檢測到 {len(result.get('results', []))} 個文字區域")
|
|
return result.get('results', [])
|
|
else:
|
|
print(f"❌ 服務器錯誤: {response.status_code}")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f"❌ OCR 調用失敗: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return None
|
|
|
|
|
|
def mask_printed_text(image, ocr_results):
|
|
"""遮罩印刷文字"""
|
|
print("\n遮罩印刷文字...")
|
|
|
|
masked_image = image.copy()
|
|
|
|
for i, result in enumerate(ocr_results):
|
|
box = result.get('box')
|
|
if box is None:
|
|
continue
|
|
|
|
# box 格式: [x, y, w, h]
|
|
x, y, w, h = box
|
|
|
|
# 遮罩(黑色矩形)
|
|
cv2.rectangle(
|
|
masked_image,
|
|
(x - MASKING_PADDING, y - MASKING_PADDING),
|
|
(x + w + MASKING_PADDING, y + h + MASKING_PADDING),
|
|
(0, 0, 0),
|
|
-1
|
|
)
|
|
|
|
# 保存遮罩後的圖片
|
|
masked_path = OUTPUT_DIR / "01_masked.png"
|
|
cv2.imwrite(str(masked_path), masked_image)
|
|
print(f"✅ 遮罩完成: {masked_path}")
|
|
|
|
return masked_image
|
|
|
|
|
|
def detect_regions(masked_image):
|
|
"""檢測候選區域"""
|
|
print("\n檢測候選區域...")
|
|
|
|
# 轉灰度
|
|
gray = cv2.cvtColor(masked_image, cv2.COLOR_BGR2GRAY)
|
|
|
|
# 二值化
|
|
_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
|
|
|
|
# 形態學操作
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
|
|
morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)
|
|
|
|
# 保存中間結果
|
|
cv2.imwrite(str(OUTPUT_DIR / "02_binary.png"), binary)
|
|
cv2.imwrite(str(OUTPUT_DIR / "03_morphed.png"), morphed)
|
|
|
|
# 找輪廓
|
|
contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
# 過濾候選區域
|
|
MIN_AREA = 3000
|
|
MAX_AREA = 300000
|
|
|
|
candidate_regions = []
|
|
for contour in contours:
|
|
area = cv2.contourArea(contour)
|
|
if MIN_AREA <= area <= MAX_AREA:
|
|
x, y, w, h = cv2.boundingRect(contour)
|
|
aspect_ratio = w / h if h > 0 else 0
|
|
|
|
candidate_regions.append({
|
|
'box': (x, y, w, h),
|
|
'area': area,
|
|
'aspect_ratio': aspect_ratio
|
|
})
|
|
|
|
# 按面積排序
|
|
candidate_regions.sort(key=lambda r: r['area'], reverse=True)
|
|
|
|
print(f"✅ 找到 {len(candidate_regions)} 個候選區域")
|
|
|
|
return candidate_regions
|
|
|
|
|
|
def merge_nearby_regions(regions, h_distance=100, v_distance=50):
|
|
"""合併鄰近區域"""
|
|
print("\n合併鄰近區域...")
|
|
|
|
if not regions:
|
|
return []
|
|
|
|
merged = []
|
|
used = set()
|
|
|
|
for i, r1 in enumerate(regions):
|
|
if i in used:
|
|
continue
|
|
|
|
x1, y1, w1, h1 = r1['box']
|
|
merged_box = [x1, y1, x1 + w1, y1 + h1] # [x_min, y_min, x_max, y_max]
|
|
group = [i]
|
|
|
|
for j, r2 in enumerate(regions):
|
|
if j <= i or j in used:
|
|
continue
|
|
|
|
x2, y2, w2, h2 = r2['box']
|
|
|
|
# 計算距離
|
|
h_dist = min(abs(x1 - (x2 + w2)), abs((x1 + w1) - x2))
|
|
v_dist = min(abs(y1 - (y2 + h2)), abs((y1 + h1) - y2))
|
|
|
|
# 檢查重疊或接近
|
|
x_overlap = not (x1 + w1 < x2 or x2 + w2 < x1)
|
|
y_overlap = not (y1 + h1 < y2 or y2 + h2 < y1)
|
|
|
|
if (x_overlap and v_dist <= v_distance) or (y_overlap and h_dist <= h_distance):
|
|
# 合併
|
|
merged_box[0] = min(merged_box[0], x2)
|
|
merged_box[1] = min(merged_box[1], y2)
|
|
merged_box[2] = max(merged_box[2], x2 + w2)
|
|
merged_box[3] = max(merged_box[3], y2 + h2)
|
|
group.append(j)
|
|
used.add(j)
|
|
|
|
used.add(i)
|
|
|
|
# 轉回 (x, y, w, h) 格式
|
|
x, y = merged_box[0], merged_box[1]
|
|
w, h = merged_box[2] - merged_box[0], merged_box[3] - merged_box[1]
|
|
|
|
merged.append({
|
|
'box': (x, y, w, h),
|
|
'area': w * h,
|
|
'merged_count': len(group)
|
|
})
|
|
|
|
print(f"✅ 合併後剩餘 {len(merged)} 個區域")
|
|
|
|
return merged
|
|
|
|
|
|
def extract_signatures(image, regions):
|
|
"""提取簽名區域"""
|
|
print("\n提取簽名區域...")
|
|
|
|
# 在圖片上標註所有區域
|
|
vis_image = image.copy()
|
|
|
|
for i, region in enumerate(regions):
|
|
x, y, w, h = region['box']
|
|
|
|
# 繪製框
|
|
cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 3)
|
|
cv2.putText(vis_image, f"Region {i+1}", (x, y - 10),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
|
|
|
# 提取並保存
|
|
signature = image[y:y+h, x:x+w]
|
|
sig_path = OUTPUT_DIR / f"signature_{i+1}.png"
|
|
cv2.imwrite(str(sig_path), signature)
|
|
print(f" Region {i+1}: {w}x{h} 像素, 面積={region['area']}")
|
|
|
|
# 保存標註圖
|
|
vis_path = OUTPUT_DIR / "04_detected_regions.png"
|
|
cv2.imwrite(str(vis_path), vis_image)
|
|
print(f"\n✅ 標註圖已保存: {vis_path}")
|
|
|
|
return vis_image
|
|
|
|
|
|
def generate_summary(ocr_count, masked_path, regions):
|
|
"""生成摘要報告"""
|
|
summary = f"""
|
|
PP-OCRv5 完整 Pipeline 測試結果
|
|
{'=' * 60}
|
|
|
|
1. OCR 檢測: {ocr_count} 個文字區域
|
|
2. 遮罩印刷文字: {masked_path}
|
|
3. 檢測候選區域: {len(regions)} 個
|
|
4. 提取簽名: {len(regions)} 個
|
|
|
|
候選區域詳情:
|
|
{'-' * 60}
|
|
"""
|
|
|
|
for i, region in enumerate(regions):
|
|
x, y, w, h = region['box']
|
|
area = region['area']
|
|
summary += f"Region {i+1}: 位置({x}, {y}), 大小{w}x{h}, 面積={area}\n"
|
|
|
|
summary += f"\n所有結果保存在: {OUTPUT_DIR}\n"
|
|
|
|
return summary
|
|
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print("PP-OCRv5 完整 Pipeline 測試")
|
|
print("=" * 60)
|
|
|
|
# 準備
|
|
setup_output_dir()
|
|
|
|
# 1. 獲取圖片
|
|
print("\n1. 讀取測試圖片...")
|
|
image = get_page_image()
|
|
if image is None:
|
|
return
|
|
print(f" 圖片大小: {image.shape}")
|
|
|
|
# 保存原圖
|
|
cv2.imwrite(str(OUTPUT_DIR / "00_original.png"), image)
|
|
|
|
# 2. OCR 檢測
|
|
print("\n2. PP-OCRv5 檢測文字...")
|
|
ocr_results = call_ocr_server(image)
|
|
if ocr_results is None:
|
|
print("❌ OCR 失敗,終止測試")
|
|
return
|
|
|
|
# 3. 遮罩印刷文字
|
|
print("\n3. 遮罩印刷文字...")
|
|
masked_image = mask_printed_text(image, ocr_results)
|
|
|
|
# 4. 檢測候選區域
|
|
print("\n4. 檢測候選區域...")
|
|
regions = detect_regions(masked_image)
|
|
|
|
# 5. 合併鄰近區域
|
|
print("\n5. 合併鄰近區域...")
|
|
merged_regions = merge_nearby_regions(regions)
|
|
|
|
# 6. 提取簽名
|
|
print("\n6. 提取簽名...")
|
|
vis_image = extract_signatures(image, merged_regions)
|
|
|
|
# 7. 生成摘要
|
|
print("\n7. 生成摘要報告...")
|
|
summary = generate_summary(len(ocr_results), OUTPUT_DIR / "01_masked.png", merged_regions)
|
|
print(summary)
|
|
|
|
# 保存摘要
|
|
summary_path = OUTPUT_DIR / "SUMMARY.txt"
|
|
with open(summary_path, 'w', encoding='utf-8') as f:
|
|
f.write(summary)
|
|
|
|
print("=" * 60)
|
|
print("✅ 測試完成!")
|
|
print(f"結果目錄: {OUTPUT_DIR}")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|