- Created PaddleOCR client for remote server communication - Implemented text masking + region detection pipeline - Test results: 100% recall on sample PDF (found both signatures) - Identified issues: split regions, printed text not fully masked - Documented 5 solution options in PADDLEOCR_STATUS.md - Next: Implement region merging and two-stage cleaning
103 lines
3.0 KiB
Python
103 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Test PaddleOCR on a sample PDF page."""
|
|
|
|
import fitz # PyMuPDF
|
|
from paddleocr import PaddleOCR
|
|
import numpy as np
|
|
from PIL import Image
|
|
import cv2
|
|
from pathlib import Path
|
|
|
|
# Configuration
|
|
TEST_PDF = "/Volumes/NV2/PDF-Processing/signature-image-output/201301_1324_AI1_page3.pdf"
|
|
DPI = 300
|
|
|
|
print("="*80)
|
|
print("Testing PaddleOCR on macOS Apple Silicon")
|
|
print("="*80)
|
|
|
|
# Step 1: Render PDF to image
|
|
print("\n1. Rendering PDF to image...")
|
|
try:
|
|
doc = fitz.open(TEST_PDF)
|
|
page = doc[0]
|
|
mat = fitz.Matrix(DPI/72, DPI/72)
|
|
pix = page.get_pixmap(matrix=mat)
|
|
image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
|
|
|
if pix.n == 4: # RGBA
|
|
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
|
|
|
print(f" ✅ Rendered: {image.shape[1]}x{image.shape[0]} pixels")
|
|
doc.close()
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
exit(1)
|
|
|
|
# Step 2: Initialize PaddleOCR
|
|
print("\n2. Initializing PaddleOCR...")
|
|
print(" (First run will download models, may take a few minutes...)")
|
|
try:
|
|
# Use the correct syntax from official docs
|
|
ocr = PaddleOCR(
|
|
use_doc_orientation_classify=False,
|
|
use_doc_unwarping=False,
|
|
use_textline_orientation=False,
|
|
lang='ch' # Chinese language
|
|
)
|
|
print(" ✅ PaddleOCR initialized successfully")
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
print("\n Note: PaddleOCR requires PaddlePaddle backend.")
|
|
print(" If this is a module import error, PaddlePaddle may not support this platform.")
|
|
exit(1)
|
|
|
|
# Step 3: Run OCR
|
|
print("\n3. Running OCR to detect printed text...")
|
|
try:
|
|
result = ocr.ocr(image, cls=False)
|
|
|
|
if result and result[0]:
|
|
print(f" ✅ Detected {len(result[0])} text regions")
|
|
|
|
# Show first few detections
|
|
print("\n Sample detections:")
|
|
for i, item in enumerate(result[0][:5]):
|
|
box = item[0] # Bounding box coordinates
|
|
text = item[1][0] # Detected text
|
|
confidence = item[1][1] # Confidence score
|
|
print(f" {i+1}. Text: '{text}' (confidence: {confidence:.2f})")
|
|
print(f" Box: {box}")
|
|
else:
|
|
print(" ⚠️ No text detected")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error during OCR: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
exit(1)
|
|
|
|
# Step 4: Visualize detection
|
|
print("\n4. Creating visualization...")
|
|
try:
|
|
vis_image = image.copy()
|
|
|
|
if result and result[0]:
|
|
for item in result[0]:
|
|
box = np.array(item[0], dtype=np.int32)
|
|
cv2.polylines(vis_image, [box], True, (0, 255, 0), 2)
|
|
|
|
# Save visualization
|
|
output_path = "/Volumes/NV2/PDF-Processing/signature-image-output/paddleocr_test_detection.png"
|
|
cv2.imwrite(output_path, cv2.cvtColor(vis_image, cv2.COLOR_RGB2BGR))
|
|
print(f" ✅ Saved visualization: {output_path}")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error during visualization: {e}")
|
|
|
|
print("\n" + "="*80)
|
|
print("PaddleOCR test completed!")
|
|
print("="*80)
|