Add PaddleOCR masking and region detection pipeline
- Created PaddleOCR client for remote server communication - Implemented text masking + region detection pipeline - Test results: 100% recall on sample PDF (found both signatures) - Identified issues: split regions, printed text not fully masked - Documented 5 solution options in PADDLEOCR_STATUS.md - Next: Implement region merging and two-stage cleaning
This commit is contained in:
81
test_paddleocr_client.py
Normal file
81
test_paddleocr_client.py
Normal file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test PaddleOCR client with a real PDF page."""
|
||||
|
||||
import fitz # PyMuPDF
|
||||
import numpy as np
|
||||
import cv2
|
||||
from paddleocr_client import create_ocr_client
|
||||
|
||||
# Test PDF
|
||||
TEST_PDF = "/Volumes/NV2/PDF-Processing/signature-image-output/201301_1324_AI1_page3.pdf"
|
||||
DPI = 300
|
||||
|
||||
print("="*80)
|
||||
print("Testing PaddleOCR Client with Real PDF")
|
||||
print("="*80)
|
||||
|
||||
# Step 1: Connect to server
|
||||
print("\n1. Connecting to PaddleOCR server...")
|
||||
try:
|
||||
client = create_ocr_client()
|
||||
print(f" ✅ Connected: {client.server_url}")
|
||||
except Exception as e:
|
||||
print(f" ❌ Connection failed: {e}")
|
||||
exit(1)
|
||||
|
||||
# Step 2: Render PDF
|
||||
print("\n2. Rendering PDF to image...")
|
||||
try:
|
||||
doc = fitz.open(TEST_PDF)
|
||||
page = doc[0]
|
||||
mat = fitz.Matrix(DPI/72, DPI/72)
|
||||
pix = page.get_pixmap(matrix=mat)
|
||||
image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
||||
|
||||
if pix.n == 4: # RGBA
|
||||
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
|
||||
|
||||
print(f" ✅ Rendered: {image.shape[1]}x{image.shape[0]} pixels")
|
||||
doc.close()
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
exit(1)
|
||||
|
||||
# Step 3: Run OCR
|
||||
print("\n3. Running OCR on image...")
|
||||
try:
|
||||
results = client.ocr(image)
|
||||
print(f" ✅ OCR successful!")
|
||||
print(f" Found {len(results)} text regions")
|
||||
|
||||
# Show first few results
|
||||
if results:
|
||||
print("\n Sample detections:")
|
||||
for i, result in enumerate(results[:5]):
|
||||
text = result['text']
|
||||
confidence = result['confidence']
|
||||
print(f" {i+1}. '{text}' (confidence: {confidence:.2f})")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ OCR failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
exit(1)
|
||||
|
||||
# Step 4: Get bounding boxes
|
||||
print("\n4. Getting text bounding boxes...")
|
||||
try:
|
||||
boxes = client.get_text_boxes(image)
|
||||
print(f" ✅ Got {len(boxes)} bounding boxes")
|
||||
|
||||
if boxes:
|
||||
print(" Sample boxes (x, y, w, h):")
|
||||
for i, box in enumerate(boxes[:3]):
|
||||
print(f" {i+1}. {box}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("Test completed successfully!")
|
||||
print("="*80)
|
||||
Reference in New Issue
Block a user