pdf_signature_extraction/test_paddleocr_client.py

#!/usr/bin/env python3
"""Test PaddleOCR client with a real PDF page."""

import fitz  # PyMuPDF
import numpy as np
import cv2
from paddleocr_client import create_ocr_client

# Test PDF
TEST_PDF = "/Volumes/NV2/PDF-Processing/signature-image-output/201301_1324_AI1_page3.pdf"
DPI = 300

print("="*80)
print("Testing PaddleOCR Client with Real PDF")
print("="*80)

# Step 1: Connect to server
print("\n1. Connecting to PaddleOCR server...")
try:
    client = create_ocr_client()
    print(f"   ✅ Connected: {client.server_url}")
except Exception as e:
    print(f"   ❌ Connection failed: {e}")
    exit(1)

# Step 2: Render PDF
print("\n2. Rendering PDF to image...")
try:
    doc = fitz.open(TEST_PDF)
    page = doc[0]
    mat = fitz.Matrix(DPI/72, DPI/72)
    pix = page.get_pixmap(matrix=mat)
    image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)

    if pix.n == 4:  # RGBA
        image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)

    print(f"   ✅ Rendered: {image.shape[1]}x{image.shape[0]} pixels")
    doc.close()
except Exception as e:
    print(f"   ❌ Error: {e}")
    exit(1)

# Step 3: Run OCR
print("\n3. Running OCR on image...")
try:
    results = client.ocr(image)
    print(f"   ✅ OCR successful!")
    print(f"   Found {len(results)} text regions")

    # Show first few results
    if results:
        print("\n   Sample detections:")
        for i, result in enumerate(results[:5]):
            text = result['text']
            confidence = result['confidence']
            print(f"      {i+1}. '{text}' (confidence: {confidence:.2f})")

except Exception as e:
    print(f"   ❌ OCR failed: {e}")
    import traceback
    traceback.print_exc()
    exit(1)

# Step 4: Get bounding boxes
print("\n4. Getting text bounding boxes...")
try:
    boxes = client.get_text_boxes(image)
    print(f"   ✅ Got {len(boxes)} bounding boxes")

    if boxes:
        print("   Sample boxes (x, y, w, h):")
        for i, box in enumerate(boxes[:3]):
            print(f"      {i+1}. {box}")

except Exception as e:
    print(f"   ❌ Error: {e}")

print("\n" + "="*80)
print("Test completed successfully!")
print("="*80)