pdf_signature_extraction/test_mask_and_detect.py

#!/usr/bin/env python3
"""
Test PaddleOCR Masking + Region Detection Pipeline

This script demonstrates:
1. PaddleOCR detects printed text bounding boxes
2. Mask out all printed text areas (fill with black)
3. Detect remaining non-white regions (potential handwriting)
4. Visualize the results
"""

import fitz  # PyMuPDF
import numpy as np
import cv2
from pathlib import Path
from paddleocr_client import create_ocr_client

# Configuration
TEST_PDF = "/Volumes/NV2/PDF-Processing/signature-image-output/201301_1324_AI1_page3.pdf"
OUTPUT_DIR = "/Volumes/NV2/PDF-Processing/signature-image-output/mask_test"
DPI = 300

# Region detection parameters
MIN_REGION_AREA = 3000      # Minimum pixels for a region
MAX_REGION_AREA = 300000    # Maximum pixels for a region
MIN_ASPECT_RATIO = 0.3      # Minimum width/height ratio
MAX_ASPECT_RATIO = 15.0     # Maximum width/height ratio

print("="*80)
print("PaddleOCR Masking + Region Detection Test")
print("="*80)

# Create output directory
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

# Step 1: Connect to PaddleOCR server
print("\n1. Connecting to PaddleOCR server...")
try:
    ocr_client = create_ocr_client()
    print(f"   ✅ Connected: {ocr_client.server_url}")
except Exception as e:
    print(f"   ❌ Error: {e}")
    exit(1)

# Step 2: Render PDF to image
print("\n2. Rendering PDF to image...")
try:
    doc = fitz.open(TEST_PDF)
    page = doc[0]
    mat = fitz.Matrix(DPI/72, DPI/72)
    pix = page.get_pixmap(matrix=mat)
    original_image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)

    if pix.n == 4:  # RGBA
        original_image = cv2.cvtColor(original_image, cv2.COLOR_RGBA2RGB)

    print(f"   ✅ Rendered: {original_image.shape[1]}x{original_image.shape[0]} pixels")
    doc.close()
except Exception as e:
    print(f"   ❌ Error: {e}")
    exit(1)

# Step 3: Detect printed text with PaddleOCR
print("\n3. Detecting printed text with PaddleOCR...")
try:
    text_boxes = ocr_client.get_text_boxes(original_image)
    print(f"   ✅ Detected {len(text_boxes)} text regions")

    # Show some sample boxes
    if text_boxes:
        print("   Sample text boxes (x, y, w, h):")
        for i, box in enumerate(text_boxes[:3]):
            print(f"      {i+1}. {box}")
except Exception as e:
    print(f"   ❌ Error: {e}")
    exit(1)

# Step 4: Mask out printed text areas
print("\n4. Masking printed text areas...")
try:
    masked_image = original_image.copy()

    # Fill each text box with black
    for (x, y, w, h) in text_boxes:
        cv2.rectangle(masked_image, (x, y), (x + w, y + h), (0, 0, 0), -1)

    print(f"   ✅ Masked {len(text_boxes)} text regions")

    # Save masked image
    masked_path = Path(OUTPUT_DIR) / "01_masked_image.png"
    cv2.imwrite(str(masked_path), cv2.cvtColor(masked_image, cv2.COLOR_RGB2BGR))
    print(f"   📁 Saved: {masked_path}")

except Exception as e:
    print(f"   ❌ Error: {e}")
    exit(1)

# Step 5: Detect remaining non-white regions
print("\n5. Detecting remaining non-white regions...")
try:
    # Convert to grayscale
    gray = cv2.cvtColor(masked_image, cv2.COLOR_RGB2GRAY)

    # Threshold to find non-white areas
    # Anything darker than 250 is considered "content"
    _, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY_INV)

    # Apply morphological operations to connect nearby regions
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Find contours
    contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    print(f"   ✅ Found {len(contours)} contours")

    # Filter contours by size and aspect ratio
    potential_regions = []

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = w * h
        aspect_ratio = w / h if h > 0 else 0

        # Check constraints
        if (MIN_REGION_AREA <= area <= MAX_REGION_AREA and
            MIN_ASPECT_RATIO <= aspect_ratio <= MAX_ASPECT_RATIO):
            potential_regions.append({
                'box': (x, y, w, h),
                'area': area,
                'aspect_ratio': aspect_ratio
            })

    print(f"   ✅ Filtered to {len(potential_regions)} potential handwriting regions")

    # Show region details
    if potential_regions:
        print("\n   Detected regions:")
        for i, region in enumerate(potential_regions[:5]):
            x, y, w, h = region['box']
            print(f"      {i+1}. Box: ({x}, {y}, {w}, {h}), "
                  f"Area: {region['area']}, "
                  f"Aspect: {region['aspect_ratio']:.2f}")

except Exception as e:
    print(f"   ❌ Error: {e}")
    import traceback
    traceback.print_exc()
    exit(1)

# Step 6: Visualize results
print("\n6. Creating visualizations...")
try:
    # Visualization 1: Original with text boxes
    vis_original = original_image.copy()
    for (x, y, w, h) in text_boxes:
        cv2.rectangle(vis_original, (x, y), (x + w, y + h), (0, 255, 0), 3)

    vis_original_path = Path(OUTPUT_DIR) / "02_original_with_text_boxes.png"
    cv2.imwrite(str(vis_original_path), cv2.cvtColor(vis_original, cv2.COLOR_RGB2BGR))
    print(f"   📁 Original + text boxes: {vis_original_path}")

    # Visualization 2: Masked image with detected regions
    vis_masked = masked_image.copy()
    for region in potential_regions:
        x, y, w, h = region['box']
        cv2.rectangle(vis_masked, (x, y), (x + w, y + h), (255, 0, 0), 3)

    vis_masked_path = Path(OUTPUT_DIR) / "03_masked_with_regions.png"
    cv2.imwrite(str(vis_masked_path), cv2.cvtColor(vis_masked, cv2.COLOR_RGB2BGR))
    print(f"   📁 Masked + regions: {vis_masked_path}")

    # Visualization 3: Binary threshold result
    binary_path = Path(OUTPUT_DIR) / "04_binary_threshold.png"
    cv2.imwrite(str(binary_path), binary)
    print(f"   📁 Binary threshold: {binary_path}")

    # Visualization 4: Morphed result
    morphed_path = Path(OUTPUT_DIR) / "05_morphed.png"
    cv2.imwrite(str(morphed_path), morphed)
    print(f"   📁 Morphed: {morphed_path}")

    # Extract and save each detected region
    print("\n7. Extracting detected regions...")
    for i, region in enumerate(potential_regions):
        x, y, w, h = region['box']

        # Add padding
        padding = 10
        x_pad = max(0, x - padding)
        y_pad = max(0, y - padding)
        w_pad = min(original_image.shape[1] - x_pad, w + 2*padding)
        h_pad = min(original_image.shape[0] - y_pad, h + 2*padding)

        # Extract region from original image
        region_img = original_image[y_pad:y_pad+h_pad, x_pad:x_pad+w_pad]

        # Save region
        region_path = Path(OUTPUT_DIR) / f"region_{i+1:02d}.png"
        cv2.imwrite(str(region_path), cv2.cvtColor(region_img, cv2.COLOR_RGB2BGR))
        print(f"   📁 Region {i+1}: {region_path}")

except Exception as e:
    print(f"   ❌ Error: {e}")
    import traceback
    traceback.print_exc()

print("\n" + "="*80)
print("Test completed!")
print(f"Results saved to: {OUTPUT_DIR}")
print("="*80)
print("\nSummary:")
print(f"  - Printed text regions detected: {len(text_boxes)}")
print(f"  - Potential handwriting regions: {len(potential_regions)}")
print(f"  - Expected signatures: 2 (楊智惠, 張志銘)")
print("="*80)