#!/usr/bin/env python3 """ Test PaddleOCR Masking + Region Detection Pipeline This script demonstrates: 1. PaddleOCR detects printed text bounding boxes 2. Mask out all printed text areas (fill with black) 3. Detect remaining non-white regions (potential handwriting) 4. Visualize the results """ import fitz # PyMuPDF import numpy as np import cv2 from pathlib import Path from paddleocr_client import create_ocr_client # Configuration TEST_PDF = "/Volumes/NV2/PDF-Processing/signature-image-output/201301_1324_AI1_page3.pdf" OUTPUT_DIR = "/Volumes/NV2/PDF-Processing/signature-image-output/mask_test" DPI = 300 # Region detection parameters MIN_REGION_AREA = 3000 # Minimum pixels for a region MAX_REGION_AREA = 300000 # Maximum pixels for a region MIN_ASPECT_RATIO = 0.3 # Minimum width/height ratio MAX_ASPECT_RATIO = 15.0 # Maximum width/height ratio print("="*80) print("PaddleOCR Masking + Region Detection Test") print("="*80) # Create output directory Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True) # Step 1: Connect to PaddleOCR server print("\n1. Connecting to PaddleOCR server...") try: ocr_client = create_ocr_client() print(f" ✅ Connected: {ocr_client.server_url}") except Exception as e: print(f" ❌ Error: {e}") exit(1) # Step 2: Render PDF to image print("\n2. Rendering PDF to image...") try: doc = fitz.open(TEST_PDF) page = doc[0] mat = fitz.Matrix(DPI/72, DPI/72) pix = page.get_pixmap(matrix=mat) original_image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n) if pix.n == 4: # RGBA original_image = cv2.cvtColor(original_image, cv2.COLOR_RGBA2RGB) print(f" ✅ Rendered: {original_image.shape[1]}x{original_image.shape[0]} pixels") doc.close() except Exception as e: print(f" ❌ Error: {e}") exit(1) # Step 3: Detect printed text with PaddleOCR print("\n3. Detecting printed text with PaddleOCR...") try: text_boxes = ocr_client.get_text_boxes(original_image) print(f" ✅ Detected {len(text_boxes)} text regions") # Show some sample boxes if text_boxes: print(" Sample text boxes (x, y, w, h):") for i, box in enumerate(text_boxes[:3]): print(f" {i+1}. {box}") except Exception as e: print(f" ❌ Error: {e}") exit(1) # Step 4: Mask out printed text areas print("\n4. Masking printed text areas...") try: masked_image = original_image.copy() # Fill each text box with black for (x, y, w, h) in text_boxes: cv2.rectangle(masked_image, (x, y), (x + w, y + h), (0, 0, 0), -1) print(f" ✅ Masked {len(text_boxes)} text regions") # Save masked image masked_path = Path(OUTPUT_DIR) / "01_masked_image.png" cv2.imwrite(str(masked_path), cv2.cvtColor(masked_image, cv2.COLOR_RGB2BGR)) print(f" 📁 Saved: {masked_path}") except Exception as e: print(f" ❌ Error: {e}") exit(1) # Step 5: Detect remaining non-white regions print("\n5. Detecting remaining non-white regions...") try: # Convert to grayscale gray = cv2.cvtColor(masked_image, cv2.COLOR_RGB2GRAY) # Threshold to find non-white areas # Anything darker than 250 is considered "content" _, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY_INV) # Apply morphological operations to connect nearby regions kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) morphed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2) # Find contours contours, _ = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) print(f" ✅ Found {len(contours)} contours") # Filter contours by size and aspect ratio potential_regions = [] for contour in contours: x, y, w, h = cv2.boundingRect(contour) area = w * h aspect_ratio = w / h if h > 0 else 0 # Check constraints if (MIN_REGION_AREA <= area <= MAX_REGION_AREA and MIN_ASPECT_RATIO <= aspect_ratio <= MAX_ASPECT_RATIO): potential_regions.append({ 'box': (x, y, w, h), 'area': area, 'aspect_ratio': aspect_ratio }) print(f" ✅ Filtered to {len(potential_regions)} potential handwriting regions") # Show region details if potential_regions: print("\n Detected regions:") for i, region in enumerate(potential_regions[:5]): x, y, w, h = region['box'] print(f" {i+1}. Box: ({x}, {y}, {w}, {h}), " f"Area: {region['area']}, " f"Aspect: {region['aspect_ratio']:.2f}") except Exception as e: print(f" ❌ Error: {e}") import traceback traceback.print_exc() exit(1) # Step 6: Visualize results print("\n6. Creating visualizations...") try: # Visualization 1: Original with text boxes vis_original = original_image.copy() for (x, y, w, h) in text_boxes: cv2.rectangle(vis_original, (x, y), (x + w, y + h), (0, 255, 0), 3) vis_original_path = Path(OUTPUT_DIR) / "02_original_with_text_boxes.png" cv2.imwrite(str(vis_original_path), cv2.cvtColor(vis_original, cv2.COLOR_RGB2BGR)) print(f" 📁 Original + text boxes: {vis_original_path}") # Visualization 2: Masked image with detected regions vis_masked = masked_image.copy() for region in potential_regions: x, y, w, h = region['box'] cv2.rectangle(vis_masked, (x, y), (x + w, y + h), (255, 0, 0), 3) vis_masked_path = Path(OUTPUT_DIR) / "03_masked_with_regions.png" cv2.imwrite(str(vis_masked_path), cv2.cvtColor(vis_masked, cv2.COLOR_RGB2BGR)) print(f" 📁 Masked + regions: {vis_masked_path}") # Visualization 3: Binary threshold result binary_path = Path(OUTPUT_DIR) / "04_binary_threshold.png" cv2.imwrite(str(binary_path), binary) print(f" 📁 Binary threshold: {binary_path}") # Visualization 4: Morphed result morphed_path = Path(OUTPUT_DIR) / "05_morphed.png" cv2.imwrite(str(morphed_path), morphed) print(f" 📁 Morphed: {morphed_path}") # Extract and save each detected region print("\n7. Extracting detected regions...") for i, region in enumerate(potential_regions): x, y, w, h = region['box'] # Add padding padding = 10 x_pad = max(0, x - padding) y_pad = max(0, y - padding) w_pad = min(original_image.shape[1] - x_pad, w + 2*padding) h_pad = min(original_image.shape[0] - y_pad, h + 2*padding) # Extract region from original image region_img = original_image[y_pad:y_pad+h_pad, x_pad:x_pad+w_pad] # Save region region_path = Path(OUTPUT_DIR) / f"region_{i+1:02d}.png" cv2.imwrite(str(region_path), cv2.cvtColor(region_img, cv2.COLOR_RGB2BGR)) print(f" 📁 Region {i+1}: {region_path}") except Exception as e: print(f" ❌ Error: {e}") import traceback traceback.print_exc() print("\n" + "="*80) print("Test completed!") print(f"Results saved to: {OUTPUT_DIR}") print("="*80) print("\nSummary:") print(f" - Printed text regions detected: {len(text_boxes)}") print(f" - Potential handwriting regions: {len(potential_regions)}") print(f" - Expected signatures: 2 (楊智惠, 張志銘)") print("="*80)