Complete OpenCV Method 3 implementation with 86.5% handwriting retention
- Implemented comprehensive feature analysis based on size, stroke length, and regularity - Size-based scoring: height >50px indicates handwriting - Stroke length ratio: >0.4 indicates handwriting - Irregularity metrics: low compactness/solidity indicates handwriting - Successfully tested on sample PDF with 2 signatures (楊智惠, 張志銘) - Created detailed documentation: CURRENT_STATUS.md and NEW_SESSION_HANDOFF.md - Stable PaddleOCR 2.7.3 configuration documented (numpy 1.26.4, opencv 4.6.0.66) - Prepared research plan for PP-OCRv5 upgrade investigation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
272
test_opencv_separation.py
Normal file
272
test_opencv_separation.py
Normal file
@@ -0,0 +1,272 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test OpenCV methods to separate handwriting from printed text
|
||||
|
||||
Tests two methods:
|
||||
1. Stroke Width Analysis (笔画宽度分析)
|
||||
2. Connected Components + Shape Features (连通组件+形状特征)
|
||||
"""
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
# Test image - contains both printed and handwritten
|
||||
TEST_IMAGE = "/Volumes/NV2/PDF-Processing/signature-image-output/paddleocr_improved/signature_02_original.png"
|
||||
OUTPUT_DIR = "/Volumes/NV2/PDF-Processing/signature-image-output/opencv_separation_test"
|
||||
|
||||
print("="*80)
|
||||
print("OpenCV Handwriting Separation Test")
|
||||
print("="*80)
|
||||
|
||||
# Create output directory
|
||||
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Load image
|
||||
print(f"\nLoading test image: {Path(TEST_IMAGE).name}")
|
||||
image = cv2.imread(TEST_IMAGE)
|
||||
if image is None:
|
||||
print(f"Error: Cannot load image from {TEST_IMAGE}")
|
||||
exit(1)
|
||||
|
||||
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
print(f"Image size: {image.shape[1]}x{image.shape[0]}")
|
||||
|
||||
# Convert to grayscale
|
||||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Binarize
|
||||
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
||||
|
||||
# Save binary for reference
|
||||
cv2.imwrite(str(Path(OUTPUT_DIR) / "00_binary.png"), binary)
|
||||
print("\n📁 Saved: 00_binary.png")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("METHOD 1: Stroke Width Analysis (笔画宽度分析)")
|
||||
print("="*80)
|
||||
|
||||
def method1_stroke_width(binary_img, threshold_values=[2.0, 3.0, 4.0, 5.0]):
|
||||
"""
|
||||
Method 1: Separate by stroke width using distance transform
|
||||
|
||||
Args:
|
||||
binary_img: Binary image (foreground = 255, background = 0)
|
||||
threshold_values: List of distance thresholds to test
|
||||
|
||||
Returns:
|
||||
List of (threshold, result_image) tuples
|
||||
"""
|
||||
results = []
|
||||
|
||||
# Calculate distance transform
|
||||
dist_transform = cv2.distanceTransform(binary_img, cv2.DIST_L2, 5)
|
||||
|
||||
# Normalize for visualization
|
||||
dist_normalized = cv2.normalize(dist_transform, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
|
||||
results.append(('distance_transform', dist_normalized))
|
||||
|
||||
print("\n Distance transform statistics:")
|
||||
print(f" Min: {dist_transform.min():.2f}")
|
||||
print(f" Max: {dist_transform.max():.2f}")
|
||||
print(f" Mean: {dist_transform.mean():.2f}")
|
||||
print(f" Median: {np.median(dist_transform):.2f}")
|
||||
|
||||
# Test different thresholds
|
||||
print("\n Testing different stroke width thresholds:")
|
||||
|
||||
for threshold in threshold_values:
|
||||
# Pixels with distance > threshold are considered "thick strokes" (handwriting)
|
||||
handwriting_mask = (dist_transform > threshold).astype(np.uint8) * 255
|
||||
|
||||
# Count pixels
|
||||
total_foreground = np.count_nonzero(binary_img)
|
||||
handwriting_pixels = np.count_nonzero(handwriting_mask)
|
||||
percentage = (handwriting_pixels / total_foreground * 100) if total_foreground > 0 else 0
|
||||
|
||||
print(f" Threshold {threshold:.1f}: {handwriting_pixels} pixels ({percentage:.1f}% of foreground)")
|
||||
|
||||
results.append((f'threshold_{threshold:.1f}', handwriting_mask))
|
||||
|
||||
return results
|
||||
|
||||
# Run Method 1
|
||||
method1_results = method1_stroke_width(binary, threshold_values=[2.0, 2.5, 3.0, 3.5, 4.0, 5.0])
|
||||
|
||||
# Save Method 1 results
|
||||
print("\n Saving results...")
|
||||
for name, result_img in method1_results:
|
||||
output_path = Path(OUTPUT_DIR) / f"method1_{name}.png"
|
||||
cv2.imwrite(str(output_path), result_img)
|
||||
print(f" 📁 {output_path.name}")
|
||||
|
||||
# Apply best threshold result to original image
|
||||
best_threshold = 3.0 # Will adjust based on visual inspection
|
||||
_, best_mask = [(n, r) for n, r in method1_results if f'threshold_{best_threshold}' in n][0]
|
||||
|
||||
# Dilate mask slightly to connect nearby strokes
|
||||
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
|
||||
best_mask_dilated = cv2.dilate(best_mask, kernel, iterations=1)
|
||||
|
||||
# Apply to color image
|
||||
result_method1 = cv2.bitwise_and(image, image, mask=best_mask_dilated)
|
||||
cv2.imwrite(str(Path(OUTPUT_DIR) / "method1_final_result.png"), result_method1)
|
||||
print(f"\n 📁 Final result: method1_final_result.png (threshold={best_threshold})")
|
||||
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("METHOD 2: Connected Components + Shape Features (连通组件分析)")
|
||||
print("="*80)
|
||||
|
||||
def method2_component_analysis(binary_img, original_img):
|
||||
"""
|
||||
Method 2: Analyze each connected component's shape features
|
||||
|
||||
Printed text characteristics:
|
||||
- Regular bounding box (aspect ratio ~1:1)
|
||||
- Medium size (200-2000 pixels)
|
||||
- High circularity/compactness
|
||||
|
||||
Handwriting characteristics:
|
||||
- Irregular shapes
|
||||
- May be large (connected strokes)
|
||||
- Variable aspect ratios
|
||||
"""
|
||||
# Find connected components
|
||||
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_img, connectivity=8)
|
||||
|
||||
print(f"\n Found {num_labels - 1} connected components")
|
||||
|
||||
# Create masks for different categories
|
||||
handwriting_mask = np.zeros_like(binary_img)
|
||||
printed_mask = np.zeros_like(binary_img)
|
||||
|
||||
# Analyze each component
|
||||
component_info = []
|
||||
|
||||
for i in range(1, num_labels): # Skip background (0)
|
||||
x, y, w, h, area = stats[i]
|
||||
|
||||
# Calculate features
|
||||
aspect_ratio = w / h if h > 0 else 0
|
||||
perimeter = cv2.arcLength(cv2.findContours((labels == i).astype(np.uint8),
|
||||
cv2.RETR_EXTERNAL,
|
||||
cv2.CHAIN_APPROX_SIMPLE)[0][0], True)
|
||||
compactness = (4 * np.pi * area) / (perimeter * perimeter) if perimeter > 0 else 0
|
||||
|
||||
# Classification logic
|
||||
# Printed text: medium size, regular aspect ratio, compact
|
||||
is_printed = (
|
||||
(200 < area < 3000) and # Medium size
|
||||
(0.3 < aspect_ratio < 3.0) and # Not too elongated
|
||||
(area < 1000) # Small to medium
|
||||
)
|
||||
|
||||
# Handwriting: larger, or irregular, or very wide/tall
|
||||
is_handwriting = (
|
||||
(area >= 3000) or # Large components (likely handwriting)
|
||||
(aspect_ratio > 3.0) or # Very elongated (连笔)
|
||||
(aspect_ratio < 0.3) or # Very tall
|
||||
not is_printed # Default to handwriting if not clearly printed
|
||||
)
|
||||
|
||||
component_info.append({
|
||||
'id': i,
|
||||
'area': area,
|
||||
'aspect_ratio': aspect_ratio,
|
||||
'compactness': compactness,
|
||||
'is_printed': is_printed,
|
||||
'is_handwriting': is_handwriting
|
||||
})
|
||||
|
||||
# Assign to mask
|
||||
if is_handwriting:
|
||||
handwriting_mask[labels == i] = 255
|
||||
if is_printed:
|
||||
printed_mask[labels == i] = 255
|
||||
|
||||
# Print statistics
|
||||
print("\n Component statistics:")
|
||||
handwriting_components = [c for c in component_info if c['is_handwriting']]
|
||||
printed_components = [c for c in component_info if c['is_printed']]
|
||||
|
||||
print(f" Handwriting components: {len(handwriting_components)}")
|
||||
print(f" Printed components: {len(printed_components)}")
|
||||
|
||||
# Show top 5 largest components
|
||||
print("\n Top 5 largest components:")
|
||||
sorted_components = sorted(component_info, key=lambda c: c['area'], reverse=True)
|
||||
for i, comp in enumerate(sorted_components[:5], 1):
|
||||
comp_type = "Handwriting" if comp['is_handwriting'] else "Printed"
|
||||
print(f" {i}. Area: {comp['area']:5d}, Aspect: {comp['aspect_ratio']:.2f}, "
|
||||
f"Type: {comp_type}")
|
||||
|
||||
return handwriting_mask, printed_mask, component_info
|
||||
|
||||
# Run Method 2
|
||||
handwriting_mask_m2, printed_mask_m2, components = method2_component_analysis(binary, image)
|
||||
|
||||
# Save Method 2 results
|
||||
print("\n Saving results...")
|
||||
|
||||
# Handwriting mask
|
||||
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_handwriting_mask.png"), handwriting_mask_m2)
|
||||
print(f" 📁 method2_handwriting_mask.png")
|
||||
|
||||
# Printed mask
|
||||
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_printed_mask.png"), printed_mask_m2)
|
||||
print(f" 📁 method2_printed_mask.png")
|
||||
|
||||
# Apply to original image
|
||||
result_handwriting = cv2.bitwise_and(image, image, mask=handwriting_mask_m2)
|
||||
result_printed = cv2.bitwise_and(image, image, mask=printed_mask_m2)
|
||||
|
||||
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_handwriting_result.png"), result_handwriting)
|
||||
print(f" 📁 method2_handwriting_result.png")
|
||||
|
||||
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_printed_result.png"), result_printed)
|
||||
print(f" 📁 method2_printed_result.png")
|
||||
|
||||
# Create visualization with component labels
|
||||
vis_components = cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
|
||||
vis_components = cv2.cvtColor(vis_components, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Color code: green = handwriting, red = printed
|
||||
vis_overlay = image.copy()
|
||||
vis_overlay[handwriting_mask_m2 > 0] = [0, 255, 0] # Green for handwriting
|
||||
vis_overlay[printed_mask_m2 > 0] = [0, 0, 255] # Red for printed
|
||||
|
||||
# Blend with original
|
||||
vis_final = cv2.addWeighted(image, 0.6, vis_overlay, 0.4, 0)
|
||||
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_visualization.png"), vis_final)
|
||||
print(f" 📁 method2_visualization.png (green=handwriting, red=printed)")
|
||||
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("COMPARISON")
|
||||
print("="*80)
|
||||
|
||||
# Count non-white pixels in each result
|
||||
def count_content_pixels(img):
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img
|
||||
return np.count_nonzero(gray > 10)
|
||||
|
||||
original_pixels = count_content_pixels(image)
|
||||
method1_pixels = count_content_pixels(result_method1)
|
||||
method2_pixels = count_content_pixels(result_handwriting)
|
||||
|
||||
print(f"\nContent pixels retained:")
|
||||
print(f" Original image: {original_pixels:6d} pixels")
|
||||
print(f" Method 1 (stroke): {method1_pixels:6d} pixels ({method1_pixels/original_pixels*100:.1f}%)")
|
||||
print(f" Method 2 (component): {method2_pixels:6d} pixels ({method2_pixels/original_pixels*100:.1f}%)")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("Test completed!")
|
||||
print(f"Results saved to: {OUTPUT_DIR}")
|
||||
print("="*80)
|
||||
|
||||
print("\nNext steps:")
|
||||
print(" 1. Review the output images")
|
||||
print(" 2. Check which method better preserves handwriting")
|
||||
print(" 3. Adjust thresholds if needed")
|
||||
print(" 4. Choose the best method for production pipeline")
|
||||
Reference in New Issue
Block a user