Complete OpenCV Method 3 implementation with 86.5% handwriting retention

- Implemented comprehensive feature analysis based on size, stroke length, and regularity
- Size-based scoring: height >50px indicates handwriting
- Stroke length ratio: >0.4 indicates handwriting
- Irregularity metrics: low compactness/solidity indicates handwriting
- Successfully tested on sample PDF with 2 signatures (楊智惠, 張志銘)
- Created detailed documentation: CURRENT_STATUS.md and NEW_SESSION_HANDOFF.md
- Stable PaddleOCR 2.7.3 configuration documented (numpy 1.26.4, opencv 4.6.0.66)
- Prepared research plan for PP-OCRv5 upgrade investigation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-27 10:35:46 +08:00
parent 479d4e0019
commit 8f231da3bc
6 changed files with 1718 additions and 0 deletions

272
test_opencv_separation.py Normal file
View File

@@ -0,0 +1,272 @@
#!/usr/bin/env python3
"""
Test OpenCV methods to separate handwriting from printed text
Tests two methods:
1. Stroke Width Analysis (笔画宽度分析)
2. Connected Components + Shape Features (连通组件+形状特征)
"""
import cv2
import numpy as np
from pathlib import Path
# Test image - contains both printed and handwritten
TEST_IMAGE = "/Volumes/NV2/PDF-Processing/signature-image-output/paddleocr_improved/signature_02_original.png"
OUTPUT_DIR = "/Volumes/NV2/PDF-Processing/signature-image-output/opencv_separation_test"
print("="*80)
print("OpenCV Handwriting Separation Test")
print("="*80)
# Create output directory
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
# Load image
print(f"\nLoading test image: {Path(TEST_IMAGE).name}")
image = cv2.imread(TEST_IMAGE)
if image is None:
print(f"Error: Cannot load image from {TEST_IMAGE}")
exit(1)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
print(f"Image size: {image.shape[1]}x{image.shape[0]}")
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Binarize
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Save binary for reference
cv2.imwrite(str(Path(OUTPUT_DIR) / "00_binary.png"), binary)
print("\n📁 Saved: 00_binary.png")
print("\n" + "="*80)
print("METHOD 1: Stroke Width Analysis (笔画宽度分析)")
print("="*80)
def method1_stroke_width(binary_img, threshold_values=[2.0, 3.0, 4.0, 5.0]):
"""
Method 1: Separate by stroke width using distance transform
Args:
binary_img: Binary image (foreground = 255, background = 0)
threshold_values: List of distance thresholds to test
Returns:
List of (threshold, result_image) tuples
"""
results = []
# Calculate distance transform
dist_transform = cv2.distanceTransform(binary_img, cv2.DIST_L2, 5)
# Normalize for visualization
dist_normalized = cv2.normalize(dist_transform, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
results.append(('distance_transform', dist_normalized))
print("\n Distance transform statistics:")
print(f" Min: {dist_transform.min():.2f}")
print(f" Max: {dist_transform.max():.2f}")
print(f" Mean: {dist_transform.mean():.2f}")
print(f" Median: {np.median(dist_transform):.2f}")
# Test different thresholds
print("\n Testing different stroke width thresholds:")
for threshold in threshold_values:
# Pixels with distance > threshold are considered "thick strokes" (handwriting)
handwriting_mask = (dist_transform > threshold).astype(np.uint8) * 255
# Count pixels
total_foreground = np.count_nonzero(binary_img)
handwriting_pixels = np.count_nonzero(handwriting_mask)
percentage = (handwriting_pixels / total_foreground * 100) if total_foreground > 0 else 0
print(f" Threshold {threshold:.1f}: {handwriting_pixels} pixels ({percentage:.1f}% of foreground)")
results.append((f'threshold_{threshold:.1f}', handwriting_mask))
return results
# Run Method 1
method1_results = method1_stroke_width(binary, threshold_values=[2.0, 2.5, 3.0, 3.5, 4.0, 5.0])
# Save Method 1 results
print("\n Saving results...")
for name, result_img in method1_results:
output_path = Path(OUTPUT_DIR) / f"method1_{name}.png"
cv2.imwrite(str(output_path), result_img)
print(f" 📁 {output_path.name}")
# Apply best threshold result to original image
best_threshold = 3.0 # Will adjust based on visual inspection
_, best_mask = [(n, r) for n, r in method1_results if f'threshold_{best_threshold}' in n][0]
# Dilate mask slightly to connect nearby strokes
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
best_mask_dilated = cv2.dilate(best_mask, kernel, iterations=1)
# Apply to color image
result_method1 = cv2.bitwise_and(image, image, mask=best_mask_dilated)
cv2.imwrite(str(Path(OUTPUT_DIR) / "method1_final_result.png"), result_method1)
print(f"\n 📁 Final result: method1_final_result.png (threshold={best_threshold})")
print("\n" + "="*80)
print("METHOD 2: Connected Components + Shape Features (连通组件分析)")
print("="*80)
def method2_component_analysis(binary_img, original_img):
"""
Method 2: Analyze each connected component's shape features
Printed text characteristics:
- Regular bounding box (aspect ratio ~1:1)
- Medium size (200-2000 pixels)
- High circularity/compactness
Handwriting characteristics:
- Irregular shapes
- May be large (connected strokes)
- Variable aspect ratios
"""
# Find connected components
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_img, connectivity=8)
print(f"\n Found {num_labels - 1} connected components")
# Create masks for different categories
handwriting_mask = np.zeros_like(binary_img)
printed_mask = np.zeros_like(binary_img)
# Analyze each component
component_info = []
for i in range(1, num_labels): # Skip background (0)
x, y, w, h, area = stats[i]
# Calculate features
aspect_ratio = w / h if h > 0 else 0
perimeter = cv2.arcLength(cv2.findContours((labels == i).astype(np.uint8),
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[0][0], True)
compactness = (4 * np.pi * area) / (perimeter * perimeter) if perimeter > 0 else 0
# Classification logic
# Printed text: medium size, regular aspect ratio, compact
is_printed = (
(200 < area < 3000) and # Medium size
(0.3 < aspect_ratio < 3.0) and # Not too elongated
(area < 1000) # Small to medium
)
# Handwriting: larger, or irregular, or very wide/tall
is_handwriting = (
(area >= 3000) or # Large components (likely handwriting)
(aspect_ratio > 3.0) or # Very elongated (连笔)
(aspect_ratio < 0.3) or # Very tall
not is_printed # Default to handwriting if not clearly printed
)
component_info.append({
'id': i,
'area': area,
'aspect_ratio': aspect_ratio,
'compactness': compactness,
'is_printed': is_printed,
'is_handwriting': is_handwriting
})
# Assign to mask
if is_handwriting:
handwriting_mask[labels == i] = 255
if is_printed:
printed_mask[labels == i] = 255
# Print statistics
print("\n Component statistics:")
handwriting_components = [c for c in component_info if c['is_handwriting']]
printed_components = [c for c in component_info if c['is_printed']]
print(f" Handwriting components: {len(handwriting_components)}")
print(f" Printed components: {len(printed_components)}")
# Show top 5 largest components
print("\n Top 5 largest components:")
sorted_components = sorted(component_info, key=lambda c: c['area'], reverse=True)
for i, comp in enumerate(sorted_components[:5], 1):
comp_type = "Handwriting" if comp['is_handwriting'] else "Printed"
print(f" {i}. Area: {comp['area']:5d}, Aspect: {comp['aspect_ratio']:.2f}, "
f"Type: {comp_type}")
return handwriting_mask, printed_mask, component_info
# Run Method 2
handwriting_mask_m2, printed_mask_m2, components = method2_component_analysis(binary, image)
# Save Method 2 results
print("\n Saving results...")
# Handwriting mask
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_handwriting_mask.png"), handwriting_mask_m2)
print(f" 📁 method2_handwriting_mask.png")
# Printed mask
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_printed_mask.png"), printed_mask_m2)
print(f" 📁 method2_printed_mask.png")
# Apply to original image
result_handwriting = cv2.bitwise_and(image, image, mask=handwriting_mask_m2)
result_printed = cv2.bitwise_and(image, image, mask=printed_mask_m2)
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_handwriting_result.png"), result_handwriting)
print(f" 📁 method2_handwriting_result.png")
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_printed_result.png"), result_printed)
print(f" 📁 method2_printed_result.png")
# Create visualization with component labels
vis_components = cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
vis_components = cv2.cvtColor(vis_components, cv2.COLOR_BGR2RGB)
# Color code: green = handwriting, red = printed
vis_overlay = image.copy()
vis_overlay[handwriting_mask_m2 > 0] = [0, 255, 0] # Green for handwriting
vis_overlay[printed_mask_m2 > 0] = [0, 0, 255] # Red for printed
# Blend with original
vis_final = cv2.addWeighted(image, 0.6, vis_overlay, 0.4, 0)
cv2.imwrite(str(Path(OUTPUT_DIR) / "method2_visualization.png"), vis_final)
print(f" 📁 method2_visualization.png (green=handwriting, red=printed)")
print("\n" + "="*80)
print("COMPARISON")
print("="*80)
# Count non-white pixels in each result
def count_content_pixels(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img
return np.count_nonzero(gray > 10)
original_pixels = count_content_pixels(image)
method1_pixels = count_content_pixels(result_method1)
method2_pixels = count_content_pixels(result_handwriting)
print(f"\nContent pixels retained:")
print(f" Original image: {original_pixels:6d} pixels")
print(f" Method 1 (stroke): {method1_pixels:6d} pixels ({method1_pixels/original_pixels*100:.1f}%)")
print(f" Method 2 (component): {method2_pixels:6d} pixels ({method2_pixels/original_pixels*100:.1f}%)")
print("\n" + "="*80)
print("Test completed!")
print(f"Results saved to: {OUTPUT_DIR}")
print("="*80)
print("\nNext steps:")
print(" 1. Review the output images")
print(" 2. Check which method better preserves handwriting")
print(" 3. Adjust thresholds if needed")
print(" 4. Choose the best method for production pipeline")