Add PaddleOCR masking and region detection pipeline
- Created PaddleOCR client for remote server communication - Implemented text masking + region detection pipeline - Test results: 100% recall on sample PDF (found both signatures) - Identified issues: split regions, printed text not fully masked - Documented 5 solution options in PADDLEOCR_STATUS.md - Next: Implement region merging and two-stage cleaning
This commit is contained in:
75
check_rejected_for_missing.py
Normal file
75
check_rejected_for_missing.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Check if rejected regions contain the missing signatures."""
|
||||
|
||||
import base64
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
OLLAMA_URL = "http://192.168.30.36:11434"
|
||||
OLLAMA_MODEL = "qwen2.5vl:32b"
|
||||
REJECTED_PATH = "/Volumes/NV2/PDF-Processing/signature-image-output/signatures/rejected"
|
||||
|
||||
# Missing signatures based on test results
|
||||
MISSING = {
|
||||
"201301_2061_AI1_page5": "林姿妤",
|
||||
"201301_2458_AI1_page4": "魏興海",
|
||||
"201301_2923_AI1_page3": "陈丽琦"
|
||||
}
|
||||
|
||||
def encode_image_to_base64(image_path):
|
||||
"""Encode image file to base64."""
|
||||
with open(image_path, 'rb') as f:
|
||||
return base64.b64encode(f.read()).decode('utf-8')
|
||||
|
||||
def ask_vlm_about_signature(image_base64, expected_name):
|
||||
"""Ask VLM if the image contains the expected signature."""
|
||||
prompt = f"""Does this image contain a handwritten signature with the Chinese name: "{expected_name}"?
|
||||
|
||||
Look carefully for handwritten Chinese characters matching this name.
|
||||
|
||||
Answer only 'yes' or 'no'."""
|
||||
|
||||
payload = {
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": prompt,
|
||||
"images": [image_base64],
|
||||
"stream": False
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=60)
|
||||
response.raise_for_status()
|
||||
answer = response.json()['response'].strip().lower()
|
||||
return answer
|
||||
except Exception as e:
|
||||
return f"error: {str(e)}"
|
||||
|
||||
# Check each missing signature
|
||||
for pdf_stem, missing_name in MISSING.items():
|
||||
print(f"\n{'='*80}")
|
||||
print(f"Checking rejected regions from: {pdf_stem}")
|
||||
print(f"Looking for missing signature: {missing_name}")
|
||||
print('='*80)
|
||||
|
||||
# Find all rejected regions from this PDF
|
||||
rejected_regions = sorted(Path(REJECTED_PATH).glob(f"{pdf_stem}_region_*.png"))
|
||||
|
||||
print(f"Found {len(rejected_regions)} rejected regions to check")
|
||||
|
||||
for region_path in rejected_regions:
|
||||
region_name = region_path.name
|
||||
print(f"\nChecking: {region_name}...", end='', flush=True)
|
||||
|
||||
# Encode and ask VLM
|
||||
image_base64 = encode_image_to_base64(region_path)
|
||||
answer = ask_vlm_about_signature(image_base64, missing_name)
|
||||
|
||||
if 'yes' in answer:
|
||||
print(f" ✅ FOUND! This region contains {missing_name}")
|
||||
print(f" → The signature was detected by CV but rejected by verification!")
|
||||
else:
|
||||
print(f" ❌ No (VLM says: {answer})")
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("Analysis complete!")
|
||||
print('='*80)
|
||||
Reference in New Issue
Block a user