- Created PaddleOCR client for remote server communication - Implemented text masking + region detection pipeline - Test results: 100% recall on sample PDF (found both signatures) - Identified issues: split regions, printed text not fully masked - Documented 5 solution options in PADDLEOCR_STATUS.md - Next: Implement region merging and two-stage cleaning
76 lines
2.4 KiB
Python
76 lines
2.4 KiB
Python
#!/usr/bin/env python3
|
|
"""Check if rejected regions contain the missing signatures."""
|
|
|
|
import base64
|
|
import requests
|
|
from pathlib import Path
|
|
|
|
OLLAMA_URL = "http://192.168.30.36:11434"
|
|
OLLAMA_MODEL = "qwen2.5vl:32b"
|
|
REJECTED_PATH = "/Volumes/NV2/PDF-Processing/signature-image-output/signatures/rejected"
|
|
|
|
# Missing signatures based on test results
|
|
MISSING = {
|
|
"201301_2061_AI1_page5": "林姿妤",
|
|
"201301_2458_AI1_page4": "魏興海",
|
|
"201301_2923_AI1_page3": "陈丽琦"
|
|
}
|
|
|
|
def encode_image_to_base64(image_path):
|
|
"""Encode image file to base64."""
|
|
with open(image_path, 'rb') as f:
|
|
return base64.b64encode(f.read()).decode('utf-8')
|
|
|
|
def ask_vlm_about_signature(image_base64, expected_name):
|
|
"""Ask VLM if the image contains the expected signature."""
|
|
prompt = f"""Does this image contain a handwritten signature with the Chinese name: "{expected_name}"?
|
|
|
|
Look carefully for handwritten Chinese characters matching this name.
|
|
|
|
Answer only 'yes' or 'no'."""
|
|
|
|
payload = {
|
|
"model": OLLAMA_MODEL,
|
|
"prompt": prompt,
|
|
"images": [image_base64],
|
|
"stream": False
|
|
}
|
|
|
|
try:
|
|
response = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=60)
|
|
response.raise_for_status()
|
|
answer = response.json()['response'].strip().lower()
|
|
return answer
|
|
except Exception as e:
|
|
return f"error: {str(e)}"
|
|
|
|
# Check each missing signature
|
|
for pdf_stem, missing_name in MISSING.items():
|
|
print(f"\n{'='*80}")
|
|
print(f"Checking rejected regions from: {pdf_stem}")
|
|
print(f"Looking for missing signature: {missing_name}")
|
|
print('='*80)
|
|
|
|
# Find all rejected regions from this PDF
|
|
rejected_regions = sorted(Path(REJECTED_PATH).glob(f"{pdf_stem}_region_*.png"))
|
|
|
|
print(f"Found {len(rejected_regions)} rejected regions to check")
|
|
|
|
for region_path in rejected_regions:
|
|
region_name = region_path.name
|
|
print(f"\nChecking: {region_name}...", end='', flush=True)
|
|
|
|
# Encode and ask VLM
|
|
image_base64 = encode_image_to_base64(region_path)
|
|
answer = ask_vlm_about_signature(image_base64, missing_name)
|
|
|
|
if 'yes' in answer:
|
|
print(f" ✅ FOUND! This region contains {missing_name}")
|
|
print(f" → The signature was detected by CV but rejected by verification!")
|
|
else:
|
|
print(f" ❌ No (VLM says: {answer})")
|
|
|
|
print(f"\n{'='*80}")
|
|
print("Analysis complete!")
|
|
print('='*80)
|