pdf_signature_extraction/check_rejected_for_missing.py

#!/usr/bin/env python3
"""Check if rejected regions contain the missing signatures."""

import base64
import requests
from pathlib import Path

OLLAMA_URL = "http://192.168.30.36:11434"
OLLAMA_MODEL = "qwen2.5vl:32b"
REJECTED_PATH = "/Volumes/NV2/PDF-Processing/signature-image-output/signatures/rejected"

# Missing signatures based on test results
MISSING = {
    "201301_2061_AI1_page5": "林姿妤",
    "201301_2458_AI1_page4": "魏興海",
    "201301_2923_AI1_page3": "陈丽琦"
}

def encode_image_to_base64(image_path):
    """Encode image file to base64."""
    with open(image_path, 'rb') as f:
        return base64.b64encode(f.read()).decode('utf-8')

def ask_vlm_about_signature(image_base64, expected_name):
    """Ask VLM if the image contains the expected signature."""
    prompt = f"""Does this image contain a handwritten signature with the Chinese name: "{expected_name}"?

Look carefully for handwritten Chinese characters matching this name.

Answer only 'yes' or 'no'."""

    payload = {
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "images": [image_base64],
        "stream": False
    }

    try:
        response = requests.post(f"{OLLAMA_URL}/api/generate", json=payload, timeout=60)
        response.raise_for_status()
        answer = response.json()['response'].strip().lower()
        return answer
    except Exception as e:
        return f"error: {str(e)}"

# Check each missing signature
for pdf_stem, missing_name in MISSING.items():
    print(f"\n{'='*80}")
    print(f"Checking rejected regions from: {pdf_stem}")
    print(f"Looking for missing signature: {missing_name}")
    print('='*80)

    # Find all rejected regions from this PDF
    rejected_regions = sorted(Path(REJECTED_PATH).glob(f"{pdf_stem}_region_*.png"))

    print(f"Found {len(rejected_regions)} rejected regions to check")

    for region_path in rejected_regions:
        region_name = region_path.name
        print(f"\nChecking: {region_name}...", end='', flush=True)

        # Encode and ask VLM
        image_base64 = encode_image_to_base64(region_path)
        answer = ask_vlm_about_signature(image_base64, missing_name)

        if 'yes' in answer:
            print(f" ✅ FOUND! This region contains {missing_name}")
            print(f"   → The signature was detected by CV but rejected by verification!")
        else:
            print(f" ❌ No (VLM says: {answer})")

print(f"\n{'='*80}")
print("Analysis complete!")
print('='*80)