pdf_signature_extraction/visualize_v5_results.py

#!/usr/bin/env python3
"""
可視化 PP-OCRv5 的檢測結果
"""

import json
import cv2
import numpy as np
from pathlib import Path

def load_results():
    """加載 v5 檢測結果"""
    result_file = "/Volumes/NV2/pdf_recognize/test_results/v5_result.json"
    with open(result_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data['res']

def draw_detections(image_path, results, output_path):
    """在圖片上繪製檢測框和文字"""
    # 讀取圖片
    img = cv2.imread(image_path)
    if img is None:
        print(f"❌ 無法讀取圖片: {image_path}")
        return None

    # 創建副本用於繪製
    vis_img = img.copy()

    # 獲取檢測結果
    rec_texts = results.get('rec_texts', [])
    rec_boxes = results.get('rec_boxes', [])
    rec_scores = results.get('rec_scores', [])

    print(f"\n檢測到 {len(rec_texts)} 個文字區域")

    # 繪製每個檢測框
    for i, (text, box, score) in enumerate(zip(rec_texts, rec_boxes, rec_scores)):
        x_min, y_min, x_max, y_max = box

        # 繪製矩形框（綠色）
        cv2.rectangle(vis_img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

        # 繪製索引號（小字）
        cv2.putText(vis_img, f"{i}", (x_min, y_min - 5),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    # 保存結果
    cv2.imwrite(output_path, vis_img)
    print(f"✅ 可視化結果已保存: {output_path}")

    return vis_img

def generate_text_report(results):
    """生成文字報告"""
    rec_texts = results.get('rec_texts', [])
    rec_scores = results.get('rec_scores', [])
    rec_boxes = results.get('rec_boxes', [])

    print("\n" + "=" * 80)
    print("PP-OCRv5 檢測結果報告")
    print("=" * 80)

    print(f"\n總共檢測到: {len(rec_texts)} 個文字區域")
    print(f"平均置信度: {np.mean(rec_scores):.4f}")
    print(f"最高置信度: {np.max(rec_scores):.4f}")
    print(f"最低置信度: {np.min(rec_scores):.4f}")

    # 分類統計
    high_conf = sum(1 for s in rec_scores if s >= 0.95)
    medium_conf = sum(1 for s in rec_scores if 0.8 <= s < 0.95)
    low_conf = sum(1 for s in rec_scores if s < 0.8)

    print(f"\n置信度分布:")
    print(f"  高 (≥0.95): {high_conf} 個 ({high_conf/len(rec_scores)*100:.1f}%)")
    print(f"  中 (0.8-0.95): {medium_conf} 個 ({medium_conf/len(rec_scores)*100:.1f}%)")
    print(f"  低 (<0.8): {low_conf} 個 ({low_conf/len(rec_scores)*100:.1f}%)")

    # 顯示前 20 個檢測結果
    print("\n前 20 個檢測結果:")
    print("-" * 80)
    for i in range(min(20, len(rec_texts))):
        text = rec_texts[i]
        score = rec_scores[i]
        box = rec_boxes[i]

        # 計算框的大小
        width = box[2] - box[0]
        height = box[3] - box[1]

        print(f"[{i:2d}] 置信度: {score:.4f}  大小: {width:4d}x{height:3d}  文字: {text}")

    if len(rec_texts) > 20:
        print(f"\n... 還有 {len(rec_texts) - 20} 個結果（省略）")

    # 尋找可能的手寫區域（低置信度 或 大字）
    print("\n" + "=" * 80)
    print("可能的手寫區域分析")
    print("=" * 80)

    potential_handwriting = []
    for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)):
        width = box[2] - box[0]
        height = box[3] - box[1]

        # 判斷條件：
        # 1. 高度較大 (>50px)
        # 2. 或置信度較低 (<0.9)
        # 3. 或文字較短但字體大
        is_large = height > 50
        is_low_conf = score < 0.9
        is_short_text = len(text) <= 3 and height > 40

        if is_large or is_low_conf or is_short_text:
            potential_handwriting.append({
                'index': i,
                'text': text,
                'score': score,
                'height': height,
                'width': width,
                'reason': []
            })

            if is_large:
                potential_handwriting[-1]['reason'].append('大字')
            if is_low_conf:
                potential_handwriting[-1]['reason'].append('低置信度')
            if is_short_text:
                potential_handwriting[-1]['reason'].append('短文大字')

    if potential_handwriting:
        print(f"\n找到 {len(potential_handwriting)} 個可能的手寫區域:")
        print("-" * 80)
        for item in potential_handwriting[:15]:  # 只顯示前 15 個
            reasons = ', '.join(item['reason'])
            print(f"[{item['index']:2d}] {item['height']:3d}px  {item['score']:.4f}  ({reasons})  {item['text']}")
    else:
        print("未找到明顯的手寫特徵區域")

    # 保存詳細報告到文件
    report_path = "/Volumes/NV2/pdf_recognize/test_results/v5_analysis_report.txt"
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write(f"PP-OCRv5 檢測結果詳細報告\n")
        f.write("=" * 80 + "\n\n")
        f.write(f"總數: {len(rec_texts)}\n")
        f.write(f"平均置信度: {np.mean(rec_scores):.4f}\n\n")
        f.write("完整檢測列表:\n")
        f.write("-" * 80 + "\n")
        for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)):
            width = box[2] - box[0]
            height = box[3] - box[1]
            f.write(f"[{i:2d}] {score:.4f}  {width:4d}x{height:3d}  {text}\n")

    print(f"\n詳細報告已保存: {report_path}")

def main():
    # 加載結果
    print("加載 PP-OCRv5 檢測結果...")
    results = load_results()

    # 生成文字報告
    generate_text_report(results)

    # 可視化
    print("\n" + "=" * 80)
    print("生成可視化圖片")
    print("=" * 80)

    image_path = "/Volumes/NV2/pdf_recognize/full_page_original.png"
    output_path = "/Volumes/NV2/pdf_recognize/test_results/v5_visualization.png"

    if Path(image_path).exists():
        draw_detections(image_path, results, output_path)
    else:
        print(f"⚠️  原始圖片不存在: {image_path}")

    print("\n" + "=" * 80)
    print("分析完成")
    print("=" * 80)

if __name__ == "__main__":
    main()