#!/usr/bin/env python3 """ 可視化 PP-OCRv5 的檢測結果 """ import json import cv2 import numpy as np from pathlib import Path def load_results(): """加載 v5 檢測結果""" result_file = "/Volumes/NV2/pdf_recognize/test_results/v5_result.json" with open(result_file, 'r', encoding='utf-8') as f: data = json.load(f) return data['res'] def draw_detections(image_path, results, output_path): """在圖片上繪製檢測框和文字""" # 讀取圖片 img = cv2.imread(image_path) if img is None: print(f"❌ 無法讀取圖片: {image_path}") return None # 創建副本用於繪製 vis_img = img.copy() # 獲取檢測結果 rec_texts = results.get('rec_texts', []) rec_boxes = results.get('rec_boxes', []) rec_scores = results.get('rec_scores', []) print(f"\n檢測到 {len(rec_texts)} 個文字區域") # 繪製每個檢測框 for i, (text, box, score) in enumerate(zip(rec_texts, rec_boxes, rec_scores)): x_min, y_min, x_max, y_max = box # 繪製矩形框(綠色) cv2.rectangle(vis_img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) # 繪製索引號(小字) cv2.putText(vis_img, f"{i}", (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) # 保存結果 cv2.imwrite(output_path, vis_img) print(f"✅ 可視化結果已保存: {output_path}") return vis_img def generate_text_report(results): """生成文字報告""" rec_texts = results.get('rec_texts', []) rec_scores = results.get('rec_scores', []) rec_boxes = results.get('rec_boxes', []) print("\n" + "=" * 80) print("PP-OCRv5 檢測結果報告") print("=" * 80) print(f"\n總共檢測到: {len(rec_texts)} 個文字區域") print(f"平均置信度: {np.mean(rec_scores):.4f}") print(f"最高置信度: {np.max(rec_scores):.4f}") print(f"最低置信度: {np.min(rec_scores):.4f}") # 分類統計 high_conf = sum(1 for s in rec_scores if s >= 0.95) medium_conf = sum(1 for s in rec_scores if 0.8 <= s < 0.95) low_conf = sum(1 for s in rec_scores if s < 0.8) print(f"\n置信度分布:") print(f" 高 (≥0.95): {high_conf} 個 ({high_conf/len(rec_scores)*100:.1f}%)") print(f" 中 (0.8-0.95): {medium_conf} 個 ({medium_conf/len(rec_scores)*100:.1f}%)") print(f" 低 (<0.8): {low_conf} 個 ({low_conf/len(rec_scores)*100:.1f}%)") # 顯示前 20 個檢測結果 print("\n前 20 個檢測結果:") print("-" * 80) for i in range(min(20, len(rec_texts))): text = rec_texts[i] score = rec_scores[i] box = rec_boxes[i] # 計算框的大小 width = box[2] - box[0] height = box[3] - box[1] print(f"[{i:2d}] 置信度: {score:.4f} 大小: {width:4d}x{height:3d} 文字: {text}") if len(rec_texts) > 20: print(f"\n... 還有 {len(rec_texts) - 20} 個結果(省略)") # 尋找可能的手寫區域(低置信度 或 大字) print("\n" + "=" * 80) print("可能的手寫區域分析") print("=" * 80) potential_handwriting = [] for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)): width = box[2] - box[0] height = box[3] - box[1] # 判斷條件: # 1. 高度較大 (>50px) # 2. 或置信度較低 (<0.9) # 3. 或文字較短但字體大 is_large = height > 50 is_low_conf = score < 0.9 is_short_text = len(text) <= 3 and height > 40 if is_large or is_low_conf or is_short_text: potential_handwriting.append({ 'index': i, 'text': text, 'score': score, 'height': height, 'width': width, 'reason': [] }) if is_large: potential_handwriting[-1]['reason'].append('大字') if is_low_conf: potential_handwriting[-1]['reason'].append('低置信度') if is_short_text: potential_handwriting[-1]['reason'].append('短文大字') if potential_handwriting: print(f"\n找到 {len(potential_handwriting)} 個可能的手寫區域:") print("-" * 80) for item in potential_handwriting[:15]: # 只顯示前 15 個 reasons = ', '.join(item['reason']) print(f"[{item['index']:2d}] {item['height']:3d}px {item['score']:.4f} ({reasons}) {item['text']}") else: print("未找到明顯的手寫特徵區域") # 保存詳細報告到文件 report_path = "/Volumes/NV2/pdf_recognize/test_results/v5_analysis_report.txt" with open(report_path, 'w', encoding='utf-8') as f: f.write(f"PP-OCRv5 檢測結果詳細報告\n") f.write("=" * 80 + "\n\n") f.write(f"總數: {len(rec_texts)}\n") f.write(f"平均置信度: {np.mean(rec_scores):.4f}\n\n") f.write("完整檢測列表:\n") f.write("-" * 80 + "\n") for i, (text, score, box) in enumerate(zip(rec_texts, rec_scores, rec_boxes)): width = box[2] - box[0] height = box[3] - box[1] f.write(f"[{i:2d}] {score:.4f} {width:4d}x{height:3d} {text}\n") print(f"\n詳細報告已保存: {report_path}") def main(): # 加載結果 print("加載 PP-OCRv5 檢測結果...") results = load_results() # 生成文字報告 generate_text_report(results) # 可視化 print("\n" + "=" * 80) print("生成可視化圖片") print("=" * 80) image_path = "/Volumes/NV2/pdf_recognize/full_page_original.png" output_path = "/Volumes/NV2/pdf_recognize/test_results/v5_visualization.png" if Path(image_path).exists(): draw_detections(image_path, results, output_path) else: print(f"⚠️ 原始圖片不存在: {image_path}") print("\n" + "=" * 80) print("分析完成") print("=" * 80) if __name__ == "__main__": main()