#!/usr/bin/env python3 """Firm x year descriptor trends (B-gate diagnostic). Plots per-firm yearly mean cosine, mean dHash, and HC-box hit share to test whether Firms B/C/D show a 2020 structural break converging toward Firm A. Read-only against the production DB. """ import sqlite3 import matplotlib.pyplot as plt DB = '/Volumes/NV2/PDF-Processing/signature-analysis/signature_analysis.db' FIRMS = [('勤業眾信聯合', 'Firm A (Deloitte)', '#d62728'), ('安侯建業聯合', 'Firm B (KPMG)', '#1f77b4'), ('資誠聯合', 'Firm C (PwC)', '#2ca02c'), ('安永聯合', 'Firm D (EY)', '#ff7f0e')] conn = sqlite3.connect(f'file:{DB}?mode=ro', uri=True) cur = conn.cursor() def series(firm_zh): cur.execute(""" SELECT CAST(substr(s.year_month,1,4) AS INT) AS yr, AVG(s.max_similarity_to_same_accountant), AVG(s.min_dhash_independent), AVG(CASE WHEN s.max_similarity_to_same_accountant>0.95 AND s.min_dhash_independent<=5 THEN 1.0 ELSE 0.0 END), COUNT(*) FROM signatures s JOIN accountants a ON s.assigned_accountant=a.name WHERE a.firm=? AND s.year_month IS NOT NULL AND s.max_similarity_to_same_accountant IS NOT NULL AND s.min_dhash_independent IS NOT NULL GROUP BY yr ORDER BY yr""", (firm_zh,)) return cur.fetchall() fig, axes = plt.subplots(1, 3, figsize=(16, 4.8)) for firm_zh, label, color in FIRMS: rows = series(firm_zh) yrs = [r[0] for r in rows] axes[0].plot(yrs, [r[1] for r in rows], 'o-', color=color, label=label) axes[1].plot(yrs, [r[2] for r in rows], 'o-', color=color, label=label) axes[2].plot(yrs, [r[3] for r in rows], 'o-', color=color, label=label) for ax in axes: ax.axvline(2020, ls='--', color='grey', alpha=0.6) ax.text(2020.05, ax.get_ylim()[0], ' 2020', color='grey', fontsize=8, va='bottom') ax.set_xlabel('Fiscal year') ax.grid(alpha=0.3) axes[0].set_title('Mean best-match cosine'); axes[0].axhline(0.95, ls=':', color='k', alpha=0.4) axes[1].set_title('Mean independent-min dHash'); axes[1].axhline(5, ls=':', color='k', alpha=0.4) axes[2].set_title('HC-box share (cos>0.95 & dHash$\\leq$5)') axes[0].legend(fontsize=8, loc='lower right') fig.suptitle('Big-4 descriptor trends 2013–2023 (2023 = partial, to Apr) — no 2020 break, no convergence to A', fontsize=11) fig.tight_layout() out = '/Volumes/NV2/pdf_recognize/signature_analysis/firm_year_trends.png' fig.savefig(out, dpi=130, bbox_inches='tight') print('saved', out) conn.close()