"""Figure 6: two-measure sensitivity surface over the (cosine cut x dHash cut) plane. Panel A: clean-group (B/C/D) flag rate -- how permissive the operating point is. Panel B: Firm A minus B/C/D flag-rate contrast (pp) -- discrimination across the plane. Shows the chosen HC point (0.95, dHash<=5) is not a cherry-picked threshold and exposes the weaker MC band (dHash<=15). Reproduces from signature_analysis.db (DB columns only). """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np import sqlite3 DB = "/Volumes/NV2/PDF-Processing/signature-analysis/signature_analysis.db" BIG4 = ('勤業眾信聯合', '資誠聯合', '安侯建業聯合', '安永聯合') con = sqlite3.connect(DB); cur = con.cursor() cur.execute(f"""SELECT CASE WHEN excel_firm='勤業眾信聯合' THEN 1 ELSE 0 END isA, max_similarity_to_same_accountant c, min_dhash_independent d FROM signatures WHERE is_valid=1 AND excel_firm IN ({','.join('?'*4)}) AND max_similarity_to_same_accountant IS NOT NULL AND min_dhash_independent IS NOT NULL""", BIG4) rows = cur.fetchall(); con.close() isA = np.array([r[0] for r in rows], bool) c = np.array([r[1] for r in rows]); d = np.array([r[2] for r in rows]) cA, dA = c[isA], d[isA]; cB, dB = c[~isA], d[~isA] cos_cuts = np.arange(0.85, 0.9901, 0.0025) dh_cuts = np.arange(0, 21, 1) A = np.zeros((len(dh_cuts), len(cos_cuts))) B = np.zeros_like(A) for j, cc in enumerate(cos_cuts): for i, dd in enumerate(dh_cuts): A[i, j] = 100 * np.mean((cA > cc) & (dA <= dd)) B[i, j] = 100 * np.mean((cB > cc) & (dB <= dd)) contrast = A - B extent = [cos_cuts[0], cos_cuts[-1], dh_cuts[0], dh_cuts[-1]] fig, axes = plt.subplots(1, 2, figsize=(10.5, 4.3)) for ax, Z, title, cmap, lab in [ (axes[0], B, '(a) Clean group (B/C/D) flag rate', 'viridis', 'flag rate (%)'), (axes[1], contrast, '(b) Firm A − B/C/D contrast', 'magma', 'contrast (pp)')]: im = ax.imshow(Z, origin='lower', aspect='auto', extent=extent, cmap=cmap) cb = fig.colorbar(im, ax=ax, pad=0.02); cb.set_label(lab, fontsize=8); cb.ax.tick_params(labelsize=7) # operating points ax.scatter([0.95], [5], marker='*', s=180, color='white', edgecolor='black', zorder=5, label='HC operating point (0.95, dHash≤5)') ax.axhline(15, color='white', ls=':', lw=1.0) ax.text(0.853, 15.4, 'MC upper bound (dHash≤15)', color='white', fontsize=6.5, va='bottom') ax.set_xlabel('cosine cut', fontsize=9) ax.set_ylabel('dHash cut (≤)', fontsize=9) ax.set_title(title, fontsize=9) ax.tick_params(labelsize=7.5) ax.legend(loc='lower left', fontsize=6.5, framealpha=0.85) fig.suptitle('Figure 6. Sensitivity surface of the deployed rule over the two-measure threshold plane (Big-4, n=%d).' % len(c), fontsize=9, y=1.02) fig.tight_layout() out = '/Volumes/NV2/pdf_recognize/paper/v13_build/figures/fig6.png' fig.savefig(out, dpi=200, bbox_inches='tight') plt.close(fig) print(f"fig6 OK n={len(c)}; HC(0.95,5) contrast={contrast[5, np.argmin(abs(cos_cuts-0.95))]:.1f}pp; written {out}")