Paper A v4.1: BCD-baseline reframe + screening positioning + trim
- Re-anchor inter-CPA coincidence-rate (ICCR) calibration on a normative non-Firm-A baseline (Firms B/C/D); Firm A held out as an out-of-sample target. Locked canonical numbers (codex-audited; Scripts 46/52/53): per-comparison HC 0.00014->0.000018, per-signature HC 0.0116, per-document HC+MC 0.34->0.1905; KDE crossover 0.837 retained corpus-wide. - Reposition as an operator-tunable, semi-automated screening/triage framework (title -> "Automated Screening..."): HC = high-specificity operating point; MC band demoted to low-specificity advisory; Firm A = demonstration that the screening surfaces a templated end, audit-quality implications deferred. - Apply codex prose-review fixes: triage-neutral five-way labels, soften mechanism/specificity wording, supersede MC claim-strength, update stale Appendix script references (40b/43/45 -> 46/52/53). - Trim pass: compress Sec. V discussion + Sec. III echoes (27.7k -> 26.8k words); no substantive content removed. - Add analysis scripts 45-53 (firm-year trends; BCD-only ICCR recompute; canonical-sampler locked numbers; Firm-A out-of-sample; BCD regression + cross-firm hit matrix). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Firm x year descriptor trends (B-gate diagnostic).
|
||||
|
||||
Plots per-firm yearly mean cosine, mean dHash, and HC-box hit share to test
|
||||
whether Firms B/C/D show a 2020 structural break converging toward Firm A.
|
||||
Read-only against the production DB.
|
||||
"""
|
||||
import sqlite3
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
DB = '/Volumes/NV2/PDF-Processing/signature-analysis/signature_analysis.db'
|
||||
FIRMS = [('勤業眾信聯合', 'Firm A (Deloitte)', '#d62728'),
|
||||
('安侯建業聯合', 'Firm B (KPMG)', '#1f77b4'),
|
||||
('資誠聯合', 'Firm C (PwC)', '#2ca02c'),
|
||||
('安永聯合', 'Firm D (EY)', '#ff7f0e')]
|
||||
|
||||
conn = sqlite3.connect(f'file:{DB}?mode=ro', uri=True)
|
||||
cur = conn.cursor()
|
||||
|
||||
|
||||
def series(firm_zh):
|
||||
cur.execute("""
|
||||
SELECT CAST(substr(s.year_month,1,4) AS INT) AS yr,
|
||||
AVG(s.max_similarity_to_same_accountant),
|
||||
AVG(s.min_dhash_independent),
|
||||
AVG(CASE WHEN s.max_similarity_to_same_accountant>0.95
|
||||
AND s.min_dhash_independent<=5 THEN 1.0 ELSE 0.0 END),
|
||||
COUNT(*)
|
||||
FROM signatures s JOIN accountants a ON s.assigned_accountant=a.name
|
||||
WHERE a.firm=? AND s.year_month IS NOT NULL
|
||||
AND s.max_similarity_to_same_accountant IS NOT NULL
|
||||
AND s.min_dhash_independent IS NOT NULL
|
||||
GROUP BY yr ORDER BY yr""", (firm_zh,))
|
||||
return cur.fetchall()
|
||||
|
||||
|
||||
fig, axes = plt.subplots(1, 3, figsize=(16, 4.8))
|
||||
for firm_zh, label, color in FIRMS:
|
||||
rows = series(firm_zh)
|
||||
yrs = [r[0] for r in rows]
|
||||
axes[0].plot(yrs, [r[1] for r in rows], 'o-', color=color, label=label)
|
||||
axes[1].plot(yrs, [r[2] for r in rows], 'o-', color=color, label=label)
|
||||
axes[2].plot(yrs, [r[3] for r in rows], 'o-', color=color, label=label)
|
||||
|
||||
for ax in axes:
|
||||
ax.axvline(2020, ls='--', color='grey', alpha=0.6)
|
||||
ax.text(2020.05, ax.get_ylim()[0], ' 2020', color='grey', fontsize=8, va='bottom')
|
||||
ax.set_xlabel('Fiscal year')
|
||||
ax.grid(alpha=0.3)
|
||||
axes[0].set_title('Mean best-match cosine'); axes[0].axhline(0.95, ls=':', color='k', alpha=0.4)
|
||||
axes[1].set_title('Mean independent-min dHash'); axes[1].axhline(5, ls=':', color='k', alpha=0.4)
|
||||
axes[2].set_title('HC-box share (cos>0.95 & dHash$\\leq$5)')
|
||||
axes[0].legend(fontsize=8, loc='lower right')
|
||||
fig.suptitle('Big-4 descriptor trends 2013–2023 (2023 = partial, to Apr) — no 2020 break, no convergence to A',
|
||||
fontsize=11)
|
||||
fig.tight_layout()
|
||||
out = '/Volumes/NV2/pdf_recognize/signature_analysis/firm_year_trends.png'
|
||||
fig.savefig(out, dpi=130, bbox_inches='tight')
|
||||
print('saved', out)
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user