Paper A v13: filled submission draft (rev7) + reproducible build bundle
Fill all 18 placeholders in the condensed v13 submission draft with data verified against the analysis DB and LOCKED canonical scripts; close 12/13 co-author review items (only #8b protocol first-run open). Key changes (need co-author sign-off; see handoff doc): - Firm A out-of-sample HC 0.01% -> 0.42% (buggy 0.0001 from Script 49 same-pair bug, propagated v4.2->v13; never reuse 0.0001) - §III-D empty cell ~=0 -> 7,681 honest reframe (not degenerate crops) - low cosine cut 0.837 -> 0.8547 primary (BCD 2013-2019 closed-world, held-out discipline; 0.8489 confirmed = BCD all-period); HC/MC/HSC unchanged, UN/LH move <=0.4pp Adds Figures 1-5 (real-data plots + schematics), full references, Appendix A/B, UN/HSC ICCR, n-reconciliation, #13 MOPS-metadata survival verification, "參" set-level feasibility probe (negative). Two codex (gpt-5.5) adversarial rounds applied; no fabrication found. Bundle: paper/v13_build/ (markdown source, harvest/figure scripts, figures) for reproducibility. Handoff note for co-author included. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
import sqlite3, numpy as np
|
||||
DB='/Volumes/NV2/PDF-Processing/signature-analysis/signature_analysis.db'
|
||||
BCD=('安侯建業聯合','資誠聯合','安永聯合')
|
||||
c=sqlite3.connect(f'file:{DB}?mode=ro',uri=True)
|
||||
rows=c.execute("""SELECT s.assigned_accountant, s.max_similarity_to_same_accountant, s.min_dhash_independent
|
||||
FROM signatures s JOIN accountants a ON s.assigned_accountant=a.name
|
||||
WHERE a.firm IN ('安侯建業聯合','資誠聯合','安永聯合')
|
||||
AND CAST(substr(s.year_month,1,4) AS INT) BETWEEN 2013 AND 2019
|
||||
AND s.max_similarity_to_same_accountant IS NOT NULL AND s.min_dhash_independent IS NOT NULL""").fetchall()
|
||||
from collections import defaultdict
|
||||
by=defaultdict(list)
|
||||
for a,cos,dh in rows: by[a].append((cos,dh))
|
||||
accs={a:np.array(v) for a,v in by.items() if len(v)>=15}
|
||||
print(f"BCD 2013-2019: {len(accs)} accountants with >=15 signatures (of {len(by)} total)")
|
||||
|
||||
rep=[]; tight=[]; rem_med=[]; klass=[]
|
||||
for a,v in accs.items():
|
||||
cos=v[:,0]; dh=v[:,1]
|
||||
hc=(cos>0.95)&(dh<=5)
|
||||
rf=hc.mean(); tf=(cos>0.95).mean()
|
||||
isolated=cos[cos<=0.95]
|
||||
rm=np.median(isolated) if len(isolated)>=3 else np.nan
|
||||
rep.append(rf); tight.append(tf); rem_med.append(rm)
|
||||
klass.append('pure-hand' if rf<0.10 else ('pure-stamp' if rf>0.90 else 'mixed'))
|
||||
rep=np.array(rep); tight=np.array(tight); rem_med=np.array(rem_med); klass=np.array(klass)
|
||||
|
||||
import collections
|
||||
print("\n=== Per-accountant replication-fraction (HC share) distribution ===")
|
||||
for lo,hi in [(0,0.1),(0.1,0.3),(0.3,0.5),(0.5,0.7),(0.7,0.9),(0.9,1.01)]:
|
||||
n=((rep>=lo)&(rep<hi)).sum(); print(f" rep_frac [{lo:.1f},{hi:.1f}): {n:3d} accountants")
|
||||
print(" class counts:", dict(collections.Counter(klass)))
|
||||
|
||||
mixed=klass=='mixed'
|
||||
print(f"\n=== MIXED accountants (n={mixed.sum()}): is the non-tight remainder dispersed (separable)? ===")
|
||||
rm_mixed=rem_med[mixed & ~np.isnan(rem_med)]
|
||||
print(f" remainder (cos<=0.95) median cosine across mixed accountants: median={np.median(rm_mixed):.3f}, IQR[{np.percentile(rm_mixed,25):.3f},{np.percentile(rm_mixed,75):.3f}]")
|
||||
print(f" fraction of mixed accountants whose remainder median < 0.90 (clearly dispersed): {(rm_mixed<0.90).mean():.2f}")
|
||||
print(f" fraction with remainder median < 0.85 (very dispersed): {(rm_mixed<0.85).mean():.2f}")
|
||||
# gap between tight group (cos>0.95) and remainder: per mixed accountant
|
||||
gaps=[]
|
||||
for a,v in accs.items():
|
||||
cos=v[:,0]
|
||||
t=cos[cos>0.95]; r=cos[cos<=0.95]
|
||||
if len(t)>=3 and len(r)>=3:
|
||||
gaps.append(np.median(t)-np.median(r))
|
||||
gaps=np.array(gaps)
|
||||
print(f"\n=== Tight-vs-remainder cosine gap (all accountants with both parts, n={len(gaps)}) ===")
|
||||
print(f" median gap = {np.median(gaps):.3f} (large gap => two-component structure is real & separable)")
|
||||
print(f" fraction with gap > 0.10: {(gaps>0.10).mean():.2f}")
|
||||
Reference in New Issue
Block a user