66c9194fcf
Fill all 18 placeholders in the condensed v13 submission draft with data verified against the analysis DB and LOCKED canonical scripts; close 12/13 co-author review items (only #8b protocol first-run open). Key changes (need co-author sign-off; see handoff doc): - Firm A out-of-sample HC 0.01% -> 0.42% (buggy 0.0001 from Script 49 same-pair bug, propagated v4.2->v13; never reuse 0.0001) - §III-D empty cell ~=0 -> 7,681 honest reframe (not degenerate crops) - low cosine cut 0.837 -> 0.8547 primary (BCD 2013-2019 closed-world, held-out discipline; 0.8489 confirmed = BCD all-period); HC/MC/HSC unchanged, UN/LH move <=0.4pp Adds Figures 1-5 (real-data plots + schematics), full references, Appendix A/B, UN/HSC ICCR, n-reconciliation, #13 MOPS-metadata survival verification, "參" set-level feasibility probe (negative). Two codex (gpt-5.5) adversarial rounds applied; no fabrication found. Bundle: paper/v13_build/ (markdown source, harvest/figure scripts, figures) for reproducibility. Handoff note for co-author included. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
76 lines
4.2 KiB
Python
76 lines
4.2 KiB
Python
import matplotlib
|
||
matplotlib.use('Agg')
|
||
import matplotlib.pyplot as plt
|
||
from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Rectangle
|
||
import numpy as np
|
||
|
||
# ============ Figure 1: data split grid ============
|
||
fig, ax = plt.subplots(figsize=(7, 3.2))
|
||
firms = ['Firm A', 'Firm B', 'Firm C', 'Firm D']
|
||
periods = ['2013–2019', '2020–2023']
|
||
# role per (row firm, col period)
|
||
def role(f, p):
|
||
if f == 'Firm A':
|
||
return ('Held-out test 1\n(Firm A, full record)', '#c0392b')
|
||
if p == '2013–2019':
|
||
return ('Calibration\n(clean reference)', '#27ae60')
|
||
return ('Held-out test 2\n(secondary)', '#2980b9')
|
||
for i, f in enumerate(firms):
|
||
for j, p in enumerate(periods):
|
||
txt, col = role(f, p)
|
||
ax.add_patch(Rectangle((j, len(firms)-1-i), 1, 1, facecolor=col, alpha=0.30, edgecolor='black', lw=1))
|
||
ax.text(j+0.5, len(firms)-1-i+0.5, txt, ha='center', va='center', fontsize=6.5)
|
||
ax.set_xlim(0, 2); ax.set_ylim(0, 4)
|
||
ax.set_xticks([0.5, 1.5]); ax.set_xticklabels(periods, fontsize=9)
|
||
ax.set_yticks([3.5, 2.5, 1.5, 0.5]); ax.set_yticklabels(firms, fontsize=9)
|
||
ax.tick_params(length=0)
|
||
for s in ax.spines.values(): s.set_visible(False)
|
||
ax.set_title('Figure 1. Data split: calibrate on the clean cell, test everything else', fontsize=9)
|
||
fig.tight_layout(); fig.savefig('/tmp/fig1.png', dpi=200, bbox_inches='tight'); plt.close(fig)
|
||
|
||
# ============ Figure 2: pipeline ============
|
||
fig, ax = plt.subplots(figsize=(9, 2.5))
|
||
steps = ['Raw PDF\nreport', 'Find signature\npage (VLM)', 'Detect signatures\n(YOLOv11)\n+ red-stamp removal',
|
||
'Feature extraction\n(ResNet-50, 2048-d)', 'Two similarities\ncosine (style)\nmin dHash (structure)', 'Five-way\nlabel']
|
||
n = len(steps); w = 1.0/n
|
||
cols = ['#ecf0f1', '#d6eaf8', '#d5f5e3', '#fcf3cf', '#fadbd8', '#e8daef']
|
||
for i, (s, c) in enumerate(zip(steps, cols)):
|
||
x = i*w + 0.01
|
||
ax.add_patch(FancyBboxPatch((x, 0.30), w-0.02, 0.40, boxstyle='round,pad=0.005,rounding_size=0.02',
|
||
facecolor=c, edgecolor='black', lw=1, transform=ax.transAxes))
|
||
ax.text(x+(w-0.02)/2, 0.50, s, ha='center', va='center', fontsize=6.8, transform=ax.transAxes)
|
||
if i < n-1:
|
||
ax.add_patch(FancyArrowPatch((x+w-0.012, 0.50), (x+w+0.002, 0.50), transform=ax.transAxes,
|
||
arrowstyle='-|>', mutation_scale=10, lw=1.2, color='black'))
|
||
ax.axis('off')
|
||
ax.set_title('Figure 2. The screening pipeline', fontsize=9, y=0.92)
|
||
fig.savefig('/tmp/fig2.png', dpi=200, bbox_inches='tight'); plt.close(fig)
|
||
|
||
# ============ Figure 3: two-measure plane, five regions ============
|
||
fig, ax = plt.subplots(figsize=(5.2, 4.2))
|
||
LO, HI = 0.8547, 0.95
|
||
DH1, DH2 = 5, 15
|
||
xmin, xmax = 0.70, 1.005
|
||
ymin, ymax = -1, 30
|
||
# LH (cos<=LO): whole column
|
||
ax.add_patch(Rectangle((xmin, ymin), LO-xmin, ymax-ymin, facecolor='#bdc3c7', alpha=0.5))
|
||
# UN (LO<cos<=HI)
|
||
ax.add_patch(Rectangle((LO, ymin), HI-LO, ymax-ymin, facecolor='#f7dc6f', alpha=0.5))
|
||
# high-cosine band subdivided by dHash
|
||
ax.add_patch(Rectangle((HI, ymin), xmax-HI, DH1-ymin, facecolor='#cb4335', alpha=0.55)) # HC dHash<=5
|
||
ax.add_patch(Rectangle((HI, DH1), xmax-HI, DH2-DH1, facecolor='#eb984e', alpha=0.55)) # MC 5<dHash<=15
|
||
ax.add_patch(Rectangle((HI, DH2), xmax-HI, ymax-DH2, facecolor='#aed6f1', alpha=0.6)) # HSC dHash>15
|
||
ax.axvline(LO, color='gray', ls=':', lw=1); ax.axvline(HI, color='black', ls='--', lw=1)
|
||
ax.plot([HI, xmax], [DH1, DH1], 'k--', lw=0.8); ax.plot([HI, xmax], [DH2, DH2], 'k--', lw=0.8)
|
||
ax.text((xmin+LO)/2, 22, 'LH', ha='center', fontsize=11, weight='bold')
|
||
ax.text((LO+HI)/2, 22, 'UN', ha='center', fontsize=11, weight='bold')
|
||
ax.text((HI+xmax)/2, 2, 'HC', ha='center', fontsize=11, weight='bold', color='white')
|
||
ax.text((HI+xmax)/2, 9.5, 'MC', ha='center', fontsize=11, weight='bold')
|
||
ax.text((HI+xmax)/2, 22, 'HSC', ha='center', fontsize=10, weight='bold')
|
||
ax.text(LO, ymin-1.5, '0.8547', ha='center', fontsize=7); ax.text(HI, ymin-1.5, '0.95', ha='center', fontsize=7)
|
||
ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
|
||
ax.set_xlabel('cosine similarity (style)'); ax.set_ylabel('dHash distance (structure)')
|
||
ax.set_title('Figure 3. The two measures and the five regions', fontsize=9)
|
||
fig.tight_layout(); fig.savefig('/tmp/fig3.png', dpi=200, bbox_inches='tight'); plt.close(fig)
|
||
print('figs 1/2/3 OK')
|