Paper A v13: filled submission draft (rev7) + reproducible build bundle

Fill all 18 placeholders in the condensed v13 submission draft with
data verified against the analysis DB and LOCKED canonical scripts;
close 12/13 co-author review items (only #8b protocol first-run open).

Key changes (need co-author sign-off; see handoff doc):
- Firm A out-of-sample HC 0.01% -> 0.42% (buggy 0.0001 from Script 49
  same-pair bug, propagated v4.2->v13; never reuse 0.0001)
- §III-D empty cell ~=0 -> 7,681 honest reframe (not degenerate crops)
- low cosine cut 0.837 -> 0.8547 primary (BCD 2013-2019 closed-world,
  held-out discipline; 0.8489 confirmed = BCD all-period); HC/MC/HSC
  unchanged, UN/LH move <=0.4pp

Adds Figures 1-5 (real-data plots + schematics), full references,
Appendix A/B, UN/HSC ICCR, n-reconciliation, #13 MOPS-metadata
survival verification, "參" set-level feasibility probe (negative).
Two codex (gpt-5.5) adversarial rounds applied; no fabrication found.

Bundle: paper/v13_build/ (markdown source, harvest/figure scripts,
figures) for reproducibility. Handoff note for co-author included.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 03:24:50 +08:00
parent 1e8466f7a8
commit 66c9194fcf
13 changed files with 749 additions and 0 deletions
+75
View File
@@ -0,0 +1,75 @@
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Rectangle
import numpy as np
# ============ Figure 1: data split grid ============
fig, ax = plt.subplots(figsize=(7, 3.2))
firms = ['Firm A', 'Firm B', 'Firm C', 'Firm D']
periods = ['20132019', '20202023']
# role per (row firm, col period)
def role(f, p):
if f == 'Firm A':
return ('Held-out test 1\n(Firm A, full record)', '#c0392b')
if p == '20132019':
return ('Calibration\n(clean reference)', '#27ae60')
return ('Held-out test 2\n(secondary)', '#2980b9')
for i, f in enumerate(firms):
for j, p in enumerate(periods):
txt, col = role(f, p)
ax.add_patch(Rectangle((j, len(firms)-1-i), 1, 1, facecolor=col, alpha=0.30, edgecolor='black', lw=1))
ax.text(j+0.5, len(firms)-1-i+0.5, txt, ha='center', va='center', fontsize=6.5)
ax.set_xlim(0, 2); ax.set_ylim(0, 4)
ax.set_xticks([0.5, 1.5]); ax.set_xticklabels(periods, fontsize=9)
ax.set_yticks([3.5, 2.5, 1.5, 0.5]); ax.set_yticklabels(firms, fontsize=9)
ax.tick_params(length=0)
for s in ax.spines.values(): s.set_visible(False)
ax.set_title('Figure 1. Data split: calibrate on the clean cell, test everything else', fontsize=9)
fig.tight_layout(); fig.savefig('/tmp/fig1.png', dpi=200, bbox_inches='tight'); plt.close(fig)
# ============ Figure 2: pipeline ============
fig, ax = plt.subplots(figsize=(9, 2.5))
steps = ['Raw PDF\nreport', 'Find signature\npage (VLM)', 'Detect signatures\n(YOLOv11)\n+ red-stamp removal',
'Feature extraction\n(ResNet-50, 2048-d)', 'Two similarities\ncosine (style)\nmin dHash (structure)', 'Five-way\nlabel']
n = len(steps); w = 1.0/n
cols = ['#ecf0f1', '#d6eaf8', '#d5f5e3', '#fcf3cf', '#fadbd8', '#e8daef']
for i, (s, c) in enumerate(zip(steps, cols)):
x = i*w + 0.01
ax.add_patch(FancyBboxPatch((x, 0.30), w-0.02, 0.40, boxstyle='round,pad=0.005,rounding_size=0.02',
facecolor=c, edgecolor='black', lw=1, transform=ax.transAxes))
ax.text(x+(w-0.02)/2, 0.50, s, ha='center', va='center', fontsize=6.8, transform=ax.transAxes)
if i < n-1:
ax.add_patch(FancyArrowPatch((x+w-0.012, 0.50), (x+w+0.002, 0.50), transform=ax.transAxes,
arrowstyle='-|>', mutation_scale=10, lw=1.2, color='black'))
ax.axis('off')
ax.set_title('Figure 2. The screening pipeline', fontsize=9, y=0.92)
fig.savefig('/tmp/fig2.png', dpi=200, bbox_inches='tight'); plt.close(fig)
# ============ Figure 3: two-measure plane, five regions ============
fig, ax = plt.subplots(figsize=(5.2, 4.2))
LO, HI = 0.8547, 0.95
DH1, DH2 = 5, 15
xmin, xmax = 0.70, 1.005
ymin, ymax = -1, 30
# LH (cos<=LO): whole column
ax.add_patch(Rectangle((xmin, ymin), LO-xmin, ymax-ymin, facecolor='#bdc3c7', alpha=0.5))
# UN (LO<cos<=HI)
ax.add_patch(Rectangle((LO, ymin), HI-LO, ymax-ymin, facecolor='#f7dc6f', alpha=0.5))
# high-cosine band subdivided by dHash
ax.add_patch(Rectangle((HI, ymin), xmax-HI, DH1-ymin, facecolor='#cb4335', alpha=0.55)) # HC dHash<=5
ax.add_patch(Rectangle((HI, DH1), xmax-HI, DH2-DH1, facecolor='#eb984e', alpha=0.55)) # MC 5<dHash<=15
ax.add_patch(Rectangle((HI, DH2), xmax-HI, ymax-DH2, facecolor='#aed6f1', alpha=0.6)) # HSC dHash>15
ax.axvline(LO, color='gray', ls=':', lw=1); ax.axvline(HI, color='black', ls='--', lw=1)
ax.plot([HI, xmax], [DH1, DH1], 'k--', lw=0.8); ax.plot([HI, xmax], [DH2, DH2], 'k--', lw=0.8)
ax.text((xmin+LO)/2, 22, 'LH', ha='center', fontsize=11, weight='bold')
ax.text((LO+HI)/2, 22, 'UN', ha='center', fontsize=11, weight='bold')
ax.text((HI+xmax)/2, 2, 'HC', ha='center', fontsize=11, weight='bold', color='white')
ax.text((HI+xmax)/2, 9.5, 'MC', ha='center', fontsize=11, weight='bold')
ax.text((HI+xmax)/2, 22, 'HSC', ha='center', fontsize=10, weight='bold')
ax.text(LO, ymin-1.5, '0.8547', ha='center', fontsize=7); ax.text(HI, ymin-1.5, '0.95', ha='center', fontsize=7)
ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
ax.set_xlabel('cosine similarity (style)'); ax.set_ylabel('dHash distance (structure)')
ax.set_title('Figure 3. The two measures and the five regions', fontsize=9)
fig.tight_layout(); fig.savefig('/tmp/fig3.png', dpi=200, bbox_inches='tight'); plt.close(fig)
print('figs 1/2/3 OK')