import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from matplotlib.patches import FancyBboxPatch, FancyArrowPatch, Rectangle import numpy as np # ============ Figure 1: data split grid ============ fig, ax = plt.subplots(figsize=(7, 3.2)) firms = ['Firm A', 'Firm B', 'Firm C', 'Firm D'] periods = ['2013–2019', '2020–2023'] # role per (row firm, col period) def role(f, p): if f == 'Firm A': return ('Held-out test 1\n(Firm A, full record)', '#c0392b') if p == '2013–2019': return ('Calibration\n(clean reference)', '#27ae60') return ('Held-out test 2\n(secondary)', '#2980b9') for i, f in enumerate(firms): for j, p in enumerate(periods): txt, col = role(f, p) ax.add_patch(Rectangle((j, len(firms)-1-i), 1, 1, facecolor=col, alpha=0.30, edgecolor='black', lw=1)) ax.text(j+0.5, len(firms)-1-i+0.5, txt, ha='center', va='center', fontsize=6.5) ax.set_xlim(0, 2); ax.set_ylim(0, 4) ax.set_xticks([0.5, 1.5]); ax.set_xticklabels(periods, fontsize=9) ax.set_yticks([3.5, 2.5, 1.5, 0.5]); ax.set_yticklabels(firms, fontsize=9) ax.tick_params(length=0) for s in ax.spines.values(): s.set_visible(False) ax.set_title('Figure 1. Data split: calibrate on the clean cell, test everything else', fontsize=9) fig.tight_layout(); fig.savefig('/tmp/fig1.png', dpi=200, bbox_inches='tight'); plt.close(fig) # ============ Figure 2: pipeline ============ fig, ax = plt.subplots(figsize=(9, 2.5)) steps = ['Raw PDF\nreport', 'Find signature\npage (VLM)', 'Detect signatures\n(YOLOv11)\n+ red-stamp removal', 'Feature extraction\n(ResNet-50, 2048-d)', 'Two similarities\ncosine (style)\nmin dHash (structure)', 'Five-way\nlabel'] n = len(steps); w = 1.0/n cols = ['#ecf0f1', '#d6eaf8', '#d5f5e3', '#fcf3cf', '#fadbd8', '#e8daef'] for i, (s, c) in enumerate(zip(steps, cols)): x = i*w + 0.01 ax.add_patch(FancyBboxPatch((x, 0.30), w-0.02, 0.40, boxstyle='round,pad=0.005,rounding_size=0.02', facecolor=c, edgecolor='black', lw=1, transform=ax.transAxes)) ax.text(x+(w-0.02)/2, 0.50, s, ha='center', va='center', fontsize=6.8, transform=ax.transAxes) if i < n-1: ax.add_patch(FancyArrowPatch((x+w-0.012, 0.50), (x+w+0.002, 0.50), transform=ax.transAxes, arrowstyle='-|>', mutation_scale=10, lw=1.2, color='black')) ax.axis('off') ax.set_title('Figure 2. The screening pipeline', fontsize=9, y=0.92) fig.savefig('/tmp/fig2.png', dpi=200, bbox_inches='tight'); plt.close(fig) # ============ Figure 3: two-measure plane, five regions ============ fig, ax = plt.subplots(figsize=(5.2, 4.2)) LO, HI = 0.8547, 0.95 DH1, DH2 = 5, 15 xmin, xmax = 0.70, 1.005 ymin, ymax = -1, 30 # LH (cos<=LO): whole column ax.add_patch(Rectangle((xmin, ymin), LO-xmin, ymax-ymin, facecolor='#bdc3c7', alpha=0.5)) # UN (LO15 ax.axvline(LO, color='gray', ls=':', lw=1); ax.axvline(HI, color='black', ls='--', lw=1) ax.plot([HI, xmax], [DH1, DH1], 'k--', lw=0.8); ax.plot([HI, xmax], [DH2, DH2], 'k--', lw=0.8) ax.text((xmin+LO)/2, 22, 'LH', ha='center', fontsize=11, weight='bold') ax.text((LO+HI)/2, 22, 'UN', ha='center', fontsize=11, weight='bold') ax.text((HI+xmax)/2, 2, 'HC', ha='center', fontsize=11, weight='bold', color='white') ax.text((HI+xmax)/2, 9.5, 'MC', ha='center', fontsize=11, weight='bold') ax.text((HI+xmax)/2, 22, 'HSC', ha='center', fontsize=10, weight='bold') ax.text(LO, ymin-1.5, '0.8547', ha='center', fontsize=7); ax.text(HI, ymin-1.5, '0.95', ha='center', fontsize=7) ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax) ax.set_xlabel('cosine similarity (style)'); ax.set_ylabel('dHash distance (structure)') ax.set_title('Figure 3. The two measures and the five regions', fontsize=9) fig.tight_layout(); fig.savefig('/tmp/fig3.png', dpi=200, bbox_inches='tight'); plt.close(fig) print('figs 1/2/3 OK')