Add Scripts 39b/c/d/e + 40b + 43: anchor-based FAR diagnostics
Spike checkpoint in response to codex rounds 28-30 review:
- 39b/c: signature-level dip test on Big-4 and non-Big-4 marginals
- 39d: dHash discrete-value robustness (raw vs jittered + histogram
valleys + firm residualization); confirms within-firm dHash dip
rejection is integer-mass-point artefact
- 39e: dHash firm-residualized + jittered 2x2 factorial decomposition;
confirms Big-4 pooled dh "multimodality" is composition + integer
artefact (centered + jittered p=0.35, 0/5 seeds reject)
- 40b: inter-CPA per-pair FAR sweep (cos + dh marginal + joint +
conditional); replicates v3 cos>0.95 FAR=0.0006 and provides
v4-new dh FAR curve
- 43: pool-normalized per-signature FAR (codex round-30 fix for
per-pair vs per-signature conflation); per-sig FAR for deployed
any-pair rule = 11.02%, per-firm structure shows Firm A 20% vs
B/C/D <1%
These scripts replace the distributional path (K=3 mixture / dip /
antimode) with anchor-based threshold derivation. Companion
artefacts in reports/v4_big4/{signature_level_diptest,
midsmall_signature_diptest, dhash_discrete_robustness,
inter_cpa_far_sweep, pool_normalized_far}/.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,446 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script 39d: dHash Discrete-Value Robustness Diagnostics
|
||||
========================================================
|
||||
Codex (gpt-5.5 xhigh) attack on Script 39b/39c findings revealed that
|
||||
the within-firm dHash dip-test rejections are driven by integer mass
|
||||
points (dHash takes integer values 0..64). A uniform jitter of
|
||||
[-0.5, +0.5] eliminates dip rejection in every firm tested. This
|
||||
script consolidates that finding into a permanent diagnostic and adds:
|
||||
|
||||
1. Raw vs jittered dip with multi-seed robustness (5 seeds)
|
||||
2. Integer-histogram valley analysis: locate local minima between
|
||||
adjacent peaks in the binned integer distribution; report whether
|
||||
any valley centers near dh = 5
|
||||
3. Firm-residualized dip on dHash (analog of cosine firm-mean
|
||||
centering that confirmed the cosine reframe)
|
||||
4. Pairwise pair-coincidence: does the same same-CPA pair achieve
|
||||
both max cosine and min dHash, or are the two descriptors
|
||||
attached to different pairs? Foundation for "is (cos, dh) a
|
||||
joint signature regime descriptor or two parallel descriptors"
|
||||
|
||||
This script does not derive operational thresholds; it characterises
|
||||
whether the v4.0 K=3 mixture and v3.x cos>0.95 AND dh<=5 rule are
|
||||
robustly supported once integer-discreteness artifacts are removed.
|
||||
|
||||
Outputs:
|
||||
reports/v4_big4/dhash_discrete_robustness/
|
||||
dhash_discrete_results.json
|
||||
dhash_discrete_report.md
|
||||
"""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import numpy as np
|
||||
import diptest
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
DB = '/Volumes/NV2/PDF-Processing/signature-analysis/signature_analysis.db'
|
||||
OUT = Path('/Volumes/NV2/PDF-Processing/signature-analysis/reports/'
|
||||
'v4_big4/dhash_discrete_robustness')
|
||||
OUT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
BIG4 = ('勤業眾信聯合', '安侯建業聯合', '資誠聯合', '安永聯合')
|
||||
ALIAS = {'勤業眾信聯合': 'Firm A',
|
||||
'安侯建業聯合': 'Firm B',
|
||||
'資誠聯合': 'Firm C',
|
||||
'安永聯合': 'Firm D'}
|
||||
N_BOOT = 2000
|
||||
JITTER_SEEDS = [42, 43, 44, 45, 46]
|
||||
SINGLE_FIRM_MIN_SIG = 500
|
||||
|
||||
|
||||
def load_signatures():
|
||||
conn = sqlite3.connect(f'file:{DB}?mode=ro', uri=True)
|
||||
cur = conn.cursor()
|
||||
cur.execute('''
|
||||
SELECT a.firm, s.assigned_accountant,
|
||||
s.max_similarity_to_same_accountant,
|
||||
CAST(s.min_dhash_independent AS REAL)
|
||||
FROM signatures s
|
||||
JOIN accountants a ON s.assigned_accountant = a.name
|
||||
WHERE s.assigned_accountant IS NOT NULL
|
||||
AND s.max_similarity_to_same_accountant IS NOT NULL
|
||||
AND s.min_dhash_independent IS NOT NULL
|
||||
AND a.firm IS NOT NULL
|
||||
''')
|
||||
rows = cur.fetchall()
|
||||
conn.close()
|
||||
return rows
|
||||
|
||||
|
||||
def dip(values, n_boot=N_BOOT):
|
||||
arr = np.asarray(values, dtype=float)
|
||||
arr = arr[np.isfinite(arr)]
|
||||
d, p = diptest.diptest(arr, boot_pval=True, n_boot=n_boot)
|
||||
return float(d), float(p)
|
||||
|
||||
|
||||
def multi_seed_jitter_dip(values, seeds=JITTER_SEEDS, n_boot=N_BOOT):
|
||||
"""Compute dip stat + p-value across seeds; return distribution."""
|
||||
arr = np.asarray(values, dtype=float)
|
||||
arr = arr[np.isfinite(arr)]
|
||||
stats = []
|
||||
for seed in seeds:
|
||||
rng = np.random.default_rng(seed)
|
||||
j = arr + rng.uniform(-0.5, 0.5, len(arr))
|
||||
d, p = diptest.diptest(j, boot_pval=True, n_boot=n_boot)
|
||||
stats.append({'seed': seed, 'dip': float(d), 'p': float(p)})
|
||||
return {
|
||||
'n_seeds': len(seeds),
|
||||
'p_min': min(s['p'] for s in stats),
|
||||
'p_max': max(s['p'] for s in stats),
|
||||
'p_median': float(np.median([s['p'] for s in stats])),
|
||||
'dip_min': min(s['dip'] for s in stats),
|
||||
'dip_max': max(s['dip'] for s in stats),
|
||||
'reject_at_05_count': int(sum(1 for s in stats if s['p'] <= 0.05)),
|
||||
'per_seed': stats,
|
||||
}
|
||||
|
||||
|
||||
def integer_histogram_valleys(values, max_bin=20):
|
||||
"""For integer-valued data, locate local minima in the count
|
||||
histogram on bins 0..max_bin. Returns valley positions and depths
|
||||
relative to flanking peaks."""
|
||||
arr = np.asarray(values, dtype=float)
|
||||
arr = arr[np.isfinite(arr)]
|
||||
bins = np.arange(0, max_bin + 2) # 0, 1, ..., max_bin+1
|
||||
counts, edges = np.histogram(arr, bins=bins)
|
||||
centers = (edges[:-1] + edges[1:]) / 2.0
|
||||
valleys = []
|
||||
for i in range(1, len(counts) - 1):
|
||||
if counts[i] < counts[i - 1] and counts[i] < counts[i + 1]:
|
||||
left_peak = counts[i - 1]
|
||||
right_peak = counts[i + 1]
|
||||
min_peak = min(left_peak, right_peak)
|
||||
depth_rel = (min_peak - counts[i]) / min_peak if min_peak else 0
|
||||
valleys.append({
|
||||
'bin_center': float(centers[i]),
|
||||
'count': int(counts[i]),
|
||||
'left_peak_bin': int(centers[i - 1]),
|
||||
'left_peak_count': int(left_peak),
|
||||
'right_peak_bin': int(centers[i + 1]),
|
||||
'right_peak_count': int(right_peak),
|
||||
'depth_rel': float(depth_rel),
|
||||
})
|
||||
return {
|
||||
'histogram_bins_0_to_max': counts[:max_bin + 1].tolist(),
|
||||
'valleys': valleys,
|
||||
'note': ('valleys are bins where count < both neighbours; '
|
||||
'depth_rel = (min(neighbour) - bin) / min(neighbour). '
|
||||
'A genuine antimode would have a deep, stable valley '
|
||||
'with depth_rel > 0.1.'),
|
||||
}
|
||||
|
||||
|
||||
def firm_residualized(values, firm_labels):
|
||||
"""Return values with firm means subtracted (centered to grand mean
|
||||
over firms). Used to test whether residual within-firm structure
|
||||
rejects unimodality."""
|
||||
arr = np.asarray(values, dtype=float)
|
||||
firms = np.asarray(firm_labels)
|
||||
out = arr.copy()
|
||||
grand = float(np.mean(arr))
|
||||
for f in np.unique(firms):
|
||||
m = firms == f
|
||||
out[m] = arr[m] - float(np.mean(arr[m])) + grand
|
||||
return out
|
||||
|
||||
|
||||
def pair_coincidence_rate():
|
||||
"""Fraction of signatures whose max-cosine partner equals the
|
||||
min-dHash partner within the same-CPA cross-year pool."""
|
||||
conn = sqlite3.connect(f'file:{DB}?mode=ro', uri=True)
|
||||
cur = conn.cursor()
|
||||
cur.execute('''
|
||||
SELECT COUNT(*) AS n_total,
|
||||
SUM(CASE WHEN max_cosine_pair_id IS NOT NULL
|
||||
AND min_dhash_pair_id IS NOT NULL
|
||||
AND max_cosine_pair_id = min_dhash_pair_id
|
||||
THEN 1 ELSE 0 END) AS n_same_pair,
|
||||
SUM(CASE WHEN max_cosine_pair_id IS NOT NULL
|
||||
AND min_dhash_pair_id IS NOT NULL
|
||||
AND max_cosine_pair_id != min_dhash_pair_id
|
||||
THEN 1 ELSE 0 END) AS n_diff_pair,
|
||||
SUM(CASE WHEN max_cosine_pair_id IS NULL
|
||||
OR min_dhash_pair_id IS NULL
|
||||
THEN 1 ELSE 0 END) AS n_null
|
||||
FROM signatures
|
||||
''')
|
||||
row = cur.fetchone()
|
||||
conn.close()
|
||||
n_total, n_same, n_diff, n_null = row
|
||||
n_with_both = (n_same or 0) + (n_diff or 0)
|
||||
return {
|
||||
'n_total': int(n_total or 0),
|
||||
'n_with_both_pair_ids': int(n_with_both),
|
||||
'n_same_pair': int(n_same or 0),
|
||||
'n_diff_pair': int(n_diff or 0),
|
||||
'n_null': int(n_null or 0),
|
||||
'same_pair_rate': (float(n_same) / n_with_both
|
||||
if n_with_both else None),
|
||||
'note': ('rate computed over signatures where both '
|
||||
'max_cosine_pair_id and min_dhash_pair_id are present'),
|
||||
}
|
||||
|
||||
|
||||
def _fmt_p(p):
|
||||
return '< 5e-4' if p == 0.0 else f'{p:.4g}'
|
||||
|
||||
|
||||
def main():
|
||||
print('=' * 72)
|
||||
print('Script 39d: dHash Discrete-Value Robustness Diagnostics')
|
||||
print('=' * 72)
|
||||
rows = load_signatures()
|
||||
firms_raw = np.array([r[0] for r in rows])
|
||||
cos = np.array([r[2] for r in rows], dtype=float)
|
||||
dh = np.array([r[3] for r in rows], dtype=float)
|
||||
is_big4 = np.isin(firms_raw, BIG4)
|
||||
n = len(rows)
|
||||
print(f'\nLoaded {n:,} signatures; Big-4 {is_big4.sum():,}, '
|
||||
f'non-Big-4 {(~is_big4).sum():,}')
|
||||
|
||||
results = {
|
||||
'meta': {
|
||||
'script': '39d',
|
||||
'timestamp': datetime.now().isoformat(timespec='seconds'),
|
||||
'n_total_signatures': int(n),
|
||||
'n_big4': int(is_big4.sum()),
|
||||
'n_non_big4': int((~is_big4).sum()),
|
||||
'n_boot': N_BOOT,
|
||||
'jitter_seeds': JITTER_SEEDS,
|
||||
'note': ('Diagnostic for dHash integer-mass-point artifact '
|
||||
'in dip test; codex round-29 attack on Script 39b/c'),
|
||||
},
|
||||
}
|
||||
|
||||
# ---- A. Raw vs multi-seed jittered dip ----
|
||||
print('\n[A] Raw vs jittered dip (5 seeds, n_boot=2000)')
|
||||
panels = {}
|
||||
# Big-4 pooled
|
||||
print(' Big-4 pooled:')
|
||||
raw_d, raw_p = dip(dh[is_big4])
|
||||
j = multi_seed_jitter_dip(dh[is_big4])
|
||||
panels['big4_pooled'] = {
|
||||
'n': int(is_big4.sum()),
|
||||
'raw': {'dip': raw_d, 'p': raw_p},
|
||||
'jittered': j,
|
||||
}
|
||||
print(f' raw: dip={raw_d:.5f}, p={_fmt_p(raw_p)}')
|
||||
print(f' jitter: p_median={j["p_median"]:.4g}, '
|
||||
f'p_range=[{j["p_min"]:.4g}, {j["p_max"]:.4g}], '
|
||||
f'reject@.05 in {j["reject_at_05_count"]}/5 seeds')
|
||||
# Each Big-4 firm
|
||||
for f in BIG4:
|
||||
mask = firms_raw == f
|
||||
if mask.sum() == 0:
|
||||
continue
|
||||
raw_d, raw_p = dip(dh[mask])
|
||||
j = multi_seed_jitter_dip(dh[mask])
|
||||
panels[ALIAS[f]] = {
|
||||
'n': int(mask.sum()),
|
||||
'raw': {'dip': raw_d, 'p': raw_p},
|
||||
'jittered': j,
|
||||
}
|
||||
print(f' {ALIAS[f]} (n={mask.sum():,}):')
|
||||
print(f' raw: dip={raw_d:.5f}, p={_fmt_p(raw_p)}')
|
||||
print(f' jitter: p_median={j["p_median"]:.4g}, '
|
||||
f'reject@.05 in {j["reject_at_05_count"]}/5 seeds')
|
||||
# Non-Big-4 pooled
|
||||
print(' Non-Big-4 pooled:')
|
||||
raw_d, raw_p = dip(dh[~is_big4])
|
||||
j = multi_seed_jitter_dip(dh[~is_big4])
|
||||
panels['non_big4_pooled'] = {
|
||||
'n': int((~is_big4).sum()),
|
||||
'raw': {'dip': raw_d, 'p': raw_p},
|
||||
'jittered': j,
|
||||
}
|
||||
print(f' raw: dip={raw_d:.5f}, p={_fmt_p(raw_p)}')
|
||||
print(f' jitter: p_median={j["p_median"]:.4g}, '
|
||||
f'reject@.05 in {j["reject_at_05_count"]}/5 seeds')
|
||||
results['raw_vs_jittered_dip'] = panels
|
||||
|
||||
# ---- B. Integer-histogram valley analysis ----
|
||||
print('\n[B] Integer-histogram valley analysis (bins 0..20)')
|
||||
valleys = {}
|
||||
valleys['big4_pooled'] = integer_histogram_valleys(dh[is_big4])
|
||||
print(f' Big-4 pooled: {len(valleys["big4_pooled"]["valleys"])} valleys')
|
||||
for v in valleys['big4_pooled']['valleys']:
|
||||
print(f' bin {v["bin_center"]:.1f}: count={v["count"]}, '
|
||||
f'depth_rel={v["depth_rel"]:.3f}')
|
||||
for f in BIG4:
|
||||
mask = firms_raw == f
|
||||
if mask.sum() == 0:
|
||||
continue
|
||||
valleys[ALIAS[f]] = integer_histogram_valleys(dh[mask])
|
||||
print(f' {ALIAS[f]}: '
|
||||
f'{len(valleys[ALIAS[f]]["valleys"])} valleys')
|
||||
for v in valleys[ALIAS[f]]['valleys']:
|
||||
print(f' bin {v["bin_center"]:.1f}: count={v["count"]}, '
|
||||
f'depth_rel={v["depth_rel"]:.3f}')
|
||||
valleys['non_big4_pooled'] = integer_histogram_valleys(dh[~is_big4])
|
||||
print(f' Non-Big-4 pooled: '
|
||||
f'{len(valleys["non_big4_pooled"]["valleys"])} valleys')
|
||||
for v in valleys['non_big4_pooled']['valleys']:
|
||||
print(f' bin {v["bin_center"]:.1f}: count={v["count"]}, '
|
||||
f'depth_rel={v["depth_rel"]:.3f}')
|
||||
results['integer_histogram_valleys'] = valleys
|
||||
|
||||
# ---- C. Firm-residualized dip on dHash, signature level ----
|
||||
print('\n[C] Firm-residualized dHash dip (signature level)')
|
||||
firm_labels = np.array([
|
||||
ALIAS[f] if f in ALIAS else f'M:{f}'
|
||||
for f in firms_raw
|
||||
])
|
||||
# Big-4 only residualized over A/B/C/D
|
||||
dh_resid_big4 = firm_residualized(dh[is_big4], firm_labels[is_big4])
|
||||
raw_d, raw_p = dip(dh[is_big4])
|
||||
res_d, res_p = dip(dh_resid_big4)
|
||||
print(f' Big-4 raw: dip={raw_d:.5f}, p={_fmt_p(raw_p)}')
|
||||
print(f' Big-4 residualized: dip={res_d:.5f}, p={_fmt_p(res_p)}')
|
||||
# Also non-Big-4 residualized over their firms
|
||||
dh_resid_nbig4 = firm_residualized(dh[~is_big4], firm_labels[~is_big4])
|
||||
raw_d_n, raw_p_n = dip(dh[~is_big4])
|
||||
res_d_n, res_p_n = dip(dh_resid_nbig4)
|
||||
print(f' Non-Big-4 raw: dip={raw_d_n:.5f}, p={_fmt_p(raw_p_n)}')
|
||||
print(f' Non-Big-4 residualized: dip={res_d_n:.5f}, p={_fmt_p(res_p_n)}')
|
||||
results['firm_residualized_dh_dip'] = {
|
||||
'big4': {
|
||||
'raw': {'dip': raw_d, 'p': raw_p},
|
||||
'firm_residualized': {'dip': res_d, 'p': res_p},
|
||||
},
|
||||
'non_big4': {
|
||||
'raw': {'dip': raw_d_n, 'p': raw_p_n},
|
||||
'firm_residualized': {'dip': res_d_n, 'p': res_p_n},
|
||||
},
|
||||
'note': ('Residualization subtracts each firm mean dh and adds '
|
||||
'back the grand mean. If residual dip rejects, there is '
|
||||
'genuine within-firm dh multimodality independent of '
|
||||
'between-firm mean shifts. If residual fails to reject, '
|
||||
'all dh "multimodality" was between-firm composition.'),
|
||||
}
|
||||
|
||||
# ---- D. Pair-coincidence rate ----
|
||||
print('\n[D] Pair-coincidence rate (max-cos pair vs min-dh pair)')
|
||||
try:
|
||||
pc = pair_coincidence_rate()
|
||||
if pc['same_pair_rate'] is not None:
|
||||
print(f' n_with_both: {pc["n_with_both_pair_ids"]:,}, '
|
||||
f'same-pair rate: {pc["same_pair_rate"]:.4f}')
|
||||
else:
|
||||
print(' Pair IDs not stored in signatures table (skipped)')
|
||||
results['pair_coincidence'] = pc
|
||||
except sqlite3.OperationalError as e:
|
||||
print(f' SQL error (pair_id columns may not exist): {e}')
|
||||
results['pair_coincidence'] = {
|
||||
'error': str(e),
|
||||
'note': ('signatures table lacks max_cosine_pair_id / '
|
||||
'min_dhash_pair_id columns; analysis skipped'),
|
||||
}
|
||||
|
||||
json_path = OUT / 'dhash_discrete_results.json'
|
||||
json_path.write_text(json.dumps(results, indent=2, ensure_ascii=False),
|
||||
encoding='utf-8')
|
||||
print(f'\n[json] {json_path}')
|
||||
|
||||
# ---- Report markdown ----
|
||||
md = ['# dHash Discrete-Value Robustness Diagnostics (Script 39d)',
|
||||
'', f'Generated: {results["meta"]["timestamp"]}',
|
||||
f'Bootstrap replicates: {N_BOOT}; jitter seeds: {JITTER_SEEDS}',
|
||||
'',
|
||||
'## A. Raw vs jittered dHash dip (signature level)',
|
||||
'',
|
||||
('dHash is integer-valued in [0, 64]. A raw dip test on '
|
||||
'integer mass points may reject unimodality due to discrete '
|
||||
'spikes rather than a continuous bimodal density. We add '
|
||||
'uniform jitter in [-0.5, +0.5] over 5 seeds and re-test.'),
|
||||
'',
|
||||
'| Scope | n | raw dip | raw p | jitter p median | jitter reject@.05 / 5 seeds |',
|
||||
'|---|---|---|---|---|---|']
|
||||
for key, label in [('big4_pooled', 'Big-4 pooled')] + \
|
||||
[(ALIAS[f], ALIAS[f]) for f in BIG4] + \
|
||||
[('non_big4_pooled', 'Non-Big-4 pooled')]:
|
||||
if key in panels:
|
||||
p = panels[key]
|
||||
md.append(f'| {label} | {p["n"]:,} | '
|
||||
f'{p["raw"]["dip"]:.5f} | '
|
||||
f'{_fmt_p(p["raw"]["p"])} | '
|
||||
f'{p["jittered"]["p_median"]:.4g} | '
|
||||
f'{p["jittered"]["reject_at_05_count"]}/5 |')
|
||||
md += ['',
|
||||
'**Interpretation.** If jittered dip ceases to reject in all '
|
||||
'panels, the raw-data rejection was driven by integer ties '
|
||||
'rather than a continuous bimodal density. Codex round-29 '
|
||||
'observed this pattern; this script confirms with multi-seed '
|
||||
'robustness.',
|
||||
'',
|
||||
'## B. Integer-histogram valley locations (bins 0..20)',
|
||||
'',
|
||||
('For each scope, list bins where count is strictly less '
|
||||
'than both neighbours, with relative depth '
|
||||
'(min(neighbour) - bin) / min(neighbour). A genuine '
|
||||
'antimode would show a deep, stable valley; integer-noise '
|
||||
'valleys are shallow and inconsistent across firms.'),
|
||||
'']
|
||||
for key, label in [('big4_pooled', 'Big-4 pooled')] + \
|
||||
[(ALIAS[f], ALIAS[f]) for f in BIG4] + \
|
||||
[('non_big4_pooled', 'Non-Big-4 pooled')]:
|
||||
if key in valleys:
|
||||
v_list = valleys[key]['valleys']
|
||||
if not v_list:
|
||||
md.append(f'- **{label}**: no integer-histogram valleys '
|
||||
f'in 0..20')
|
||||
else:
|
||||
desc = ', '.join(
|
||||
f'dh={v["bin_center"]:.0f} (depth_rel={v["depth_rel"]:.3f})'
|
||||
for v in v_list)
|
||||
md.append(f'- **{label}**: {desc}')
|
||||
md += ['',
|
||||
'## C. Firm-residualized dHash dip',
|
||||
'',
|
||||
('Subtract each firm mean dHash; add back grand mean. If '
|
||||
'residual rejects, within-firm multimodality is genuine. '
|
||||
'If residual fails to reject, all dh "multimodality" was '
|
||||
'between-firm composition.'),
|
||||
'',
|
||||
'| Scope | raw dip | raw p | residualized dip | residualized p |',
|
||||
'|---|---|---|---|---|']
|
||||
fr = results['firm_residualized_dh_dip']
|
||||
md += [f'| Big-4 | {fr["big4"]["raw"]["dip"]:.5f} | '
|
||||
f'{_fmt_p(fr["big4"]["raw"]["p"])} | '
|
||||
f'{fr["big4"]["firm_residualized"]["dip"]:.5f} | '
|
||||
f'{_fmt_p(fr["big4"]["firm_residualized"]["p"])} |',
|
||||
f'| Non-Big-4 | {fr["non_big4"]["raw"]["dip"]:.5f} | '
|
||||
f'{_fmt_p(fr["non_big4"]["raw"]["p"])} | '
|
||||
f'{fr["non_big4"]["firm_residualized"]["dip"]:.5f} | '
|
||||
f'{_fmt_p(fr["non_big4"]["firm_residualized"]["p"])} |']
|
||||
md += ['',
|
||||
'## D. Max-cos pair vs min-dh pair coincidence',
|
||||
'']
|
||||
pc = results.get('pair_coincidence', {})
|
||||
if 'same_pair_rate' in pc and pc['same_pair_rate'] is not None:
|
||||
md += [f'- n_signatures with both pair IDs: '
|
||||
f'{pc["n_with_both_pair_ids"]:,}',
|
||||
f'- same-pair rate: {pc["same_pair_rate"]:.4f} '
|
||||
f'({pc["n_same_pair"]:,} of '
|
||||
f'{pc["n_with_both_pair_ids"]:,})',
|
||||
'',
|
||||
('A high rate (>0.8) supports a single-pair regime '
|
||||
'descriptor language (cos and dh attached to the same '
|
||||
'partner). A low rate indicates the two descriptors '
|
||||
'attach to different partners and should be discussed '
|
||||
'as parallel-but-different evidence.')]
|
||||
elif 'error' in pc:
|
||||
md += [f'- column not present in DB: {pc["error"]}',
|
||||
('- note: schema-dependent; pair IDs not currently stored '
|
||||
'in signatures table.')]
|
||||
md.append('')
|
||||
md_path = OUT / 'dhash_discrete_report.md'
|
||||
md_path.write_text('\n'.join(md), encoding='utf-8')
|
||||
print(f'[md ] {md_path}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user