Add Paper A (IEEE TAI) complete draft with Firm A-calibrated dual-method classification
Paper draft includes all sections (Abstract through Conclusion), 36 references, and supporting scripts. Key methodology: Cosine similarity + dHash dual-method verification with thresholds calibrated against known-replication firm (Firm A). Includes: - 8 section markdown files (paper_a_*.md) - Ablation study script (ResNet-50 vs VGG-16 vs EfficientNet-B0) - Recalibrated classification script (84,386 PDFs, 5-tier system) - Figure generation and Word export scripts - Citation renumbering script ([1]-[36]) - Signature analysis pipeline (12 steps) - YOLO extraction scripts Three rounds of AI review completed (GPT-5.4, Claude Opus 4.6, Gemini 3 Pro). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,493 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Ablation Study: Backbone Comparison for Signature Feature Extraction
|
||||
====================================================================
|
||||
Compares ResNet-50 vs VGG-16 vs EfficientNet-B0 on:
|
||||
1. Feature extraction speed
|
||||
2. Intra/Inter class cosine similarity separation (Cohen's d)
|
||||
3. KDE crossover point
|
||||
4. Firm A (known replication) distribution
|
||||
|
||||
Usage:
|
||||
python ablation_backbone_comparison.py # Run all backbones
|
||||
python ablation_backbone_comparison.py --extract # Feature extraction only
|
||||
python ablation_backbone_comparison.py --analyze # Analysis only (features must exist)
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.models as models
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
import numpy as np
|
||||
import sqlite3
|
||||
import time
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
from tqdm import tqdm
|
||||
import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
# === Configuration ===
|
||||
IMAGES_DIR = Path("/Volumes/NV2/PDF-Processing/yolo-signatures/images")
|
||||
FEATURES_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/features")
|
||||
DB_PATH = Path("/Volumes/NV2/PDF-Processing/signature-analysis/signature_analysis.db")
|
||||
OUTPUT_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/ablation")
|
||||
FILENAMES_PATH = FEATURES_DIR / "signature_filenames.txt"
|
||||
|
||||
BATCH_SIZE = 64
|
||||
NUM_WORKERS = 4
|
||||
DEVICE = torch.device("mps" if torch.backends.mps.is_available() else
|
||||
"cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# Sampling for analysis
|
||||
INTER_CLASS_SAMPLE_SIZE = 500_000
|
||||
INTRA_CLASS_MIN_SIGNATURES = 3
|
||||
RANDOM_SEED = 42
|
||||
|
||||
# Known replication firm (Deloitte Taiwan = 勤業眾信)
|
||||
FIRM_A_NAME = "勤業眾信聯合"
|
||||
|
||||
BACKBONES = {
|
||||
"resnet50": {
|
||||
"model_fn": lambda: models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2),
|
||||
"feature_dim": 2048,
|
||||
"description": "ResNet-50 (ImageNet1K_V2)",
|
||||
},
|
||||
"vgg16": {
|
||||
"model_fn": lambda: models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1),
|
||||
"feature_dim": 4096,
|
||||
"description": "VGG-16 (ImageNet1K_V1)",
|
||||
},
|
||||
"efficientnet_b0": {
|
||||
"model_fn": lambda: models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1),
|
||||
"feature_dim": 1280,
|
||||
"description": "EfficientNet-B0 (ImageNet1K_V1)",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class SignatureDataset(Dataset):
|
||||
def __init__(self, image_paths, transform=None):
|
||||
self.image_paths = image_paths
|
||||
self.transform = transform
|
||||
|
||||
def __len__(self):
|
||||
return len(self.image_paths)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
import cv2
|
||||
img_path = self.image_paths[idx]
|
||||
img = cv2.imread(str(img_path))
|
||||
if img is None:
|
||||
img = np.ones((224, 224, 3), dtype=np.uint8) * 255
|
||||
else:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
img = self._resize_with_padding(img, 224, 224)
|
||||
if self.transform:
|
||||
img = self.transform(img)
|
||||
return img, str(img_path.name)
|
||||
|
||||
@staticmethod
|
||||
def _resize_with_padding(img, target_w, target_h):
|
||||
h, w = img.shape[:2]
|
||||
scale = min(target_w / w, target_h / h)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
import cv2
|
||||
resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
|
||||
canvas = np.ones((target_h, target_w, 3), dtype=np.uint8) * 255
|
||||
x_off = (target_w - new_w) // 2
|
||||
y_off = (target_h - new_h) // 2
|
||||
canvas[y_off:y_off+new_h, x_off:x_off+new_w] = resized
|
||||
return canvas
|
||||
|
||||
|
||||
def build_feature_extractor(backbone_name):
|
||||
"""Build a feature extractor for the given backbone."""
|
||||
config = BACKBONES[backbone_name]
|
||||
model = config["model_fn"]()
|
||||
|
||||
if backbone_name == "vgg16":
|
||||
features_part = model.features
|
||||
avgpool = model.avgpool
|
||||
# Drop last Linear (classifier) to get 4096-dim output
|
||||
classifier_part = nn.Sequential(*list(model.classifier.children())[:-1])
|
||||
|
||||
class VGGFeatureExtractor(nn.Module):
|
||||
def __init__(self, features, avgpool, classifier):
|
||||
super().__init__()
|
||||
self.features = features
|
||||
self.avgpool = avgpool
|
||||
self.classifier = classifier
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = self.avgpool(x)
|
||||
x = torch.flatten(x, 1)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
model = VGGFeatureExtractor(features_part, avgpool, classifier_part)
|
||||
|
||||
elif backbone_name == "resnet50":
|
||||
model = nn.Sequential(*list(model.children())[:-1])
|
||||
|
||||
elif backbone_name == "efficientnet_b0":
|
||||
model.classifier = nn.Identity()
|
||||
|
||||
model = model.to(DEVICE)
|
||||
model.eval()
|
||||
return model
|
||||
|
||||
|
||||
def extract_features(backbone_name):
|
||||
"""Extract features for all signatures using the given backbone."""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Extracting features: {BACKBONES[backbone_name]['description']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
output_path = OUTPUT_DIR / f"features_{backbone_name}.npy"
|
||||
if output_path.exists():
|
||||
print(f" Features already exist: {output_path}")
|
||||
print(f" Skipping extraction. Delete file to re-extract.")
|
||||
return np.load(output_path)
|
||||
|
||||
# Load filenames
|
||||
with open(FILENAMES_PATH) as f:
|
||||
filenames = [line.strip() for line in f if line.strip()]
|
||||
print(f" Images: {len(filenames):,}")
|
||||
|
||||
image_paths = [IMAGES_DIR / fn for fn in filenames]
|
||||
|
||||
# Build model
|
||||
model = build_feature_extractor(backbone_name)
|
||||
|
||||
transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
dataset = SignatureDataset(image_paths, transform=transform)
|
||||
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False,
|
||||
num_workers=NUM_WORKERS, pin_memory=True)
|
||||
|
||||
all_features = []
|
||||
start_time = time.time()
|
||||
|
||||
with torch.no_grad():
|
||||
for images, _ in tqdm(dataloader, desc=f" {backbone_name}"):
|
||||
images = images.to(DEVICE)
|
||||
feats = model(images)
|
||||
feats = feats.view(feats.size(0), -1) # flatten
|
||||
feats = nn.functional.normalize(feats, p=2, dim=1) # L2 normalize
|
||||
all_features.append(feats.cpu().numpy())
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
all_features = np.vstack(all_features)
|
||||
|
||||
print(f" Feature shape: {all_features.shape}")
|
||||
print(f" Time: {elapsed:.1f}s ({elapsed/60:.1f}min)")
|
||||
print(f" Speed: {len(filenames)/elapsed:.1f} images/sec")
|
||||
|
||||
# Save
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
np.save(output_path, all_features)
|
||||
print(f" Saved: {output_path} ({all_features.nbytes / 1e9:.2f} GB)")
|
||||
|
||||
return all_features
|
||||
|
||||
|
||||
def load_accountant_data():
|
||||
"""Load accountant assignments and firm info from DB."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute('''
|
||||
SELECT image_filename, assigned_accountant
|
||||
FROM signatures
|
||||
WHERE feature_vector IS NOT NULL
|
||||
ORDER BY signature_id
|
||||
''')
|
||||
sig_rows = cur.fetchall()
|
||||
|
||||
cur.execute('SELECT name, firm FROM accountants')
|
||||
acct_firm = {r[0]: r[1] for r in cur.fetchall()}
|
||||
|
||||
conn.close()
|
||||
|
||||
filename_to_acct = {r[0]: r[1] for r in sig_rows}
|
||||
return filename_to_acct, acct_firm
|
||||
|
||||
|
||||
def analyze_backbone(backbone_name, features, filenames, filename_to_acct, acct_firm):
|
||||
"""Compute intra/inter class stats for a backbone's features."""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Analyzing: {BACKBONES[backbone_name]['description']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
np.random.seed(RANDOM_SEED)
|
||||
|
||||
# Map features to accountants
|
||||
accountants = []
|
||||
valid_indices = []
|
||||
for i, fn in enumerate(filenames):
|
||||
acct = filename_to_acct.get(fn)
|
||||
if acct:
|
||||
accountants.append(acct)
|
||||
valid_indices.append(i)
|
||||
|
||||
valid_features = features[valid_indices]
|
||||
print(f" Valid signatures with accountant: {len(valid_indices):,}")
|
||||
|
||||
# Group by accountant
|
||||
acct_groups = defaultdict(list)
|
||||
for i, acct in enumerate(accountants):
|
||||
acct_groups[acct].append(i)
|
||||
|
||||
# --- Intra-class ---
|
||||
print(" Computing intra-class similarities...")
|
||||
intra_sims = []
|
||||
for acct, indices in tqdm(acct_groups.items(), desc=" Intra-class", leave=False):
|
||||
if len(indices) < INTRA_CLASS_MIN_SIGNATURES:
|
||||
continue
|
||||
vecs = valid_features[indices]
|
||||
sim_matrix = vecs @ vecs.T
|
||||
n = len(indices)
|
||||
triu_idx = np.triu_indices(n, k=1)
|
||||
intra_sims.extend(sim_matrix[triu_idx].tolist())
|
||||
|
||||
intra_sims = np.array(intra_sims)
|
||||
print(f" Intra-class pairs: {len(intra_sims):,}")
|
||||
|
||||
# --- Inter-class ---
|
||||
print(" Computing inter-class similarities...")
|
||||
all_acct_list = list(acct_groups.keys())
|
||||
inter_sims = []
|
||||
for _ in range(INTER_CLASS_SAMPLE_SIZE):
|
||||
a1, a2 = np.random.choice(len(all_acct_list), 2, replace=False)
|
||||
i1 = np.random.choice(acct_groups[all_acct_list[a1]])
|
||||
i2 = np.random.choice(acct_groups[all_acct_list[a2]])
|
||||
sim = float(valid_features[i1] @ valid_features[i2])
|
||||
inter_sims.append(sim)
|
||||
inter_sims = np.array(inter_sims)
|
||||
print(f" Inter-class pairs: {len(inter_sims):,}")
|
||||
|
||||
# --- Firm A (known replication) ---
|
||||
print(f" Computing Firm A ({FIRM_A_NAME}) distribution...")
|
||||
firm_a_accts = [acct for acct in acct_groups if acct_firm.get(acct) == FIRM_A_NAME]
|
||||
firm_a_sims = []
|
||||
for acct in firm_a_accts:
|
||||
indices = acct_groups[acct]
|
||||
if len(indices) < 2:
|
||||
continue
|
||||
vecs = valid_features[indices]
|
||||
sim_matrix = vecs @ vecs.T
|
||||
n = len(indices)
|
||||
triu_idx = np.triu_indices(n, k=1)
|
||||
firm_a_sims.extend(sim_matrix[triu_idx].tolist())
|
||||
firm_a_sims = np.array(firm_a_sims) if firm_a_sims else np.array([])
|
||||
print(f" Firm A accountants: {len(firm_a_accts)}, pairs: {len(firm_a_sims):,}")
|
||||
|
||||
# --- Statistics ---
|
||||
def dist_stats(arr, name):
|
||||
return {
|
||||
"name": name,
|
||||
"n": len(arr),
|
||||
"mean": float(np.mean(arr)),
|
||||
"std": float(np.std(arr)),
|
||||
"median": float(np.median(arr)),
|
||||
"p1": float(np.percentile(arr, 1)),
|
||||
"p5": float(np.percentile(arr, 5)),
|
||||
"p25": float(np.percentile(arr, 25)),
|
||||
"p75": float(np.percentile(arr, 75)),
|
||||
"p95": float(np.percentile(arr, 95)),
|
||||
"p99": float(np.percentile(arr, 99)),
|
||||
"min": float(np.min(arr)),
|
||||
"max": float(np.max(arr)),
|
||||
}
|
||||
|
||||
intra_stats = dist_stats(intra_sims, "intra")
|
||||
inter_stats = dist_stats(inter_sims, "inter")
|
||||
firm_a_stats = dist_stats(firm_a_sims, "firm_a") if len(firm_a_sims) > 0 else None
|
||||
|
||||
# Cohen's d
|
||||
pooled_std = np.sqrt((intra_stats["std"]**2 + inter_stats["std"]**2) / 2)
|
||||
cohens_d = (intra_stats["mean"] - inter_stats["mean"]) / pooled_std if pooled_std > 0 else 0
|
||||
|
||||
# KDE crossover
|
||||
try:
|
||||
from scipy.stats import gaussian_kde
|
||||
x_grid = np.linspace(0, 1, 1000)
|
||||
kde_intra = gaussian_kde(intra_sims)
|
||||
kde_inter = gaussian_kde(inter_sims)
|
||||
diff = kde_intra(x_grid) - kde_inter(x_grid)
|
||||
sign_changes = np.where(np.diff(np.sign(diff)))[0]
|
||||
crossovers = x_grid[sign_changes]
|
||||
valid_crossovers = crossovers[(crossovers > 0.5) & (crossovers < 1.0)]
|
||||
kde_crossover = float(valid_crossovers[-1]) if len(valid_crossovers) > 0 else None
|
||||
except Exception as e:
|
||||
print(f" KDE crossover computation failed: {e}")
|
||||
kde_crossover = None
|
||||
|
||||
results = {
|
||||
"backbone": backbone_name,
|
||||
"description": BACKBONES[backbone_name]["description"],
|
||||
"feature_dim": BACKBONES[backbone_name]["feature_dim"],
|
||||
"intra": intra_stats,
|
||||
"inter": inter_stats,
|
||||
"firm_a": firm_a_stats,
|
||||
"cohens_d": float(cohens_d),
|
||||
"kde_crossover": kde_crossover,
|
||||
}
|
||||
|
||||
# Print summary
|
||||
print(f"\n --- {backbone_name} Summary ---")
|
||||
print(f" Feature dim: {results['feature_dim']}")
|
||||
print(f" Intra mean: {intra_stats['mean']:.4f} +/- {intra_stats['std']:.4f}")
|
||||
print(f" Inter mean: {inter_stats['mean']:.4f} +/- {inter_stats['std']:.4f}")
|
||||
print(f" Cohen's d: {cohens_d:.4f}")
|
||||
print(f" KDE crossover: {kde_crossover}")
|
||||
if firm_a_stats:
|
||||
print(f" Firm A mean: {firm_a_stats['mean']:.4f} +/- {firm_a_stats['std']:.4f}")
|
||||
print(f" Firm A 1st pct: {firm_a_stats['p1']:.4f}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def generate_comparison_table(all_results):
|
||||
"""Generate a markdown comparison table."""
|
||||
print(f"\n{'='*60}")
|
||||
print("COMPARISON TABLE")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
results_by_name = {r["backbone"]: r for r in all_results}
|
||||
|
||||
def get_val(backbone, key, sub=None):
|
||||
r = results_by_name.get(backbone)
|
||||
if not r:
|
||||
return None
|
||||
if sub:
|
||||
section = r.get(sub)
|
||||
if isinstance(section, dict):
|
||||
return section.get(key)
|
||||
return None
|
||||
return r.get(key)
|
||||
|
||||
def fmt(val, fmt_str=".4f"):
|
||||
if val is None:
|
||||
return "---"
|
||||
if isinstance(val, int):
|
||||
return str(val)
|
||||
return f"{val:{fmt_str}}"
|
||||
|
||||
names = ["resnet50", "vgg16", "efficientnet_b0"]
|
||||
header = "| Metric | ResNet-50 | VGG-16 | EfficientNet-B0 |"
|
||||
sep = "|--------|-----------|--------|-----------------|"
|
||||
|
||||
rows = [
|
||||
f"| Feature dim | {fmt(get_val('resnet50','feature_dim'),'')} | {fmt(get_val('vgg16','feature_dim'),'')} | {fmt(get_val('efficientnet_b0','feature_dim'),'')} |",
|
||||
f"| Intra mean | {fmt(get_val('resnet50','mean','intra'))} | {fmt(get_val('vgg16','mean','intra'))} | {fmt(get_val('efficientnet_b0','mean','intra'))} |",
|
||||
f"| Intra std | {fmt(get_val('resnet50','std','intra'))} | {fmt(get_val('vgg16','std','intra'))} | {fmt(get_val('efficientnet_b0','std','intra'))} |",
|
||||
f"| Inter mean | {fmt(get_val('resnet50','mean','inter'))} | {fmt(get_val('vgg16','mean','inter'))} | {fmt(get_val('efficientnet_b0','mean','inter'))} |",
|
||||
f"| Inter std | {fmt(get_val('resnet50','std','inter'))} | {fmt(get_val('vgg16','std','inter'))} | {fmt(get_val('efficientnet_b0','std','inter'))} |",
|
||||
f"| **Cohen's d** | **{fmt(get_val('resnet50','cohens_d'))}** | **{fmt(get_val('vgg16','cohens_d'))}** | **{fmt(get_val('efficientnet_b0','cohens_d'))}** |",
|
||||
f"| KDE crossover | {fmt(get_val('resnet50','kde_crossover'))} | {fmt(get_val('vgg16','kde_crossover'))} | {fmt(get_val('efficientnet_b0','kde_crossover'))} |",
|
||||
f"| Firm A mean | {fmt(get_val('resnet50','mean','firm_a'))} | {fmt(get_val('vgg16','mean','firm_a'))} | {fmt(get_val('efficientnet_b0','mean','firm_a'))} |",
|
||||
f"| Firm A 1st pct | {fmt(get_val('resnet50','p1','firm_a'))} | {fmt(get_val('vgg16','p1','firm_a'))} | {fmt(get_val('efficientnet_b0','p1','firm_a'))} |",
|
||||
]
|
||||
|
||||
table = "\n".join([header, sep] + rows)
|
||||
print(table)
|
||||
|
||||
# Save report
|
||||
report_path = OUTPUT_DIR / "ablation_comparison.md"
|
||||
with open(report_path, 'w') as f:
|
||||
f.write("# Ablation Study: Backbone Comparison\n\n")
|
||||
f.write(f"Date: {time.strftime('%Y-%m-%d %H:%M')}\n\n")
|
||||
f.write("## Comparison Table\n\n")
|
||||
f.write(table + "\n\n")
|
||||
f.write("## Interpretation\n\n")
|
||||
f.write("- **Cohen's d**: Higher = better separation between same-CPA and different-CPA signatures\n")
|
||||
f.write("- **KDE crossover**: The Bayes-optimal decision boundary (higher = easier to classify)\n")
|
||||
f.write("- **Firm A**: Known replication firm; expect very high mean similarity\n")
|
||||
f.write("- **Firm A 1st percentile**: Lower bound of known-replication similarity\n")
|
||||
|
||||
json_path = OUTPUT_DIR / "ablation_results.json"
|
||||
with open(json_path, 'w') as f:
|
||||
json.dump(all_results, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n Report saved: {report_path}")
|
||||
print(f" Raw data saved: {json_path}")
|
||||
|
||||
return table
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Ablation: backbone comparison")
|
||||
parser.add_argument("--extract", action="store_true", help="Feature extraction only")
|
||||
parser.add_argument("--analyze", action="store_true", help="Analysis only")
|
||||
parser.add_argument("--backbone", type=str, help="Run single backbone (resnet50/vgg16/efficientnet_b0)")
|
||||
args = parser.parse_args()
|
||||
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Load filenames
|
||||
with open(FILENAMES_PATH) as f:
|
||||
filenames = [line.strip() for line in f if line.strip()]
|
||||
|
||||
backbones_to_run = [args.backbone] if args.backbone else list(BACKBONES.keys())
|
||||
|
||||
if not args.analyze:
|
||||
# === Phase 1: Feature Extraction ===
|
||||
print("\n" + "=" * 60)
|
||||
print("PHASE 1: FEATURE EXTRACTION")
|
||||
print("=" * 60)
|
||||
|
||||
# For ResNet-50, copy existing features instead of re-extracting
|
||||
resnet_ablation_path = OUTPUT_DIR / "features_resnet50.npy"
|
||||
resnet_existing_path = FEATURES_DIR / "signature_features.npy"
|
||||
if "resnet50" in backbones_to_run and not resnet_ablation_path.exists() and resnet_existing_path.exists():
|
||||
print(f"\nCopying existing ResNet-50 features...")
|
||||
import shutil
|
||||
resnet_ablation_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(resnet_existing_path, resnet_ablation_path)
|
||||
print(f" Copied: {resnet_ablation_path}")
|
||||
|
||||
for name in backbones_to_run:
|
||||
if name == "resnet50" and resnet_ablation_path.exists():
|
||||
continue
|
||||
extract_features(name)
|
||||
|
||||
if args.extract:
|
||||
print("\nFeature extraction complete. Run with --analyze to compute statistics.")
|
||||
return
|
||||
|
||||
# === Phase 2: Analysis ===
|
||||
print("\n" + "=" * 60)
|
||||
print("PHASE 2: ANALYSIS")
|
||||
print("=" * 60)
|
||||
|
||||
filename_to_acct, acct_firm = load_accountant_data()
|
||||
|
||||
all_results = []
|
||||
for name in backbones_to_run:
|
||||
feat_path = OUTPUT_DIR / f"features_{name}.npy"
|
||||
if not feat_path.exists():
|
||||
print(f"\n WARNING: {feat_path} not found, skipping {name}")
|
||||
continue
|
||||
features = np.load(feat_path)
|
||||
results = analyze_backbone(name, features, filenames, filename_to_acct, acct_firm)
|
||||
all_results.append(results)
|
||||
|
||||
if len(all_results) > 1:
|
||||
generate_comparison_table(all_results)
|
||||
elif len(all_results) == 1:
|
||||
print(f"\nOnly one backbone analyzed. Run all three for comparison table.")
|
||||
|
||||
print("\nDone!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user