Add Paper A drafts and docx export script

- export_paper_to_docx.py: build script combining paper_a_*.md sections into docx - Paper_A_IEEE_TAI_Draft_20260403.docx: intermediate draft before AI review rounds - Paper_A_IEEE_TAI_Draft_v2.docx: current draft after 3 AI reviews (GPT-5.4, Opus 4.6, Gemini 3 Pro) and Firm A recalibration Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 21:34:31 +08:00
parent a261a22bd2
commit 158f63efb2
3 changed files with 575 additions and 0 deletions
@@ -0,0 +1,575 @@
+#!/usr/bin/env python3
+"""
+Export Paper A draft to a single Word document (.docx)
+with IEEE-style formatting, embedded figures, and tables.
+"""
+
+from docx import Document
+from docx.shared import Inches, Pt, RGBColor
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from docx.enum.table import WD_TABLE_ALIGNMENT
+from pathlib import Path
+import re
+
+# Paths
+PAPER_DIR = Path("/Volumes/NV2/pdf_recognize")
+FIGURE_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/paper_figures")
+OUTPUT_PATH = PAPER_DIR / "Paper_A_IEEE_TAI_Draft.docx"
+
+
+def add_heading(doc, text, level=1):
+    h = doc.add_heading(text, level=level)
+    for run in h.runs:
+        run.font.color.rgb = RGBColor(0, 0, 0)
+    return h
+
+
+def add_para(doc, text, bold=False, italic=False, font_size=10, alignment=None, space_after=6):
+    p = doc.add_paragraph()
+    if alignment:
+        p.alignment = alignment
+    p.paragraph_format.space_after = Pt(space_after)
+    p.paragraph_format.space_before = Pt(0)
+    run = p.add_run(text)
+    run.font.size = Pt(font_size)
+    run.font.name = 'Times New Roman'
+    run.bold = bold
+    run.italic = italic
+    return p
+
+
+def add_table(doc, headers, rows, caption=None):
+    if caption:
+        add_para(doc, caption, bold=True, font_size=9, alignment=WD_ALIGN_PARAGRAPH.CENTER, space_after=4)
+
+    table = doc.add_table(rows=1 + len(rows), cols=len(headers))
+    table.style = 'Table Grid'
+    table.alignment = WD_TABLE_ALIGNMENT.CENTER
+
+    # Header
+    for i, h in enumerate(headers):
+        cell = table.rows[0].cells[i]
+        cell.text = h
+        for p in cell.paragraphs:
+            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            for run in p.runs:
+                run.bold = True
+                run.font.size = Pt(8)
+                run.font.name = 'Times New Roman'
+
+    # Data
+    for r_idx, row in enumerate(rows):
+        for c_idx, val in enumerate(row):
+            cell = table.rows[r_idx + 1].cells[c_idx]
+            cell.text = str(val)
+            for p in cell.paragraphs:
+                p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+                for run in p.runs:
+                    run.font.size = Pt(8)
+                    run.font.name = 'Times New Roman'
+
+    doc.add_paragraph()  # spacing
+    return table
+
+
+def add_figure(doc, image_path, caption, width=5.0):
+    if Path(image_path).exists():
+        p = doc.add_paragraph()
+        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+        run = p.add_run()
+        run.add_picture(str(image_path), width=Inches(width))
+
+        cap = doc.add_paragraph()
+        cap.alignment = WD_ALIGN_PARAGRAPH.CENTER
+        cap.paragraph_format.space_after = Pt(8)
+        run = cap.add_run(caption)
+        run.font.size = Pt(9)
+        run.font.name = 'Times New Roman'
+        run.italic = True
+
+
+def build_document():
+    doc = Document()
+
+    # Set default font
+    style = doc.styles['Normal']
+    font = style.font
+    font.name = 'Times New Roman'
+    font.size = Pt(10)
+
+    # ==================== TITLE ====================
+    add_para(doc, "Automated Detection of Digitally Replicated Signatures\nin Large-Scale Financial Audit Reports",
+             bold=True, font_size=16, alignment=WD_ALIGN_PARAGRAPH.CENTER, space_after=12)
+
+    add_para(doc, "[Authors removed for double-blind review]",
+             italic=True, font_size=10, alignment=WD_ALIGN_PARAGRAPH.CENTER, space_after=4)
+    add_para(doc, "[Affiliations removed for double-blind review]",
+             italic=True, font_size=10, alignment=WD_ALIGN_PARAGRAPH.CENTER, space_after=12)
+
+    # ==================== ABSTRACT ====================
+    add_heading(doc, "Abstract", level=1)
+    abstract_text = (
+        "Regulations in many jurisdictions require Certified Public Accountants (CPAs) to personally sign each audit report they certify. "
+        "However, the digitization of financial reporting makes it trivial to reuse a scanned signature image across multiple reports, "
+        "bypassing this requirement. Unlike signature forgery, where an impostor imitates another person's handwriting, signature replication "
+        "involves a legitimate signer reusing a digital copy of their own genuine signature\u2014a practice that is virtually undetectable through "
+        "manual inspection at scale. We present an end-to-end AI pipeline that automatically detects signature replication in financial audit reports. "
+        "The pipeline employs a Vision-Language Model for signature page identification, YOLOv11 for signature region detection, and ResNet-50 for "
+        "deep feature extraction, followed by a dual-method verification combining cosine similarity with perceptual hashing (pHash). This dual-method "
+        "design distinguishes consistent handwriting style (high feature similarity but divergent perceptual hashes) from digital replication "
+        "(convergent evidence across both methods), resolving an ambiguity that single-metric approaches cannot address. We apply this pipeline to "
+        "90,282 audit reports filed by publicly listed companies in Taiwan over a decade (2013\u20132023), analyzing 182,328 signatures from 758 CPAs. "
+        "Using a known-replication accounting firm as a calibration reference, we establish distribution-free detection thresholds validated against "
+        "empirical ground truth. Our analysis reveals that cosine similarity alone overestimates replication rates by approximately 25-fold, "
+        "underscoring the necessity of multi-method verification. To our knowledge, this is the largest-scale forensic analysis of signature "
+        "authenticity in financial documents."
+    )
+    add_para(doc, abstract_text, font_size=9, space_after=8)
+
+    # ==================== IMPACT STATEMENT ====================
+    add_heading(doc, "Impact Statement", level=1)
+    impact_text = (
+        "Auditor signatures on financial reports are a key safeguard of corporate accountability. When Certified Public Accountants digitally "
+        "copy and paste a single signature image across multiple reports instead of signing each one individually, this safeguard is undermined\u2014"
+        "yet detecting such practices through manual inspection is infeasible at the scale of modern financial markets. We developed an artificial "
+        "intelligence system that automatically extracts and analyzes signatures from over 90,000 audit reports spanning ten years of filings by "
+        "publicly listed companies. By combining deep learning-based visual feature analysis with perceptual hashing, the system distinguishes "
+        "genuinely handwritten signatures from digitally replicated ones. Our analysis reveals that signature replication practices vary substantially "
+        "across accounting firms, with measurable differences between firms known to use digital replication and those that do not. This technology "
+        "can be directly deployed by financial regulators to automate signature authenticity monitoring at national scale."
+    )
+    add_para(doc, impact_text, font_size=9, space_after=8)
+
+    # ==================== I. INTRODUCTION ====================
+    add_heading(doc, "I. Introduction", level=1)
+
+    intro_paras = [
+        "Financial audit reports serve as a critical mechanism for ensuring corporate accountability and investor protection. "
+        "In Taiwan, the Certified Public Accountant Act (\u6703\u8a08\u5e2b\u6cd5 \u00a74) and the Financial Supervisory Commission\u2019s attestation regulations "
+        "(\u67e5\u6838\u7c3d\u8b49\u6838\u6e96\u6e96\u5247 \u00a76) require that certifying CPAs affix their signature or seal (\u7c3d\u540d\u6216\u84cb\u7ae0) to each audit report [1]. "
+        "While the law permits either a handwritten signature or a seal, the CPA\u2019s attestation on each report is intended to represent a deliberate, "
+        "individual act of professional endorsement for that specific audit engagement [2].",
+
+        "The digitization of financial reporting, however, has introduced a practice that challenges this intent. "
+        "As audit reports are now routinely generated, transmitted, and archived as PDF documents, it is technically trivial for a CPA to digitally "
+        "replicate a single scanned signature image and paste it across multiple reports. Although this practice may not violate the literal statutory "
+        "requirement of \u201csignature or seal,\u201d it raises substantive concerns about audit quality: if a CPA\u2019s signature is applied identically across "
+        "hundreds of reports without any variation, does it still represent meaningful attestation of individual professional judgment? "
+        "Unlike traditional signature forgery, where a third party attempts to imitate another person\u2019s handwriting, signature replication involves "
+        "the legitimate signer reusing a digital copy of their own genuine signature. This practice, while potentially widespread, is virtually "
+        "undetectable through manual inspection at scale: regulatory agencies overseeing thousands of publicly listed companies cannot feasibly "
+        "examine each signature for evidence of digital duplication.",
+
+        "The distinction between signature replication and signature forgery is both conceptually and technically important. "
+        "The extensive body of research on offline signature verification [3]\u2013[8] has focused almost exclusively on forgery detection\u2014determining "
+        "whether a questioned signature was produced by its purported author or by an impostor. This framing presupposes that the central threat "
+        "is identity fraud. In our context, identity is not in question; the CPA is indeed the legitimate signer. The question is whether the "
+        "physical act of signing occurred for each individual report, or whether a single signing event was digitally propagated across many reports. "
+        "This replication detection problem is, in one sense, simpler than forgery detection\u2014we need not model the variability of skilled forgers\u2014"
+        "but it requires a different analytical framework, one focused on detecting abnormally high similarity across documents rather than "
+        "distinguishing genuine from forged specimens.",
+
+        "Despite the significance of this problem for audit quality and regulatory oversight, no prior work has addressed signature replication "
+        "detection in financial documents at scale. Woodruff et al. [9] developed an automated pipeline for signature analysis in corporate filings "
+        "for anti-money laundering investigations, but their work focused on author clustering (grouping signatures by signer identity) rather than "
+        "detecting reuse of digital copies. Copy-move forgery detection methods [10], [11] address duplicated regions within or across images, but "
+        "are designed for natural images and do not account for the specific characteristics of scanned document signatures, where legitimate visual "
+        "similarity between a signer\u2019s authentic signatures is expected and must be distinguished from digital duplication. Research on near-duplicate "
+        "image detection using perceptual hashing combined with deep learning [12], [13] provides relevant methodological foundations, but has not "
+        "been applied to document forensics or signature analysis.",
+
+        "In this paper, we present a fully automated, end-to-end pipeline for detecting digitally replicated CPA signatures in audit reports at scale. "
+        "Our approach processes raw PDF documents through six sequential stages: (1) signature page identification using a Vision-Language Model (VLM), "
+        "(2) signature region detection using a trained YOLOv11 object detector, (3) deep feature extraction via a pre-trained ResNet-50 convolutional "
+        "neural network, (4) dual-method similarity verification combining cosine similarity of deep features with perceptual hash (pHash) distance, "
+        "(5) distribution-free threshold calibration using a known-replication reference group, and (6) statistical classification with cross-method validation.",
+
+        "The dual-method verification is central to our contribution. Cosine similarity of deep feature embeddings captures high-level visual style "
+        "similarity\u2014it can identify signatures that share similar stroke patterns and spatial layouts\u2014but cannot distinguish between a CPA who signs "
+        "consistently and one who reuses a digital copy. Perceptual hashing, by contrast, captures structural-level similarity that is sensitive to "
+        "pixel-level correspondence. By requiring convergent evidence from both methods, we can differentiate style consistency (high cosine similarity "
+        "but divergent pHash) from digital replication (high cosine similarity with convergent pHash), resolving an ambiguity that neither method can "
+        "address alone.",
+
+        "A distinctive feature of our approach is the use of a known-replication calibration group for threshold validation. Through domain expertise, "
+        "we identified a major accounting firm (hereafter \u201cFirm A\u201d) whose signatures are known to be digitally replicated across all audit reports. "
+        "This provides an empirical anchor for calibrating detection thresholds: any threshold that fails to classify Firm A\u2019s signatures as replicated "
+        "is demonstrably too conservative, while the distributional characteristics of Firm A\u2019s signatures establish an upper bound on the similarity "
+        "values achievable through replication in real-world scanned documents. This calibration strategy\u2014using a known-positive subpopulation to "
+        "validate detection thresholds\u2014addresses a persistent challenge in document forensics, where ground truth labels are scarce.",
+
+        "We apply this pipeline to 90,282 audit reports filed by publicly listed companies in Taiwan between 2013 and 2023, extracting and analyzing "
+        "182,328 individual CPA signatures from 758 unique accountants. To our knowledge, this represents the largest-scale forensic analysis of "
+        "signature authenticity in financial documents reported in the literature.",
+    ]
+
+    for para in intro_paras:
+        add_para(doc, para)
+
+    # Contributions
+    add_para(doc, "The contributions of this paper are summarized as follows:", space_after=4)
+    contributions = [
+        "Problem formulation: We formally define the signature replication detection problem as distinct from signature forgery detection, "
+        "and argue that it requires a different analytical framework focused on intra-signer similarity distributions rather than "
+        "genuine-versus-forged classification.",
+        "End-to-end pipeline: We present a fully automated pipeline that processes raw PDF audit reports through VLM-based page identification, "
+        "YOLO-based signature detection, deep feature extraction, and dual-method similarity verification, requiring no manual intervention "
+        "after initial model training.",
+        "Dual-method verification: We demonstrate that combining deep feature cosine similarity with perceptual hashing resolves the fundamental "
+        "ambiguity between style consistency and digital replication, supported by an ablation study comparing three feature extraction backbones.",
+        "Calibration methodology: We introduce a threshold calibration approach using a known-replication reference group, providing empirical "
+        "validation in a domain where labeled ground truth is scarce.",
+        "Large-scale empirical analysis: We report findings from the analysis of over 90,000 audit reports spanning a decade, providing the "
+        "first large-scale empirical evidence on signature replication practices in financial reporting.",
+    ]
+    for i, c in enumerate(contributions, 1):
+        p = doc.add_paragraph(style='List Number')
+        run = p.add_run(c)
+        run.font.size = Pt(10)
+        run.font.name = 'Times New Roman'
+
+    add_para(doc, "The remainder of this paper is organized as follows. Section II reviews related work on signature verification, "
+             "document forensics, and perceptual hashing. Section III describes the proposed methodology. Section IV presents experimental "
+             "results including the ablation study and calibration group analysis. Section V discusses the implications and limitations of "
+             "our findings. Section VI concludes with directions for future work.")
+
+    # ==================== II. RELATED WORK ====================
+    add_heading(doc, "II. Related Work", level=1)
+
+    add_heading(doc, "A. Offline Signature Verification", level=2)
+    add_para(doc, "Offline signature verification\u2014determining whether a static signature image is genuine or forged\u2014has been studied "
+             "extensively using deep learning. Bromley et al. [3] introduced the Siamese neural network architecture for signature verification, "
+             "establishing the pairwise comparison paradigm that remains dominant. Dey et al. [4] proposed SigNet, a convolutional Siamese network "
+             "for writer-independent offline verification, demonstrating that deep features learned from signature images generalize across signers "
+             "without per-writer retraining. Hadjadj et al. [5] addressed the practical constraint of limited reference samples, achieving competitive "
+             "verification accuracy using only a single known genuine signature per writer. More recently, Li et al. [6] introduced TransOSV, "
+             "the first Vision Transformer-based approach for offline signature verification, achieving state-of-the-art results. Tehsin et al. [7] "
+             "evaluated distance metrics for triplet Siamese networks, finding that Manhattan distance outperformed cosine and Euclidean alternatives.")
+
+    add_para(doc, "A common thread in this literature is the assumption that the primary threat is identity fraud: a forger attempting to produce "
+             "a convincing imitation of another person\u2019s signature. Our work addresses a fundamentally different problem\u2014detecting whether the "
+             "legitimate signer reused a digital copy of their own signature\u2014which requires analyzing intra-signer similarity distributions "
+             "rather than modeling inter-signer discriminability.")
+
+    add_para(doc, "Brimoh and Olisah [8] proposed a consensus-threshold approach that derives classification boundaries from known genuine "
+             "reference pairs, the methodology most closely related to our calibration strategy. However, their method operates on standard "
+             "verification benchmarks with laboratory-collected signatures, whereas our approach applies threshold calibration using a "
+             "known-replication subpopulation identified through domain expertise in real-world regulatory documents.")
+
+    add_heading(doc, "B. Document Forensics and Copy Detection", level=2)
+    add_para(doc, "Copy-move forgery detection (CMFD) identifies duplicated regions within or across images, typically targeting manipulated "
+             "photographs [10]. Abramova and Bohme [11] adapted block-based CMFD to scanned text documents, noting that standard methods perform "
+             "poorly in this domain because legitimate character repetitions produce high similarity scores that confound duplicate detection.")
+
+    add_para(doc, "Woodruff et al. [9] developed the work most closely related to ours: a fully automated pipeline for extracting and "
+             "analyzing signatures from corporate filings in the context of anti-money laundering investigations. Their system uses connected "
+             "component analysis for signature detection, GANs for noise removal, and Siamese networks for author clustering. While their "
+             "pipeline shares our goal of large-scale automated signature analysis on real regulatory documents, their objective\u2014grouping "
+             "signatures by authorship\u2014differs fundamentally from ours, which is detecting digital replication within a single author\u2019s "
+             "signatures across documents.")
+
+    add_para(doc, "In the domain of image copy detection, Pizzi et al. [13] proposed SSCD, a self-supervised descriptor using ResNet-50 "
+             "with contrastive learning for large-scale copy detection on natural images. Their work demonstrates that pre-trained CNN features "
+             "with cosine similarity provide a strong baseline for identifying near-duplicate images, supporting our feature extraction approach.")
+
+    add_heading(doc, "C. Perceptual Hashing", level=2)
+    add_para(doc, "Perceptual hashing algorithms generate compact fingerprints that are robust to minor image transformations while remaining "
+             "sensitive to substantive content changes [14]. Jakhar and Borah [12] demonstrated that combining perceptual hashing with deep "
+             "learning features significantly outperforms either approach alone for near-duplicate image detection, achieving AUROC of 0.99. "
+             "Their two-stage architecture\u2014pHash for fast structural comparison followed by deep features for semantic verification\u2014provides "
+             "methodological precedent for our dual-method approach, though applied to natural images rather than document signatures.")
+
+    add_heading(doc, "D. Deep Feature Extraction for Signature Analysis", level=2)
+    add_para(doc, "Several studies have explored pre-trained CNN features for signature comparison without metric learning or Siamese architectures. "
+             "Engin et al. [15] used ResNet-50 features with cosine similarity for offline signature verification on real-world scanned documents, "
+             "incorporating CycleGAN-based stamp removal as preprocessing. Tsourounis et al. [16] demonstrated successful transfer from handwritten "
+             "text recognition to signature verification. Chamakh and Bounouh [17] confirmed that a simple ResNet backbone with cosine similarity "
+             "achieves competitive verification accuracy across multilingual signature datasets without fine-tuning, supporting the viability of "
+             "our off-the-shelf feature extraction approach.")
+
+    # ==================== III. METHODOLOGY ====================
+    add_heading(doc, "III. Methodology", level=1)
+
+    add_heading(doc, "A. Pipeline Overview", level=2)
+    add_para(doc, "We propose a six-stage pipeline for large-scale signature replication detection in scanned financial documents. "
+             "Fig. 1 illustrates the overall architecture. The pipeline takes as input a corpus of PDF audit reports and produces, for each "
+             "document, a classification of its CPA signatures as genuine, uncertain, or replicated, along with confidence scores and "
+             "supporting evidence from multiple verification methods.")
+    add_figure(doc, FIGURE_DIR / "fig1_pipeline.png",
+               "Fig. 1. Pipeline architecture for automated signature replication detection.", width=6.5)
+
+    add_heading(doc, "B. Data Collection", level=2)
+    add_para(doc, "The dataset comprises 90,282 annual financial audit reports filed by publicly listed companies in Taiwan, covering fiscal "
+             "years 2013 to 2023. The reports were collected from the Market Observation Post System (MOPS) operated by the Taiwan Stock Exchange "
+             "Corporation, the official repository for mandatory corporate filings. CPA names, affiliated accounting firms, and audit engagement "
+             "tenure were obtained from a publicly available audit firm tenure registry encompassing 758 unique CPAs.")
+
+    add_table(doc,
+              ["Attribute", "Value"],
+              [
+                  ["Total PDF documents", "90,282"],
+                  ["Date range", "2013\u20132023"],
+                  ["Documents with signatures", "86,072 (95.4%)"],
+                  ["Unique CPAs identified", "758"],
+                  ["Accounting firms", ">50"],
+              ],
+              caption="TABLE I: Dataset Summary")
+
+    add_heading(doc, "C. Signature Page Identification", level=2)
+    add_para(doc, "To identify which page of each multi-page PDF contains the auditor\u2019s signatures, we employed the Qwen2.5-VL "
+             "vision-language model (32B parameters) [18] as an automated pre-screening mechanism. Each PDF page was rendered to JPEG at "
+             "180 DPI and submitted to the VLM with a structured prompt requesting a binary determination of whether the page contains "
+             "a Chinese handwritten signature. The scanning range was restricted to the first quartile of each document\u2019s page count, "
+             "reflecting the regulatory structure of Taiwanese audit reports. This process identified 86,072 documents with signature pages. "
+             "Cross-validation between the VLM and subsequent YOLO detection confirmed high agreement: YOLO successfully detected signature "
+             "regions in 98.8% of VLM-positive documents.")
+
+    add_heading(doc, "D. Signature Detection", level=2)
+    add_para(doc, "We adopted YOLOv11n (nano variant) [19] for signature region localization. A training set of 500 randomly sampled signature "
+             "pages was annotated using a custom web-based interface following a two-stage protocol: primary annotation followed by independent "
+             "review and correction.")
+
+    add_table(doc,
+              ["Metric", "Value"],
+              [
+                  ["Precision", "0.97\u20130.98"],
+                  ["Recall", "0.95\u20130.98"],
+                  ["mAP@0.50", "0.98\u20130.99"],
+                  ["mAP@0.50:0.95", "0.85\u20130.90"],
+              ],
+              caption="TABLE II: YOLO Detection Performance")
+
+    add_para(doc, "Batch inference on 86,071 documents extracted 182,328 signature images at 43.1 documents/second (8 workers). "
+             "A red stamp removal step was applied using HSV color space filtering. Each signature was matched to its corresponding CPA "
+             "using positional order against the official registry, achieving a 92.6% match rate.")
+
+    add_heading(doc, "E. Feature Extraction", level=2)
+    add_para(doc, "Each extracted signature was encoded into a 2048-dimensional feature vector using a pre-trained ResNet-50 CNN [20] with "
+             "ImageNet-1K V2 weights, used as a fixed feature extractor without fine-tuning. Preprocessing consisted of resizing to "
+             "224\u00d7224 pixels with aspect ratio preservation and white padding, followed by ImageNet channel normalization. All feature "
+             "vectors were L2-normalized, ensuring that cosine similarity equals the dot product. The choice of ResNet-50 without fine-tuning "
+             "was motivated by three considerations: (1) the task is similarity comparison rather than classification; (2) ImageNet features "
+             "transfer effectively to document analysis [15], [16]; and (3) the absence of fine-tuning preserves generalizability. "
+             "This design choice is validated by an ablation study (Section IV-F).")
+
+    add_heading(doc, "F. Dual-Method Similarity Verification", level=2)
+    add_para(doc, "For each signature, the most similar signature from the same CPA across all other documents was identified via cosine "
+             "similarity. Two complementary measures were then computed against this closest match:")
+    add_para(doc, "Cosine similarity captures high-level visual style similarity: sim(fA, fB) = fA \u00b7 fB, where fA and fB are L2-normalized "
+             "feature vectors. A high cosine similarity indicates shared visual characteristics but does not distinguish between consistent "
+             "handwriting style and digital duplication.")
+    add_para(doc, "Perceptual hash (pHash) distance captures structural-level similarity. Each signature is converted to a 64-bit binary "
+             "fingerprint by resizing to 9\u00d78 pixels and computing horizontal gradient differences. The Hamming distance between two hashes "
+             "quantifies perceptual dissimilarity: 0 indicates perceptually identical images, while distances exceeding 15 indicate clearly "
+             "different images.")
+    add_para(doc, "The complementarity of these measures resolves the style-versus-replication ambiguity: high cosine + low pHash = converging "
+             "evidence of replication; high cosine + high pHash = consistent style, not replication. SSIM was excluded as a primary method "
+             "because scan-induced pixel variations caused a known-replication firm to exhibit a mean SSIM of only 0.70.")
+
+    add_heading(doc, "G. Threshold Selection and Calibration", level=2)
+    add_para(doc, "Intra-class (same CPA, 41.3M pairs) and inter-class (different CPAs, 500K pairs) cosine similarity distributions were "
+             "computed. Shapiro-Wilk tests rejected normality (p < 0.001), motivating distribution-free, percentile-based thresholds. "
+             "The primary threshold was derived via KDE crossover\u2014the point where intra- and inter-class density functions intersect.")
+    add_para(doc, "A distinctive aspect is the use of Firm A\u2014a major firm whose signatures are known to be digitally replicated\u2014as a "
+             "calibration reference. Firm A\u2019s distribution provides: (1) lower bound validation\u2014any threshold must classify the vast majority "
+             "of Firm A as replicated; and (2) upper bound estimation\u2014Firm A\u2019s 1st percentile establishes the floor of similarity achievable "
+             "through replication in scanned documents.")
+
+    add_heading(doc, "H. Classification", level=2)
+    add_para(doc, "The final per-document classification integrates evidence from both methods: (1) Definite replication: pixel-identical match "
+             "or SSIM > 0.95 with pHash \u2264 5; (2) Likely replication: cosine > 0.95 with pHash \u2264 5, or multiple methods indicate replication; "
+             "(3) Uncertain: cosine between KDE crossover and 0.95 without structural evidence; (4) Likely genuine: cosine below KDE crossover.")
+
+    # ==================== IV. RESULTS ====================
+    add_heading(doc, "IV. Experiments and Results", level=1)
+
+    add_heading(doc, "A. Experimental Setup", level=2)
+    add_para(doc, "All experiments were conducted using PyTorch 2.9 with Apple Silicon MPS GPU acceleration. "
+             "Feature extraction used torchvision model implementations with identical preprocessing across all backbones.")
+
+    add_heading(doc, "B. Distribution Analysis", level=2)
+    add_para(doc, "Fig. 2 presents the cosine similarity distributions for intra-class and inter-class pairs.")
+    add_figure(doc, FIGURE_DIR / "fig2_intra_inter_kde.png",
+               "Fig. 2. Cosine similarity distributions: intra-class (same CPA) vs. inter-class (different CPAs). "
+               "KDE crossover at 0.837 marks the Bayes-optimal decision boundary.", width=3.5)
+
+    add_table(doc,
+              ["Statistic", "Intra-class", "Inter-class"],
+              [
+                  ["N (pairs)", "41,352,824", "500,000"],
+                  ["Mean", "0.821", "0.758"],
+                  ["Std. Dev.", "0.098", "0.090"],
+                  ["Median", "0.836", "0.774"],
+              ],
+              caption="TABLE IV: Cosine Similarity Distribution Statistics")
+
+    add_para(doc, "Cohen\u2019s d of 0.669 indicates a medium effect size, confirming that the distributional difference is not merely "
+             "statistically significant but also practically meaningful.")
+
+    add_heading(doc, "C. Calibration Group Analysis", level=2)
+    add_para(doc, "Fig. 3 presents the per-signature best-match cosine similarity distribution of Firm A compared to other CPAs.")
+    add_figure(doc, FIGURE_DIR / "fig3_firm_a_calibration.png",
+               "Fig. 3. Per-signature best-match cosine similarity: Firm A (known replication) vs. other CPAs. "
+               "Firm A\u2019s 1st percentile (0.908) validates threshold selection.", width=3.5)
+
+    add_table(doc,
+              ["Statistic", "Firm A", "All CPAs"],
+              [
+                  ["N (signatures)", "60,448", "168,740"],
+                  ["Mean", "0.980", "0.961"],
+                  ["Std. Dev.", "0.019", "0.029"],
+                  ["1st percentile", "0.908", "\u2014"],
+                  ["% > 0.95", "92.5%", "\u2014"],
+                  ["% > 0.90", "99.3%", "\u2014"],
+              ],
+              caption="TABLE VI: Firm A Calibration Statistics (Per-Signature Best Match)")
+
+    add_para(doc, "Firm A\u2019s per-signature best-match cosine similarity (mean = 0.980, std = 0.019) is notably higher and more concentrated "
+             "than the overall CPA population (mean = 0.961, std = 0.029). Critically, 99.3% of Firm A\u2019s signatures exhibit a best-match "
+             "similarity exceeding 0.90, and the 1st percentile is 0.908\u2014establishing that any threshold below 0.91 would fail to capture "
+             "even the most dissimilar replicated signatures in the calibration group.")
+
+    add_heading(doc, "D. Classification Results", level=2)
+    add_table(doc,
+              ["Verdict", "N (PDFs)", "%", "Description"],
+              [
+                  ["Definite replication", "2,403", "2.8%", "Pixel-level evidence"],
+                  ["Likely replication", "69,255", "81.4%", "Feature-level evidence"],
+                  ["Uncertain", "12,681", "14.9%", "Between thresholds"],
+                  ["Likely genuine", "47", "0.1%", "Below KDE crossover"],
+                  ["Unknown", "656", "0.8%", "Unmatched CPA"],
+              ],
+              caption="TABLE VII: Classification Results (85,042 Documents)")
+
+    add_para(doc, "The most striking finding is the discrepancy between feature-level and pixel-level evidence. Of the 71,656 documents with "
+             "cosine similarity exceeding 0.95, only 3.4% (2,427) simultaneously exhibited SSIM > 0.95, and only 4.3% (3,081) had a pHash "
+             "distance of 0. This gap demonstrates that the vast majority of high cosine similarity scores reflect consistent signing style "
+             "rather than digital replication, vindicating the dual-method approach.")
+
+    add_para(doc, "The 267 pixel-identical signatures (0.4%) constitute the strongest evidence of digital replication, as it is physically "
+             "impossible for two instances of genuine handwriting to produce identical pixel arrays.")
+
+    add_heading(doc, "E. Ablation Study: Feature Backbone Comparison", level=2)
+    add_para(doc, "To validate the choice of ResNet-50, we compared three pre-trained architectures (Fig. 4).")
+    add_figure(doc, FIGURE_DIR / "fig4_ablation.png",
+               "Fig. 4. Ablation study comparing three feature extraction backbones: "
+               "(a) intra/inter-class mean similarity, (b) Cohen\u2019s d, (c) KDE crossover point.", width=6.5)
+
+    add_table(doc,
+              ["Metric", "ResNet-50", "VGG-16", "EfficientNet-B0"],
+              [
+                  ["Feature dim", "2048", "4096", "1280"],
+                  ["Intra mean", "0.821", "0.822", "0.786"],
+                  ["Inter mean", "0.758", "0.767", "0.699"],
+                  ["Cohen\u2019s d", "0.669", "0.564", "0.707"],
+                  ["KDE crossover", "0.837", "0.850", "0.792"],
+                  ["Firm A mean", "0.826", "0.820", "0.810"],
+                  ["Firm A 1st pct", "0.543", "0.520", "0.454"],
+              ],
+              caption="TABLE IX: Backbone Comparison")
+
+    add_para(doc, "EfficientNet-B0 achieves the highest Cohen\u2019s d (0.707), but exhibits the widest distributional spread, resulting in "
+             "lower per-sample classification confidence. VGG-16 performs worst despite the highest dimensionality. ResNet-50 provides the "
+             "best balance: competitive Cohen\u2019s d, tightest distributions, highest Firm A 1st percentile (0.543), and practical feature "
+             "dimensionality.")
+
+    # ==================== V. DISCUSSION ====================
+    add_heading(doc, "V. Discussion", level=1)
+
+    add_heading(doc, "A. Replication Detection as a Distinct Problem", level=2)
+    add_para(doc, "Our results highlight the importance of distinguishing signature replication detection from forgery detection. "
+             "Forgery detection optimizes for inter-class discriminability\u2014maximizing the gap between genuine and forged signatures. "
+             "Replication detection requires sensitivity to the upper tail of the intra-class similarity distribution, where the boundary "
+             "between consistent handwriting and digital copies becomes ambiguous. The dual-method framework addresses this ambiguity "
+             "in a way that single-method approaches cannot.")
+
+    add_heading(doc, "B. The Style-Replication Gap", level=2)
+    add_para(doc, "The most important empirical finding is the magnitude of the gap between style similarity and digital replication. "
+             "Of documents with cosine similarity exceeding 0.95, only 3.4% exhibited pixel-level evidence of actual replication via SSIM, "
+             "and only 4.3% via pHash. This implies that a naive cosine-only approach would overestimate the replication rate by approximately "
+             "25-fold. This gap likely reflects the nature of CPA signing practices: many accountants develop highly consistent signing habits, "
+             "resulting in signatures that appear nearly identical at the feature level while retaining microscopic handwriting variations.")
+
+    add_heading(doc, "C. Value of Known-Replication Calibration", level=2)
+    add_para(doc, "The use of Firm A as a calibration reference addresses a fundamental challenge in document forensics: the scarcity of "
+             "ground truth labels. Our approach leverages domain knowledge\u2014the established practice of digital signature replication at "
+             "a specific firm\u2014to create a naturally occurring positive control group. This calibration strategy has broader applicability: "
+             "any forensic detection system can benefit from identifying subpopulations with known characteristics to anchor threshold selection.")
+
+    add_heading(doc, "D. Limitations", level=2)
+    add_para(doc, "Several limitations should be acknowledged. First, comprehensive ground truth labels are not available for the full dataset. "
+             "While pixel-identical cases and Firm A provide anchor points, a small-scale manual verification study would strengthen confidence "
+             "in classification boundaries. Second, the ResNet-50 feature extractor was not fine-tuned on domain-specific data. Third, scanning "
+             "equipment and compression algorithms may have changed over the 10-year study period. Fourth, the classification framework does not "
+             "account for potential changes in signing practice over time. Finally, whether digital replication constitutes a violation of signing "
+             "requirements is a legal question that our technical analysis can inform but cannot resolve.")
+
+    # ==================== VI. CONCLUSION ====================
+    add_heading(doc, "VI. Conclusion and Future Work", level=1)
+
+    add_para(doc, "We have presented an end-to-end AI pipeline for detecting digitally replicated signatures in financial audit reports at scale. "
+             "Applied to 90,282 audit reports from Taiwanese publicly listed companies spanning 2013\u20132023, our system extracted and analyzed "
+             "182,328 CPA signatures using VLM-based page identification, YOLO-based signature detection, deep feature extraction, and "
+             "dual-method similarity verification.")
+
+    add_para(doc, "Our key findings are threefold. First, signature replication detection is a distinct problem from forgery detection, requiring "
+             "different analytical tools. Second, combining cosine similarity with perceptual hashing is essential for distinguishing consistent "
+             "handwriting style from digital duplication\u2014a single-metric approach overestimates replication rates by approximately 25-fold. "
+             "Third, a calibration methodology using a known-replication reference group provides empirical threshold validation in the absence "
+             "of comprehensive ground truth.")
+
+    add_para(doc, "An ablation study confirmed that ResNet-50 offers the best balance of discriminative power, classification stability, and "
+             "computational efficiency among three evaluated backbones.")
+
+    add_para(doc, "Future directions include domain-adapted feature extractors, temporal analysis of signing practice evolution, cross-country "
+             "generalization, regulatory system integration, and small-scale ground truth validation through expert review.")
+
+    # ==================== REFERENCES ====================
+    add_heading(doc, "References", level=1)
+    refs = [
+        '[1] Taiwan Certified Public Accountant Act (\u6703\u8a08\u5e2b\u6cd5), Art. 4; FSC Attestation Regulations (\u67e5\u6838\u7c3d\u8b49\u6838\u6e96\u6e96\u5247), Art. 6.',
+        '[2] S.-H. Yen, Y.-S. Chang, and H.-L. Chen, \u201cDoes the signature of a CPA matter? Evidence from Taiwan,\u201d Res. Account. Regul., vol. 25, no. 2, pp. 230\u2013235, 2013.',
+        '[3] J. Bromley et al., \u201cSignature verification using a Siamese time delay neural network,\u201d in Proc. NeurIPS, 1993.',
+        '[4] S. Dey et al., \u201cSigNet: Convolutional Siamese network for writer independent offline signature verification,\u201d arXiv:1707.02131, 2017.',
+        '[5] I. Hadjadj et al., \u201cAn offline signature verification method based on a single known sample and an explainable deep learning approach,\u201d Appl. Sci., vol. 10, no. 11, p. 3716, 2020.',
+        '[6] H. Li et al., \u201cTransOSV: Offline signature verification with transformers,\u201d Pattern Recognit., vol. 145, p. 109882, 2024.',
+        '[7] S. Tehsin et al., \u201cEnhancing signature verification using triplet Siamese similarity networks in digital documents,\u201d Mathematics, vol. 12, no. 17, p. 2757, 2024.',
+        '[8] P. Brimoh and C. C. Olisah, \u201cConsensus-threshold criterion for offline signature verification using CNN learned representations,\u201d arXiv:2401.03085, 2024.',
+        '[9] N. Woodruff et al., \u201cFully-automatic pipeline for document signature analysis to detect money laundering activities,\u201d arXiv:2107.14091, 2021.',
+        '[10] Copy-move forgery detection in digital image forensics: A survey, Multimedia Tools Appl., 2024.',
+        '[11] S. Abramova and R. Bohme, \u201cDetecting copy-move forgeries in scanned text documents,\u201d in Proc. Electronic Imaging, 2016.',
+        '[12] Y. Jakhar and M. D. Borah, \u201cEffective near-duplicate image detection using perceptual hashing and deep learning,\u201d Inf. Process. Manage., p. 104086, 2025.',
+        '[13] E. Pizzi et al., \u201cA self-supervised descriptor for image copy detection,\u201d in Proc. CVPR, 2022.',
+        '[14] A survey of perceptual hashing for multimedia, ACM Trans. Multimedia Comput. Commun. Appl., vol. 21, no. 7, 2025.',
+        '[15] D. Engin et al., \u201cOffline signature verification on real-world documents,\u201d in Proc. CVPRW, 2020.',
+        '[16] D. Tsourounis et al., \u201cFrom text to signatures: Knowledge transfer for efficient deep feature learning in offline signature verification,\u201d Expert Syst. Appl., 2022.',
+        '[17] B. Chamakh and O. Bounouh, \u201cA unified ResNet18-based approach for offline signature classification and verification,\u201d Procedia Comput. Sci., vol. 270, 2025.',
+        '[18] Qwen2.5-VL Technical Report, Alibaba Group, 2025.',
+        '[19] Ultralytics, \u201cYOLOv11 documentation,\u201d 2024. [Online]. Available: https://docs.ultralytics.com/',
+        '[20] K. He, X. Zhang, S. Ren, and J. Sun, \u201cDeep residual learning for image recognition,\u201d in Proc. CVPR, 2016.',
+        '[21] J. V. Carcello and C. Li, \u201cCosts and benefits of requiring an engagement partner signature: Recent experience in the United Kingdom,\u201d The Accounting Review, vol. 88, no. 5, pp. 1511\u20131546, 2013.',
+        '[22] A. D. Blay, M. Notbohm, C. Schelleman, and A. Valencia, \u201cAudit quality effects of an individual audit engagement partner signature mandate,\u201d Int. J. Auditing, vol. 18, no. 3, pp. 172\u2013192, 2014.',
+        '[23] W. Chi, H. Huang, Y. Liao, and H. Xie, \u201cMandatory audit partner rotation, audit quality, and market perception: Evidence from Taiwan,\u201d Contemp. Account. Res., vol. 26, no. 2, pp. 359\u2013391, 2009.',
+        '[24] L. G. Hafemann, R. Sabourin, and L. S. Oliveira, \u201cLearning features for offline handwritten signature verification using deep convolutional neural networks,\u201d Pattern Recognit., vol. 70, pp. 163\u2013176, 2017.',
+        '[25] L. G. Hafemann, R. Sabourin, and L. S. Oliveira, \u201cMeta-learning for fast classifier adaptation to new users of signature verification systems,\u201d IEEE Trans. Inf. Forensics Security, vol. 15, pp. 1735\u20131745, 2019.',
+        '[26] E. N. Zois, D. Tsourounis, and D. Kalivas, \u201cSimilarity distance learning on SPD manifold for writer independent offline signature verification,\u201d IEEE Trans. Inf. Forensics Security, vol. 19, pp. 1342\u20131356, 2024.',
+        '[27] H. Farid, \u201cImage forgery detection,\u201d IEEE Signal Process. Mag., vol. 26, no. 2, pp. 16\u201325, 2009.',
+        '[28] F. Z. Mehrjardi, A. M. Latif, M. S. Zarchi, and R. Sheikhpour, \u201cA survey on deep learning-based image forgery detection,\u201d Pattern Recognit., vol. 144, art. no. 109778, 2023.',
+        '[29] A. Babenko, A. Slesarev, A. Chigorin, and V. Lempitsky, \u201cNeural codes for image retrieval,\u201d in Proc. ECCV, 2014, pp. 584\u2013599.',
+        '[30] J. Redmon, S. Divvala, R. Girshick, and A. Farhadi, \u201cYou only look once: Unified, real-time object detection,\u201d in Proc. CVPR, 2016, pp. 779\u2013788.',
+        '[31] J. Zhang, J. Huang, S. Jin, and S. Lu, \u201cVision-language models for vision tasks: A survey,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol. 46, no. 8, pp. 5625\u20135644, 2024.',
+        '[32] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, \u201cImage quality assessment: From error visibility to structural similarity,\u201d IEEE Trans. Image Process., vol. 13, no. 4, pp. 600\u2013612, 2004.',
+        '[33] B. W. Silverman, Density Estimation for Statistics and Data Analysis. London: Chapman & Hall, 1986.',
+        '[34] J. Cohen, Statistical Power Analysis for the Behavioral Sciences, 2nd ed. Hillsdale, NJ: Lawrence Erlbaum, 1988.',
+        '[35] H. B. Mann and D. R. Whitney, \u201cOn a test of whether one of two random variables is stochastically larger than the other,\u201d Ann. Math. Statist., vol. 18, no. 1, pp. 50\u201360, 1947.',
+    ]
+    for ref in refs:
+        add_para(doc, ref, font_size=8, space_after=2)
+
+    # Save
+    doc.save(str(OUTPUT_PATH))
+    print(f"Saved: {OUTPUT_PATH}")
+
+
+if __name__ == "__main__":
+    build_document()