12f716ddf1
Fully addresses the partial-resolution / unfixed items from codex
gpt-5.4 round-4 review (codex_review_gpt54_v3_4.md):
Critical
- Table XI z/p columns now reproduce from displayed counts. Earlier
table had 1-4-unit transcription errors in k values and a fabricated
cos > 0.9407 calibration row; both fixed by rerunning Script 24
with cos = 0.9407 added to COS_RULES and copying exact values from
the JSON output.
- Section III-L classifier now defined entirely in terms of the
independent-minimum dHash statistic that the deployed code (Scripts
21, 23, 24) actually uses; the legacy "cosine-conditional dHash"
language is removed. Tables IX, XI, XII, XVI are now arithmetically
consistent with the III-L classifier definition.
- "0.95 not calibrated to Firm A" inconsistency reconciled: Section
III-H now correctly says 0.95 is the whole-sample Firm A P95 of the
per-signature cosine distribution, matching III-L and IV-F.
Major
- Abstract trimmed to 246 words (from 367) to meet IEEE Access 250-word
limit. Removed "we break the circularity" overclaim; replaced with
"report capture rates on both folds with Wilson 95% intervals to
make fold-level variance visible".
- Conclusion mirrors the Abstract reframe: 70/30 split documents
within-firm sampling variance, not external generalization.
- Introduction no longer promises precision / F1 / EER metrics that
Methods/Results don't deliver; replaced with anchor-based capture /
FAR + Wilson CI language.
- Section III-G within-auditor-year empirical-check wording corrected:
intra-report consistency (IV-H.3) is a different test (two co-signers
on the same report, firm-level homogeneity) and is not a within-CPA
year-level mixing check; the assumption is maintained as a bounded
identification convention.
- Section III-H "two analyses fully threshold-free" corrected to "only
the partner-level ranking is threshold-free"; longitudinal-stability
uses 0.95 cutoff, intra-report uses the operational classifier.
Minor
- Impact Statement removed from export_v3.py SECTIONS list (IEEE Access
Regular Papers do not have a standalone Impact Statement). The file
itself is retained as an archived non-paper note for cover-letter /
grant-report reuse, with a clear archive header.
- All 7 previously unused references ([27] dHash, [31][32] partner-
signature mandates, [33] Taiwan partner rotation, [34] YOLO original,
[35] VLM survey, [36] Mann-Whitney) are now cited in-text:
[27] in Methodology III-E (dHash definition)
[31][32][33] in Introduction (audit-quality regulation context)
[34][35] in Methodology III-C/III-D
[36] in Results IV-C (Mann-Whitney result)
Updated Script 24 to include cos = 0.9407 in COS_RULES so Table XI's
calibration-fold P5 row is computed from the same data file as the
other rows.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
235 lines
8.1 KiB
Python
235 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Export Paper A v3 (IEEE Access target) to Word, reading from v3 md section files."""
|
|
|
|
from docx import Document
|
|
from docx.shared import Inches, Pt, RGBColor
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
from pathlib import Path
|
|
import re
|
|
|
|
PAPER_DIR = Path("/Volumes/NV2/pdf_recognize/paper")
|
|
FIG_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/paper_figures")
|
|
EXTRA_FIG_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/reports")
|
|
OUTPUT = PAPER_DIR / "Paper_A_IEEE_Access_Draft_v3.docx"
|
|
|
|
SECTIONS = [
|
|
"paper_a_abstract_v3.md",
|
|
# paper_a_impact_statement_v3.md removed: not a standard IEEE Access
|
|
# Regular Paper section. Content folded into cover letter / abstract.
|
|
"paper_a_introduction_v3.md",
|
|
"paper_a_related_work_v3.md",
|
|
"paper_a_methodology_v3.md",
|
|
"paper_a_results_v3.md",
|
|
"paper_a_discussion_v3.md",
|
|
"paper_a_conclusion_v3.md",
|
|
"paper_a_references_v3.md",
|
|
]
|
|
|
|
# Figure insertion hooks (trigger phrase -> (file, caption, width inches)).
|
|
# New figures for v3: dip test, BD/McCrary overlays, accountant GMM 2D + marginals.
|
|
FIGURES = {
|
|
"Fig. 1 illustrates": (
|
|
FIG_DIR / "fig1_pipeline.png",
|
|
"Fig. 1. Pipeline architecture for automated non-hand-signed signature detection.",
|
|
6.5,
|
|
),
|
|
"Fig. 2 presents the cosine similarity distributions for intra-class": (
|
|
FIG_DIR / "fig2_intra_inter_kde.png",
|
|
"Fig. 2. Cosine similarity distributions: intra-class vs. inter-class with KDE crossover at 0.837.",
|
|
3.5,
|
|
),
|
|
"Fig. 3 presents the per-signature cosine and dHash distributions of Firm A": (
|
|
FIG_DIR / "fig3_firm_a_calibration.png",
|
|
"Fig. 3. Firm A per-signature cosine and dHash distributions against the overall CPA population.",
|
|
3.5,
|
|
),
|
|
"Fig. 4 visualizes the accountant-level clusters": (
|
|
EXTRA_FIG_DIR / "accountant_mixture" / "accountant_mixture_2d.png",
|
|
"Fig. 4. Accountant-level 3-component Gaussian mixture in the (cosine-mean, dHash-mean) plane.",
|
|
4.5,
|
|
),
|
|
"conducted an ablation study comparing three": (
|
|
FIG_DIR / "fig4_ablation.png",
|
|
"Fig. 5. Ablation study comparing three feature extraction backbones.",
|
|
6.5,
|
|
),
|
|
}
|
|
|
|
|
|
def strip_comments(text):
|
|
return re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
|
|
|
|
|
|
def add_md_table(doc, table_lines):
|
|
rows_data = []
|
|
for line in table_lines:
|
|
cells = [c.strip() for c in line.strip("|").split("|")]
|
|
if not re.match(r"^[-: ]+$", cells[0]):
|
|
rows_data.append(cells)
|
|
if len(rows_data) < 2:
|
|
return
|
|
ncols = len(rows_data[0])
|
|
table = doc.add_table(rows=len(rows_data), cols=ncols)
|
|
table.style = "Table Grid"
|
|
for r_idx, row in enumerate(rows_data):
|
|
for c_idx in range(min(len(row), ncols)):
|
|
cell = table.rows[r_idx].cells[c_idx]
|
|
cell.text = row[c_idx]
|
|
for p in cell.paragraphs:
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
for run in p.runs:
|
|
run.font.size = Pt(8)
|
|
run.font.name = "Times New Roman"
|
|
if r_idx == 0:
|
|
run.bold = True
|
|
doc.add_paragraph()
|
|
|
|
|
|
def _insert_figures(doc, para_text):
|
|
for trigger, (fig_path, caption, width) in FIGURES.items():
|
|
if trigger in para_text and Path(fig_path).exists():
|
|
fp = doc.add_paragraph()
|
|
fp.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
fr = fp.add_run()
|
|
fr.add_picture(str(fig_path), width=Inches(width))
|
|
cp = doc.add_paragraph()
|
|
cp.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
cr = cp.add_run(caption)
|
|
cr.font.size = Pt(9)
|
|
cr.font.name = "Times New Roman"
|
|
cr.italic = True
|
|
|
|
|
|
def process_section(doc, filepath):
|
|
text = filepath.read_text(encoding="utf-8")
|
|
text = strip_comments(text)
|
|
lines = text.split("\n")
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
i += 1
|
|
continue
|
|
if stripped.startswith("# "):
|
|
h = doc.add_heading(stripped[2:], level=1)
|
|
for run in h.runs:
|
|
run.font.color.rgb = RGBColor(0, 0, 0)
|
|
i += 1
|
|
continue
|
|
if stripped.startswith("## "):
|
|
h = doc.add_heading(stripped[3:], level=2)
|
|
for run in h.runs:
|
|
run.font.color.rgb = RGBColor(0, 0, 0)
|
|
i += 1
|
|
continue
|
|
if stripped.startswith("### "):
|
|
h = doc.add_heading(stripped[4:], level=3)
|
|
for run in h.runs:
|
|
run.font.color.rgb = RGBColor(0, 0, 0)
|
|
i += 1
|
|
continue
|
|
if "|" in stripped and i + 1 < len(lines) and re.match(r"\s*\|[-|: ]+\|", lines[i + 1]):
|
|
table_lines = []
|
|
while i < len(lines) and "|" in lines[i]:
|
|
table_lines.append(lines[i])
|
|
i += 1
|
|
add_md_table(doc, table_lines)
|
|
continue
|
|
if re.match(r"^\d+\.\s", stripped):
|
|
p = doc.add_paragraph(style="List Number")
|
|
content = re.sub(r"^\d+\.\s", "", stripped)
|
|
content = re.sub(r"\*\*(.+?)\*\*", r"\1", content)
|
|
run = p.add_run(content)
|
|
run.font.size = Pt(10)
|
|
run.font.name = "Times New Roman"
|
|
i += 1
|
|
continue
|
|
if stripped.startswith("- "):
|
|
p = doc.add_paragraph(style="List Bullet")
|
|
content = stripped[2:]
|
|
content = re.sub(r"\*\*(.+?)\*\*", r"\1", content)
|
|
run = p.add_run(content)
|
|
run.font.size = Pt(10)
|
|
run.font.name = "Times New Roman"
|
|
i += 1
|
|
continue
|
|
# Regular paragraph
|
|
para_lines = [stripped]
|
|
i += 1
|
|
while i < len(lines):
|
|
nxt = lines[i].strip()
|
|
if (
|
|
not nxt
|
|
or nxt.startswith("#")
|
|
or nxt.startswith("|")
|
|
or nxt.startswith("- ")
|
|
or re.match(r"^\d+\.\s", nxt)
|
|
):
|
|
break
|
|
para_lines.append(nxt)
|
|
i += 1
|
|
para_text = " ".join(para_lines)
|
|
para_text = re.sub(r"\*\*\*(.+?)\*\*\*", r"\1", para_text)
|
|
para_text = re.sub(r"\*\*(.+?)\*\*", r"\1", para_text)
|
|
para_text = re.sub(r"\*(.+?)\*", r"\1", para_text)
|
|
para_text = re.sub(r"`(.+?)`", r"\1", para_text)
|
|
para_text = para_text.replace("$$", "")
|
|
para_text = para_text.replace("---", "\u2014")
|
|
|
|
p = doc.add_paragraph()
|
|
p.paragraph_format.space_after = Pt(6)
|
|
run = p.add_run(para_text)
|
|
run.font.size = Pt(10)
|
|
run.font.name = "Times New Roman"
|
|
|
|
_insert_figures(doc, para_text)
|
|
|
|
|
|
def main():
|
|
doc = Document()
|
|
style = doc.styles["Normal"]
|
|
style.font.name = "Times New Roman"
|
|
style.font.size = Pt(10)
|
|
|
|
# Title page
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
p.paragraph_format.space_after = Pt(12)
|
|
run = p.add_run(
|
|
"Automated Identification of Non-Hand-Signed Auditor Signatures\n"
|
|
"in Large-Scale Financial Audit Reports:\n"
|
|
"A Dual-Descriptor Framework with Three-Method Convergent Thresholding"
|
|
)
|
|
run.font.size = Pt(16)
|
|
run.font.name = "Times New Roman"
|
|
run.bold = True
|
|
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
p.paragraph_format.space_after = Pt(6)
|
|
run = p.add_run("[Authors removed for double-blind review]")
|
|
run.font.size = Pt(10)
|
|
run.italic = True
|
|
|
|
p = doc.add_paragraph()
|
|
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
p.paragraph_format.space_after = Pt(20)
|
|
run = p.add_run("Target journal: IEEE Access (Regular Paper)")
|
|
run.font.size = Pt(10)
|
|
run.italic = True
|
|
|
|
for section_file in SECTIONS:
|
|
filepath = PAPER_DIR / section_file
|
|
if filepath.exists():
|
|
process_section(doc, filepath)
|
|
else:
|
|
print(f"WARNING: missing section file: {filepath}")
|
|
|
|
doc.save(str(OUTPUT))
|
|
print(f"Saved: {OUTPUT}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|