#!/usr/bin/env python3 """Export Paper A v3 (IEEE Access target) to Word, reading from v3 md section files.""" from docx import Document from docx.shared import Inches, Pt, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from pathlib import Path import re PAPER_DIR = Path("/Volumes/NV2/pdf_recognize/paper") FIG_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/paper_figures") EXTRA_FIG_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/reports") OUTPUT = PAPER_DIR / "Paper_A_IEEE_Access_Draft_v3.docx" SECTIONS = [ "paper_a_abstract_v3.md", # paper_a_impact_statement_v3.md removed: not a standard IEEE Access # Regular Paper section. Content folded into cover letter / abstract. "paper_a_introduction_v3.md", "paper_a_related_work_v3.md", "paper_a_methodology_v3.md", "paper_a_results_v3.md", "paper_a_discussion_v3.md", "paper_a_conclusion_v3.md", "paper_a_references_v3.md", ] # Figure insertion hooks (trigger phrase -> (file, caption, width inches)). # New figures for v3: dip test, BD/McCrary overlays, accountant GMM 2D + marginals. FIGURES = { "Fig. 1 illustrates": ( FIG_DIR / "fig1_pipeline.png", "Fig. 1. Pipeline architecture for automated non-hand-signed signature detection.", 6.5, ), "Fig. 2 presents the cosine similarity distributions for intra-class": ( FIG_DIR / "fig2_intra_inter_kde.png", "Fig. 2. Cosine similarity distributions: intra-class vs. inter-class with KDE crossover at 0.837.", 3.5, ), "Fig. 3 presents the per-signature cosine and dHash distributions of Firm A": ( FIG_DIR / "fig3_firm_a_calibration.png", "Fig. 3. Firm A per-signature cosine and dHash distributions against the overall CPA population.", 3.5, ), "Fig. 4 visualizes the accountant-level clusters": ( EXTRA_FIG_DIR / "accountant_mixture" / "accountant_mixture_2d.png", "Fig. 4. Accountant-level 3-component Gaussian mixture in the (cosine-mean, dHash-mean) plane.", 4.5, ), "conducted an ablation study comparing three": ( FIG_DIR / "fig4_ablation.png", "Fig. 5. Ablation study comparing three feature extraction backbones.", 6.5, ), } def strip_comments(text): return re.sub(r"", "", text, flags=re.DOTALL) def add_md_table(doc, table_lines): rows_data = [] for line in table_lines: cells = [c.strip() for c in line.strip("|").split("|")] if not re.match(r"^[-: ]+$", cells[0]): rows_data.append(cells) if len(rows_data) < 2: return ncols = len(rows_data[0]) table = doc.add_table(rows=len(rows_data), cols=ncols) table.style = "Table Grid" for r_idx, row in enumerate(rows_data): for c_idx in range(min(len(row), ncols)): cell = table.rows[r_idx].cells[c_idx] cell.text = row[c_idx] for p in cell.paragraphs: p.alignment = WD_ALIGN_PARAGRAPH.CENTER for run in p.runs: run.font.size = Pt(8) run.font.name = "Times New Roman" if r_idx == 0: run.bold = True doc.add_paragraph() def _insert_figures(doc, para_text): for trigger, (fig_path, caption, width) in FIGURES.items(): if trigger in para_text and Path(fig_path).exists(): fp = doc.add_paragraph() fp.alignment = WD_ALIGN_PARAGRAPH.CENTER fr = fp.add_run() fr.add_picture(str(fig_path), width=Inches(width)) cp = doc.add_paragraph() cp.alignment = WD_ALIGN_PARAGRAPH.CENTER cr = cp.add_run(caption) cr.font.size = Pt(9) cr.font.name = "Times New Roman" cr.italic = True def process_section(doc, filepath): text = filepath.read_text(encoding="utf-8") text = strip_comments(text) lines = text.split("\n") i = 0 while i < len(lines): line = lines[i] stripped = line.strip() if not stripped: i += 1 continue if stripped.startswith("# "): h = doc.add_heading(stripped[2:], level=1) for run in h.runs: run.font.color.rgb = RGBColor(0, 0, 0) i += 1 continue if stripped.startswith("## "): h = doc.add_heading(stripped[3:], level=2) for run in h.runs: run.font.color.rgb = RGBColor(0, 0, 0) i += 1 continue if stripped.startswith("### "): h = doc.add_heading(stripped[4:], level=3) for run in h.runs: run.font.color.rgb = RGBColor(0, 0, 0) i += 1 continue if "|" in stripped and i + 1 < len(lines) and re.match(r"\s*\|[-|: ]+\|", lines[i + 1]): table_lines = [] while i < len(lines) and "|" in lines[i]: table_lines.append(lines[i]) i += 1 add_md_table(doc, table_lines) continue if re.match(r"^\d+\.\s", stripped): p = doc.add_paragraph(style="List Number") content = re.sub(r"^\d+\.\s", "", stripped) content = re.sub(r"\*\*(.+?)\*\*", r"\1", content) run = p.add_run(content) run.font.size = Pt(10) run.font.name = "Times New Roman" i += 1 continue if stripped.startswith("- "): p = doc.add_paragraph(style="List Bullet") content = stripped[2:] content = re.sub(r"\*\*(.+?)\*\*", r"\1", content) run = p.add_run(content) run.font.size = Pt(10) run.font.name = "Times New Roman" i += 1 continue # Regular paragraph para_lines = [stripped] i += 1 while i < len(lines): nxt = lines[i].strip() if ( not nxt or nxt.startswith("#") or nxt.startswith("|") or nxt.startswith("- ") or re.match(r"^\d+\.\s", nxt) ): break para_lines.append(nxt) i += 1 para_text = " ".join(para_lines) para_text = re.sub(r"\*\*\*(.+?)\*\*\*", r"\1", para_text) para_text = re.sub(r"\*\*(.+?)\*\*", r"\1", para_text) para_text = re.sub(r"\*(.+?)\*", r"\1", para_text) para_text = re.sub(r"`(.+?)`", r"\1", para_text) para_text = para_text.replace("$$", "") para_text = para_text.replace("---", "\u2014") p = doc.add_paragraph() p.paragraph_format.space_after = Pt(6) run = p.add_run(para_text) run.font.size = Pt(10) run.font.name = "Times New Roman" _insert_figures(doc, para_text) def main(): doc = Document() style = doc.styles["Normal"] style.font.name = "Times New Roman" style.font.size = Pt(10) # Title page p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.space_after = Pt(12) run = p.add_run( "Automated Identification of Non-Hand-Signed Auditor Signatures\n" "in Large-Scale Financial Audit Reports:\n" "A Dual-Descriptor Framework with Three-Method Convergent Thresholding" ) run.font.size = Pt(16) run.font.name = "Times New Roman" run.bold = True p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.space_after = Pt(6) run = p.add_run("[Authors removed for double-blind review]") run.font.size = Pt(10) run.italic = True p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.space_after = Pt(20) run = p.add_run("Target journal: IEEE Access (Regular Paper)") run.font.size = Pt(10) run.italic = True for section_file in SECTIONS: filepath = PAPER_DIR / section_file if filepath.exists(): process_section(doc, filepath) else: print(f"WARNING: missing section file: {filepath}") doc.save(str(OUTPUT)) print(f"Saved: {OUTPUT}") if __name__ == "__main__": main()