#!/usr/bin/env python3 """Export Paper A v3 (IEEE Access target) to Word, reading from v3 md section files.""" from docx import Document from docx.shared import Inches, Pt, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from pathlib import Path import hashlib import re import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt PAPER_DIR = Path("/Volumes/NV2/pdf_recognize/paper") EQUATION_CACHE_DIR = PAPER_DIR / "equations" EQUATION_CACHE_DIR.mkdir(exist_ok=True) FIG_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/paper_figures") EXTRA_FIG_DIR = Path("/Volumes/NV2/PDF-Processing/signature-analysis/reports") OUTPUT = PAPER_DIR / "Paper_A_IEEE_Access_Draft_v3.docx" SECTIONS = [ "paper_a_abstract_v3.md", # paper_a_impact_statement_v3.md removed: not a standard IEEE Access # Regular Paper section. Content folded into cover letter / abstract. "paper_a_introduction_v3.md", "paper_a_related_work_v3.md", "paper_a_methodology_v3.md", "paper_a_results_v3.md", "paper_a_discussion_v3.md", "paper_a_conclusion_v3.md", # Appendix A: BD/McCrary bin-width sensitivity (see v3.7 notes). "paper_a_appendix_v3.md", # Declarations (COI / data availability / funding) before References, # per IEEE Access convention. "paper_a_declarations_v3.md", "paper_a_references_v3.md", ] # Figure insertion hooks (trigger phrase -> (file, caption, width inches)). # New figures for v3: dip test, BD/McCrary overlays, accountant GMM 2D + marginals. FIGURES = { "Fig. 1 illustrates": ( FIG_DIR / "fig1_pipeline.png", "Fig. 1. Pipeline architecture for automated non-hand-signed signature detection.", 6.5, ), "Fig. 2 presents the cosine similarity distributions for intra-class": ( FIG_DIR / "fig2_intra_inter_kde.png", "Fig. 2. Cosine similarity distributions: intra-class vs. inter-class with KDE crossover at 0.837.", 3.5, ), "Fig. 3 presents the per-signature cosine and dHash distributions of Firm A": ( FIG_DIR / "fig3_firm_a_calibration.png", "Fig. 3. Firm A per-signature cosine and dHash distributions against the overall CPA population.", 3.5, ), "Fig. 4 summarises the per-firm yearly per-signature": ( EXTRA_FIG_DIR / "figures" / "fig_yearly_big4_comparison.png", "Fig. 4. Per-firm yearly per-signature best-match cosine, 2013-2023. (a) Mean per-signature best-match cosine by firm bucket and fiscal year (threshold-free). (b) Share of per-signature best-match cosine ≥ 0.95 (operational cut of Section III-K). Five lines: Firm A, B, C, D, Non-Big-4. Firm A is above the other Big-4 firms in every year; Non-Big-4 is below all four Big-4 firms in every year.", 6.5, ), "conducted an ablation study comparing three": ( FIG_DIR / "fig4_ablation.png", "Fig. 5. Ablation study comparing three feature extraction backbones.", 6.5, ), } def strip_comments(text): """Remove HTML comments, but UNWRAP comments whose first non-blank line starts with `TABLE ` (or `TABLE\t`). The v3 markdown sources wrap every numerical table in an HTML comment of the form The caption (`TABLE V: Hartigan Dip Test Results`) is on the same line as the opening `` closes the block. The previous implementation wholesale-deleted these comments, which silently dropped every table from the rendered DOCX. We now (i) detect comments whose first non-empty line starts with `TABLE `, (ii) emit a synthetic caption marker line `__TABLE_CAPTION__: ` so process_section can render the caption as a centered bold paragraph above the table, and (iii) keep the table body so the existing markdown-table detector picks it up. Non-TABLE comments (figure placeholders, editorial notes) are stripped as before. """ def _replace(match): body = match.group(1) # Find first non-blank line. for line in body.splitlines(): stripped = line.strip() if stripped: first = stripped break else: return "" if not first.startswith("TABLE ") and not first.startswith("TABLE\t"): return "" # Split caption (first non-blank line) from the rest. lines = body.splitlines() # Find index of the first non-blank line and use everything after. for idx, line in enumerate(lines): if line.strip(): caption = line.strip() rest = "\n".join(lines[idx + 1:]) break else: return "" # Emit caption marker + body. Surround with blank lines so the # paragraph/table detector treats the marker as its own paragraph. return f"\n\n__TABLE_CAPTION__:{caption}\n{rest}\n" # Non-greedy match across lines. return re.sub(r"", _replace, text, flags=re.DOTALL) # --------------------------------------------------------------------------- # LaTeX → plain text + Unicode conversion # --------------------------------------------------------------------------- # The v3 markdown sources contain inline LaTeX ($...$) and a small number of # display-math blocks ($$...$$). Pandoc would render these natively; the # python-docx pipeline used here does not, so without preprocessing every # `\leq`, `\text{dHash}_\text{indep}`, `\Delta\text{BIC}`, `60{,}448`, etc. # leaks into the DOCX as raw LaTeX. The helpers below convert the common # inline cases to Unicode and split subscripts/superscripts into proper Word # runs. Display-math (rare; 3 equations in this paper) gets a best-effort # linearisation and is acceptable for a partner-handoff DOCX; final IEEE # typesetting is handled by the publisher's LaTeX/MathType pipeline. LATEX_TOKEN_REPLACEMENTS = [ # Greek letters (lower) (r"\\alpha(?![A-Za-z])", "α"), (r"\\beta(?![A-Za-z])", "β"), (r"\\gamma(?![A-Za-z])", "γ"), (r"\\delta(?![A-Za-z])", "δ"), (r"\\epsilon(?![A-Za-z])", "ε"), (r"\\zeta(?![A-Za-z])", "ζ"), (r"\\eta(?![A-Za-z])", "η"), (r"\\theta(?![A-Za-z])", "θ"), (r"\\iota(?![A-Za-z])", "ι"), (r"\\kappa(?![A-Za-z])", "κ"), (r"\\lambda(?![A-Za-z])", "λ"), (r"\\mu(?![A-Za-z])", "μ"), (r"\\nu(?![A-Za-z])", "ν"), (r"\\xi(?![A-Za-z])", "ξ"), (r"\\pi(?![A-Za-z])", "π"), (r"\\rho(?![A-Za-z])", "ρ"), (r"\\sigma(?![A-Za-z])", "σ"), (r"\\tau(?![A-Za-z])", "τ"), (r"\\phi(?![A-Za-z])", "φ"), (r"\\chi(?![A-Za-z])", "χ"), (r"\\psi(?![A-Za-z])", "ψ"), (r"\\omega(?![A-Za-z])", "ω"), # Greek letters (upper, only those distinguishable from Latin) (r"\\Gamma(?![A-Za-z])", "Γ"), (r"\\Delta(?![A-Za-z])", "Δ"), (r"\\Theta(?![A-Za-z])", "Θ"), (r"\\Lambda(?![A-Za-z])", "Λ"), (r"\\Xi(?![A-Za-z])", "Ξ"), (r"\\Pi(?![A-Za-z])", "Π"), (r"\\Sigma(?![A-Za-z])", "Σ"), (r"\\Phi(?![A-Za-z])", "Φ"), (r"\\Psi(?![A-Za-z])", "Ψ"), (r"\\Omega(?![A-Za-z])", "Ω"), # Relations / arrows (r"\\leq(?![A-Za-z])", "≤"), (r"\\geq(?![A-Za-z])", "≥"), (r"\\neq(?![A-Za-z])", "≠"), (r"\\approx(?![A-Za-z])", "≈"), (r"\\equiv(?![A-Za-z])", "≡"), (r"\\sim(?![A-Za-z])", "~"), (r"\\to(?![A-Za-z])", "→"), (r"\\rightarrow(?![A-Za-z])", "→"), (r"\\leftarrow(?![A-Za-z])", "←"), (r"\\Rightarrow(?![A-Za-z])", "⇒"), (r"\\Leftarrow(?![A-Za-z])", "⇐"), # Binary operators (r"\\times(?![A-Za-z])", "×"), (r"\\cdot(?![A-Za-z])", "·"), (r"\\pm(?![A-Za-z])", "±"), (r"\\mp(?![A-Za-z])", "∓"), (r"\\div(?![A-Za-z])", "÷"), # Misc (r"\\infty(?![A-Za-z])", "∞"), (r"\\partial(?![A-Za-z])", "∂"), (r"\\sum(?![A-Za-z])", "∑"), (r"\\prod(?![A-Za-z])", "∏"), (r"\\int(?![A-Za-z])", "∫"), (r"\\ldots(?![A-Za-z])", "…"), (r"\\dots(?![A-Za-z])", "…"), # Spacing commands (drop or replace with single space) (r"\\,", " "), (r"\\;", " "), (r"\\:", " "), (r"\\!", ""), (r"\\ ", " "), (r"\\quad(?![A-Za-z])", " "), (r"\\qquad(?![A-Za-z])", " "), # Escaped punctuation (r"\\%", "%"), (r"\\#", "#"), (r"\\&", "&"), (r"\\\$", "$"), (r"\\_", "_"), ] def _unwrap_command(text, cmd): """Repeatedly replace `\\cmd{X}` → `X` until stable.""" pat = re.compile(r"\\" + cmd + r"\{([^{}]*)\}") prev = None while prev != text: prev = text text = pat.sub(r"\1", text) return text MATH_START = "" # Private Use Area: XML-safe MATH_END = "" def latex_to_unicode(text): """Convert a LaTeX-laced markdown paragraph into plain text. Math context is preserved with private-use sentinel characters (MATH_START / MATH_END) so the downstream run-splitter only treats `_X` / `^X` as subscript / superscript inside math regions; in body text underscores in identifiers like `signature_analysis` survive. """ if "$" not in text and "\\" not in text: return text # 1. Strip display-math delimiters first (keep the inner content for # best-effort linearisation), wrapping math regions with sentinels. # Then strip inline math delimiters with the same sentinel wrapping. text = re.sub(r"\$\$([\s\S]+?)\$\$", lambda m: MATH_START + m.group(1) + MATH_END, text) text = re.sub(r"\$([^$]+?)\$", lambda m: MATH_START + m.group(1) + MATH_END, text) # 2. Replace token-level commands with Unicode glyphs *before* unwrapping # `\text{...}` and friends, so that `\Delta\text{BIC}` becomes # `Δ\text{BIC}` (then `ΔBIC`) rather than `\DeltaBIC` which would be # stripped wholesale by the cleanup pass. for pat, repl in LATEX_TOKEN_REPLACEMENTS: text = re.sub(pat, repl, text) # 3. Unwrap formatting / text commands (innermost first via _unwrap loop). for cmd in ("text", "mathbf", "mathit", "mathrm", "mathsf", "mathtt", "operatorname", "emph", "textbf", "textit"): text = _unwrap_command(text, cmd) # 4. \frac{a}{b} → (a)/(b); \sqrt{x} → √(x). Apply repeatedly to handle # one level of nesting; deeper nesting is rare in this paper. for _ in range(3): text = re.sub( r"\\t?frac\{([^{}]+)\}\{([^{}]+)\}", r"(\1)/(\2)", text, ) text = re.sub(r"\\sqrt\{([^{}]+)\}", r"√(\1)", text) # 5. TeX braces used purely for spacing/grouping: K{=}3 → K=3, # 60{,}448 → 60,448, 10{,}175 → 10,175. text = re.sub(r"\{([=<>+\-,])\}", r"\1", text) # 6. Strip any remaining `\cmd{...}` (best effort) and `\cmd ` tokens. text = re.sub(r"\\[a-zA-Z]+\{([^{}]*)\}", r"\1", text) text = re.sub(r"\\[a-zA-Z]+(?![A-Za-z])", "", text) # 7. Collapse runs of whitespace introduced by command stripping. text = re.sub(r"[ \t]{2,}", " ", text) return text _SUBSUP_PATTERN = re.compile( r"_\{([^{}]*)\}" # _{...} r"|\^\{([^{}]*)\}" # ^{...} r"|_([A-Za-z0-9+\-])" # _X (single token) r"|\^([A-Za-z0-9+\-])" # ^X (single token) ) def _emit_plain(paragraph, text, font_name, font_size, bold, italic): if not text: return run = paragraph.add_run(text) run.font.name = font_name run.font.size = font_size run.bold = bold run.italic = italic def _emit_math(paragraph, text, font_name, font_size, bold, italic): """Emit `text` from a math region: split on `_X` / `_{X}` / `^X` / `^{X}` and render those as Word subscripts / superscripts.""" if "_" not in text and "^" not in text: _emit_plain(paragraph, text, font_name, font_size, bold, italic) return pos = 0 for m in _SUBSUP_PATTERN.finditer(text): if m.start() > pos: _emit_plain(paragraph, text[pos:m.start()], font_name, font_size, bold, italic) sub_text = m.group(1) or m.group(3) sup_text = m.group(2) or m.group(4) if sub_text is not None: run = paragraph.add_run(sub_text) run.font.subscript = True else: run = paragraph.add_run(sup_text) run.font.superscript = True run.font.name = font_name run.font.size = font_size run.bold = bold run.italic = italic pos = m.end() if pos < len(text): _emit_plain(paragraph, text[pos:], font_name, font_size, bold, italic) def add_text_with_subsup(paragraph, text, font_name="Times New Roman", font_size=Pt(10), bold=False, italic=False): """Add `text` to `paragraph`. Subscript/superscript handling is scoped to math regions delimited by MATH_START / MATH_END sentinels (set up by `latex_to_unicode`). Outside math regions, underscores and carets are preserved literally so identifiers like `signature_analysis` and `paper_a_results_v3.md` survive intact. """ if MATH_START not in text: _emit_math(paragraph, text, font_name, font_size, bold, italic) \ if False else \ _emit_plain(paragraph, text, font_name, font_size, bold, italic) return pos = 0 while pos < len(text): s = text.find(MATH_START, pos) if s == -1: _emit_plain(paragraph, text[pos:], font_name, font_size, bold, italic) break if s > pos: _emit_plain(paragraph, text[pos:s], font_name, font_size, bold, italic) e = text.find(MATH_END, s + 1) if e == -1: # Unterminated math region — emit rest as plain. _emit_plain(paragraph, text[s + 1:], font_name, font_size, bold, italic) break math_body = text[s + 1:e] _emit_math(paragraph, math_body, font_name, font_size, bold, italic) pos = e + 1 # --------------------------------------------------------------------------- # Display-equation rendering (matplotlib mathtext → PNG → embedded image) # --------------------------------------------------------------------------- # matplotlib mathtext is a subset of LaTeX. A few common TeX-only macros need # to be substituted with mathtext-supported equivalents before parsing. _MATHTEXT_SUBS = [ (re.compile(r"\\tfrac\b"), r"\\frac"), # text-frac → frac (re.compile(r"\\dfrac\b"), r"\\frac"), # display-frac → frac (re.compile(r"\\operatorname\{([^{}]+)\}"), lambda m: r"\mathrm{" + m.group(1) + "}"), # operatorname → mathrm (re.compile(r"\\,"), " "), # thin space (re.compile(r"\\;"), " "), (re.compile(r"\\!"), ""), ] def _sanitise_for_mathtext(latex: str) -> str: out = latex for pat, repl in _MATHTEXT_SUBS: out = pat.sub(repl, out) return out def render_equation_png(latex: str, fontsize: int = 14) -> Path: """Render a LaTeX math expression to a tightly-cropped PNG using matplotlib mathtext, with content-addressed caching so a re-build only re-renders changed equations. Returns the cached PNG path.""" sanitised = _sanitise_for_mathtext(latex.strip()) digest = hashlib.sha1( (sanitised + f"|fs{fontsize}").encode("utf-8")).hexdigest()[:16] out_path = EQUATION_CACHE_DIR / f"eq_{digest}.png" if out_path.exists(): return out_path fig = plt.figure(figsize=(8, 1.6)) fig.text(0.5, 0.5, f"${sanitised}$", fontsize=fontsize, ha="center", va="center") fig.savefig(str(out_path), dpi=220, bbox_inches="tight", pad_inches=0.05) plt.close(fig) return out_path def add_equation_block(doc, latex: str, equation_number: int, width_inches: float = 4.5): """Insert a centered display equation (rendered as PNG) followed by a right-aligned equation number `(N)`. Width keeps the equation visually proportional within the IEEE Access body column.""" img_path = render_equation_png(latex) p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.space_before = Pt(6) p.paragraph_format.space_after = Pt(6) run = p.add_run() run.add_picture(str(img_path), width=Inches(width_inches)) # Equation number on the same paragraph, tab-aligned to the right. num_run = p.add_run(f"\t({equation_number})") num_run.font.name = "Times New Roman" num_run.font.size = Pt(10) def add_md_table(doc, table_lines): rows_data = [] for line in table_lines: cells = [c.strip() for c in line.strip("|").split("|")] if not re.match(r"^[-: ]+$", cells[0]): rows_data.append(cells) if len(rows_data) < 2: return ncols = len(rows_data[0]) table = doc.add_table(rows=len(rows_data), cols=ncols) table.style = "Table Grid" for r_idx, row in enumerate(rows_data): for c_idx in range(min(len(row), ncols)): cell = table.rows[r_idx].cells[c_idx] raw = row[c_idx] # Strip markdown emphasis markers; convert LaTeX before rendering. raw = re.sub(r"\*\*\*(.+?)\*\*\*", r"\1", raw) raw = re.sub(r"\*\*(.+?)\*\*", r"\1", raw) raw = re.sub(r"\*(.+?)\*", r"\1", raw) raw = re.sub(r"`(.+?)`", r"\1", raw) cell_text = latex_to_unicode(raw) # Replace the default empty paragraph with one we control. cell.text = "" cp = cell.paragraphs[0] cp.alignment = WD_ALIGN_PARAGRAPH.CENTER add_text_with_subsup( cp, cell_text, font_name="Times New Roman", font_size=Pt(8), bold=(r_idx == 0), ) doc.add_paragraph() def _insert_figures(doc, para_text): for trigger, (fig_path, caption, width) in FIGURES.items(): if trigger in para_text and Path(fig_path).exists(): fp = doc.add_paragraph() fp.alignment = WD_ALIGN_PARAGRAPH.CENTER fr = fp.add_run() fr.add_picture(str(fig_path), width=Inches(width)) cp = doc.add_paragraph() cp.alignment = WD_ALIGN_PARAGRAPH.CENTER cr = cp.add_run(caption) cr.font.size = Pt(9) cr.font.name = "Times New Roman" cr.italic = True def process_section(doc, filepath, equation_counter=None): """Process one v3 markdown section. `equation_counter` is a single-element list (used as a mutable counter shared across sections) tracking the running display-equation number.""" if equation_counter is None: equation_counter = [0] text = filepath.read_text(encoding="utf-8") text = strip_comments(text) lines = text.split("\n") # Defensive blockquote handling: markdown blockquote lines (`> body`) are # not rendered as Word callout blocks here, but stripping the leading # `> ` keeps the body text from leaking the literal `>` and the empty # `>` separator lines into the DOCX. cleaned = [] for ln in lines: s = ln.lstrip() if s == ">" or s.startswith("> "): cleaned.append(ln[ln.index(">") + 1:].lstrip() if "> " in ln else "") else: cleaned.append(ln) lines = cleaned i = 0 while i < len(lines): line = lines[i] stripped = line.strip() if not stripped: i += 1 continue if stripped.startswith("# "): h = doc.add_heading( latex_to_unicode(stripped[2:]).replace(MATH_START, "").replace(MATH_END, ""), level=1) for run in h.runs: run.font.color.rgb = RGBColor(0, 0, 0) i += 1 continue if stripped.startswith("## "): h = doc.add_heading( latex_to_unicode(stripped[3:]).replace(MATH_START, "").replace(MATH_END, ""), level=2) for run in h.runs: run.font.color.rgb = RGBColor(0, 0, 0) i += 1 continue if stripped.startswith("### "): h = doc.add_heading( latex_to_unicode(stripped[4:]).replace(MATH_START, "").replace(MATH_END, ""), level=3) for run in h.runs: run.font.color.rgb = RGBColor(0, 0, 0) i += 1 continue if stripped.startswith("__TABLE_CAPTION__:"): caption_text = stripped[len("__TABLE_CAPTION__:"):].strip() caption_text = latex_to_unicode(caption_text) cp = doc.add_paragraph() cp.alignment = WD_ALIGN_PARAGRAPH.CENTER cp.paragraph_format.space_before = Pt(6) cp.paragraph_format.space_after = Pt(2) add_text_with_subsup( cp, caption_text, font_name="Times New Roman", font_size=Pt(9), bold=True, ) i += 1 continue if "|" in stripped and i + 1 < len(lines) and re.match(r"\s*\|[-|: ]+\|", lines[i + 1]): table_lines = [] while i < len(lines) and "|" in lines[i]: table_lines.append(lines[i]) i += 1 add_md_table(doc, table_lines) continue # Display math: a line starting with `$$` is treated as a single-line # equation block and rendered as an embedded mathtext PNG with an # auto-incrementing equation number. if stripped.startswith("$$"): # Accumulate until a closing $$ is found (single line in our # corpus, but defensively support multi-line just in case). buf = [stripped] if not (stripped.count("$$") >= 2 and stripped.endswith("$$")): while i + 1 < len(lines): i += 1 buf.append(lines[i]) if "$$" in lines[i]: break joined = "\n".join(buf).strip() # Strip the leading and trailing $$ delimiters and any trailing # punctuation (e.g. the `,` that some equation lines end with). inner = joined if inner.startswith("$$"): inner = inner[2:] if inner.endswith("$$"): inner = inner[:-2] inner = inner.rstrip(", ") equation_counter[0] += 1 try: add_equation_block(doc, inner, equation_counter[0]) except Exception as exc: # Fallback: render as plain centered Times-Roman line so the # build doesn't fail on a single un-renderable equation. p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER run = p.add_run(f"[equation render failed: {exc}] {inner}") run.font.name = "Times New Roman" run.font.size = Pt(10) run.italic = True i += 1 continue if re.match(r"^\d+\.\s", stripped): # Manual numbering: keep the number from the markdown source and # apply a hanging-indent paragraph format. Avoids python-docx's # `style='List Number'` which depends on a properly-set-up # numbering definition that the default Document() lacks. m = re.match(r"^(\d+)\.\s+(.*)$", stripped) num, content = m.group(1), m.group(2) p = doc.add_paragraph() p.paragraph_format.left_indent = Inches(0.4) p.paragraph_format.first_line_indent = Inches(-0.25) p.paragraph_format.space_after = Pt(4) content = re.sub(r"\*\*\*(.+?)\*\*\*", r"\1", content) content = re.sub(r"\*\*(.+?)\*\*", r"\1", content) content = re.sub(r"\*(.+?)\*", r"\1", content) content = re.sub(r"`(.+?)`", r"\1", content) content = latex_to_unicode(content) add_text_with_subsup(p, f"{num}. {content}") i += 1 continue if stripped.startswith("- "): # Manual bullets with hanging indent (same rationale as numbered). p = doc.add_paragraph() p.paragraph_format.left_indent = Inches(0.4) p.paragraph_format.first_line_indent = Inches(-0.25) p.paragraph_format.space_after = Pt(4) content = stripped[2:] content = re.sub(r"\*\*\*(.+?)\*\*\*", r"\1", content) content = re.sub(r"\*\*(.+?)\*\*", r"\1", content) content = re.sub(r"\*(.+?)\*", r"\1", content) content = re.sub(r"`(.+?)`", r"\1", content) content = latex_to_unicode(content) add_text_with_subsup(p, f"• {content}") i += 1 continue # Regular paragraph para_lines = [stripped] i += 1 while i < len(lines): nxt = lines[i].strip() if ( not nxt or nxt.startswith("#") or nxt.startswith("|") or nxt.startswith("- ") or re.match(r"^\d+\.\s", nxt) ): break para_lines.append(nxt) i += 1 para_text = " ".join(para_lines) para_text = re.sub(r"\*\*\*(.+?)\*\*\*", r"\1", para_text) para_text = re.sub(r"\*\*(.+?)\*\*", r"\1", para_text) para_text = re.sub(r"\*(.+?)\*", r"\1", para_text) para_text = re.sub(r"`(.+?)`", r"\1", para_text) para_text = para_text.replace("---", "\u2014") para_text = latex_to_unicode(para_text) p = doc.add_paragraph() p.paragraph_format.space_after = Pt(6) add_text_with_subsup(p, para_text) _insert_figures(doc, para_text) def main(): doc = Document() style = doc.styles["Normal"] style.font.name = "Times New Roman" style.font.size = Pt(10) # Title page p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.space_after = Pt(12) run = p.add_run( "Automated Identification of Non-Hand-Signed Auditor Signatures\n" "in Large-Scale Financial Audit Reports:\n" "A Dual-Descriptor Framework with Replication-Dominated Calibration" ) run.font.size = Pt(16) run.font.name = "Times New Roman" run.bold = True # IEEE Access uses single-anonymized review: author / affiliation # / corresponding-author block must appear on the title page in the # final submission. Fill these placeholders with real metadata # before submitting the generated DOCX. p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.space_after = Pt(6) run = p.add_run("[AUTHOR NAMES — fill in before submission]") run.font.size = Pt(11) p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.space_after = Pt(6) run = p.add_run("[Affiliations and corresponding-author email — fill in before submission]") run.font.size = Pt(10) run.italic = True p = doc.add_paragraph() p.alignment = WD_ALIGN_PARAGRAPH.CENTER p.paragraph_format.space_after = Pt(20) run = p.add_run("Target journal: IEEE Access (Regular Paper, single-anonymized review)") run.font.size = Pt(10) run.italic = True equation_counter = [0] for section_file in SECTIONS: filepath = PAPER_DIR / section_file if filepath.exists(): process_section(doc, filepath, equation_counter=equation_counter) else: print(f"WARNING: missing section file: {filepath}") doc.save(str(OUTPUT)) print(f"Saved: {OUTPUT}") _run_linter() def _run_linter(): """Run the leak linter on the freshly built DOCX. Non-fatal: prints a summary line. For full output run `python3 paper/lint_paper_v3.py`.""" try: import lint_paper_v3 # local module except Exception as exc: # pragma: no cover print(f"(lint skipped: {exc})") return findings = lint_paper_v3.lint_docx(OUTPUT) errors = sum(1 for f in findings if f.severity == "ERROR") warns = sum(1 for f in findings if f.severity == "WARN") infos = sum(1 for f in findings if f.severity == "INFO") if errors: print(f"\n[lint] {errors} ERROR finding(s) in DOCX — run " f"`python3 paper/lint_paper_v3.py --docx` for details.") elif warns or infos: print(f"[lint] DOCX clean of ERRORs ({warns} WARN, {infos} INFO).") else: print("[lint] DOCX clean.") if __name__ == "__main__": main()