#!/usr/bin/env python3 """ Renumber all in-text citations to sequential order by first appearance. Also rewrites references.md with the final numbering. """ import re from pathlib import Path PAPER_DIR = Path("/Volumes/NV2/pdf_recognize/paper") # === FINAL NUMBERING (by order of first appearance in paper) === # Format: new_number: (short_key, full_citation) FINAL_REFS = { 1: ("cpa_act", 'Taiwan Certified Public Accountant Act (會計師法), Art. 4; FSC Attestation Regulations (查核簽證核准準則), Art. 6. Available: https://law.moj.gov.tw/ENG/LawClass/LawAll.aspx?pcode=G0400067'), 2: ("yen2013", 'S.-H. Yen, Y.-S. Chang, and H.-L. Chen, "Does the signature of a CPA matter? Evidence from Taiwan," *Res. Account. Regul.*, vol. 25, no. 2, pp. 230–235, 2013.'), 3: ("bromley1993", 'J. Bromley et al., "Signature verification using a Siamese time delay neural network," in *Proc. NeurIPS*, 1993.'), 4: ("dey2017", 'S. Dey et al., "SigNet: Convolutional Siamese network for writer independent offline signature verification," arXiv:1707.02131, 2017.'), 5: ("hadjadj2020", 'I. Hadjadj et al., "An offline signature verification method based on a single known sample and an explainable deep learning approach," *Appl. Sci.*, vol. 10, no. 11, p. 3716, 2020.'), 6: ("li2024", 'H. Li et al., "TransOSV: Offline signature verification with transformers," *Pattern Recognit.*, vol. 145, p. 109882, 2024.'), 7: ("tehsin2024", 'S. Tehsin et al., "Enhancing signature verification using triplet Siamese similarity networks in digital documents," *Mathematics*, vol. 12, no. 17, p. 2757, 2024.'), 8: ("brimoh2024", 'P. Brimoh and C. C. Olisah, "Consensus-threshold criterion for offline signature verification using CNN learned representations," arXiv:2401.03085, 2024.'), 9: ("woodruff2021", 'N. Woodruff et al., "Fully-automatic pipeline for document signature analysis to detect money laundering activities," arXiv:2107.14091, 2021.'), 10: ("abramova2016", 'S. Abramova and R. Bohme, "Detecting copy-move forgeries in scanned text documents," in *Proc. Electronic Imaging*, 2016.'), 11: ("cmfd_survey", 'Y. Li et al., "Copy-move forgery detection in digital image forensics: A survey," *Multimedia Tools Appl.*, 2024.'), 12: ("jakhar2025", 'Y. Jakhar and M. D. Borah, "Effective near-duplicate image detection using perceptual hashing and deep learning," *Inf. Process. Manage.*, p. 104086, 2025.'), 13: ("pizzi2022", 'E. Pizzi et al., "A self-supervised descriptor for image copy detection," in *Proc. CVPR*, 2022.'), 14: ("hafemann2017", 'L. G. Hafemann, R. Sabourin, and L. S. Oliveira, "Learning features for offline handwritten signature verification using deep convolutional neural networks," *Pattern Recognit.*, vol. 70, pp. 163–176, 2017.'), 15: ("zois2024", 'E. N. Zois, D. Tsourounis, and D. Kalivas, "Similarity distance learning on SPD manifold for writer independent offline signature verification," *IEEE Trans. Inf. Forensics Security*, vol. 19, pp. 1342–1356, 2024.'), 16: ("hafemann2019", 'L. G. Hafemann, R. Sabourin, and L. S. Oliveira, "Meta-learning for fast classifier adaptation to new users of signature verification systems," *IEEE Trans. Inf. Forensics Security*, vol. 15, pp. 1735–1745, 2019.'), 17: ("farid2009", 'H. Farid, "Image forgery detection," *IEEE Signal Process. Mag.*, vol. 26, no. 2, pp. 16–25, 2009.'), 18: ("mehrjardi2023", 'F. Z. Mehrjardi, A. M. Latif, M. S. Zarchi, and R. Sheikhpour, "A survey on deep learning-based image forgery detection," *Pattern Recognit.*, vol. 144, art. no. 109778, 2023.'), 19: ("phash_survey", 'J. Luo et al., "A survey of perceptual hashing for multimedia," *ACM Trans. Multimedia Comput. Commun. Appl.*, vol. 21, no. 7, 2025.'), 20: ("engin2020", 'D. Engin et al., "Offline signature verification on real-world documents," in *Proc. CVPRW*, 2020.'), 21: ("tsourounis2022", 'D. Tsourounis et al., "From text to signatures: Knowledge transfer for efficient deep feature learning in offline signature verification," *Expert Syst. Appl.*, 2022.'), 22: ("chamakh2025", 'B. Chamakh and O. Bounouh, "A unified ResNet18-based approach for offline signature classification and verification," *Procedia Comput. Sci.*, vol. 270, 2025.'), 23: ("babenko2014", 'A. Babenko, A. Slesarev, A. Chigorin, and V. Lempitsky, "Neural codes for image retrieval," in *Proc. ECCV*, 2014, pp. 584–599.'), 24: ("qwen2025", 'Qwen2.5-VL Technical Report, Alibaba Group, 2025.'), 25: ("yolov11", 'Ultralytics, "YOLOv11 documentation," 2024. [Online]. Available: https://docs.ultralytics.com/'), 26: ("he2016", 'K. He, X. Zhang, S. Ren, and J. Sun, "Deep residual learning for image recognition," in *Proc. CVPR*, 2016.'), 27: ("krawetz2013", 'N. Krawetz, "Kind of like that," The Hacker Factor Blog, 2013. [Online]. Available: https://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html'), 28: ("silverman1986", 'B. W. Silverman, *Density Estimation for Statistics and Data Analysis*. London: Chapman & Hall, 1986.'), 29: ("cohen1988", 'J. Cohen, *Statistical Power Analysis for the Behavioral Sciences*, 2nd ed. Hillsdale, NJ: Lawrence Erlbaum, 1988.'), 30: ("wang2004", 'Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image quality assessment: From error visibility to structural similarity," *IEEE Trans. Image Process.*, vol. 13, no. 4, pp. 600–612, 2004.'), 31: ("carcello2013", 'J. V. Carcello and C. Li, "Costs and benefits of requiring an engagement partner signature: Recent experience in the United Kingdom," *The Accounting Review*, vol. 88, no. 5, pp. 1511–1546, 2013.'), 32: ("blay2014", 'A. D. Blay, M. Notbohm, C. Schelleman, and A. Valencia, "Audit quality effects of an individual audit engagement partner signature mandate," *Int. J. Auditing*, vol. 18, no. 3, pp. 172–192, 2014.'), 33: ("chi2009", 'W. Chi, H. Huang, Y. Liao, and H. Xie, "Mandatory audit partner rotation, audit quality, and market perception: Evidence from Taiwan," *Contemp. Account. Res.*, vol. 26, no. 2, pp. 359–391, 2009.'), 34: ("redmon2016", 'J. Redmon, S. Divvala, R. Girshick, and A. Farhadi, "You only look once: Unified, real-time object detection," in *Proc. CVPR*, 2016, pp. 779–788.'), 35: ("vlm_survey", 'J. Zhang, J. Huang, S. Jin, and S. Lu, "Vision-language models for vision tasks: A survey," *IEEE Trans. Pattern Anal. Mach. Intell.*, vol. 46, no. 8, pp. 5625–5644, 2024.'), 36: ("mann1947", 'H. B. Mann and D. R. Whitney, "On a test of whether one of two random variables is stochastically larger than the other," *Ann. Math. Statist.*, vol. 18, no. 1, pp. 50–60, 1947.'), } # === LINE-SPECIFIC REPLACEMENTS PER FILE === # Each entry: (unique_context_string, old_text, new_text) INTRO_FIXES = [ # Line 16: SV range should start at [3] not [2] (since [2] is Yen) ("offline signature verification [2]--[7]", "offline signature verification [2]--[7]", "offline signature verification [3]--[8]"), # Line 23: Woodruff ("Woodruff et al. [8]", "Woodruff et al. [8]", "Woodruff et al. [9]"), # Line 24: CMFD refs ("Copy-move forgery detection methods [9], [10]", "methods [9], [10]", "methods [10], [11]"), # Line 25: pHash+DL refs ("perceptual hashing combined with deep learning [11], [12]", "deep learning [11], [12]", "deep learning [12], [13]"), # Line 28: pHash -> dHash in pipeline description ("perceptual hash (pHash) distance", "perceptual hash (pHash) distance", "difference hash (dHash) distance"), ] RW_FIXES = [ # Line 7: Hafemann 2017 ("Hafemann et al. [24]", "et al. [24]", "et al. [14]"), # Line 12: Zois ("Zois et al. [26]", "et al. [26]", "et al. [15]"), # Line 13: Hafemann 2019 ("Hafemann et al. [25]", "et al. [25]", "et al. [16]"), # Line 18: Brimoh (wrongly [7], should be [8]) ("Brimoh and Olisah [7]", "Olisah [7]", "Olisah [8]"), # Line 23: Farid ("manipulated visual content [27]", "content [27]", "content [17]"), # Line 23: Mehrjardi ("forgery detection [28]", "detection [28]", "detection [18]"), # Line 24: CMFD survey ("manipulated photographs [10]", "photographs [10]", "photographs [11]"), # Line 25: Abramova (was [11], should be [10]) ("Abramova and Bohme [11]", "Bohme [11]", "Bohme [10]"), # Line 27: Woodruff (was [8], should be [9]) ("Woodruff et al. [8]", "et al. [8]", "et al. [9]"), # Line 31: Pizzi (was [12], should be [13]) ("Pizzi et al. [12]", "et al. [12]", "et al. [13]"), # Line 36: pHash survey (was [13], should be [19]) ("substantive content changes [13]", "changes [13]", "changes [19]"), # Line 39: Jakhar (was [11], should be [12]) ("Jakhar and Borah [11]", "Borah [11]", "Borah [12]"), # Line 47: Engin (was [14], should be [20]) ("Engin et al. [14]", "et al. [14]", "et al. [20]"), # Line 48: Tsourounis (was [15], should be [21]) ("Tsourounis et al. [15]", "et al. [15]", "et al. [21]"), # Line 49: Chamakh (was [16], should be [22]) ("Chamakh and Bounouh [16]", "Bounouh [16]", "Bounouh [22]"), # Line 51: Babenko (was [29], should be [23]) ("Babenko et al. [29]", "et al. [29]", "et al. [23]"), ] METH_FIXES = [ # Line 40: Qwen (was [17], should be [24]) ("parameters) [17]", ") [17]", ") [24]"), # Line 53: YOLO (was [18], should be [25]) ("(nano variant) [18]", "variant) [18]", "variant) [25]"), # Line 75: ResNet (was [19], should be [26]) ("neural network [19]", "network [19]", "network [26]"), # Line 81: Engin, Tsourounis (was [14], [15], should be [20], [21]) ("document analysis tasks [14], [15]", "tasks [14], [15]", "tasks [20], [21]"), # Line 98: Krawetz dHash (was [36], should be [27]) ("(dHash) [36]", ") [36]", ") [27]"), # Line 101: pHash survey ref (was [14], should be [19]) ("scan-induced variations [14]", "variations [14]", "variations [19]"), # Line 122: Silverman KDE (was [33], should be [28]) ("(KDE) [33]", ") [33]", ") [28]"), ] RESULTS_FIXES = [ # Cohen's d citation (was [34], should be [29]) ("effect size [34]", "size [34]", "size [29]"), ] DISCUSSION_FIXES = [ # Engin/Tsourounis/Chamakh range (was [14]--[16], should be [20]--[22]) ("prior literature [14]--[16]", "literature [14]--[16]", "literature [20]--[22]"), ] def apply_fixes(filepath, fixes): text = filepath.read_text(encoding='utf-8') changes = 0 for context, old, new in fixes: if context in text: text = text.replace(old, new, 1) changes += 1 else: print(f" WARNING: context not found in {filepath.name}: {context[:60]}...") filepath.write_text(text, encoding='utf-8') print(f" {filepath.name}: {changes} fixes applied") return changes def rewrite_references(): """Rewrite references.md with final sequential numbering.""" lines = ["# References\n\n"] lines.append("\n\n") for num, (key, citation) in sorted(FINAL_REFS.items()): lines.append(f"[{num}] {citation}\n\n") lines.append(f"\n") ref_path = PAPER_DIR / "paper_a_references.md" ref_path.write_text("".join(lines), encoding='utf-8') print(f" paper_a_references.md: rewritten with {len(FINAL_REFS)} references") def main(): print("Renumbering citations...\n") total = 0 total += apply_fixes(PAPER_DIR / "paper_a_introduction.md", INTRO_FIXES) total += apply_fixes(PAPER_DIR / "paper_a_related_work.md", RW_FIXES) total += apply_fixes(PAPER_DIR / "paper_a_methodology.md", METH_FIXES) total += apply_fixes(PAPER_DIR / "paper_a_results.md", RESULTS_FIXES) total += apply_fixes(PAPER_DIR / "paper_a_discussion.md", DISCUSSION_FIXES) print(f"\nTotal fixes: {total}") print("\nRewriting references.md...") rewrite_references() print("\nDone! Verify with: grep -n '\\[.*\\]' paper/paper_a_*.md") if __name__ == "__main__": main()