939a348da4
Paper draft includes all sections (Abstract through Conclusion), 36 references, and supporting scripts. Key methodology: Cosine similarity + dHash dual-method verification with thresholds calibrated against known-replication firm (Firm A). Includes: - 8 section markdown files (paper_a_*.md) - Ablation study script (ResNet-50 vs VGG-16 vs EfficientNet-B0) - Recalibrated classification script (84,386 PDFs, 5-tier system) - Figure generation and Word export scripts - Citation renumbering script ([1]-[36]) - Signature analysis pipeline (12 steps) - YOLO extraction scripts Three rounds of AI review completed (GPT-5.4, Claude Opus 4.6, Gemini 3 Pro). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
196 lines
12 KiB
Python
196 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Renumber all in-text citations to sequential order by first appearance.
|
||
Also rewrites references.md with the final numbering.
|
||
"""
|
||
import re
|
||
from pathlib import Path
|
||
|
||
PAPER_DIR = Path("/Volumes/NV2/pdf_recognize/paper")
|
||
|
||
# === FINAL NUMBERING (by order of first appearance in paper) ===
|
||
# Format: new_number: (short_key, full_citation)
|
||
FINAL_REFS = {
|
||
1: ("cpa_act", 'Taiwan Certified Public Accountant Act (會計師法), Art. 4; FSC Attestation Regulations (查核簽證核准準則), Art. 6. Available: https://law.moj.gov.tw/ENG/LawClass/LawAll.aspx?pcode=G0400067'),
|
||
2: ("yen2013", 'S.-H. Yen, Y.-S. Chang, and H.-L. Chen, "Does the signature of a CPA matter? Evidence from Taiwan," *Res. Account. Regul.*, vol. 25, no. 2, pp. 230–235, 2013.'),
|
||
3: ("bromley1993", 'J. Bromley et al., "Signature verification using a Siamese time delay neural network," in *Proc. NeurIPS*, 1993.'),
|
||
4: ("dey2017", 'S. Dey et al., "SigNet: Convolutional Siamese network for writer independent offline signature verification," arXiv:1707.02131, 2017.'),
|
||
5: ("hadjadj2020", 'I. Hadjadj et al., "An offline signature verification method based on a single known sample and an explainable deep learning approach," *Appl. Sci.*, vol. 10, no. 11, p. 3716, 2020.'),
|
||
6: ("li2024", 'H. Li et al., "TransOSV: Offline signature verification with transformers," *Pattern Recognit.*, vol. 145, p. 109882, 2024.'),
|
||
7: ("tehsin2024", 'S. Tehsin et al., "Enhancing signature verification using triplet Siamese similarity networks in digital documents," *Mathematics*, vol. 12, no. 17, p. 2757, 2024.'),
|
||
8: ("brimoh2024", 'P. Brimoh and C. C. Olisah, "Consensus-threshold criterion for offline signature verification using CNN learned representations," arXiv:2401.03085, 2024.'),
|
||
9: ("woodruff2021", 'N. Woodruff et al., "Fully-automatic pipeline for document signature analysis to detect money laundering activities," arXiv:2107.14091, 2021.'),
|
||
10: ("abramova2016", 'S. Abramova and R. Bohme, "Detecting copy-move forgeries in scanned text documents," in *Proc. Electronic Imaging*, 2016.'),
|
||
11: ("cmfd_survey", 'Y. Li et al., "Copy-move forgery detection in digital image forensics: A survey," *Multimedia Tools Appl.*, 2024.'),
|
||
12: ("jakhar2025", 'Y. Jakhar and M. D. Borah, "Effective near-duplicate image detection using perceptual hashing and deep learning," *Inf. Process. Manage.*, p. 104086, 2025.'),
|
||
13: ("pizzi2022", 'E. Pizzi et al., "A self-supervised descriptor for image copy detection," in *Proc. CVPR*, 2022.'),
|
||
14: ("hafemann2017", 'L. G. Hafemann, R. Sabourin, and L. S. Oliveira, "Learning features for offline handwritten signature verification using deep convolutional neural networks," *Pattern Recognit.*, vol. 70, pp. 163–176, 2017.'),
|
||
15: ("zois2024", 'E. N. Zois, D. Tsourounis, and D. Kalivas, "Similarity distance learning on SPD manifold for writer independent offline signature verification," *IEEE Trans. Inf. Forensics Security*, vol. 19, pp. 1342–1356, 2024.'),
|
||
16: ("hafemann2019", 'L. G. Hafemann, R. Sabourin, and L. S. Oliveira, "Meta-learning for fast classifier adaptation to new users of signature verification systems," *IEEE Trans. Inf. Forensics Security*, vol. 15, pp. 1735–1745, 2019.'),
|
||
17: ("farid2009", 'H. Farid, "Image forgery detection," *IEEE Signal Process. Mag.*, vol. 26, no. 2, pp. 16–25, 2009.'),
|
||
18: ("mehrjardi2023", 'F. Z. Mehrjardi, A. M. Latif, M. S. Zarchi, and R. Sheikhpour, "A survey on deep learning-based image forgery detection," *Pattern Recognit.*, vol. 144, art. no. 109778, 2023.'),
|
||
19: ("phash_survey", 'J. Luo et al., "A survey of perceptual hashing for multimedia," *ACM Trans. Multimedia Comput. Commun. Appl.*, vol. 21, no. 7, 2025.'),
|
||
20: ("engin2020", 'D. Engin et al., "Offline signature verification on real-world documents," in *Proc. CVPRW*, 2020.'),
|
||
21: ("tsourounis2022", 'D. Tsourounis et al., "From text to signatures: Knowledge transfer for efficient deep feature learning in offline signature verification," *Expert Syst. Appl.*, 2022.'),
|
||
22: ("chamakh2025", 'B. Chamakh and O. Bounouh, "A unified ResNet18-based approach for offline signature classification and verification," *Procedia Comput. Sci.*, vol. 270, 2025.'),
|
||
23: ("babenko2014", 'A. Babenko, A. Slesarev, A. Chigorin, and V. Lempitsky, "Neural codes for image retrieval," in *Proc. ECCV*, 2014, pp. 584–599.'),
|
||
24: ("qwen2025", 'Qwen2.5-VL Technical Report, Alibaba Group, 2025.'),
|
||
25: ("yolov11", 'Ultralytics, "YOLOv11 documentation," 2024. [Online]. Available: https://docs.ultralytics.com/'),
|
||
26: ("he2016", 'K. He, X. Zhang, S. Ren, and J. Sun, "Deep residual learning for image recognition," in *Proc. CVPR*, 2016.'),
|
||
27: ("krawetz2013", 'N. Krawetz, "Kind of like that," The Hacker Factor Blog, 2013. [Online]. Available: https://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html'),
|
||
28: ("silverman1986", 'B. W. Silverman, *Density Estimation for Statistics and Data Analysis*. London: Chapman & Hall, 1986.'),
|
||
29: ("cohen1988", 'J. Cohen, *Statistical Power Analysis for the Behavioral Sciences*, 2nd ed. Hillsdale, NJ: Lawrence Erlbaum, 1988.'),
|
||
30: ("wang2004", 'Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image quality assessment: From error visibility to structural similarity," *IEEE Trans. Image Process.*, vol. 13, no. 4, pp. 600–612, 2004.'),
|
||
31: ("carcello2013", 'J. V. Carcello and C. Li, "Costs and benefits of requiring an engagement partner signature: Recent experience in the United Kingdom," *The Accounting Review*, vol. 88, no. 5, pp. 1511–1546, 2013.'),
|
||
32: ("blay2014", 'A. D. Blay, M. Notbohm, C. Schelleman, and A. Valencia, "Audit quality effects of an individual audit engagement partner signature mandate," *Int. J. Auditing*, vol. 18, no. 3, pp. 172–192, 2014.'),
|
||
33: ("chi2009", 'W. Chi, H. Huang, Y. Liao, and H. Xie, "Mandatory audit partner rotation, audit quality, and market perception: Evidence from Taiwan," *Contemp. Account. Res.*, vol. 26, no. 2, pp. 359–391, 2009.'),
|
||
34: ("redmon2016", 'J. Redmon, S. Divvala, R. Girshick, and A. Farhadi, "You only look once: Unified, real-time object detection," in *Proc. CVPR*, 2016, pp. 779–788.'),
|
||
35: ("vlm_survey", 'J. Zhang, J. Huang, S. Jin, and S. Lu, "Vision-language models for vision tasks: A survey," *IEEE Trans. Pattern Anal. Mach. Intell.*, vol. 46, no. 8, pp. 5625–5644, 2024.'),
|
||
36: ("mann1947", 'H. B. Mann and D. R. Whitney, "On a test of whether one of two random variables is stochastically larger than the other," *Ann. Math. Statist.*, vol. 18, no. 1, pp. 50–60, 1947.'),
|
||
}
|
||
|
||
# === LINE-SPECIFIC REPLACEMENTS PER FILE ===
|
||
# Each entry: (unique_context_string, old_text, new_text)
|
||
|
||
INTRO_FIXES = [
|
||
# Line 16: SV range should start at [3] not [2] (since [2] is Yen)
|
||
("offline signature verification [2]--[7]",
|
||
"offline signature verification [2]--[7]",
|
||
"offline signature verification [3]--[8]"),
|
||
# Line 23: Woodruff
|
||
("Woodruff et al. [8]",
|
||
"Woodruff et al. [8]",
|
||
"Woodruff et al. [9]"),
|
||
# Line 24: CMFD refs
|
||
("Copy-move forgery detection methods [9], [10]",
|
||
"methods [9], [10]",
|
||
"methods [10], [11]"),
|
||
# Line 25: pHash+DL refs
|
||
("perceptual hashing combined with deep learning [11], [12]",
|
||
"deep learning [11], [12]",
|
||
"deep learning [12], [13]"),
|
||
# Line 28: pHash -> dHash in pipeline description
|
||
("perceptual hash (pHash) distance",
|
||
"perceptual hash (pHash) distance",
|
||
"difference hash (dHash) distance"),
|
||
]
|
||
|
||
RW_FIXES = [
|
||
# Line 7: Hafemann 2017
|
||
("Hafemann et al. [24]", "et al. [24]", "et al. [14]"),
|
||
# Line 12: Zois
|
||
("Zois et al. [26]", "et al. [26]", "et al. [15]"),
|
||
# Line 13: Hafemann 2019
|
||
("Hafemann et al. [25]", "et al. [25]", "et al. [16]"),
|
||
# Line 18: Brimoh (wrongly [7], should be [8])
|
||
("Brimoh and Olisah [7]", "Olisah [7]", "Olisah [8]"),
|
||
# Line 23: Farid
|
||
("manipulated visual content [27]", "content [27]", "content [17]"),
|
||
# Line 23: Mehrjardi
|
||
("forgery detection [28]", "detection [28]", "detection [18]"),
|
||
# Line 24: CMFD survey
|
||
("manipulated photographs [10]", "photographs [10]", "photographs [11]"),
|
||
# Line 25: Abramova (was [11], should be [10])
|
||
("Abramova and Bohme [11]", "Bohme [11]", "Bohme [10]"),
|
||
# Line 27: Woodruff (was [8], should be [9])
|
||
("Woodruff et al. [8]", "et al. [8]", "et al. [9]"),
|
||
# Line 31: Pizzi (was [12], should be [13])
|
||
("Pizzi et al. [12]", "et al. [12]", "et al. [13]"),
|
||
# Line 36: pHash survey (was [13], should be [19])
|
||
("substantive content changes [13]", "changes [13]", "changes [19]"),
|
||
# Line 39: Jakhar (was [11], should be [12])
|
||
("Jakhar and Borah [11]", "Borah [11]", "Borah [12]"),
|
||
# Line 47: Engin (was [14], should be [20])
|
||
("Engin et al. [14]", "et al. [14]", "et al. [20]"),
|
||
# Line 48: Tsourounis (was [15], should be [21])
|
||
("Tsourounis et al. [15]", "et al. [15]", "et al. [21]"),
|
||
# Line 49: Chamakh (was [16], should be [22])
|
||
("Chamakh and Bounouh [16]", "Bounouh [16]", "Bounouh [22]"),
|
||
# Line 51: Babenko (was [29], should be [23])
|
||
("Babenko et al. [29]", "et al. [29]", "et al. [23]"),
|
||
]
|
||
|
||
METH_FIXES = [
|
||
# Line 40: Qwen (was [17], should be [24])
|
||
("parameters) [17]", ") [17]", ") [24]"),
|
||
# Line 53: YOLO (was [18], should be [25])
|
||
("(nano variant) [18]", "variant) [18]", "variant) [25]"),
|
||
# Line 75: ResNet (was [19], should be [26])
|
||
("neural network [19]", "network [19]", "network [26]"),
|
||
# Line 81: Engin, Tsourounis (was [14], [15], should be [20], [21])
|
||
("document analysis tasks [14], [15]",
|
||
"tasks [14], [15]",
|
||
"tasks [20], [21]"),
|
||
# Line 98: Krawetz dHash (was [36], should be [27])
|
||
("(dHash) [36]", ") [36]", ") [27]"),
|
||
# Line 101: pHash survey ref (was [14], should be [19])
|
||
("scan-induced variations [14]",
|
||
"variations [14]",
|
||
"variations [19]"),
|
||
# Line 122: Silverman KDE (was [33], should be [28])
|
||
("(KDE) [33]", ") [33]", ") [28]"),
|
||
]
|
||
|
||
RESULTS_FIXES = [
|
||
# Cohen's d citation (was [34], should be [29])
|
||
("effect size [34]", "size [34]", "size [29]"),
|
||
]
|
||
|
||
DISCUSSION_FIXES = [
|
||
# Engin/Tsourounis/Chamakh range (was [14]--[16], should be [20]--[22])
|
||
("prior literature [14]--[16]",
|
||
"literature [14]--[16]",
|
||
"literature [20]--[22]"),
|
||
]
|
||
|
||
|
||
def apply_fixes(filepath, fixes):
|
||
text = filepath.read_text(encoding='utf-8')
|
||
changes = 0
|
||
for context, old, new in fixes:
|
||
if context in text:
|
||
text = text.replace(old, new, 1)
|
||
changes += 1
|
||
else:
|
||
print(f" WARNING: context not found in {filepath.name}: {context[:60]}...")
|
||
filepath.write_text(text, encoding='utf-8')
|
||
print(f" {filepath.name}: {changes} fixes applied")
|
||
return changes
|
||
|
||
|
||
def rewrite_references():
|
||
"""Rewrite references.md with final sequential numbering."""
|
||
lines = ["# References\n\n"]
|
||
lines.append("<!-- IEEE numbered style, sequential by first appearance in text -->\n\n")
|
||
|
||
for num, (key, citation) in sorted(FINAL_REFS.items()):
|
||
lines.append(f"[{num}] {citation}\n\n")
|
||
|
||
lines.append(f"<!-- Total: {len(FINAL_REFS)} references -->\n")
|
||
|
||
ref_path = PAPER_DIR / "paper_a_references.md"
|
||
ref_path.write_text("".join(lines), encoding='utf-8')
|
||
print(f" paper_a_references.md: rewritten with {len(FINAL_REFS)} references")
|
||
|
||
|
||
def main():
|
||
print("Renumbering citations...\n")
|
||
|
||
total = 0
|
||
total += apply_fixes(PAPER_DIR / "paper_a_introduction.md", INTRO_FIXES)
|
||
total += apply_fixes(PAPER_DIR / "paper_a_related_work.md", RW_FIXES)
|
||
total += apply_fixes(PAPER_DIR / "paper_a_methodology.md", METH_FIXES)
|
||
total += apply_fixes(PAPER_DIR / "paper_a_results.md", RESULTS_FIXES)
|
||
total += apply_fixes(PAPER_DIR / "paper_a_discussion.md", DISCUSSION_FIXES)
|
||
|
||
print(f"\nTotal fixes: {total}")
|
||
|
||
print("\nRewriting references.md...")
|
||
rewrite_references()
|
||
|
||
print("\nDone! Verify with: grep -n '\\[.*\\]' paper/paper_a_*.md")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|