"""Table VI: HC flag rate by firm under any-pair (deployed) vs strict same-pair rule. same-pair dHash = Hamming distance between a signature's dHash and its cosine-closest same-accountant partner (closest_match_file). Reproduces from signature_analysis.db.""" import sqlite3 from collections import defaultdict DB="/Volumes/NV2/PDF-Processing/signature-analysis/signature_analysis.db" BIG4=('勤業眾信聯合','資誠聯合','安侯建業聯合','安永聯合') FM={'勤業眾信聯合':'A','安侯建業聯合':'B','資誠聯合':'C','安永聯合':'D'} con=sqlite3.connect(DB);cur=con.cursor() cur.execute("SELECT image_filename, dhash_vector FROM signatures WHERE dhash_vector IS NOT NULL") dh={fn:bytes(b) for fn,b in cur.fetchall()} ham=lambda a,b: bin(int.from_bytes(a,'big')^int.from_bytes(b,'big')).count('1') cur.execute(f"""SELECT excel_firm,max_similarity_to_same_accountant,min_dhash_independent,closest_match_file,image_filename FROM signatures WHERE is_valid=1 AND max_similarity_to_same_accountant IS NOT NULL AND min_dhash_independent IS NOT NULL AND excel_firm IN ({','.join('?'*4)})""",BIG4) st=defaultdict(lambda:[0,0,0]) for firm,cos,mindh,cmf,imf in cur.fetchall(): f=FM[firm]; st[f][0]+=1 st[f][1]+= (cos>0.95 and mindh<=5) sp=ham(dh[imf],dh[cmf]) if (cmf in dh and imf in dh) else 99 st[f][2]+= (cos>0.95 and sp<=5) con.close() print(f"{'firm':5}{'n':>8}{'any-pair%':>11}{'same-pair%':>12}") T=[0,0,0] for f in 'ABCD': n,a,s=st[f]; T=[T[0]+n,T[1]+a,T[2]+s] print(f"{f:5}{n:>8}{100*a/n:>10.1f}%{100*s/n:>11.1f}%") n,a,s=T; print(f"{'all':5}{n:>8}{100*a/n:>10.1f}%{100*s/n:>11.1f}%")