#!/usr/bin/env python3 """ Pharmacogenomics Analysis Script Analyzes drug-gene interactions based on PharmGKB and CPIC guidelines. """ import gzip import sys import re from collections import defaultdict from dataclasses import dataclass from typing import Dict, List, Optional # Key pharmacogenomic variants (curated from PharmGKB/CPIC) # Format: rsid -> (chrom, pos, gene, drug_class, effect, clinical_recommendation) PHARMGKB_VARIANTS = { # CYP2D6 - Codeine, Tramadol, Tamoxifen, many antidepressants "rs3892097": ("22", 42526694, "CYP2D6", "Codeine/Tramadol/Antidepressants", "*4 allele - Poor metabolizer", "Reduced efficacy of codeine (no conversion to morphine); Consider alternative analgesics"), "rs1065852": ("22", 42525772, "CYP2D6", "Codeine/Tramadol/Antidepressants", "*10 allele - Reduced function", "Intermediate metabolizer; May need dose adjustment"), "rs16947": ("22", 42523943, "CYP2D6", "Codeine/Tramadol", "*2 allele - Normal function", "Normal metabolism"), # CYP2C19 - Clopidogrel, PPIs, some antidepressants "rs4244285": ("10", 96541616, "CYP2C19", "Clopidogrel/PPIs/Antidepressants", "*2 allele - Loss of function", "Poor metabolizer; Clopidogrel may have reduced efficacy; Consider prasugrel or ticagrelor"), "rs4986893": ("10", 96540410, "CYP2C19", "Clopidogrel/PPIs", "*3 allele - Loss of function", "Poor metabolizer; Reduced clopidogrel activation"), "rs12248560": ("10", 96522463, "CYP2C19", "Clopidogrel/PPIs", "*17 allele - Increased function", "Ultra-rapid metabolizer; May need lower PPI doses"), # CYP2C9 - Warfarin, NSAIDs, Phenytoin "rs1799853": ("10", 96702047, "CYP2C9", "Warfarin/NSAIDs/Phenytoin", "*2 allele - Reduced function", "Slower warfarin metabolism; Lower dose may be needed"), "rs1057910": ("10", 96741053, "CYP2C9", "Warfarin/NSAIDs/Phenytoin", "*3 allele - Reduced function", "Significantly slower warfarin metabolism; Require ~50% lower dose"), # VKORC1 - Warfarin sensitivity "rs9923231": ("16", 31107689, "VKORC1", "Warfarin", "-1639G>A - Warfarin sensitivity", "A allele: Increased sensitivity, need lower warfarin dose"), # CYP3A4/CYP3A5 - Many drugs (statins, immunosuppressants, etc.) "rs776746": ("7", 99270539, "CYP3A5", "Tacrolimus/Cyclosporine/Statins", "*3 allele - Non-expressor", "Most common; Normal tacrolimus dosing"), "rs2740574": ("7", 99382096, "CYP3A4", "Statins/Many drugs", "*1B allele", "May affect drug metabolism"), # SLCO1B1 - Statin-induced myopathy "rs4149056": ("12", 21331549, "SLCO1B1", "Simvastatin/Statins", "*5 allele - Reduced function", "C allele: Increased risk of statin myopathy; Consider lower dose or alternative statin"), # TPMT - Thiopurines (Azathioprine, 6-MP) "rs1800460": ("6", 18130918, "TPMT", "Azathioprine/6-Mercaptopurine", "*3B allele - Reduced function", "Intermediate/Poor metabolizer; High risk of myelosuppression; Reduce dose"), "rs1142345": ("6", 18130725, "TPMT", "Azathioprine/6-Mercaptopurine", "*3C allele - Reduced function", "Intermediate/Poor metabolizer; High risk of myelosuppression; Reduce dose"), # DPYD - Fluoropyrimidines (5-FU, Capecitabine) "rs3918290": ("1", 97915614, "DPYD", "5-Fluorouracil/Capecitabine", "*2A allele - No function", "CRITICAL: Complete DPD deficiency; Contraindicated - severe toxicity risk"), "rs55886062": ("1", 98205966, "DPYD", "5-Fluorouracil/Capecitabine", "*13 allele - No function", "CRITICAL: DPD deficiency; Contraindicated"), "rs67376798": ("1", 97981395, "DPYD", "5-Fluorouracil/Capecitabine", "D949V - Reduced function", "Intermediate metabolizer; Consider dose reduction"), # UGT1A1 - Irinotecan "rs8175347": ("2", 234668879, "UGT1A1", "Irinotecan", "*28 allele (TA repeat)", "7/7 genotype: Reduced glucuronidation; Increased toxicity risk; Consider dose reduction"), # HLA-B*57:01 - Abacavir hypersensitivity "rs2395029": ("6", 31431780, "HLA-B", "Abacavir (HIV)", "HLA-B*57:01 tag SNP", "CRITICAL: If positive, abacavir contraindicated - hypersensitivity reaction risk"), # HLA-B*15:02 - Carbamazepine/Phenytoin (SJS/TEN) "rs144012689": ("6", 31356867, "HLA-B", "Carbamazepine/Phenytoin", "HLA-B*15:02 tag SNP", "CRITICAL: If positive in Asian ancestry, carbamazepine contraindicated - SJS/TEN risk"), # HLA-A*31:01 - Carbamazepine "rs1061235": ("6", 29912280, "HLA-A", "Carbamazepine", "HLA-A*31:01 tag SNP", "If positive, increased carbamazepine hypersensitivity risk"), # F5 - Oral contraceptives, HRT (Factor V Leiden) "rs6025": ("1", 169519049, "F5", "Oral Contraceptives/HRT", "Factor V Leiden", "CRITICAL: Increased thrombosis risk; Oral contraceptives relatively contraindicated"), # F2 - Oral contraceptives (Prothrombin) "rs1799963": ("11", 46761055, "F2", "Oral Contraceptives/HRT", "Prothrombin G20210A", "Increased thrombosis risk; Caution with oral contraceptives"), # MTHFR - Methotrexate, Folate metabolism "rs1801133": ("1", 11856378, "MTHFR", "Methotrexate/Folate", "C677T - Reduced function", "T/T genotype: Reduced MTHFR activity; May need folate supplementation with methotrexate"), "rs1801131": ("1", 11854476, "MTHFR", "Methotrexate/Folate", "A1298C", "May affect folate metabolism"), # OPRM1 - Opioid response "rs1799971": ("6", 154039662, "OPRM1", "Opioids (Morphine, etc.)", "A118G", "G allele: May need higher opioid doses for pain relief"), # COMT - Pain medications, ADHD drugs "rs4680": ("22", 19951271, "COMT", "Pain medications/ADHD drugs", "Val158Met", "Met/Met: Lower COMT activity; May affect pain perception and stimulant response"), # IFNL3 (IL28B) - Hepatitis C treatment "rs12979860": ("19", 39738787, "IFNL3", "Hepatitis C treatment (Interferon)", "IL28B genotype", "C/C genotype: Better response to interferon-based HCV treatment"), # NAT2 - Isoniazid, Hydralazine "rs1801280": ("8", 18257854, "NAT2", "Isoniazid/Hydralazine/Sulfonamides", "*5 allele - Slow acetylator", "Slow acetylator; Increased isoniazid toxicity risk; Monitor for peripheral neuropathy"), "rs1799930": ("8", 18258103, "NAT2", "Isoniazid/Hydralazine", "*6 allele - Slow acetylator", "Slow acetylator; May need dose adjustment"), # G6PD - Primaquine, Dapsone, Sulfonamides "rs1050828": ("X", 153764217, "G6PD", "Primaquine/Dapsone/Sulfonamides", "G6PD A- variant", "CRITICAL: G6PD deficiency; Avoid oxidant drugs - hemolysis risk"), # CYP2B6 - Efavirenz "rs3745274": ("19", 41512841, "CYP2B6", "Efavirenz (HIV)", "*6 allele - Reduced function", "T/T genotype: Slow metabolizer; Consider lower efavirenz dose; CNS side effects more likely"), } def get_genotype_class(gt: str) -> str: """Classify genotype""" if gt in ['./.', '.|.', '.']: return 'MISSING' alleles = re.split('[/|]', gt) if all(a == '0' for a in alleles): return 'HOM_REF' elif all(a != '0' and a != '.' for a in alleles): return 'HOM_ALT' else: return 'HET' def analyze_pharmacogenomics(vcf_path: str, proband_idx: int = 2) -> Dict: """Analyze VCF for pharmacogenomic variants""" print("Scanning for pharmacogenomic variants...") # Build position lookup pos_to_variant = {} for rsid, data in PHARMGKB_VARIANTS.items(): chrom, pos, gene, drug, effect, recommendation = data key = f"{chrom}-{pos}" pos_to_variant[key] = { 'rsid': rsid, 'gene': gene, 'drug': drug, 'effect': effect, 'recommendation': recommendation } results = {} samples = [] open_func = gzip.open if vcf_path.endswith('.gz') else open mode = 'rt' if vcf_path.endswith('.gz') else 'r' with open_func(vcf_path, mode) as f: for line in f: if line.startswith('##'): continue elif line.startswith('#CHROM'): parts = line.strip().split('\t') samples = parts[9:] continue parts = line.strip().split('\t') if len(parts) < 10: continue chrom, pos, rsid_vcf, ref, alt, qual, filt, info, fmt = parts[:9] gt_fields = parts[9:] key = f"{chrom}-{pos}" if key not in pos_to_variant: continue variant_info = pos_to_variant[key] # Get proband genotype fmt_parts = fmt.split(':') gt_idx = fmt_parts.index('GT') if 'GT' in fmt_parts else 0 if proband_idx < len(gt_fields): gt_data = gt_fields[proband_idx].split(':') gt = gt_data[gt_idx] if gt_idx < len(gt_data) else './.' else: gt = './.' gt_class = get_genotype_class(gt) # Determine alleles alleles = [ref] + alt.split(',') gt_alleles_str = [] if gt_class != 'MISSING': gt_indices = re.split('[/|]', gt) for idx in gt_indices: if idx.isdigit() and int(idx) < len(alleles): gt_alleles_str.append(alleles[int(idx)]) results[variant_info['rsid']] = { **variant_info, 'chrom': chrom, 'pos': pos, 'ref': ref, 'alt': alt, 'genotype': gt, 'genotype_class': gt_class, 'alleles': '/'.join(gt_alleles_str) if gt_alleles_str else 'N/A', 'has_variant': gt_class in ['HET', 'HOM_ALT'] } return results, samples def generate_pgx_report(results: Dict, output_path: str): """Generate pharmacogenomics report""" # Categorize by drug class drug_classes = defaultdict(list) for rsid, data in results.items(): drug_classes[data['drug']].append(data) # Identify actionable results critical = [] actionable = [] informational = [] for rsid, data in results.items(): if data['has_variant']: if 'CRITICAL' in data['recommendation']: critical.append(data) elif any(word in data['recommendation'].lower() for word in ['reduce', 'consider', 'lower', 'avoid', 'contraindicated']): actionable.append(data) else: informational.append(data) with open(output_path, 'w') as f: f.write("# Pharmacogenomics Analysis Report\n") f.write("# Based on PharmGKB and CPIC Guidelines\n\n") # Critical findings first if critical: f.write("=" * 80 + "\n") f.write("⚠️ CRITICAL FINDINGS - Immediate Clinical Relevance\n") f.write("=" * 80 + "\n\n") for data in critical: f.write(f"GENE: {data['gene']} ({data['rsid']})\n") f.write(f" Drug(s): {data['drug']}\n") f.write(f" Genotype: {data['alleles']} ({data['genotype_class']})\n") f.write(f" Effect: {data['effect']}\n") f.write(f" ⚠️ {data['recommendation']}\n\n") # Actionable findings if actionable: f.write("=" * 80 + "\n") f.write("📋 ACTIONABLE FINDINGS - May Require Dose Adjustment\n") f.write("=" * 80 + "\n\n") for data in actionable: f.write(f"GENE: {data['gene']} ({data['rsid']})\n") f.write(f" Drug(s): {data['drug']}\n") f.write(f" Genotype: {data['alleles']} ({data['genotype_class']})\n") f.write(f" Effect: {data['effect']}\n") f.write(f" Recommendation: {data['recommendation']}\n\n") # Summary by drug class f.write("=" * 80 + "\n") f.write("SUMMARY BY DRUG CLASS\n") f.write("=" * 80 + "\n\n") for drug_class in sorted(drug_classes.keys()): variants = drug_classes[drug_class] has_risk = any(v['has_variant'] for v in variants) status = "⚠️ VARIANT DETECTED" if has_risk else "✓ Normal" f.write(f"\n## {drug_class}\n") f.write(f"Status: {status}\n") for v in variants: marker = "→" if v['has_variant'] else " " f.write(f" {marker} {v['gene']} ({v['rsid']}): {v['alleles']} - {v['genotype_class']}\n") # Detailed table f.write("\n" + "=" * 80 + "\n") f.write("DETAILED RESULTS\n") f.write("=" * 80 + "\n\n") f.write("RSID\tGENE\tGENOTYPE\tALLELES\tHAS_VARIANT\tDRUG\tEFFECT\n") for rsid in sorted(results.keys()): data = results[rsid] f.write(f"{rsid}\t{data['gene']}\t{data['genotype']}\t{data['alleles']}\t") f.write(f"{data['has_variant']}\t{data['drug']}\t{data['effect']}\n") print(f"Report saved to: {output_path}") return critical, actionable def main(): vcf_path = sys.argv[1] if len(sys.argv) > 1 else '/Volumes/NV2/genomics_analysis/vcf/trio_joint.snpeff.vcf' output_path = sys.argv[2] if len(sys.argv) > 2 else '/Volumes/NV2/genomics_analysis/pharmacogenomics_report.txt' proband_idx = int(sys.argv[3]) if len(sys.argv) > 3 else 2 print("=" * 60) print("PHARMACOGENOMICS ANALYSIS") print("=" * 60) print(f"VCF: {vcf_path}") print(f"Searching for {len(PHARMGKB_VARIANTS)} pharmacogenomic variants...\n") results, samples = analyze_pharmacogenomics(vcf_path, proband_idx) print(f"Found {len(results)} pharmacogenomic variants in VCF") critical, actionable = generate_pgx_report(results, output_path) # Console summary print("\n" + "=" * 60) print("QUICK SUMMARY") print("=" * 60) variants_with_effect = [r for r in results.values() if r['has_variant']] print(f"\nVariants detected: {len(variants_with_effect)}/{len(results)}") if critical: print("\n⚠️ CRITICAL FINDINGS:") for c in critical: print(f" - {c['gene']}: {c['drug']}") print(f" {c['recommendation']}") if actionable: print("\n📋 ACTIONABLE FINDINGS:") for a in actionable: print(f" - {a['gene']} ({a['rsid']}): {a['drug']}") print(f" Genotype: {a['alleles']}") print(f" {a['recommendation']}") if not critical and not actionable: print("\n✓ No critical or actionable pharmacogenomic variants detected") if __name__ == '__main__': main()