Files
genomic-consultant/pharmacogenomics.py
gbanyan d13d58df8b Refactor: Replace scaffolding with working analysis scripts
- Add trio_analysis.py for trio-based variant analysis with de novo detection
- Add clinvar_acmg_annotate.py for ClinVar/ACMG annotation
- Add gwas_comprehensive.py with 201 SNPs across 18 categories
- Add pharmgkb_full_analysis.py for pharmacogenomics analysis
- Add gwas_trait_lookup.py for basic GWAS trait lookup
- Add pharmacogenomics.py for basic PGx analysis
- Remove unused scaffolding code (src/, configs/, docs/, tests/)
- Update README.md with new documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-01 22:36:02 +08:00

377 lines
16 KiB
Python

#!/usr/bin/env python3
"""
Pharmacogenomics Analysis Script
Analyzes drug-gene interactions based on PharmGKB and CPIC guidelines.
"""
import gzip
import sys
import re
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, List, Optional
# Key pharmacogenomic variants (curated from PharmGKB/CPIC)
# Format: rsid -> (chrom, pos, gene, drug_class, effect, clinical_recommendation)
PHARMGKB_VARIANTS = {
# CYP2D6 - Codeine, Tramadol, Tamoxifen, many antidepressants
"rs3892097": ("22", 42526694, "CYP2D6", "Codeine/Tramadol/Antidepressants",
"*4 allele - Poor metabolizer",
"Reduced efficacy of codeine (no conversion to morphine); Consider alternative analgesics"),
"rs1065852": ("22", 42525772, "CYP2D6", "Codeine/Tramadol/Antidepressants",
"*10 allele - Reduced function",
"Intermediate metabolizer; May need dose adjustment"),
"rs16947": ("22", 42523943, "CYP2D6", "Codeine/Tramadol",
"*2 allele - Normal function",
"Normal metabolism"),
# CYP2C19 - Clopidogrel, PPIs, some antidepressants
"rs4244285": ("10", 96541616, "CYP2C19", "Clopidogrel/PPIs/Antidepressants",
"*2 allele - Loss of function",
"Poor metabolizer; Clopidogrel may have reduced efficacy; Consider prasugrel or ticagrelor"),
"rs4986893": ("10", 96540410, "CYP2C19", "Clopidogrel/PPIs",
"*3 allele - Loss of function",
"Poor metabolizer; Reduced clopidogrel activation"),
"rs12248560": ("10", 96522463, "CYP2C19", "Clopidogrel/PPIs",
"*17 allele - Increased function",
"Ultra-rapid metabolizer; May need lower PPI doses"),
# CYP2C9 - Warfarin, NSAIDs, Phenytoin
"rs1799853": ("10", 96702047, "CYP2C9", "Warfarin/NSAIDs/Phenytoin",
"*2 allele - Reduced function",
"Slower warfarin metabolism; Lower dose may be needed"),
"rs1057910": ("10", 96741053, "CYP2C9", "Warfarin/NSAIDs/Phenytoin",
"*3 allele - Reduced function",
"Significantly slower warfarin metabolism; Require ~50% lower dose"),
# VKORC1 - Warfarin sensitivity
"rs9923231": ("16", 31107689, "VKORC1", "Warfarin",
"-1639G>A - Warfarin sensitivity",
"A allele: Increased sensitivity, need lower warfarin dose"),
# CYP3A4/CYP3A5 - Many drugs (statins, immunosuppressants, etc.)
"rs776746": ("7", 99270539, "CYP3A5", "Tacrolimus/Cyclosporine/Statins",
"*3 allele - Non-expressor",
"Most common; Normal tacrolimus dosing"),
"rs2740574": ("7", 99382096, "CYP3A4", "Statins/Many drugs",
"*1B allele",
"May affect drug metabolism"),
# SLCO1B1 - Statin-induced myopathy
"rs4149056": ("12", 21331549, "SLCO1B1", "Simvastatin/Statins",
"*5 allele - Reduced function",
"C allele: Increased risk of statin myopathy; Consider lower dose or alternative statin"),
# TPMT - Thiopurines (Azathioprine, 6-MP)
"rs1800460": ("6", 18130918, "TPMT", "Azathioprine/6-Mercaptopurine",
"*3B allele - Reduced function",
"Intermediate/Poor metabolizer; High risk of myelosuppression; Reduce dose"),
"rs1142345": ("6", 18130725, "TPMT", "Azathioprine/6-Mercaptopurine",
"*3C allele - Reduced function",
"Intermediate/Poor metabolizer; High risk of myelosuppression; Reduce dose"),
# DPYD - Fluoropyrimidines (5-FU, Capecitabine)
"rs3918290": ("1", 97915614, "DPYD", "5-Fluorouracil/Capecitabine",
"*2A allele - No function",
"CRITICAL: Complete DPD deficiency; Contraindicated - severe toxicity risk"),
"rs55886062": ("1", 98205966, "DPYD", "5-Fluorouracil/Capecitabine",
"*13 allele - No function",
"CRITICAL: DPD deficiency; Contraindicated"),
"rs67376798": ("1", 97981395, "DPYD", "5-Fluorouracil/Capecitabine",
"D949V - Reduced function",
"Intermediate metabolizer; Consider dose reduction"),
# UGT1A1 - Irinotecan
"rs8175347": ("2", 234668879, "UGT1A1", "Irinotecan",
"*28 allele (TA repeat)",
"7/7 genotype: Reduced glucuronidation; Increased toxicity risk; Consider dose reduction"),
# HLA-B*57:01 - Abacavir hypersensitivity
"rs2395029": ("6", 31431780, "HLA-B", "Abacavir (HIV)",
"HLA-B*57:01 tag SNP",
"CRITICAL: If positive, abacavir contraindicated - hypersensitivity reaction risk"),
# HLA-B*15:02 - Carbamazepine/Phenytoin (SJS/TEN)
"rs144012689": ("6", 31356867, "HLA-B", "Carbamazepine/Phenytoin",
"HLA-B*15:02 tag SNP",
"CRITICAL: If positive in Asian ancestry, carbamazepine contraindicated - SJS/TEN risk"),
# HLA-A*31:01 - Carbamazepine
"rs1061235": ("6", 29912280, "HLA-A", "Carbamazepine",
"HLA-A*31:01 tag SNP",
"If positive, increased carbamazepine hypersensitivity risk"),
# F5 - Oral contraceptives, HRT (Factor V Leiden)
"rs6025": ("1", 169519049, "F5", "Oral Contraceptives/HRT",
"Factor V Leiden",
"CRITICAL: Increased thrombosis risk; Oral contraceptives relatively contraindicated"),
# F2 - Oral contraceptives (Prothrombin)
"rs1799963": ("11", 46761055, "F2", "Oral Contraceptives/HRT",
"Prothrombin G20210A",
"Increased thrombosis risk; Caution with oral contraceptives"),
# MTHFR - Methotrexate, Folate metabolism
"rs1801133": ("1", 11856378, "MTHFR", "Methotrexate/Folate",
"C677T - Reduced function",
"T/T genotype: Reduced MTHFR activity; May need folate supplementation with methotrexate"),
"rs1801131": ("1", 11854476, "MTHFR", "Methotrexate/Folate",
"A1298C",
"May affect folate metabolism"),
# OPRM1 - Opioid response
"rs1799971": ("6", 154039662, "OPRM1", "Opioids (Morphine, etc.)",
"A118G",
"G allele: May need higher opioid doses for pain relief"),
# COMT - Pain medications, ADHD drugs
"rs4680": ("22", 19951271, "COMT", "Pain medications/ADHD drugs",
"Val158Met",
"Met/Met: Lower COMT activity; May affect pain perception and stimulant response"),
# IFNL3 (IL28B) - Hepatitis C treatment
"rs12979860": ("19", 39738787, "IFNL3", "Hepatitis C treatment (Interferon)",
"IL28B genotype",
"C/C genotype: Better response to interferon-based HCV treatment"),
# NAT2 - Isoniazid, Hydralazine
"rs1801280": ("8", 18257854, "NAT2", "Isoniazid/Hydralazine/Sulfonamides",
"*5 allele - Slow acetylator",
"Slow acetylator; Increased isoniazid toxicity risk; Monitor for peripheral neuropathy"),
"rs1799930": ("8", 18258103, "NAT2", "Isoniazid/Hydralazine",
"*6 allele - Slow acetylator",
"Slow acetylator; May need dose adjustment"),
# G6PD - Primaquine, Dapsone, Sulfonamides
"rs1050828": ("X", 153764217, "G6PD", "Primaquine/Dapsone/Sulfonamides",
"G6PD A- variant",
"CRITICAL: G6PD deficiency; Avoid oxidant drugs - hemolysis risk"),
# CYP2B6 - Efavirenz
"rs3745274": ("19", 41512841, "CYP2B6", "Efavirenz (HIV)",
"*6 allele - Reduced function",
"T/T genotype: Slow metabolizer; Consider lower efavirenz dose; CNS side effects more likely"),
}
def get_genotype_class(gt: str) -> str:
"""Classify genotype"""
if gt in ['./.', '.|.', '.']:
return 'MISSING'
alleles = re.split('[/|]', gt)
if all(a == '0' for a in alleles):
return 'HOM_REF'
elif all(a != '0' and a != '.' for a in alleles):
return 'HOM_ALT'
else:
return 'HET'
def analyze_pharmacogenomics(vcf_path: str, proband_idx: int = 2) -> Dict:
"""Analyze VCF for pharmacogenomic variants"""
print("Scanning for pharmacogenomic variants...")
# Build position lookup
pos_to_variant = {}
for rsid, data in PHARMGKB_VARIANTS.items():
chrom, pos, gene, drug, effect, recommendation = data
key = f"{chrom}-{pos}"
pos_to_variant[key] = {
'rsid': rsid,
'gene': gene,
'drug': drug,
'effect': effect,
'recommendation': recommendation
}
results = {}
samples = []
open_func = gzip.open if vcf_path.endswith('.gz') else open
mode = 'rt' if vcf_path.endswith('.gz') else 'r'
with open_func(vcf_path, mode) as f:
for line in f:
if line.startswith('##'):
continue
elif line.startswith('#CHROM'):
parts = line.strip().split('\t')
samples = parts[9:]
continue
parts = line.strip().split('\t')
if len(parts) < 10:
continue
chrom, pos, rsid_vcf, ref, alt, qual, filt, info, fmt = parts[:9]
gt_fields = parts[9:]
key = f"{chrom}-{pos}"
if key not in pos_to_variant:
continue
variant_info = pos_to_variant[key]
# Get proband genotype
fmt_parts = fmt.split(':')
gt_idx = fmt_parts.index('GT') if 'GT' in fmt_parts else 0
if proband_idx < len(gt_fields):
gt_data = gt_fields[proband_idx].split(':')
gt = gt_data[gt_idx] if gt_idx < len(gt_data) else './.'
else:
gt = './.'
gt_class = get_genotype_class(gt)
# Determine alleles
alleles = [ref] + alt.split(',')
gt_alleles_str = []
if gt_class != 'MISSING':
gt_indices = re.split('[/|]', gt)
for idx in gt_indices:
if idx.isdigit() and int(idx) < len(alleles):
gt_alleles_str.append(alleles[int(idx)])
results[variant_info['rsid']] = {
**variant_info,
'chrom': chrom,
'pos': pos,
'ref': ref,
'alt': alt,
'genotype': gt,
'genotype_class': gt_class,
'alleles': '/'.join(gt_alleles_str) if gt_alleles_str else 'N/A',
'has_variant': gt_class in ['HET', 'HOM_ALT']
}
return results, samples
def generate_pgx_report(results: Dict, output_path: str):
"""Generate pharmacogenomics report"""
# Categorize by drug class
drug_classes = defaultdict(list)
for rsid, data in results.items():
drug_classes[data['drug']].append(data)
# Identify actionable results
critical = []
actionable = []
informational = []
for rsid, data in results.items():
if data['has_variant']:
if 'CRITICAL' in data['recommendation']:
critical.append(data)
elif any(word in data['recommendation'].lower() for word in ['reduce', 'consider', 'lower', 'avoid', 'contraindicated']):
actionable.append(data)
else:
informational.append(data)
with open(output_path, 'w') as f:
f.write("# Pharmacogenomics Analysis Report\n")
f.write("# Based on PharmGKB and CPIC Guidelines\n\n")
# Critical findings first
if critical:
f.write("=" * 80 + "\n")
f.write("⚠️ CRITICAL FINDINGS - Immediate Clinical Relevance\n")
f.write("=" * 80 + "\n\n")
for data in critical:
f.write(f"GENE: {data['gene']} ({data['rsid']})\n")
f.write(f" Drug(s): {data['drug']}\n")
f.write(f" Genotype: {data['alleles']} ({data['genotype_class']})\n")
f.write(f" Effect: {data['effect']}\n")
f.write(f" ⚠️ {data['recommendation']}\n\n")
# Actionable findings
if actionable:
f.write("=" * 80 + "\n")
f.write("📋 ACTIONABLE FINDINGS - May Require Dose Adjustment\n")
f.write("=" * 80 + "\n\n")
for data in actionable:
f.write(f"GENE: {data['gene']} ({data['rsid']})\n")
f.write(f" Drug(s): {data['drug']}\n")
f.write(f" Genotype: {data['alleles']} ({data['genotype_class']})\n")
f.write(f" Effect: {data['effect']}\n")
f.write(f" Recommendation: {data['recommendation']}\n\n")
# Summary by drug class
f.write("=" * 80 + "\n")
f.write("SUMMARY BY DRUG CLASS\n")
f.write("=" * 80 + "\n\n")
for drug_class in sorted(drug_classes.keys()):
variants = drug_classes[drug_class]
has_risk = any(v['has_variant'] for v in variants)
status = "⚠️ VARIANT DETECTED" if has_risk else "✓ Normal"
f.write(f"\n## {drug_class}\n")
f.write(f"Status: {status}\n")
for v in variants:
marker = "" if v['has_variant'] else " "
f.write(f" {marker} {v['gene']} ({v['rsid']}): {v['alleles']} - {v['genotype_class']}\n")
# Detailed table
f.write("\n" + "=" * 80 + "\n")
f.write("DETAILED RESULTS\n")
f.write("=" * 80 + "\n\n")
f.write("RSID\tGENE\tGENOTYPE\tALLELES\tHAS_VARIANT\tDRUG\tEFFECT\n")
for rsid in sorted(results.keys()):
data = results[rsid]
f.write(f"{rsid}\t{data['gene']}\t{data['genotype']}\t{data['alleles']}\t")
f.write(f"{data['has_variant']}\t{data['drug']}\t{data['effect']}\n")
print(f"Report saved to: {output_path}")
return critical, actionable
def main():
vcf_path = sys.argv[1] if len(sys.argv) > 1 else '/Volumes/NV2/genomics_analysis/vcf/trio_joint.snpeff.vcf'
output_path = sys.argv[2] if len(sys.argv) > 2 else '/Volumes/NV2/genomics_analysis/pharmacogenomics_report.txt'
proband_idx = int(sys.argv[3]) if len(sys.argv) > 3 else 2
print("=" * 60)
print("PHARMACOGENOMICS ANALYSIS")
print("=" * 60)
print(f"VCF: {vcf_path}")
print(f"Searching for {len(PHARMGKB_VARIANTS)} pharmacogenomic variants...\n")
results, samples = analyze_pharmacogenomics(vcf_path, proband_idx)
print(f"Found {len(results)} pharmacogenomic variants in VCF")
critical, actionable = generate_pgx_report(results, output_path)
# Console summary
print("\n" + "=" * 60)
print("QUICK SUMMARY")
print("=" * 60)
variants_with_effect = [r for r in results.values() if r['has_variant']]
print(f"\nVariants detected: {len(variants_with_effect)}/{len(results)}")
if critical:
print("\n⚠️ CRITICAL FINDINGS:")
for c in critical:
print(f" - {c['gene']}: {c['drug']}")
print(f" {c['recommendation']}")
if actionable:
print("\n📋 ACTIONABLE FINDINGS:")
for a in actionable:
print(f" - {a['gene']} ({a['rsid']}): {a['drug']}")
print(f" Genotype: {a['alleles']}")
print(f" {a['recommendation']}")
if not critical and not actionable:
print("\n✓ No critical or actionable pharmacogenomic variants detected")
if __name__ == '__main__':
main()