Refactor: Replace scaffolding with working analysis scripts
- Add trio_analysis.py for trio-based variant analysis with de novo detection - Add clinvar_acmg_annotate.py for ClinVar/ACMG annotation - Add gwas_comprehensive.py with 201 SNPs across 18 categories - Add pharmgkb_full_analysis.py for pharmacogenomics analysis - Add gwas_trait_lookup.py for basic GWAS trait lookup - Add pharmacogenomics.py for basic PGx analysis - Remove unused scaffolding code (src/, configs/, docs/, tests/) - Update README.md with new documentation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
376
pharmacogenomics.py
Normal file
376
pharmacogenomics.py
Normal file
@@ -0,0 +1,376 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Pharmacogenomics Analysis Script
|
||||
Analyzes drug-gene interactions based on PharmGKB and CPIC guidelines.
|
||||
"""
|
||||
|
||||
import gzip
|
||||
import sys
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
# Key pharmacogenomic variants (curated from PharmGKB/CPIC)
|
||||
# Format: rsid -> (chrom, pos, gene, drug_class, effect, clinical_recommendation)
|
||||
|
||||
PHARMGKB_VARIANTS = {
|
||||
# CYP2D6 - Codeine, Tramadol, Tamoxifen, many antidepressants
|
||||
"rs3892097": ("22", 42526694, "CYP2D6", "Codeine/Tramadol/Antidepressants",
|
||||
"*4 allele - Poor metabolizer",
|
||||
"Reduced efficacy of codeine (no conversion to morphine); Consider alternative analgesics"),
|
||||
"rs1065852": ("22", 42525772, "CYP2D6", "Codeine/Tramadol/Antidepressants",
|
||||
"*10 allele - Reduced function",
|
||||
"Intermediate metabolizer; May need dose adjustment"),
|
||||
"rs16947": ("22", 42523943, "CYP2D6", "Codeine/Tramadol",
|
||||
"*2 allele - Normal function",
|
||||
"Normal metabolism"),
|
||||
|
||||
# CYP2C19 - Clopidogrel, PPIs, some antidepressants
|
||||
"rs4244285": ("10", 96541616, "CYP2C19", "Clopidogrel/PPIs/Antidepressants",
|
||||
"*2 allele - Loss of function",
|
||||
"Poor metabolizer; Clopidogrel may have reduced efficacy; Consider prasugrel or ticagrelor"),
|
||||
"rs4986893": ("10", 96540410, "CYP2C19", "Clopidogrel/PPIs",
|
||||
"*3 allele - Loss of function",
|
||||
"Poor metabolizer; Reduced clopidogrel activation"),
|
||||
"rs12248560": ("10", 96522463, "CYP2C19", "Clopidogrel/PPIs",
|
||||
"*17 allele - Increased function",
|
||||
"Ultra-rapid metabolizer; May need lower PPI doses"),
|
||||
|
||||
# CYP2C9 - Warfarin, NSAIDs, Phenytoin
|
||||
"rs1799853": ("10", 96702047, "CYP2C9", "Warfarin/NSAIDs/Phenytoin",
|
||||
"*2 allele - Reduced function",
|
||||
"Slower warfarin metabolism; Lower dose may be needed"),
|
||||
"rs1057910": ("10", 96741053, "CYP2C9", "Warfarin/NSAIDs/Phenytoin",
|
||||
"*3 allele - Reduced function",
|
||||
"Significantly slower warfarin metabolism; Require ~50% lower dose"),
|
||||
|
||||
# VKORC1 - Warfarin sensitivity
|
||||
"rs9923231": ("16", 31107689, "VKORC1", "Warfarin",
|
||||
"-1639G>A - Warfarin sensitivity",
|
||||
"A allele: Increased sensitivity, need lower warfarin dose"),
|
||||
|
||||
# CYP3A4/CYP3A5 - Many drugs (statins, immunosuppressants, etc.)
|
||||
"rs776746": ("7", 99270539, "CYP3A5", "Tacrolimus/Cyclosporine/Statins",
|
||||
"*3 allele - Non-expressor",
|
||||
"Most common; Normal tacrolimus dosing"),
|
||||
"rs2740574": ("7", 99382096, "CYP3A4", "Statins/Many drugs",
|
||||
"*1B allele",
|
||||
"May affect drug metabolism"),
|
||||
|
||||
# SLCO1B1 - Statin-induced myopathy
|
||||
"rs4149056": ("12", 21331549, "SLCO1B1", "Simvastatin/Statins",
|
||||
"*5 allele - Reduced function",
|
||||
"C allele: Increased risk of statin myopathy; Consider lower dose or alternative statin"),
|
||||
|
||||
# TPMT - Thiopurines (Azathioprine, 6-MP)
|
||||
"rs1800460": ("6", 18130918, "TPMT", "Azathioprine/6-Mercaptopurine",
|
||||
"*3B allele - Reduced function",
|
||||
"Intermediate/Poor metabolizer; High risk of myelosuppression; Reduce dose"),
|
||||
"rs1142345": ("6", 18130725, "TPMT", "Azathioprine/6-Mercaptopurine",
|
||||
"*3C allele - Reduced function",
|
||||
"Intermediate/Poor metabolizer; High risk of myelosuppression; Reduce dose"),
|
||||
|
||||
# DPYD - Fluoropyrimidines (5-FU, Capecitabine)
|
||||
"rs3918290": ("1", 97915614, "DPYD", "5-Fluorouracil/Capecitabine",
|
||||
"*2A allele - No function",
|
||||
"CRITICAL: Complete DPD deficiency; Contraindicated - severe toxicity risk"),
|
||||
"rs55886062": ("1", 98205966, "DPYD", "5-Fluorouracil/Capecitabine",
|
||||
"*13 allele - No function",
|
||||
"CRITICAL: DPD deficiency; Contraindicated"),
|
||||
"rs67376798": ("1", 97981395, "DPYD", "5-Fluorouracil/Capecitabine",
|
||||
"D949V - Reduced function",
|
||||
"Intermediate metabolizer; Consider dose reduction"),
|
||||
|
||||
# UGT1A1 - Irinotecan
|
||||
"rs8175347": ("2", 234668879, "UGT1A1", "Irinotecan",
|
||||
"*28 allele (TA repeat)",
|
||||
"7/7 genotype: Reduced glucuronidation; Increased toxicity risk; Consider dose reduction"),
|
||||
|
||||
# HLA-B*57:01 - Abacavir hypersensitivity
|
||||
"rs2395029": ("6", 31431780, "HLA-B", "Abacavir (HIV)",
|
||||
"HLA-B*57:01 tag SNP",
|
||||
"CRITICAL: If positive, abacavir contraindicated - hypersensitivity reaction risk"),
|
||||
|
||||
# HLA-B*15:02 - Carbamazepine/Phenytoin (SJS/TEN)
|
||||
"rs144012689": ("6", 31356867, "HLA-B", "Carbamazepine/Phenytoin",
|
||||
"HLA-B*15:02 tag SNP",
|
||||
"CRITICAL: If positive in Asian ancestry, carbamazepine contraindicated - SJS/TEN risk"),
|
||||
|
||||
# HLA-A*31:01 - Carbamazepine
|
||||
"rs1061235": ("6", 29912280, "HLA-A", "Carbamazepine",
|
||||
"HLA-A*31:01 tag SNP",
|
||||
"If positive, increased carbamazepine hypersensitivity risk"),
|
||||
|
||||
# F5 - Oral contraceptives, HRT (Factor V Leiden)
|
||||
"rs6025": ("1", 169519049, "F5", "Oral Contraceptives/HRT",
|
||||
"Factor V Leiden",
|
||||
"CRITICAL: Increased thrombosis risk; Oral contraceptives relatively contraindicated"),
|
||||
|
||||
# F2 - Oral contraceptives (Prothrombin)
|
||||
"rs1799963": ("11", 46761055, "F2", "Oral Contraceptives/HRT",
|
||||
"Prothrombin G20210A",
|
||||
"Increased thrombosis risk; Caution with oral contraceptives"),
|
||||
|
||||
# MTHFR - Methotrexate, Folate metabolism
|
||||
"rs1801133": ("1", 11856378, "MTHFR", "Methotrexate/Folate",
|
||||
"C677T - Reduced function",
|
||||
"T/T genotype: Reduced MTHFR activity; May need folate supplementation with methotrexate"),
|
||||
"rs1801131": ("1", 11854476, "MTHFR", "Methotrexate/Folate",
|
||||
"A1298C",
|
||||
"May affect folate metabolism"),
|
||||
|
||||
# OPRM1 - Opioid response
|
||||
"rs1799971": ("6", 154039662, "OPRM1", "Opioids (Morphine, etc.)",
|
||||
"A118G",
|
||||
"G allele: May need higher opioid doses for pain relief"),
|
||||
|
||||
# COMT - Pain medications, ADHD drugs
|
||||
"rs4680": ("22", 19951271, "COMT", "Pain medications/ADHD drugs",
|
||||
"Val158Met",
|
||||
"Met/Met: Lower COMT activity; May affect pain perception and stimulant response"),
|
||||
|
||||
# IFNL3 (IL28B) - Hepatitis C treatment
|
||||
"rs12979860": ("19", 39738787, "IFNL3", "Hepatitis C treatment (Interferon)",
|
||||
"IL28B genotype",
|
||||
"C/C genotype: Better response to interferon-based HCV treatment"),
|
||||
|
||||
# NAT2 - Isoniazid, Hydralazine
|
||||
"rs1801280": ("8", 18257854, "NAT2", "Isoniazid/Hydralazine/Sulfonamides",
|
||||
"*5 allele - Slow acetylator",
|
||||
"Slow acetylator; Increased isoniazid toxicity risk; Monitor for peripheral neuropathy"),
|
||||
"rs1799930": ("8", 18258103, "NAT2", "Isoniazid/Hydralazine",
|
||||
"*6 allele - Slow acetylator",
|
||||
"Slow acetylator; May need dose adjustment"),
|
||||
|
||||
# G6PD - Primaquine, Dapsone, Sulfonamides
|
||||
"rs1050828": ("X", 153764217, "G6PD", "Primaquine/Dapsone/Sulfonamides",
|
||||
"G6PD A- variant",
|
||||
"CRITICAL: G6PD deficiency; Avoid oxidant drugs - hemolysis risk"),
|
||||
|
||||
# CYP2B6 - Efavirenz
|
||||
"rs3745274": ("19", 41512841, "CYP2B6", "Efavirenz (HIV)",
|
||||
"*6 allele - Reduced function",
|
||||
"T/T genotype: Slow metabolizer; Consider lower efavirenz dose; CNS side effects more likely"),
|
||||
}
|
||||
|
||||
|
||||
def get_genotype_class(gt: str) -> str:
|
||||
"""Classify genotype"""
|
||||
if gt in ['./.', '.|.', '.']:
|
||||
return 'MISSING'
|
||||
alleles = re.split('[/|]', gt)
|
||||
if all(a == '0' for a in alleles):
|
||||
return 'HOM_REF'
|
||||
elif all(a != '0' and a != '.' for a in alleles):
|
||||
return 'HOM_ALT'
|
||||
else:
|
||||
return 'HET'
|
||||
|
||||
|
||||
def analyze_pharmacogenomics(vcf_path: str, proband_idx: int = 2) -> Dict:
|
||||
"""Analyze VCF for pharmacogenomic variants"""
|
||||
|
||||
print("Scanning for pharmacogenomic variants...")
|
||||
|
||||
# Build position lookup
|
||||
pos_to_variant = {}
|
||||
for rsid, data in PHARMGKB_VARIANTS.items():
|
||||
chrom, pos, gene, drug, effect, recommendation = data
|
||||
key = f"{chrom}-{pos}"
|
||||
pos_to_variant[key] = {
|
||||
'rsid': rsid,
|
||||
'gene': gene,
|
||||
'drug': drug,
|
||||
'effect': effect,
|
||||
'recommendation': recommendation
|
||||
}
|
||||
|
||||
results = {}
|
||||
samples = []
|
||||
|
||||
open_func = gzip.open if vcf_path.endswith('.gz') else open
|
||||
mode = 'rt' if vcf_path.endswith('.gz') else 'r'
|
||||
|
||||
with open_func(vcf_path, mode) as f:
|
||||
for line in f:
|
||||
if line.startswith('##'):
|
||||
continue
|
||||
elif line.startswith('#CHROM'):
|
||||
parts = line.strip().split('\t')
|
||||
samples = parts[9:]
|
||||
continue
|
||||
|
||||
parts = line.strip().split('\t')
|
||||
if len(parts) < 10:
|
||||
continue
|
||||
|
||||
chrom, pos, rsid_vcf, ref, alt, qual, filt, info, fmt = parts[:9]
|
||||
gt_fields = parts[9:]
|
||||
|
||||
key = f"{chrom}-{pos}"
|
||||
if key not in pos_to_variant:
|
||||
continue
|
||||
|
||||
variant_info = pos_to_variant[key]
|
||||
|
||||
# Get proband genotype
|
||||
fmt_parts = fmt.split(':')
|
||||
gt_idx = fmt_parts.index('GT') if 'GT' in fmt_parts else 0
|
||||
|
||||
if proband_idx < len(gt_fields):
|
||||
gt_data = gt_fields[proband_idx].split(':')
|
||||
gt = gt_data[gt_idx] if gt_idx < len(gt_data) else './.'
|
||||
else:
|
||||
gt = './.'
|
||||
|
||||
gt_class = get_genotype_class(gt)
|
||||
|
||||
# Determine alleles
|
||||
alleles = [ref] + alt.split(',')
|
||||
gt_alleles_str = []
|
||||
if gt_class != 'MISSING':
|
||||
gt_indices = re.split('[/|]', gt)
|
||||
for idx in gt_indices:
|
||||
if idx.isdigit() and int(idx) < len(alleles):
|
||||
gt_alleles_str.append(alleles[int(idx)])
|
||||
|
||||
results[variant_info['rsid']] = {
|
||||
**variant_info,
|
||||
'chrom': chrom,
|
||||
'pos': pos,
|
||||
'ref': ref,
|
||||
'alt': alt,
|
||||
'genotype': gt,
|
||||
'genotype_class': gt_class,
|
||||
'alleles': '/'.join(gt_alleles_str) if gt_alleles_str else 'N/A',
|
||||
'has_variant': gt_class in ['HET', 'HOM_ALT']
|
||||
}
|
||||
|
||||
return results, samples
|
||||
|
||||
|
||||
def generate_pgx_report(results: Dict, output_path: str):
|
||||
"""Generate pharmacogenomics report"""
|
||||
|
||||
# Categorize by drug class
|
||||
drug_classes = defaultdict(list)
|
||||
for rsid, data in results.items():
|
||||
drug_classes[data['drug']].append(data)
|
||||
|
||||
# Identify actionable results
|
||||
critical = []
|
||||
actionable = []
|
||||
informational = []
|
||||
|
||||
for rsid, data in results.items():
|
||||
if data['has_variant']:
|
||||
if 'CRITICAL' in data['recommendation']:
|
||||
critical.append(data)
|
||||
elif any(word in data['recommendation'].lower() for word in ['reduce', 'consider', 'lower', 'avoid', 'contraindicated']):
|
||||
actionable.append(data)
|
||||
else:
|
||||
informational.append(data)
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
f.write("# Pharmacogenomics Analysis Report\n")
|
||||
f.write("# Based on PharmGKB and CPIC Guidelines\n\n")
|
||||
|
||||
# Critical findings first
|
||||
if critical:
|
||||
f.write("=" * 80 + "\n")
|
||||
f.write("⚠️ CRITICAL FINDINGS - Immediate Clinical Relevance\n")
|
||||
f.write("=" * 80 + "\n\n")
|
||||
for data in critical:
|
||||
f.write(f"GENE: {data['gene']} ({data['rsid']})\n")
|
||||
f.write(f" Drug(s): {data['drug']}\n")
|
||||
f.write(f" Genotype: {data['alleles']} ({data['genotype_class']})\n")
|
||||
f.write(f" Effect: {data['effect']}\n")
|
||||
f.write(f" ⚠️ {data['recommendation']}\n\n")
|
||||
|
||||
# Actionable findings
|
||||
if actionable:
|
||||
f.write("=" * 80 + "\n")
|
||||
f.write("📋 ACTIONABLE FINDINGS - May Require Dose Adjustment\n")
|
||||
f.write("=" * 80 + "\n\n")
|
||||
for data in actionable:
|
||||
f.write(f"GENE: {data['gene']} ({data['rsid']})\n")
|
||||
f.write(f" Drug(s): {data['drug']}\n")
|
||||
f.write(f" Genotype: {data['alleles']} ({data['genotype_class']})\n")
|
||||
f.write(f" Effect: {data['effect']}\n")
|
||||
f.write(f" Recommendation: {data['recommendation']}\n\n")
|
||||
|
||||
# Summary by drug class
|
||||
f.write("=" * 80 + "\n")
|
||||
f.write("SUMMARY BY DRUG CLASS\n")
|
||||
f.write("=" * 80 + "\n\n")
|
||||
|
||||
for drug_class in sorted(drug_classes.keys()):
|
||||
variants = drug_classes[drug_class]
|
||||
has_risk = any(v['has_variant'] for v in variants)
|
||||
status = "⚠️ VARIANT DETECTED" if has_risk else "✓ Normal"
|
||||
f.write(f"\n## {drug_class}\n")
|
||||
f.write(f"Status: {status}\n")
|
||||
for v in variants:
|
||||
marker = "→" if v['has_variant'] else " "
|
||||
f.write(f" {marker} {v['gene']} ({v['rsid']}): {v['alleles']} - {v['genotype_class']}\n")
|
||||
|
||||
# Detailed table
|
||||
f.write("\n" + "=" * 80 + "\n")
|
||||
f.write("DETAILED RESULTS\n")
|
||||
f.write("=" * 80 + "\n\n")
|
||||
f.write("RSID\tGENE\tGENOTYPE\tALLELES\tHAS_VARIANT\tDRUG\tEFFECT\n")
|
||||
|
||||
for rsid in sorted(results.keys()):
|
||||
data = results[rsid]
|
||||
f.write(f"{rsid}\t{data['gene']}\t{data['genotype']}\t{data['alleles']}\t")
|
||||
f.write(f"{data['has_variant']}\t{data['drug']}\t{data['effect']}\n")
|
||||
|
||||
print(f"Report saved to: {output_path}")
|
||||
return critical, actionable
|
||||
|
||||
|
||||
def main():
|
||||
vcf_path = sys.argv[1] if len(sys.argv) > 1 else '/Volumes/NV2/genomics_analysis/vcf/trio_joint.snpeff.vcf'
|
||||
output_path = sys.argv[2] if len(sys.argv) > 2 else '/Volumes/NV2/genomics_analysis/pharmacogenomics_report.txt'
|
||||
proband_idx = int(sys.argv[3]) if len(sys.argv) > 3 else 2
|
||||
|
||||
print("=" * 60)
|
||||
print("PHARMACOGENOMICS ANALYSIS")
|
||||
print("=" * 60)
|
||||
print(f"VCF: {vcf_path}")
|
||||
print(f"Searching for {len(PHARMGKB_VARIANTS)} pharmacogenomic variants...\n")
|
||||
|
||||
results, samples = analyze_pharmacogenomics(vcf_path, proband_idx)
|
||||
|
||||
print(f"Found {len(results)} pharmacogenomic variants in VCF")
|
||||
|
||||
critical, actionable = generate_pgx_report(results, output_path)
|
||||
|
||||
# Console summary
|
||||
print("\n" + "=" * 60)
|
||||
print("QUICK SUMMARY")
|
||||
print("=" * 60)
|
||||
|
||||
variants_with_effect = [r for r in results.values() if r['has_variant']]
|
||||
print(f"\nVariants detected: {len(variants_with_effect)}/{len(results)}")
|
||||
|
||||
if critical:
|
||||
print("\n⚠️ CRITICAL FINDINGS:")
|
||||
for c in critical:
|
||||
print(f" - {c['gene']}: {c['drug']}")
|
||||
print(f" {c['recommendation']}")
|
||||
|
||||
if actionable:
|
||||
print("\n📋 ACTIONABLE FINDINGS:")
|
||||
for a in actionable:
|
||||
print(f" - {a['gene']} ({a['rsid']}): {a['drug']}")
|
||||
print(f" Genotype: {a['alleles']}")
|
||||
print(f" {a['recommendation']}")
|
||||
|
||||
if not critical and not actionable:
|
||||
print("\n✓ No critical or actionable pharmacogenomic variants detected")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user