Initial commit

2025-11-28 11:52:04 +08:00
commit f74dc351f7
51 changed files with 2402 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,46 @@
+# Python and environments
+__pycache__/
+*.py[cod]
+.venv/
+env/
+.env
+.git-mcp/
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.coverage
+
+# Notebooks
+.ipynb_checkpoints/
+
+# Editor
+.DS_Store
+.idea/
+.vscode/
+
+# Genomics data (avoid accidental commits)
+*.bam
+*.bai
+*.sam
+*.cram
+*.crai
+*.vcf
+*.vcf.gz
+*.vcf.gz.tbi
+*.g.vcf
+*.g.vcf.gz
+*.g.vcf.gz.tbi
+*.bed
+*.bed.gz
+*.bed.gz.tbi
+*.fastq
+*.fastq.gz
+*.fq
+*.fq.gz
+
+# Reports / tmp
+*.log
+*.tmp
+tmp/
+reports/
+runtime/
--- a/14
+++ b/14
@@ -0,0 +1,14 @@
+PYTHON := .venv/bin/python
+PIP := .venv/bin/pip
+
+.PHONY: venv install test
+
+venv:
+	python3 -m venv .venv
+	$(PIP) install -e .
+
+install: venv
+	$(PIP) install -e .[store]
+
+test:
+	$(PYTHON) -m pytest
--- a/README.md
+++ b/README.md
@@ -0,0 +1,112 @@
+# Genomic Consultant
+
+Early design for a personal genomic risk and drug–interaction decision support system. Specs are sourced from `genomic_decision_support_system_spec_v0.1.md`.
+
+## Vision (per spec)
+- Phase 1: trio variant calling, annotation, queryable genomic DB, initial ACMG evidence tagging.
+- Phase 2: pharmacogenomics genotype-to-phenotype mapping plus drug–drug interaction checks.
+- Phase 3: supplement/herb normalization and interaction risk layering.
+- Phase 4: LLM-driven query orchestration and report generation.
+
+## Repository Layout
+- `docs/` — system architecture notes, phase plans, data models (work in progress).
+- `configs/` — example ACMG config and gene panel JSON.
+- `configs/phenotype_to_genes.example.json` — placeholder phenotype/HPO → gene mappings.
+- `configs/phenotype_to_genes.hpo_seed.json` — seed HPO mappings (replace with full HPO/GenCC derived panels).
+- `sample_data/` — tiny annotated TSV for demo.
+- `src/genomic_consultant/` — Python scaffolding (pipelines, store, panel lookup, ACMG tagging, reporting).
+- `genomic_decision_support_system_spec_v0.1.md` — original requirements draft.
+
+## Contributing/next steps
+1. Finalize Phase 1 tech selection (variant caller, annotation stack, reference/DB versions).
+2. Stand up the Phase 1 pipelines and minimal query API surface.
+3. Add ACMG evidence tagging config and human-review logging.
+4. Layer in PGx/DDI and supplement modules per later phases.
+
+Data safety: keep genomic/clinical data local; the `.gitignore` blocks common genomic outputs by default.
+
+## Quickstart (CLI scaffolding)
+```
+pip install -e .
+
+# 1) Show trio calling plan (commands only; not executed)
+genomic-consultant plan-call \
+  --sample proband:/data/proband.bam \
+  --sample father:/data/father.bam \
+  --sample mother:/data/mother.bam \
+  --reference /refs/GRCh38.fa \
+  --workdir /tmp/trio
+
+# 1b) Execute calling plan (requires GATK installed) and emit run log
+genomic-consultant run-call \
+  --sample proband:/data/proband.bam \
+  --sample father:/data/father.bam \
+  --sample mother:/data/mother.bam \
+  --reference /refs/GRCh38.fa \
+  --workdir /tmp/trio \
+  --log /tmp/trio/run_call_log.json \
+  --probe-tools
+
+# 2) Show annotation plan for a joint VCF
+genomic-consultant plan-annotate \
+  --vcf /tmp/trio/trio.joint.vcf.gz \
+  --workdir /tmp/trio/annot \
+  --prefix trio \
+  --reference /refs/GRCh38.fa
+
+# 2b) Execute annotation plan (requires VEP, bcftools) with run log
+genomic-consultant run-annotate \
+  --vcf /tmp/trio/trio.joint.vcf.gz \
+  --workdir /tmp/trio/annot \
+  --prefix trio \
+  --reference /refs/GRCh38.fa \
+  --log /tmp/trio/annot/run_annot_log.json \
+  --probe-tools
+
+# 3) Demo panel report using sample data (panel file)
+genomic-consultant panel-report \
+  --tsv sample_data/example_annotated.tsv \
+  --panel configs/panel.example.json \
+  --acmg-config configs/acmg_config.example.yaml \
+  --individual-id demo \
+  --format markdown \
+  --log /tmp/panel_log.json
+
+# 3b) Demo panel report using phenotype mapping (HPO)
+genomic-consultant panel-report \
+  --tsv sample_data/example_annotated.tsv \
+  --phenotype-id HP:0000365 \
+  --phenotype-mapping configs/phenotype_to_genes.hpo_seed.json \
+  --acmg-config configs/acmg_config.example.yaml \
+  --individual-id demo \
+  --format markdown
+
+# 3c) Merge multiple phenotype→gene mappings into one
+genomic-consultant build-phenotype-mapping \
+  --output configs/phenotype_to_genes.merged.json \
+  configs/phenotype_to_genes.example.json configs/phenotype_to_genes.hpo_seed.json
+
+# 4) End-to-end Phase 1 pipeline (optionally skip call/annotate; use sample TSV)
+genomic-consultant phase1-run \
+  --tsv sample_data/example_annotated.tsv \
+  --skip-call --skip-annotate \
+  --panel configs/panel.example.json \
+  --acmg-config configs/acmg_config.example.yaml \
+  --workdir runtime \
+  --prefix demo
+
+# Run tests
+pytest
+```
+
+### Optional Parquet-backed store
+Install pandas to enable Parquet ingestion:
+```
+pip install -e .[store]
+```
+
+### Notes on VEP plugins (SpliceAI/CADD)
+- The annotation plan already queries `SpliceAI` and `CADD_PHRED` fields; ensure your VEP run includes plugins/flags that produce them, e.g.:
+  - `--plugin SpliceAI,snv=/path/to/spliceai.snv.vcf.gz,indel=/path/to/spliceai.indel.vcf.gz`
+  - `--plugin CADD,/path/to/whole_genome_SNVs.tsv.gz,/path/to/InDels.tsv.gz`
+- Pass these via `--plugin` and/or `--extra-flag` on `run-annotate` / `plan-annotate` to embed fields into the TSV.
--- a/configs/acmg_config.example.yaml
+++ b/configs/acmg_config.example.yaml
@@ -0,0 +1,12 @@
+# ACMG evidence thresholds (example, adjust per lab policy)
+ba1_af: 0.05   # Stand-alone benign if AF >= this
+bs1_af: 0.01   # Strong benign if AF >= this (and not meeting BA1)
+pm2_af: 0.0005 # Moderate pathogenic if AF <= this
+bp7_splice_ai_max: 0.1 # Supporting benign if synonymous and predicted low splice impact
+
+# Genes considered loss-of-function sensitive for PVS1 auto-tagging
+lof_genes:
+  - BRCA1
+  - BRCA2
+  - TTN
+  - CFTR
--- a/configs/panel.example.json
+++ b/configs/panel.example.json
@@ -0,0 +1,10 @@
+{
+  "name": "Hearing loss core",
+  "version": "0.1",
+  "source": "curated",
+  "last_updated": "2024-06-01",
+  "genes": ["GJB2", "SLC26A4", "MITF", "OTOF"],
+  "metadata": {
+    "notes": "Example panel for demo; replace with curated panel and provenance."
+  }
+}
--- a/configs/phenotype_to_genes.example.json
+++ b/configs/phenotype_to_genes.example.json
@@ -0,0 +1,11 @@
+{
+  "version": "0.1",
+  "source": "example-curated",
+  "phenotype_to_genes": {
+    "HP:0000365": ["GJB2", "SLC26A4", "OTOF"],
+    "HP:0000510": ["MITF", "SOX10"]
+  },
+  "metadata": {
+    "notes": "Placeholder mapping; replace with curated HPO/OMIM/GenCC panels."
+  }
+}
--- a/configs/phenotype_to_genes.hpo_seed.json
+++ b/configs/phenotype_to_genes.hpo_seed.json
@@ -0,0 +1,13 @@
+{
+  "version": "2024-11-01",
+  "source": "HPO-curated-seed",
+  "phenotype_to_genes": {
+    "HP:0000365": ["GJB2", "SLC26A4", "OTOF", "TECTA"],
+    "HP:0001250": ["SCN1A", "KCNQ2", "STXBP1"],
+    "HP:0001631": ["MYH7", "TNNT2", "MYBPC3"],
+    "HP:0001156": ["COL1A1", "COL1A2", "PLOD2"]
+  },
+  "metadata": {
+    "notes": "Seed mapping using common phenotype IDs; replace with full HPO-derived panels."
+  }
+}
--- a/docs/phase1_howto.md
+++ b/docs/phase1_howto.md
@@ -0,0 +1,101 @@
+# Phase 1 How-To: BAM → VCF → Annotate → Panel Report
+
+本文件說明如何用現有 CLI 從 BAM 執行到報告輸出，選項意義，以及可跳過步驟的用法。假設已安裝 GATK、VEP、bcftools/tabix，並以 `pip install -e .` 安裝本專案。
+
+## 流程總覽
+1) Trio variant calling → joint VCF  
+2) VEP 註解 → annotated VCF + 平坦 TSV  
+3) Panel/phenotype 查詢 + ACMG 標籤 → Markdown/JSON 報告  
+可用 `phase1-run` 一鍵（可跳過 call/annotate），或分步 `run-call` / `run-annotate` / `panel-report`。
+
+## 分步執行
+
+### 1) Variant calling（GATK）
+```bash
+genomic-consultant run-call \
+  --sample proband:/path/proband.bam \
+  --sample father:/path/father.bam \
+  --sample mother:/path/mother.bam \
+  --reference /refs/GRCh38.fa \
+  --workdir /tmp/trio \
+  --prefix trio \
+  --log /tmp/trio/run_call_log.json \
+  --probe-tools
+```
+- `--sample`: `sample_id:/path/to.bam`，可重複。
+- `--reference`: 參考序列。
+- `--workdir`: 輸出與中間檔位置。
+- `--prefix`: 輸出檔名前綴。
+- `--log`: run log（JSON）路徑。
+- 輸出：joint VCF (`/tmp/trio/trio.joint.vcf.gz`)、run log（含指令/returncode）。
+
+### 2) Annotation（VEP + bcftools）
+```bash
+genomic-consultant run-annotate \
+  --vcf /tmp/trio/trio.joint.vcf.gz \
+  --workdir /tmp/trio/annot \
+  --prefix trio \
+  --reference /refs/GRCh38.fa \
+  --plugin 'SpliceAI,snv=/path/spliceai.snv.vcf.gz,indel=/path/spliceai.indel.vcf.gz' \
+  --plugin 'CADD,/path/whole_genome_SNVs.tsv.gz,/path/InDels.tsv.gz' \
+  --extra-flag "--cache --offline" \
+  --log /tmp/trio/annot/run_annot_log.json \
+  --probe-tools
+```
+- `--plugin`: VEP plugin 規格，可重複（示範 SpliceAI/CADD）。
+- `--extra-flag`: 附加給 VEP 的旗標（如 cache/offline）。
+- 輸出：annotated VCF (`trio.vep.vcf.gz`)、平坦 TSV (`trio.vep.tsv`)、run log。
+
+### 3) Panel/Phenotype 報告
+使用 panel 檔：
+```bash
+genomic-consultant panel-report \
+  --tsv /tmp/trio/annot/trio.vep.tsv \
+  --panel configs/panel.example.json \
+  --acmg-config configs/acmg_config.example.yaml \
+  --individual-id proband \
+  --max-af 0.05 \
+  --format markdown \
+  --log /tmp/trio/panel_log.json
+```
+使用 phenotype 直譯 panel：
+```bash
+genomic-consultant panel-report \
+  --tsv /tmp/trio/annot/trio.vep.tsv \
+  --phenotype-id HP:0000365 \
+  --phenotype-mapping configs/phenotype_to_genes.hpo_seed.json \
+  --acmg-config configs/acmg_config.example.yaml \
+  --individual-id proband \
+  --max-af 0.05 \
+  --format markdown
+```
+- `--max-af`: 過濾等位基因頻率上限。
+- `--format`: `markdown` 或 `json`。
+- 輸出：報告文字 + run log（記錄 panel/ACMG config 的 hash）。
+
+## 一鍵模式（可跳過 call/annotate）
+已經有 joint VCF/TSV 時，可跳過前兩步：
+```bash
+genomic-consultant phase1-run \
+  --workdir /tmp/trio \
+  --prefix trio \
+  --tsv /tmp/trio/annot/trio.vep.tsv \
+  --skip-call --skip-annotate \
+  --panel configs/panel.example.json \
+  --acmg-config configs/acmg_config.example.yaml \
+  --max-af 0.05 \
+  --format markdown \
+  --log-dir /tmp/trio/runtime
+```
+若要實際跑 VEP，可移除 `--skip-annotate` 並提供 `--plugins/--extra-flag`；若要跑 calling，移除 `--skip-call` 並提供 `--sample`/`--reference`。
+
+## 主要輸出
+- joint VCF（呼叫結果）
+- annotated VCF + 平坦 TSV（含 gene/consequence/ClinVar/AF/SpliceAI/CADD 等欄位）
+- run logs（JSON，含指令、return code、config hash；在 `--log` 或 `--log-dir`）
+- Panel 報告（Markdown 或 JSON），附 ACMG 自動標籤，需人工複核
+
+## 注意
+- Call/annotate 依賴外部工具與對應資源（參考序列、VEP cache、SpliceAI/CADD 資料）。
+- 若無 BAM/資源，可用 sample TSV：`phase1-run --tsv sample_data/example_annotated.tsv --skip-call --skip-annotate ...` 演示報告。
+- 安全：`.gitignore` 已排除大型基因檔案；建議本地受控環境執行。
--- a/docs/phase1_implementation_plan.md
+++ b/docs/phase1_implementation_plan.md
@@ -0,0 +1,80 @@
+# Phase 1 Implementation Plan (Genomic Foundation)
+
+Scope: deliver a working trio-based variant pipeline, annotated genomic store, query APIs, initial ACMG evidence tagging, and reporting/logging scaffolding. This assumes local execution with Python 3.11+.
+
+## Objectives
+- Trio BAM → joint VCF with QC artifacts (`Auto`).
+- Annotate variants with population frequency, ClinVar, consequence/prediction (`Auto`).
+- Provide queryable interfaces for gene/region lookups with filters (`Auto`).
+- Disease/phenotype → gene panel lookup and filtered variant outputs (`Auto+Review`).
+- Auto-tag subset of ACMG criteria; human-only final classification (`Auto+Review`).
+- Produce machine-readable run logs with versions, configs, and overrides.
+
+## Work Breakdown
+1) **Data & references**
+   - Reference genome: GRCh38 (primary) with option for GRCh37; pin version hash.
+   - Resource bundles: known sites for BQSR (if using GATK), gnomAD/ClinVar versions for annotation.
+   - Test fixtures: small trio BAM/CRAM subset or GIAB trio downsampled for CI-like checks.
+
+2) **Variant calling pipeline (wrapper, `Auto`)**
+   - Tooling: GATK HaplotypeCaller → gVCF; GenotypeGVCFs for joint calls. Alt: DeepVariant + joint genotyper (parameterized).
+   - Steps:
+     - Validate inputs (file presence, reference match).
+     - Optional QC: coverage, duplicates, on-target.
+     - Generate per-sample gVCF; joint genotyping to trio VCF.
+   - Outputs: joint VCF + index; QC summary JSON/TSV; log with tool versions and params.
+
+3) **Annotation pipeline (`Auto`)**
+   - Tooling: Ensembl VEP with plugins for gnomAD, ClinVar, CADD, SpliceAI where available; alt path: ANNOVAR.
+   - Steps:
+     - Normalize variants (bcftools norm) if needed.
+     - Annotate with gene, transcript, protein change; population AF; ClinVar significance; consequence; predictions (SIFT/PolyPhen/CADD); inheritance flags.
+     - Include SpliceAI/CADD plugins if installed; CLI accepts extra flags/plugins to embed SpliceAI/CADD fields.
+   - Outputs: annotated VCF; flattened TSV/Parquet for faster querying; manifest of DB versions used.
+
+4) **Genomic store + query API (`Auto`)**
+   - Early option: tabix-indexed VCF with Python wrapper.
+   - Functions (Python module `genomic_store`):
+     - `get_variants_by_gene(individual_id, genes, filters)`  
+       Filters: AF thresholds, consequence categories, ClinVar significance, inheritance pattern.
+     - `get_variants_by_region(individual_id, chrom, start, end, filters)`
+     - `list_genes_with_variants(individual_id, filters)` (optional).
+   - Filters defined in a `FilterConfig` dataclass; serialize-able for logging.
+   - Future option: import to SQLite/Postgres via Arrow/Parquet for richer predicates.
+
+5) **Disease/phenotype → gene panel (`Auto+Review`)**
+   - Data: HPO/OMIM/GenCC lookup or curated JSON panels.
+   - Function: `get_gene_panel(disease_or_hpo_id, version=None)` returning gene list + provenance.
+   - Phenotype resolver: curated JSON mapping (e.g., `phenotype_to_genes.example.json`) as placeholder until upstream data is wired; allow dynamic panel synthesis by phenotype ID in CLI; support merging multiple sources into one mapping.
+   - Flow: resolve panel → call genomic store → apply simple ranking (AF low, ClinVar pathogenicity high).
+   - Manual review points: panel curation, rank threshold tuning.
+
+6) **ACMG evidence tagging (subset, `Auto+Review`)**
+   - Criteria to auto-evaluate initially: PVS1 (LoF in LoF-sensitive gene list), PM2 (absent/rare in population), BA1/BS1 (common frequency), possible BP7 (synonymous, no splicing impact).
+   - Config: YAML/JSON with thresholds (AF cutoffs, LoF gene list, transcript precedence).
+   - Output schema per variant: `{variant_id, evidence: [tag, strength, rationale], suggested_class}` with suggested class computed purely from auto tags; final class left blank for human.
+   - Logging: capture rule version and reasons for each fired rule.
+
+7) **Run logging and versioning**
+   - Every pipeline run emits `run_log.json` containing:
+     - Inputs (sample IDs, file paths, reference build).
+     - Tool versions and parameters; DB versions; config hashes (panel/ACMG configs).
+     - Automation level per step; manual overrides (`who/when/why`).
+     - Derived artifacts paths and checksums.
+   - Embed run_log reference in reports.
+
+8) **Report template (minimal)**
+   - Input: disease/gene panel name, variant query results, ACMG evidence tags.
+   - Output: Markdown + JSON summary with sections: context, methods, variants table, limitations.
+   - Mark human-only decisions clearly.
+
+## Milestones
+- **M1**: Repo scaffolding + configs; tiny trio test data wired; `make pipeline` runs through variant calling + annotation on fixture.
+- **M2**: Genomic store wrapper with gene/region queries; filter config; basic CLI/Notebook demo.
+- **M3**: Panel lookup + ranked variant listing; ACMG auto tags on outputs; run_log generation.
+- **M4**: Minimal report generator + acceptance of human-reviewed classifications.
+
+## Validation Strategy
+- Unit tests for filter logic, panel resolution, ACMG tagger decisions (synthetic variants).
+- Integration test on small fixture trio to ensure call → annotate → query path works.
+- Determinism checks: hash configs and verify outputs stable across runs given same inputs.
--- a/docs/system_architecture.md
+++ b/docs/system_architecture.md
@@ -0,0 +1,69 @@
+# System Architecture Blueprint (v0.1)
+
+This document turns `genomic_decision_support_system_spec_v0.1.md` into a buildable architecture and phased roadmap. Automation levels follow `Auto / Auto+Review / Human-only`.
+
+## High-Level Views
+- **Core layers**: (1) sequencing ingest → variant calling/annotation (`Auto`), (2) genomic query layer (`Auto`), (3) rule engines (ACMG, PGx, DDI, supplements; mixed automation), (4) orchestration/LLM and report generation (`Auto` tool calls, `Auto+Review` outputs).
+- **Data custody**: all PHI/genomic artifacts remain local; external calls require de-identification or private models.
+- **Traceability**: every run records tool versions, database snapshots, configs, and manual overrides in machine-readable logs.
+
+### End-to-end flow
+```
+BAM (proband + parents)
+  ↓ Variant Calling (gVCF) [Auto]
+Joint Genotyper → joint VCF
+  ↓ Annotation (VEP/ANNOVAR + ClinVar/gnomAD etc.) [Auto]
+  ↓ Genomic Store (VCF+tabix or SQL) + Query API [Auto]
+  ↓
+  ├─ Disease/Phenotype → Gene Panel lookup [Auto+Review]
+  │    └─ Panel variants with basic ranking (freq, ClinVar) [Auto+Review]
+  ├─ ACMG evidence tagging subset (PVS1, PM2, BA1, BS1…) [Auto+Review]
+  ├─ PGx genotype→phenotype and recommendation rules [Auto → Auto+Review]
+  ├─ DDI rule evaluation [Auto]
+  └─ Supplement/Herb normalization + interaction rules [Auto+Review → Human-only]
+  ↓
+LLM/Orchestrator routes user questions to tools, produces JSON + Markdown drafts [Auto tools, Auto+Review narratives]
+```
+
+## Phase Roadmap (build-first view)
+- **Phase 1 – Genomic foundation**  
+  - Deliverables: trio joint VCF + annotation; query functions (`get_variants_by_gene/region`); disease→gene panel lookup; partial ACMG evidence tagging.  
+  - Data stores: tabix-backed VCF wrapper initially; optional SQLite/Postgres import later.  
+  - Interfaces: Python CLI/SDK first; machine-readable run logs with versions and automation levels.
+- **Phase 2 – PGx & DDI**  
+  - Drug vocabulary normalization (ATC/RxNorm).  
+  - PGx engine: star-allele calling or rule-based genotype→phenotype; guideline-mapped advice with review gates.  
+  - DDI engine: rule base with severity tiers; combine with PGx outputs.
+- **Phase 3 – Supplements & Herbs**  
+  - Name/ingredient normalization; herb formula expansion.  
+  - Rule tables for CYP/transporters, coagulation, CNS effects.  
+  - Evidence grading and conservative messaging; human-only final clinical language.
+- **Phase 4 – LLM Interface & Reports**  
+  - Tool-calling schema for queries listed above.  
+  - JSON + Markdown report templates with traceability to rules, data versions, and overrides.
+
+## Module Boundaries
+- **Variant Calling Pipeline** (`Auto`): wrapper around GATK or DeepVariant + joint genotyper; pluggable reference genome; QC summaries.  
+- **Annotation Pipeline** (`Auto`): VEP/ANNOVAR with pinned database versions (gnomAD, ClinVar, transcript set); emits annotated VCF + flat table.  
+- **Genomic Query Layer** (`Auto`): abstraction over tabix or SQL; minimal APIs: `get_variants_by_gene`, `get_variants_by_region`, filters (freq, consequence, clinvar).  
+- **Disease/Phenotype to Panel** (`Auto+Review`): HPO/OMIM lookups or curated panels; panel versioned; feeds queries.  
+- **Phenotype Resolver** (`Auto+Review`): JSON/DB mapping of phenotype/HPO IDs to gene lists as a placeholder until upstream sources are integrated; can synthesize panels dynamically and merge multiple sources.  
+- **ACMG Evidence Tagger** (`Auto+Review`): auto-evaluable criteria only; config-driven thresholds; human-only final classification.  
+- **PGx Engine** (`Auto → Auto+Review`): star-allele calling where possible; guideline rules (CPIC/DPWG) with conservative defaults; flag items needing review.  
+- **DDI Engine** (`Auto`): rule tables keyed by normalized drug IDs; outputs severity and rationale.  
+- **Supplements/Herbs** (`Auto+Review → Human-only`): ingredient extraction + mapping; interaction rules; human sign-off for clinical language.  
+- **Orchestrator/LLM** (`Auto tools, Auto+Review outputs`): intent parsing, tool sequencing, safety guardrails, report drafting.
+
+## Observability and Versioning
+- Every pipeline run writes a JSON log: tool versions, reference genome, DB versions, config hashes, automation level per step, manual overrides (who/when/why).
+- Reports embed references to those logs so outputs remain reproducible.
+- Configs (ACMG thresholds, gene panels, PGx rules) are versioned artifacts stored alongside code.
+
+## Security/Privacy Notes
+- Default to local processing; if external LLMs are used, strip identifiers and avoid full VCF uploads.
+- Secrets kept out of repo; rely on environment variables or local config files (excluded by `.gitignore`).
+
+## Initial Tech Bets (to be validated)
+- Language/runtime: Python 3.11+ for pipelines, rules, and orchestration stubs.
+- Bio stack candidates: GATK or DeepVariant; VEP; tabix for early querying; SQLAlchemy + SQLite/Postgres when scaling.  
+- Infra: containerized runners for pipelines; makefiles or workflow engine (Nextflow/Snakemake) later if needed.
--- a/genomic_decision_support_system_spec_v0.1.md
+++ b/genomic_decision_support_system_spec_v0.1.md
@@ -0,0 +1,356 @@
+
+# 個人基因風險與用藥交互作用決策支援系統 – 系統規格書
+
+- 版本：v0.1-draft  
+- 作者：Gbanyan + 助理規劃  
+- 狀態：草案，預期隨實作迭代  
+- 目的：提供給 LLM（如 Claude、Codex 等）與開發者閱讀，作為系統設計與實作的基礎規格。
+
+---
+
+## 0. 系統目標與範圍
+
+### 0.1 目標
+
+建立一套「個人化、基因驅動」的決策支援系統，核心功能：
+
+1. **從個人與父母的外顯子定序資料（BAM）出發**，產生可查詢的變異資料庫。
+2. **針對特定疾病、症狀或表型**，自動查詢相關基因與已知致病變異，並在個人資料中搜尋對應變異。
+3. 依據 **ACMG/AMP 等準則** 與公開資料庫，給出 **機器輔助、具人工介入點的變異詮釋**。
+4. 進一步整合：
+   - 基因藥物學（Pharmacogenomics, PGx）
+   - 藥–藥交互作用（DDI）
+   - 保健食品、中藥等成分的潛在交互作用與風險
+5. 提供一個自然語言問答介面，使用者可直接問：
+   - 「我有沒有 XXX 的遺傳風險？」
+   - 「以我現在吃的藥和保健食品，有沒有需要注意的交互作用？」
+
+> 系統定位為：**個人決策支援工具 / 研究工具**，而非正式醫療診斷系統。
+
+### 0.2 核心設計哲學
+
+- **分階段發展**：先穩固「基因本體」與變異詮釋，再往 PGx 與交互作用擴充。
+- **明確的人機分工**：對每個模組標記 `Auto / Auto+Review / Human-only`。
+- **可追蹤、可回溯**：每一個結論都可追蹤到所用規則、資料庫版本、人工 override。
+
+---
+
+## 1. 發展階段與整體架構
+
+### 1.1 發展階段總覽
+
+| 階段 | 名稱 | 核心產出 | 主要對象 |
+|------|------|----------|----------|
+| Phase 1 | 基因本體與變異詮釋基礎 | 個人+trio VCF、註解與疾病/基因查詢 | 單基因疾病風險 |
+| Phase 2 | 基因藥物學與 DDI | 基因–藥物對應、藥–藥交互作用分析 | 精準用藥建議 |
+| Phase 3 | 保健食品與中藥交互作用 | 成分標準化與交互作用風險層級 | 整體用藥＋補充品安全網 |
+| Phase 4 | NLP/LLM 問答介面 | 自然語言問答、報告生成 | 一般使用者 / 臨床對話 |
+
+### 1.2 高階架構圖（Mermaid）
+
+```mermaid
+flowchart TD
+
+subgraph P1[Phase 1: 基因本體]
+  A1[BAM (本人+父母)] --> A2[Variant Calling Pipeline]
+  A2 --> A3[Joint VCF (Trio)]
+  A3 --> A4[Variant Annotation (ClinVar, gnomAD, VEP...)]
+  A4 --> A5[Genomic DB / Query API]
+end
+
+subgraph P2[Phase 2: PGx & DDI]
+  B1[藥物清單] --> B2[PGx Engine]
+  A5 --> B2
+  B1 --> B3[DDI Engine]
+end
+
+subgraph P3[Phase 3: 補充品 & 中藥]
+  C1[保健食品/中藥清單] --> C2[成分標準化]
+  C2 --> C3[成分交互作用引擎]
+  B1 --> C3
+  A5 --> C3
+end
+
+subgraph P4[Phase 4: 問答與報告]
+  D1[前端 UI / CLI / API Client] --> D2[LLM Orchestrator]
+  D2 -->|疾病/症狀詢問| A5
+  D2 -->|用藥/成分詢問| B2
+  D2 --> B3
+  D2 --> C3
+  A5 --> D3[報告產生器]
+  B2 --> D3
+  B3 --> D3
+  C3 --> D3
+end
+```
+
+---
+
+## 2. 通用設計：人機分工標記
+
+所有模組需標記自動化等級：
+
+- `Auto`：可完全自動執行的步驟（例：variant calling、基本註解）。
+- `Auto+Review`：系統先產生建議，需人工複核或有條件接受（例：ACMG 部分 evidence scoring）。
+- `Human-only`：最終醫療判斷／用語／管理建議，必須由人決策（例：最終 Pathogenic 分類、臨床處置建議）。
+
+每次分析需生成一份 **machine-readable log**，紀錄：
+
+- 使用的模組與版本
+- 每一步的自動化等級
+- 哪些地方有人工 override（人員、時間、理由）
+
+---
+
+## 3. Phase 1：基因本體與變異詮釋基礎
+
+### 3.1 功能需求
+
+1. **輸入**
+   - 本人與雙親外顯子定序 BAM 檔。
+2. **輸出**
+   - 高品質 joint VCF（含 trio）
+   - 每個變異的註解資訊：
+     - 基因、轉錄本、蛋白改變
+     - 族群頻率（gnomAD 等）
+     - ClinVar 註解
+     - 功能預測（SIFT/PolyPhen/CADD 等）
+   - 對特定疾病/基因清單的變異過濾結果。
+3. **對外服務**
+   - 以 API / 函式介面提供：
+     - 給定基因列表 → 回傳該個體在這些基因中的變異列表
+     - 支援疾病名稱/HPO → 基因 → 變異的查詢流程（初期可分步呼叫）
+
+### 3.2 模組設計
+
+#### 3.2.1 Variant Calling Pipeline
+
+- **輸入**：BAM（本人 + 父母）
+- **輸出**：個別 gVCF → joint VCF
+- **工具候選**：
+  - GATK（HaplotypeCaller + GenotypeGVCFs）
+  - 或 DeepVariant + joint genotyper
+- **自動化等級**：`Auto`
+- **需求**：
+  - 基本 QC（coverage、duplicate rate、on-target rate）
+  - 支援版本標記（如 reference genome 版本）
+
+#### 3.2.2 Annotation Pipeline
+
+- **輸入**：joint VCF
+- **輸出**：annotated VCF / 變異表
+- **工具候選**：
+  - VEP、ANNOVAR 或類似工具
+- **資料庫**：
+  - ClinVar
+  - gnomAD
+  - 基因功能與轉錄本資料庫
+- **自動化等級**：`Auto`
+
+#### 3.2.3 Genomic DB / Query API
+
+- **目的**：提供高效查詢，作為後續模組（疾病風險、PGx 等）的基底。
+- **形式**：
+  - 選項 A：基於 VCF + tabix，以封裝函式操作
+  - 選項 B：匯入 SQLite / PostgreSQL / 專用 genomic DB
+- **關鍵查詢**：
+  - `get_variants_by_gene(individual_id, gene_list, filters)`
+  - `get_variants_by_region(individual_id, chr, start, end, filters)`
+- **自動化等級**：`Auto`
+
+#### 3.2.4 疾病/表型 → 基因 → 變異流程
+
+- 初期可拆成三步：
+  1. 使用外部知識庫或手動 panel：疾病/表型 → 基因清單
+  2. 透過 Genomic DB 查詢個人變異
+  3. 以簡單規則（頻率、ClinVar 標註）做初步排序
+- **自動化等級**：`Auto+Review`
+
+### 3.3 ACMG 規則實作（初版）
+
+- **範圍**：僅實作部分機器可自動判定之 evidence（如 PVS1、PM2、BA1、BS1 等）。
+- **輸出**：
+  - 每個變異的 evidence tag 列表與建議分級（例如：`suggested_class = "VUS"`）
+- **人工介入點**：
+  - 變異最終分類（Pathogenic / Likely pathogenic / VUS / Likely benign / Benign） → `Human-only`
+  - 規則閾值（如頻率 cutoff）以 config 檔管理 → `Auto+Review`
+
+---
+
+## 4. Phase 2：基因藥物學（PGx）與藥–藥交互作用（DDI）
+
+### 4.1 功能需求
+
+1. 接收使用者目前用藥清單（處方藥、成藥）。
+2. 透過基因資料，判定與 PGx 相關的 genotype（例如 CYP2D6, CYP2C9, HLA 等）。
+3. 根據 CPIC / DPWG 等指南，給出：
+   - 適應症相關風險（如 HLA-B*58:01 與 allopurinol）
+   - 劑量調整建議 / 藥物替代建議（僅 decision-support 層級）
+4. 計算基礎藥–藥交互作用（DDI），例如：
+   - CYP 抑制 / 誘導疊加
+   - QT prolongation 疊加
+   - 出血風險疊加
+
+### 4.2 模組設計
+
+#### 4.2.1 用藥資料標準化
+
+- 使用 ATC / RxNorm / 自訂 ID。
+- **自動化等級**：`Auto`
+
+#### 4.2.2 PGx Engine
+
+- **輸入**：個人變異（Phase 1 DB）、藥物清單
+- **輸出**：每個藥物的 PGx 評估（genotype → phenotype → 建議）
+- **資料庫**：
+  - CPIC guidelines
+  - PharmGKB 關聯資料
+- **自動化等級**：
+  - genotype → phenotype：`Auto`
+  - phenotype → 臨床建議：`Auto+Review`
+
+#### 4.2.3 DDI Engine
+
+- **輸入**：藥物清單
+- **輸出**：已知 DDI 清單與嚴重程度分級
+- **資料來源**：公開或商用 DDI 資料庫（視可用性）
+- **自動化等級**：`Auto`
+
+---
+
+## 5. Phase 3：保健食品與中藥交互作用模組
+
+### 5.1 功能需求
+
+1. 接收使用者的保健食品與中藥使用資料。
+2. 將名稱解析為：
+   - 標準化有效成分（如 EPA/DHA mg、Vit D IU、銀杏葉萃取物 mg 等）
+   - 中藥材名稱（如 黃耆、當歸、川芎…）
+3. 評估：
+   - 成分與藥物、基因的交互作用風險
+   - 成分間的加乘作用（如抗凝、CNS 抑制等）
+4. 按證據等級給出：
+   - 高優先級警示（有較強臨床證據）
+   - 一般提醒（動物實驗 / case report 等）
+   - 資料不足，僅能提醒不確定性
+
+### 5.2 模組設計
+
+#### 5.2.1 成分標準化引擎
+
+- **輸入**：使用者輸入的品名 / 處方
+- **輸出**：
+  - 標準化成分列表
+  - 估計劑量範圍（若無精確資料）
+- **資料**：
+  - 保健食品常用成分資料表
+  - 中藥方與藥材對應表
+- **自動化等級**：`Auto+Review`
+
+#### 5.2.2 成分交互作用引擎
+
+- **輸入**：成分列表、藥物清單、基因資料
+- **輸出**：交互作用列表與風險層級
+- **邏輯**：
+  - 成分對 CYP / P-gp / OATP 等的影響
+  - 成分對凝血、血壓、中樞神經等系統的影響
+- **自動化等級**：
+  - 規則推論：`Auto`
+  - 最終臨床建議表述：`Human-only`
+
+---
+
+## 6. Phase 4：NLP/LLM 問答介面與報告生成
+
+### 6.1 功能需求
+
+1. 支援使用者以自然語言提問：
+   - 疾病/症狀相關風險
+   - 用藥安全性
+   - 保健食品、中藥併用風險
+2. LLM 負責：
+   - 問題解析 → 結構化查詢（疾病、HPO、藥物、成分等）
+   - 協調呼叫底層 API（Phase 1–3）
+   - 整合結果並生成報告草稿
+3. 報告形式：
+   - 機器可讀 JSON（便於後處理）
+   - 人類可讀 Markdown / PDF 報告
+
+### 6.2 Orchestration 設計
+
+- 可採用「LLM + Tool/Function Calling」模式：
+  - 工具包括：
+    - `query_variants_by_gene`
+    - `query_disease_gene_panel`
+    - `run_pgx_analysis`
+    - `run_ddi_analysis`
+    - `run_supplement_herb_interaction`
+- LLM 主要負責：
+  - 意圖辨識與拆解
+  - 工具呼叫順序規劃
+  - 結果解釋與用語調整（需符合安全與保守原則）
+- **自動化等級**：
+  - 工具呼叫：`Auto`
+  - 臨床敏感結論：`Auto+Review` / `Human-only`（視場景而定）
+
+---
+
+## 7. 安全性、隱私與版本管理
+
+### 7.1 資料安全與隱私
+
+- 所有基因資料、用藥清單、報告：
+  - 儲存於本地或受控環境
+  - 若需與外部服務（如雲端 LLM）互動，需：
+    - 做脫敏處理（移除個資）
+    - 或改用 local/私有 LLM
+
+### 7.2 版本管理
+
+- 對以下物件進行版本控制：
+  - 參考基因組版本
+  - variant calling pipeline 版本
+  - 資料庫版本（ClinVar、gnomAD 等）
+  - ACMG 規則 config 版本
+  - gene panel / PGx 規則版本
+- 每份分析報告需記錄所用版本，以利追蹤與重跑。
+
+### 7.3 人工介入紀錄
+
+- 每次人工 override 或審核需紀錄：
+  - 變異 ID / 分析項目
+  - 原自動建議
+  - 人工調整結果
+  - 理由與參考文獻（如有）
+  - 審核者與時間
+
+---
+
+## 8. 未來擴充方向（Optional）
+
+- 整合 polygenic risk score（PRS）模組
+- 整合 longitudinal data（實驗室數據、症狀日誌）做風險動態追蹤
+- 為特定疾病領域建立更深的 expert-curated knowledge base
+- 與可穿戴裝置／其他健康資料源整合
+
+---
+
+## 9. 第一階段實作建議路線（Actionable TODO）
+
+1. **規劃 Phase 1 的技術選型**
+   - 選擇 variant caller（如 GATK）與 reference genome 版本
+   - 選擇 annotation 工具（如 VEP 或 ANNOVAR）
+2. **建立基本 pipeline**
+   - BAM → gVCF → joint VCF（trio）
+   - 加上基本 QC 報表
+3. **建置簡單的 Genomic Query 介面**
+   - 先以 CLI/Notebook 函式為主（例如 Python 函式庫）
+4. **選一個你最關心的疾病領域**
+   - 建立第一個 gene panel（例如視覺/聽力相關）
+   - 實作 panel-based 查詢與變異列表輸出
+5. **撰寫第一版報告模板**
+   - 輸入：疾病名稱 + gene panel + 查詢結果
+   - 輸出：簡易 Markdown 報告（含變異表 + 限制說明）
+6. **逐步加入 ACMG 自動 evidence 標記與人工 review 流程**
+
+這個規格書預期會在實作過程中持續更新，可視此為 v0.1 的起點版本。
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,31 @@
+[build-system]
+requires = ["setuptools>=61", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "genomic-consultant"
+version = "0.1.0"
+description = "Personal genomic risk and drug–interaction decision support scaffolding"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+  "pyyaml>=6",
+]
+
+[project.scripts]
+genomic-consultant = "genomic_consultant.cli:main"
+
+[project.optional-dependencies]
+dev = [
+  "pytest",
+  "ruff",
+]
+store = [
+  "pandas>=2",
+]
+
+[tool.setuptools]
+package-dir = {"" = "src"}
+
+[tool.setuptools.packages.find]
+where = ["src"]
--- a/sample_data/example_annotated.tsv
+++ b/sample_data/example_annotated.tsv
@@ -0,0 +1,3 @@
+#CHROM	POS	REF	ALT	SYMBOL	Consequence	Protein_position	PolyPhen	SIFT	CLIN_SIG	AF	gnomAD_AF	SpliceAI	CADD_PHRED
+1	123456	A	T	GJB2	missense_variant	p.Val37Ile	benign	tolerated	Benign	0.012	0.012	0.02	5.1
+2	234567	G	C	OTOF	stop_gained	p.*	probably_damaging	deleterious	Uncertain_significance	0.0001	0.0001	0.6	28.5
--- a/src/genomic_consultant.egg-info/PKG-INFO
+++ b/src/genomic_consultant.egg-info/PKG-INFO
@@ -0,0 +1,63 @@
+Metadata-Version: 2.4
+Name: genomic-consultant
+Version: 0.1.0
+Summary: Personal genomic risk and drug–interaction decision support scaffolding
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+Requires-Dist: pyyaml>=6
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: ruff; extra == "dev"
+
+# Genomic Consultant
+
+Early design for a personal genomic risk and drug–interaction decision support system. Specs are sourced from `genomic_decision_support_system_spec_v0.1.md`.
+
+## Vision (per spec)
+- Phase 1: trio variant calling, annotation, queryable genomic DB, initial ACMG evidence tagging.
+- Phase 2: pharmacogenomics genotype-to-phenotype mapping plus drug–drug interaction checks.
+- Phase 3: supplement/herb normalization and interaction risk layering.
+- Phase 4: LLM-driven query orchestration and report generation.
+
+## Repository Layout
+- `docs/` — system architecture notes, phase plans, data models (work in progress).
+- `configs/` — example ACMG config and gene panel JSON.
+- `sample_data/` — tiny annotated TSV for demo.
+- `src/genomic_consultant/` — Python scaffolding (pipelines, store, panel lookup, ACMG tagging, reporting).
+- `genomic_decision_support_system_spec_v0.1.md` — original requirements draft.
+
+## Contributing/next steps
+1. Finalize Phase 1 tech selection (variant caller, annotation stack, reference/DB versions).
+2. Stand up the Phase 1 pipelines and minimal query API surface.
+3. Add ACMG evidence tagging config and human-review logging.
+4. Layer in PGx/DDI and supplement modules per later phases.
+
+Data safety: keep genomic/clinical data local; the `.gitignore` blocks common genomic outputs by default.
+
+## Quickstart (CLI scaffolding)
+```
+pip install -e .
+
+# 1) Show trio calling plan (commands only; not executed)
+genomic-consultant plan-call \
+  --sample proband:/data/proband.bam \
+  --sample father:/data/father.bam \
+  --sample mother:/data/mother.bam \
+  --reference /refs/GRCh38.fa \
+  --workdir /tmp/trio
+
+# 2) Show annotation plan for a joint VCF
+genomic-consultant plan-annotate \
+  --vcf /tmp/trio/trio.joint.vcf.gz \
+  --workdir /tmp/trio/annot \
+  --prefix trio \
+  --reference /refs/GRCh38.fa
+
+# 3) Demo panel report using sample data
+genomic-consultant panel-report \
+  --tsv sample_data/example_annotated.tsv \
+  --panel configs/panel.example.json \
+  --acmg-config configs/acmg_config.example.yaml \
+  --individual-id demo \
+  --format markdown
+```
--- a/src/genomic_consultant.egg-info/SOURCES.txt
+++ b/src/genomic_consultant.egg-info/SOURCES.txt
@@ -0,0 +1,27 @@
+README.md
+pyproject.toml
+src/genomic_consultant/__init__.py
+src/genomic_consultant/cli.py
+src/genomic_consultant.egg-info/PKG-INFO
+src/genomic_consultant.egg-info/SOURCES.txt
+src/genomic_consultant.egg-info/dependency_links.txt
+src/genomic_consultant.egg-info/entry_points.txt
+src/genomic_consultant.egg-info/requires.txt
+src/genomic_consultant.egg-info/top_level.txt
+src/genomic_consultant/acmg/__init__.py
+src/genomic_consultant/acmg/tagger.py
+src/genomic_consultant/audit/__init__.py
+src/genomic_consultant/audit/run_log.py
+src/genomic_consultant/orchestration/__init__.py
+src/genomic_consultant/orchestration/workflows.py
+src/genomic_consultant/panels/__init__.py
+src/genomic_consultant/panels/panels.py
+src/genomic_consultant/pipelines/__init__.py
+src/genomic_consultant/pipelines/annotation.py
+src/genomic_consultant/pipelines/variant_calling.py
+src/genomic_consultant/reporting/__init__.py
+src/genomic_consultant/reporting/report.py
+src/genomic_consultant/store/__init__.py
+src/genomic_consultant/store/query.py
+src/genomic_consultant/utils/__init__.py
+src/genomic_consultant/utils/models.py
--- a/src/genomic_consultant.egg-info/dependency_links.txt
+++ b/src/genomic_consultant.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant.egg-info/entry_points.txt
+++ b/src/genomic_consultant.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+genomic-consultant = genomic_consultant.cli:main
--- a/src/genomic_consultant.egg-info/requires.txt
+++ b/src/genomic_consultant.egg-info/requires.txt
@@ -0,0 +1,5 @@
+pyyaml>=6
+
+[dev]
+pytest
+ruff
--- a/src/genomic_consultant.egg-info/top_level.txt
+++ b/src/genomic_consultant.egg-info/top_level.txt
@@ -0,0 +1 @@
+genomic_consultant
--- a/src/genomic_consultant/init.py
+++ b/src/genomic_consultant/init.py
@@ -0,0 +1,4 @@
+"""Genomic Consultant: genomic decision support scaffolding."""
+
+__all__ = ["__version__"]
+__version__ = "0.1.0"
--- a/src/genomic_consultant/acmg/init.py
+++ b/src/genomic_consultant/acmg/init.py
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant/acmg/tagger.py
+++ b/src/genomic_consultant/acmg/tagger.py
@@ -0,0 +1,91 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Set
+
+import yaml
+
+from genomic_consultant.utils.models import EvidenceTag, SuggestedClassification, Variant
+
+
+@dataclass
+class ACMGConfig:
+    ba1_af: float = 0.05
+    bs1_af: float = 0.01
+    pm2_af: float = 0.0005
+    lof_genes: Set[str] | None = None
+    bp7_splice_ai_max: float = 0.1
+
+
+def load_acmg_config(path: Path) -> ACMGConfig:
+    data = yaml.safe_load(Path(path).read_text())
+    return ACMGConfig(
+        ba1_af=data.get("ba1_af", 0.05),
+        bs1_af=data.get("bs1_af", 0.01),
+        pm2_af=data.get("pm2_af", 0.0005),
+        lof_genes=set(data.get("lof_genes", [])) if data.get("lof_genes") else set(),
+        bp7_splice_ai_max=data.get("bp7_splice_ai_max", 0.1),
+    )
+
+
+def tag_variant(variant: Variant, config: ACMGConfig) -> SuggestedClassification:
+    evidence: List[EvidenceTag] = []
+    af = variant.allele_frequency
+
+    if af is not None:
+        if af >= config.ba1_af:
+            evidence.append(EvidenceTag(tag="BA1", strength="Stand-alone", rationale=f"AF {af} >= {config.ba1_af}"))
+        elif af >= config.bs1_af:
+            evidence.append(EvidenceTag(tag="BS1", strength="Strong", rationale=f"AF {af} >= {config.bs1_af}"))
+        elif af <= config.pm2_af:
+            evidence.append(EvidenceTag(tag="PM2", strength="Moderate", rationale=f"AF {af} <= {config.pm2_af}"))
+
+    if _is_lof(variant) and variant.gene and variant.gene in config.lof_genes:
+        evidence.append(
+            EvidenceTag(tag="PVS1", strength="Very strong", rationale="Predicted LoF in LoF-sensitive gene")
+        )
+
+    splice_ai = _get_float(variant.annotations.get("splice_ai_delta_score"))
+    if _is_synonymous(variant) and (splice_ai is None or splice_ai <= config.bp7_splice_ai_max):
+        evidence.append(
+            EvidenceTag(
+                tag="BP7",
+                strength="Supporting",
+                rationale=f"Synonymous with low predicted splice impact (spliceAI {splice_ai})",
+            )
+        )
+
+    suggested = _suggest_class(evidence)
+    return SuggestedClassification(suggested_class=suggested, evidence=evidence)
+
+
+def _is_lof(variant: Variant) -> bool:
+    consequence = (variant.consequence or "").lower()
+    lof_keywords = ["frameshift", "stop_gained", "splice_acceptor", "splice_donor", "start_lost"]
+    return any(k in consequence for k in lof_keywords)
+
+
+def _suggest_class(evidence: List[EvidenceTag]) -> str:
+    tags = {e.tag for e in evidence}
+    if "BA1" in tags:
+        return "Benign"
+    if "BS1" in tags and "PM2" not in tags and "PVS1" not in tags:
+        return "Likely benign"
+    if "PVS1" in tags and "PM2" in tags:
+        return "Likely pathogenic"
+    return "VUS"
+
+
+def _is_synonymous(variant: Variant) -> bool:
+    consequence = (variant.consequence or "").lower()
+    return "synonymous_variant" in consequence
+
+
+def _get_float(value: str | float | None) -> float | None:
+    if value is None:
+        return None
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return None
--- a/src/genomic_consultant/audit/init.py
+++ b/src/genomic_consultant/audit/init.py
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant/audit/run_log.py
+++ b/src/genomic_consultant/audit/run_log.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+import json
+from dataclasses import asdict, is_dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from genomic_consultant.utils.models import RunLog
+
+
+def _serialize(obj: Any) -> Any:
+    if isinstance(obj, datetime):
+        return obj.isoformat()
+    if is_dataclass(obj):
+        return {k: _serialize(v) for k, v in asdict(obj).items()}
+    if isinstance(obj, dict):
+        return {k: _serialize(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_serialize(v) for v in obj]
+    return obj
+
+
+def write_run_log(run_log: RunLog, path: str | Path) -> Path:
+    """Persist a RunLog to JSON."""
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    payload = _serialize(run_log)
+    path.write_text(json.dumps(payload, indent=2))
+    return path
--- a/src/genomic_consultant/cli.py
+++ b/src/genomic_consultant/cli.py
@@ -0,0 +1,273 @@
+from __future__ import annotations
+
+import argparse
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List
+
+from genomic_consultant.acmg.tagger import ACMGConfig, load_acmg_config
+from genomic_consultant.orchestration.workflows import run_panel_variant_review
+from genomic_consultant.panels.aggregate import merge_mappings
+from genomic_consultant.panels.panels import load_panel
+from genomic_consultant.panels.resolver import PhenotypeGeneResolver
+from genomic_consultant.pipelines.annotation import build_vep_plan
+from genomic_consultant.pipelines.variant_calling import build_gatk_trio_plan
+from genomic_consultant.pipelines.runner import execute_plan
+from genomic_consultant.reporting.report import panel_report_json, panel_report_markdown
+from genomic_consultant.orchestration.phase1_pipeline import run_phase1_pipeline
+from genomic_consultant.store.query import GenomicStore
+from genomic_consultant.utils.hashing import sha256sum
+from genomic_consultant.utils.models import FilterConfig
+from genomic_consultant.utils.tooling import probe_tool_versions
+
+
+def parse_samples(sample_args: List[str]) -> Dict[str, Path]:
+    samples: Dict[str, Path] = {}
+    for arg in sample_args:
+        if ":" not in arg:
+            raise ValueError(f"Sample must be sample_id:/path/to.bam, got {arg}")
+        sample_id, bam_path = arg.split(":", 1)
+        samples[sample_id] = Path(bam_path)
+    return samples
+
+
+def main(argv: List[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(prog="genomic-consultant", description="Genomic decision support scaffolding")
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    # Variant calling plan
+    call = sub.add_parser("plan-call", help="Build variant calling command plan (GATK trio).")
+    call.add_argument("--sample", action="append", required=True, help="sample_id:/path/to.bam (repeatable)")
+    call.add_argument("--reference", required=True, help="Path to reference FASTA")
+    call.add_argument("--workdir", required=True, help="Working directory for outputs")
+    call.add_argument("--prefix", default="trio", help="Output prefix for joint VCF")
+
+    run_call = sub.add_parser("run-call", help="Execute variant calling plan (GATK trio).")
+    run_call.add_argument("--sample", action="append", required=True, help="sample_id:/path/to.bam (repeatable)")
+    run_call.add_argument("--reference", required=True, help="Path to reference FASTA")
+    run_call.add_argument("--workdir", required=True, help="Working directory for outputs")
+    run_call.add_argument("--prefix", default="trio", help="Output prefix for joint VCF")
+    run_call.add_argument("--log", required=False, help="Path to write run log JSON")
+    run_call.add_argument("--probe-tools", action="store_true", help="Attempt to record tool versions")
+
+    # Annotation plan
+    ann = sub.add_parser("plan-annotate", help="Build annotation command plan (VEP).")
+    ann.add_argument("--vcf", required=True, help="Path to joint VCF")
+    ann.add_argument("--workdir", required=True, help="Working directory for annotated outputs")
+    ann.add_argument("--prefix", default="annotated", help="Output prefix")
+    ann.add_argument("--reference", required=False, help="Reference FASTA (optional)")
+    ann.add_argument("--plugin", action="append", help="VEP plugin spec, repeatable", default=[])
+
+    run_ann = sub.add_parser("run-annotate", help="Execute annotation plan (VEP).")
+    run_ann.add_argument("--vcf", required=True, help="Path to joint VCF")
+    run_ann.add_argument("--workdir", required=True, help="Working directory for annotated outputs")
+    run_ann.add_argument("--prefix", default="annotated", help="Output prefix")
+    run_ann.add_argument("--reference", required=False, help="Reference FASTA (optional)")
+    run_ann.add_argument("--plugin", action="append", help="VEP plugin spec, repeatable", default=[])
+    run_ann.add_argument("--extra-flag", action="append", help="Extra flags appended to VEP command", default=[])
+    run_ann.add_argument("--log", required=False, help="Path to write run log JSON")
+    run_ann.add_argument("--probe-tools", action="store_true", help="Attempt to record tool versions")
+
+    # Panel report
+    panel = sub.add_parser("panel-report", help="Run panel query + ACMG tagging and emit report.")
+    panel.add_argument("--tsv", required=True, help="Flattened annotated TSV")
+    panel.add_argument("--panel", required=False, help="Panel JSON file")
+    panel.add_argument("--phenotype-id", required=False, help="Phenotype/HPO ID to resolve to a panel")
+    panel.add_argument("--phenotype-mapping", required=False, help="Phenotype→genes mapping JSON")
+    panel.add_argument("--acmg-config", required=True, help="ACMG config YAML")
+    panel.add_argument("--individual-id", required=True, help="Individual identifier")
+    panel.add_argument("--max-af", type=float, default=None, help="Max allele frequency filter")
+    panel.add_argument("--format", choices=["markdown", "json"], default="markdown", help="Output format")
+    panel.add_argument("--log", required=False, help="Path to write run log JSON for the analysis")
+    panel.add_argument("--phenotype-panel", required=False, help="Phenotype→genes mapping JSON (optional)")
+
+    phase1 = sub.add_parser("phase1-run", help="End-to-end Phase 1 pipeline (call→annotate→panel).")
+    phase1.add_argument("--sample", action="append", help="sample_id:/path/to.bam (repeatable)")
+    phase1.add_argument("--reference", required=False, help="Path to reference FASTA")
+    phase1.add_argument("--workdir", required=True, help="Working directory for outputs")
+    phase1.add_argument("--prefix", default="trio", help="Output prefix")
+    phase1.add_argument("--plugins", action="append", default=[], help="VEP plugin specs")
+    phase1.add_argument("--extra-flag", action="append", default=[], help="Extra flags for VEP")
+    phase1.add_argument("--joint-vcf", required=False, help="Existing joint VCF (skip calling)")
+    phase1.add_argument("--tsv", required=False, help="Existing annotated TSV (skip annotation)")
+    phase1.add_argument("--skip-call", action="store_true", help="Skip variant calling step")
+    phase1.add_argument("--skip-annotate", action="store_true", help="Skip annotation step")
+    phase1.add_argument("--panel", required=False, help="Panel JSON file")
+    phase1.add_argument("--phenotype-id", required=False, help="Phenotype/HPO ID to resolve to a panel")
+    phase1.add_argument("--phenotype-mapping", required=False, help="Phenotype→genes mapping JSON")
+    phase1.add_argument("--acmg-config", required=True, help="ACMG config YAML")
+    phase1.add_argument("--max-af", type=float, default=None, help="Max allele frequency filter")
+    phase1.add_argument("--format", choices=["markdown", "json"], default="markdown", help="Report format")
+    phase1.add_argument("--log-dir", required=False, help="Directory to write run logs")
+
+    build_map = sub.add_parser("build-phenotype-mapping", help="Merge phenotype→gene mapping JSON files.")
+    build_map.add_argument("--output", required=True, help="Output JSON path")
+    build_map.add_argument("inputs", nargs="+", help="Input mapping JSON files")
+
+    args = parser.parse_args(argv)
+
+    if args.command == "plan-call":
+        samples = parse_samples(args.sample)
+        plan = build_gatk_trio_plan(
+            samples=samples,
+            reference_fasta=Path(args.reference),
+            workdir=Path(args.workdir),
+            output_prefix=args.prefix,
+        )
+        print("# Variant calling command plan")
+        for cmd in plan.commands:
+            print(cmd)
+        return 0
+
+    if args.command == "run-call":
+        samples = parse_samples(args.sample)
+        plan = build_gatk_trio_plan(
+            samples=samples,
+            reference_fasta=Path(args.reference),
+            workdir=Path(args.workdir),
+            output_prefix=args.prefix,
+        )
+        tool_versions = probe_tool_versions({"gatk": "gatk"}) if args.probe_tools else {}
+        run_log = execute_plan(
+            plan, automation_level="Auto", log_path=Path(args.log) if args.log else None
+        )
+        run_log.tool_versions.update(tool_versions)
+        if args.log:
+            from genomic_consultant.audit.run_log import write_run_log
+
+            write_run_log(run_log, Path(args.log))
+        print(f"Run finished with {len(run_log.outputs.get('command_results', []))} steps. Log ID: {run_log.run_id}")
+        if args.log:
+            print(f"Run log written to {args.log}")
+        return 0
+
+    if args.command == "plan-annotate":
+        plan = build_vep_plan(
+            vcf_path=Path(args.vcf),
+            workdir=Path(args.workdir),
+            reference_fasta=Path(args.reference) if args.reference else None,
+            output_prefix=args.prefix,
+            plugins=args.plugin,
+        )
+        print("# Annotation command plan")
+        for cmd in plan.commands:
+            print(cmd)
+        return 0
+
+    if args.command == "run-annotate":
+        plan = build_vep_plan(
+            vcf_path=Path(args.vcf),
+            workdir=Path(args.workdir),
+            reference_fasta=Path(args.reference) if args.reference else None,
+            output_prefix=args.prefix,
+            plugins=args.plugin,
+            extra_flags=args.extra_flag,
+        )
+        tool_versions = probe_tool_versions({"vep": "vep", "bcftools": "bcftools", "tabix": "tabix"}) if args.probe_tools else {}
+        run_log = execute_plan(
+            plan, automation_level="Auto", log_path=Path(args.log) if args.log else None
+        )
+        run_log.tool_versions.update(tool_versions)
+        if args.log:
+            from genomic_consultant.audit.run_log import write_run_log
+
+            write_run_log(run_log, Path(args.log))
+        print(f"Run finished with {len(run_log.outputs.get('command_results', []))} steps. Log ID: {run_log.run_id}")
+        if args.log:
+            print(f"Run log written to {args.log}")
+        return 0
+
+    if args.command == "panel-report":
+        if not args.panel and not (args.phenotype_id and args.phenotype_mapping):
+            raise SystemExit("Provide either --panel or (--phenotype-id and --phenotype-mapping).")
+
+        store = GenomicStore.from_tsv(Path(args.tsv))
+        if args.panel:
+            panel_obj = load_panel(Path(args.panel))
+            panel_config_hash = sha256sum(Path(args.panel))
+        else:
+            resolver = PhenotypeGeneResolver.from_json(Path(args.phenotype_mapping))
+            panel_obj = resolver.build_panel(args.phenotype_id)
+            if panel_obj is None:
+                raise SystemExit(f"No genes found for phenotype {args.phenotype_id}")
+            panel_config_hash = sha256sum(Path(args.phenotype_mapping))
+
+        acmg_config = load_acmg_config(Path(args.acmg_config))
+        filters = FilterConfig(max_af=args.max_af)
+        result = run_panel_variant_review(
+            individual_id=args.individual_id,
+            panel=panel_obj,
+            store=store,
+            acmg_config=acmg_config,
+            filters=filters,
+        )
+        output = panel_report_json(result) if args.format == "json" else panel_report_markdown(result)
+        print(output)
+
+        if args.log:
+            from genomic_consultant.audit.run_log import write_run_log
+            from genomic_consultant.utils.models import RunLog
+            run_log = RunLog(
+                run_id=f"panel-{args.individual_id}",
+                started_at=datetime.utcnow(),
+                inputs={
+                    "tsv": str(args.tsv),
+                    "panel": str(args.panel) if args.panel else f"phenotype:{args.phenotype_id}",
+                    "acmg_config": str(args.acmg_config),
+                },
+                parameters={
+                    "max_af": args.max_af,
+                    "format": args.format,
+                    "phenotype_id": args.phenotype_id,
+                },
+                tool_versions={},
+                database_versions={},
+                config_hashes={
+                    "panel": panel_config_hash,
+                    "acmg_config": sha256sum(Path(args.acmg_config)),
+                },
+                automation_levels={"panel_report": "Auto+Review"},
+                overrides=[],
+                outputs={"report": output},
+                notes=None,
+            )
+            write_run_log(run_log, Path(args.log))
+            print(f"Analysis log written to {args.log}")
+        return 0
+
+    if args.command == "build-phenotype-mapping":
+        merge_mappings(inputs=[Path(p) for p in args.inputs], output=Path(args.output))
+        print(f"Merged mapping written to {args.output}")
+        return 0
+
+    if args.command == "phase1-run":
+        samples = parse_samples(args.sample) if args.sample else None
+        log_dir = Path(args.log_dir) if args.log_dir else None
+        artifacts = run_phase1_pipeline(
+            samples=samples,
+            reference_fasta=Path(args.reference) if args.reference else None,
+            workdir=Path(args.workdir),
+            output_prefix=args.prefix,
+            acmg_config_path=Path(args.acmg_config),
+            max_af=args.max_af,
+            panel_path=Path(args.panel) if args.panel else None,
+            phenotype_id=args.phenotype_id,
+            phenotype_mapping=Path(args.phenotype_mapping) if args.phenotype_mapping else None,
+            report_format=args.format,
+            plugins=args.plugins,
+            extra_flags=args.extra_flag,
+            existing_joint_vcf=Path(args.joint_vcf) if args.joint_vcf else None,
+            existing_tsv=Path(args.tsv) if args.tsv else None,
+            skip_call=args.skip_call,
+            skip_annotate=args.skip_annotate,
+            log_dir=log_dir,
+        )
+        print(artifacts.panel_report)
+        return 0
+
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/src/genomic_consultant/orchestration/init.py
+++ b/src/genomic_consultant/orchestration/init.py
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant/orchestration/phase1_pipeline.py
+++ b/src/genomic_consultant/orchestration/phase1_pipeline.py
@@ -0,0 +1,146 @@
+from __future__ import annotations
+
+import uuid
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from datetime import datetime, timezone
+
+from genomic_consultant.acmg.tagger import ACMGConfig, load_acmg_config
+from genomic_consultant.audit.run_log import write_run_log
+from genomic_consultant.orchestration.workflows import run_panel_variant_review
+from genomic_consultant.panels.panels import load_panel
+from genomic_consultant.panels.resolver import PhenotypeGeneResolver
+from genomic_consultant.pipelines.annotation import AnnotationPlan, build_vep_plan
+from genomic_consultant.pipelines.runner import execute_plan
+from genomic_consultant.pipelines.variant_calling import VariantCallingPlan, build_gatk_trio_plan
+from genomic_consultant.reporting.report import panel_report_json, panel_report_markdown
+from genomic_consultant.store.query import GenomicStore
+from genomic_consultant.utils.hashing import sha256sum
+from genomic_consultant.utils.models import FilterConfig, RunLog
+
+
+@dataclass
+class Phase1Artifacts:
+    call_log: Optional[RunLog]
+    annotate_log: Optional[RunLog]
+    panel_report: str
+    panel_report_format: str
+    panel_result_log: RunLog
+    tsv_path: Path
+    joint_vcf: Optional[Path]
+
+
+def run_phase1_pipeline(
+    samples: Optional[Dict[str, Path]],
+    reference_fasta: Optional[Path],
+    workdir: Path,
+    output_prefix: str,
+    acmg_config_path: Path,
+    max_af: Optional[float],
+    panel_path: Optional[Path],
+    phenotype_id: Optional[str],
+    phenotype_mapping: Optional[Path],
+    report_format: str = "markdown",
+    plugins: Optional[List[str]] = None,
+    extra_flags: Optional[List[str]] = None,
+    existing_joint_vcf: Optional[Path] = None,
+    existing_tsv: Optional[Path] = None,
+    skip_call: bool = False,
+    skip_annotate: bool = False,
+    log_dir: Optional[Path] = None,
+) -> Phase1Artifacts:
+    """
+    Orchestrate Phase 1: optional call -> annotate -> panel report.
+    Allows skipping call/annotate when precomputed artifacts are supplied.
+    """
+    log_dir = log_dir or workdir / "runtime"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    call_log = None
+    annotate_log = None
+
+    # Variant calling
+    joint_vcf: Optional[Path] = existing_joint_vcf
+    if not skip_call:
+        if not samples or not reference_fasta:
+            raise ValueError("samples and reference_fasta are required unless skip_call is True")
+        call_plan: VariantCallingPlan = build_gatk_trio_plan(
+            samples=samples, reference_fasta=reference_fasta, workdir=workdir, output_prefix=output_prefix
+        )
+        call_log_path = log_dir / f"{output_prefix}_call_runlog.json"
+        call_log = execute_plan(call_plan, automation_level="Auto", log_path=call_log_path)
+        joint_vcf = call_plan.joint_vcf
+    elif joint_vcf is None:
+        joint_vcf = existing_joint_vcf
+
+    # Annotation
+    tsv_path: Optional[Path] = existing_tsv
+    if not skip_annotate:
+        if joint_vcf is None:
+            raise ValueError("joint VCF must be provided (via call step or existing_joint_vcf)")
+        ann_plan: AnnotationPlan = build_vep_plan(
+            vcf_path=joint_vcf,
+            workdir=workdir,
+            reference_fasta=reference_fasta,
+            output_prefix=output_prefix,
+            plugins=plugins or [],
+            extra_flags=extra_flags or [],
+        )
+        ann_log_path = log_dir / f"{output_prefix}_annotate_runlog.json"
+        annotate_log = execute_plan(ann_plan, automation_level="Auto", log_path=ann_log_path)
+        tsv_path = ann_plan.flat_table
+
+    if tsv_path is None:
+        raise ValueError("No TSV available; provide existing_tsv or run annotation.")
+
+    # Panel selection
+    if panel_path:
+        panel_obj = load_panel(panel_path)
+        panel_hash = sha256sum(panel_path)
+    elif phenotype_id and phenotype_mapping:
+        resolver = PhenotypeGeneResolver.from_json(phenotype_mapping)
+        panel_obj = resolver.build_panel(phenotype_id)
+        if panel_obj is None:
+            raise ValueError(f"No genes found for phenotype {phenotype_id}")
+        panel_hash = sha256sum(phenotype_mapping)
+    else:
+        raise ValueError("Provide panel_path or (phenotype_id and phenotype_mapping).")
+
+    acmg_config = load_acmg_config(acmg_config_path)
+    store = GenomicStore.from_tsv(tsv_path)
+    filters = FilterConfig(max_af=max_af)
+    panel_result = run_panel_variant_review(
+        individual_id=output_prefix, panel=panel_obj, store=store, acmg_config=acmg_config, filters=filters
+    )
+    report = panel_report_markdown(panel_result) if report_format == "markdown" else panel_report_json(panel_result)
+
+    panel_log = RunLog(
+        run_id=f"phase1-panel-{uuid.uuid4()}",
+        started_at=datetime.now(timezone.utc),
+        inputs={
+            "tsv": str(tsv_path),
+            "panel": str(panel_path) if panel_path else f"phenotype:{phenotype_id}",
+            "acmg_config": str(acmg_config_path),
+        },
+        parameters={"max_af": max_af, "report_format": report_format, "phenotype_id": phenotype_id},
+        tool_versions={},
+        database_versions={},
+        config_hashes={"panel": panel_hash, "acmg_config": sha256sum(acmg_config_path)},
+        automation_levels={"panel_report": "Auto+Review"},
+        overrides=[],
+        outputs={"report": report},
+        notes=None,
+    )
+    panel_log_path = log_dir / f"{output_prefix}_panel_runlog.json"
+    write_run_log(panel_log, panel_log_path)
+
+    return Phase1Artifacts(
+        call_log=call_log,
+        annotate_log=annotate_log,
+        panel_report=report,
+        panel_report_format=report_format,
+        panel_result_log=panel_log,
+        tsv_path=tsv_path,
+        joint_vcf=joint_vcf,
+    )
--- a/src/genomic_consultant/orchestration/workflows.py
+++ b/src/genomic_consultant/orchestration/workflows.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import List
+
+from genomic_consultant.acmg.tagger import ACMGConfig, tag_variant
+from genomic_consultant.panels.panels import GenePanel
+from genomic_consultant.store.query import GenomicStore
+from genomic_consultant.utils.models import FilterConfig, SuggestedClassification, Variant
+
+
+@dataclass
+class PanelVariantResult:
+    variant: Variant
+    acmg: SuggestedClassification
+
+
+@dataclass
+class PanelAnalysisResult:
+    individual_id: str
+    panel: GenePanel
+    variants: List[PanelVariantResult]
+
+
+def run_panel_variant_review(
+    individual_id: str,
+    panel: GenePanel,
+    store: GenomicStore,
+    acmg_config: ACMGConfig,
+    filters: FilterConfig | None = None,
+) -> PanelAnalysisResult:
+    """Query variants for a panel and attach ACMG evidence suggestions."""
+    variants = store.get_variants_by_gene(individual_id=individual_id, genes=panel.genes, filters=filters)
+    enriched = [PanelVariantResult(variant=v, acmg=tag_variant(v, acmg_config)) for v in variants]
+    return PanelAnalysisResult(individual_id=individual_id, panel=panel, variants=enriched)
--- a/src/genomic_consultant/panels/init.py
+++ b/src/genomic_consultant/panels/init.py
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant/panels/aggregate.py
+++ b/src/genomic_consultant/panels/aggregate.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Dict, Iterable, List, Set
+
+
+def merge_mappings(inputs: Iterable[Path], output: Path, version: str = "merged", sources: List[str] | None = None) -> Path:
+    """
+    Merge multiple phenotype→gene mapping JSON files into one.
+    Input schema: {"phenotype_to_genes": {"HP:xxxx": ["GENE1", ...]}, "version": "...", "source": "..."}
+    """
+    merged: Dict[str, Set[str]] = {}
+    source_list: List[str] = sources or []
+    for path in inputs:
+        data = json.loads(Path(path).read_text())
+        phenos = data.get("phenotype_to_genes", {})
+        for pid, genes in phenos.items():
+            merged.setdefault(pid, set()).update(genes)
+        src_label = data.get("source") or path.name
+        source_list.append(src_label)
+
+    out = {
+        "version": version,
+        "source": ",".join(source_list),
+        "phenotype_to_genes": {pid: sorted(list(genes)) for pid, genes in merged.items()},
+        "metadata": {"merged_from": [str(p) for p in inputs]},
+    }
+    output.parent.mkdir(parents=True, exist_ok=True)
+    output.write_text(json.dumps(out, indent=2))
+    return output
--- a/src/genomic_consultant/panels/panels.py
+++ b/src/genomic_consultant/panels/panels.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Optional
+
+from genomic_consultant.utils.models import GenePanel
+
+
+@dataclass
+class PanelRepository:
+    """Loads curated gene panels stored as JSON files."""
+
+    panels: Dict[str, GenePanel]
+
+    @classmethod
+    def from_directory(cls, path: Path) -> "PanelRepository":
+        panels: Dict[str, GenePanel] = {}
+        for json_file in Path(path).glob("*.json"):
+            panel = load_panel(json_file)
+            panels[panel.name] = panel
+        return cls(panels=panels)
+
+    def get(self, name: str) -> Optional[GenePanel]:
+        return self.panels.get(name)
+
+
+def load_panel(path: Path) -> GenePanel:
+    data = json.loads(Path(path).read_text())
+    return GenePanel(
+        name=data["name"],
+        genes=data["genes"],
+        source=data.get("source", "unknown"),
+        version=data.get("version", "unknown"),
+        last_updated=data.get("last_updated", ""),
+        metadata=data.get("metadata", {}),
+    )
--- a/src/genomic_consultant/panels/resolver.py
+++ b/src/genomic_consultant/panels/resolver.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from genomic_consultant.utils.models import GenePanel
+
+
+@dataclass
+class PhenotypeGeneResolver:
+    """Resolves phenotype/HPO terms to gene lists using a curated mapping file."""
+
+    mapping: Dict[str, List[str]]
+    version: str
+    source: str
+
+    @classmethod
+    def from_json(cls, path: Path) -> "PhenotypeGeneResolver":
+        data = json.loads(Path(path).read_text())
+        mapping = data.get("phenotype_to_genes", {})
+        version = data.get("version", "unknown")
+        source = data.get("source", "unknown")
+        return cls(mapping=mapping, version=version, source=source)
+
+    def resolve(self, phenotype_id: str) -> Optional[List[str]]:
+        """Return gene list for a phenotype/HPO ID if present."""
+        return self.mapping.get(phenotype_id)
+
+    def build_panel(self, phenotype_id: str) -> Optional[GenePanel]:
+        genes = self.resolve(phenotype_id)
+        if not genes:
+            return None
+        return GenePanel(
+            name=f"Phenotype:{phenotype_id}",
+            genes=genes,
+            source=self.source,
+            version=self.version,
+            last_updated="",
+            metadata={"phenotype_id": phenotype_id},
+        )
--- a/src/genomic_consultant/pipelines/init.py
+++ b/src/genomic_consultant/pipelines/init.py
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant/pipelines/annotation.py
+++ b/src/genomic_consultant/pipelines/annotation.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Sequence
+
+
+@dataclass
+class AnnotationPlan:
+    """Command plan for annotating a VCF with VEP (or similar)."""
+
+    annotated_vcf: Path
+    flat_table: Path
+    commands: List[str]
+    workdir: Path
+
+
+def build_vep_plan(
+    vcf_path: Path,
+    workdir: Path,
+    reference_fasta: Path | None = None,
+    output_prefix: str = "annotated",
+    plugins: Sequence[str] | None = None,
+    extra_flags: Sequence[str] | None = None,
+) -> AnnotationPlan:
+    """
+    Build shell commands for running VEP on a VCF. Produces compressed VCF and a flattened TSV.
+    This is a plan only; execution is left to a runner.
+    """
+    workdir = Path(workdir)
+    workdir.mkdir(parents=True, exist_ok=True)
+
+    annotated_vcf = workdir / f"{output_prefix}.vep.vcf.gz"
+    flat_table = workdir / f"{output_prefix}.vep.tsv"
+
+    plugin_arg = ""
+    if plugins:
+        plugin_arg = " ".join(f"--plugin {p}" for p in plugins)
+
+    extra_arg = " ".join(extra_flags) if extra_flags else ""
+
+    ref_arg = f"--fasta {reference_fasta}" if reference_fasta else ""
+
+    commands: List[str] = [
+        (
+            "vep "
+            f"-i {vcf_path} "
+            f"-o {annotated_vcf} "
+            "--vcf --compress_output bgzip "
+            "--symbol --canonical "
+            "--af --af_gnomad "
+            "--polyphen b --sift b "
+            "--everything "
+            f"{plugin_arg} "
+            f"{extra_arg} "
+            f"{ref_arg}"
+        ),
+        f"tabix -p vcf {annotated_vcf}",
+        (
+            "bcftools query "
+            f"-f '%CHROM\\t%POS\\t%REF\\t%ALT\\t%SYMBOL\\t%Consequence\\t%Protein_position\\t"
+            f"%PolyPhen\\t%SIFT\\t%CLIN_SIG\\t%AF\\t%gnomAD_AF\\t%SpliceAI\\t%CADD_PHRED\\n' "
+            f"{annotated_vcf} > {flat_table}"
+        ),
+    ]
+
+    return AnnotationPlan(
+        annotated_vcf=annotated_vcf,
+        flat_table=flat_table,
+        commands=commands,
+        workdir=workdir,
+    )
--- a/src/genomic_consultant/pipelines/runner.py
+++ b/src/genomic_consultant/pipelines/runner.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+import subprocess
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Protocol
+
+from genomic_consultant.audit.run_log import write_run_log
+from genomic_consultant.utils.models import RunLog
+
+
+class HasCommands(Protocol):
+    commands: List[str]
+    workdir: Path
+
+
+def execute_plan(plan: HasCommands, automation_level: str, log_path: Path | None = None) -> RunLog:
+    """
+    Execute shell commands defined in a plan sequentially. Captures stdout/stderr and exit codes.
+    Suitable for VariantCallingPlan or AnnotationPlan.
+    """
+    run_id = str(uuid.uuid4())
+    started_at = datetime.utcnow()
+    outputs: Dict[str, Any] = {}
+    # The plan object may expose outputs such as joint_vcf/annotated_vcf we can introspect.
+    for attr in ("joint_vcf", "per_sample_gvcf", "annotated_vcf", "flat_table"):
+        if hasattr(plan, attr):
+            outputs[attr] = getattr(plan, attr)
+
+    results: List[Dict[str, Any]] = []
+    for idx, cmd in enumerate(plan.commands):
+        proc = subprocess.run(cmd, shell=True, cwd=plan.workdir, capture_output=True, text=True)
+        results.append(
+            {
+                "step": idx,
+                "command": cmd,
+                "returncode": proc.returncode,
+                "stdout": proc.stdout,
+                "stderr": proc.stderr,
+            }
+        )
+        if proc.returncode != 0:
+            break
+
+    run_log = RunLog(
+        run_id=run_id,
+        started_at=started_at,
+        inputs={"workdir": str(plan.workdir)},
+        parameters={},
+        tool_versions={},  # left for caller to fill if known
+        database_versions={},
+        config_hashes={},
+        automation_levels={"pipeline": automation_level},
+        overrides=[],
+        outputs=outputs | {"command_results": results},
+        notes=None,
+    )
+
+    if log_path:
+        write_run_log(run_log, log_path)
+    return run_log
--- a/src/genomic_consultant/pipelines/variant_calling.py
+++ b/src/genomic_consultant/pipelines/variant_calling.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Sequence
+
+
+@dataclass
+class VariantCallingPlan:
+    """Command plan for running a trio variant calling pipeline."""
+
+    per_sample_gvcf: Dict[str, Path]
+    joint_vcf: Path
+    commands: List[str]
+    workdir: Path
+
+
+def build_gatk_trio_plan(
+    samples: Dict[str, Path],
+    reference_fasta: Path,
+    workdir: Path,
+    output_prefix: str = "trio",
+    intervals: Sequence[Path] | None = None,
+) -> VariantCallingPlan:
+    """
+    Build shell commands for a GATK-based trio pipeline (HaplotypeCaller gVCF + joint genotyping).
+    Does not execute commands; returns a plan for orchestration layers to run.
+    """
+    workdir = Path(workdir)
+    workdir.mkdir(parents=True, exist_ok=True)
+
+    per_sample_gvcf: Dict[str, Path] = {}
+    commands: List[str] = []
+    interval_args = ""
+    if intervals:
+        interval_args = " ".join(f"-L {i}" for i in intervals)
+
+    for sample_id, bam_path in samples.items():
+        gvcf_path = workdir / f"{sample_id}.g.vcf.gz"
+        per_sample_gvcf[sample_id] = gvcf_path
+        cmd = (
+            "gatk HaplotypeCaller "
+            f"-R {reference_fasta} "
+            f"-I {bam_path} "
+            "-O {out} "
+            "-ERC GVCF "
+            f"{interval_args}"
+        ).format(out=gvcf_path)
+        commands.append(cmd)
+
+    joint_vcf = workdir / f"{output_prefix}.joint.vcf.gz"
+    joint_cmd = (
+        "gatk GenotypeGVCFs "
+        f"-R {reference_fasta} "
+        + " ".join(f"--variant {p}" for p in per_sample_gvcf.values())
+        + f" -O {joint_vcf}"
+    )
+    commands.append(joint_cmd)
+
+    return VariantCallingPlan(
+        per_sample_gvcf=per_sample_gvcf,
+        joint_vcf=joint_vcf,
+        commands=commands,
+        workdir=workdir,
+    )
--- a/src/genomic_consultant/reporting/init.py
+++ b/src/genomic_consultant/reporting/init.py
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant/reporting/report.py
+++ b/src/genomic_consultant/reporting/report.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from typing import List
+
+from genomic_consultant.orchestration.workflows import PanelAnalysisResult, PanelVariantResult
+
+
+def panel_report_markdown(result: PanelAnalysisResult) -> str:
+    lines: List[str] = []
+    lines.append(f"# Panel Report: {result.panel.name}")
+    lines.append("")
+    lines.append(f"- Individual: `{result.individual_id}`")
+    lines.append(f"- Panel version: `{result.panel.version}` (source: {result.panel.source})")
+    lines.append(f"- Generated: {datetime.now(timezone.utc).isoformat()}")
+    lines.append("")
+    lines.append("## Variants")
+    if not result.variants:
+        lines.append("No variants found for this panel with current filters.")
+        return "\n".join(lines)
+
+    header = "| Variant | Consequence | ClinVar | AF | ACMG suggestion | Evidence |"
+    lines.append(header)
+    lines.append("|---|---|---|---|---|---|")
+    for pv in result.variants:
+        v = pv.variant
+        ev_summary = "; ".join(f"{e.tag}({e.strength})" for e in pv.acmg.evidence) or "None"
+        lines.append(
+            "|"
+            + f"{v.id} ({v.gene or 'NA'})"
+            + f"|{v.consequence or 'NA'}"
+            + f"|{v.clinvar_significance or 'NA'}"
+            + f"|{v.allele_frequency if v.allele_frequency is not None else 'NA'}"
+            + f"|{pv.acmg.suggested_class}"
+            + f"|{ev_summary}"
+            + "|"
+        )
+    lines.append("")
+    lines.append("> Note: ACMG suggestions here are auto-generated and require human review for clinical decisions.")
+    return "\n".join(lines)
+
+
+def panel_report_json(result: PanelAnalysisResult) -> str:
+    payload = {
+        "individual_id": result.individual_id,
+        "panel": {
+            "name": result.panel.name,
+            "version": result.panel.version,
+            "source": result.panel.source,
+        },
+        "generated": datetime.utcnow().isoformat(),
+        "variants": [
+            {
+                "id": pv.variant.id,
+                "gene": pv.variant.gene,
+                "consequence": pv.variant.consequence,
+                "clinvar_significance": pv.variant.clinvar_significance,
+                "allele_frequency": pv.variant.allele_frequency,
+                "acmg": {
+                    "suggested_class": pv.acmg.suggested_class,
+                    "evidence": [
+                        {"tag": e.tag, "strength": e.strength, "rationale": e.rationale}
+                        for e in pv.acmg.evidence
+                    ],
+                    "human_classification": pv.acmg.human_classification,
+                    "human_reviewer": pv.acmg.human_reviewer,
+                    "human_notes": pv.acmg.human_notes,
+                },
+            }
+            for pv in result.variants
+        ],
+        "disclaimer": "Auto-generated suggestions; human review required.",
+    }
+    return json.dumps(payload, indent=2)
--- a/src/genomic_consultant/store/init.py
+++ b/src/genomic_consultant/store/init.py
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant/store/parquet_store.py
+++ b/src/genomic_consultant/store/parquet_store.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Sequence
+
+from genomic_consultant.store.query import _matches_any
+from genomic_consultant.utils.models import FilterConfig, Variant
+
+try:
+    import pandas as pd
+except ImportError as exc:  # pragma: no cover - import guard
+    raise ImportError("ParquetGenomicStore requires pandas. Install with `pip install pandas`.") from exc
+
+
+@dataclass
+class ParquetGenomicStore:
+    """Parquet-backed store for larger datasets (optional dependency: pandas)."""
+
+    df: "pd.DataFrame"
+
+    @classmethod
+    def from_parquet(cls, path: Path) -> "ParquetGenomicStore":
+        df = pd.read_parquet(path)
+        return cls(df=df)
+
+    def get_variants_by_gene(
+        self, individual_id: str, genes: Sequence[str], filters: FilterConfig | None = None
+    ) -> List[Variant]:
+        filters = filters or FilterConfig()
+        subset = self.df[self.df["SYMBOL"].str.upper().isin([g.upper() for g in genes])]
+        return self._apply_filters(subset, filters)
+
+    def get_variants_by_region(
+        self, individual_id: str, chrom: str, start: int, end: int, filters: FilterConfig | None = None
+    ) -> List[Variant]:
+        filters = filters or FilterConfig()
+        subset = self.df[(self.df["CHROM"] == chrom) & (self.df["POS"] >= start) & (self.df["POS"] <= end)]
+        return self._apply_filters(subset, filters)
+
+    def _apply_filters(self, df: "pd.DataFrame", filters: FilterConfig) -> List[Variant]:
+        mask = pd.Series(True, index=df.index)
+        if filters.max_af is not None:
+            mask &= (df["AF"].fillna(df.get("gnomAD_AF")).fillna(0) <= filters.max_af)
+        if filters.min_af is not None:
+            mask &= (df["AF"].fillna(df.get("gnomAD_AF")).fillna(0) >= filters.min_af)
+        if filters.clinvar_significance:
+            mask &= df["CLIN_SIG"].str.lower().isin([s.lower() for s in filters.clinvar_significance])
+        if filters.consequence_includes:
+            mask &= df["Consequence"].str.lower().apply(
+                lambda v: _matches_any(v, [c.lower() for c in filters.consequence_includes])
+            )
+        if filters.consequence_excludes:
+            mask &= ~df["Consequence"].str.lower().apply(
+                lambda v: _matches_any(v, [c.lower() for c in filters.consequence_excludes])
+            )
+        filtered = df[mask]
+        variants: List[Variant] = []
+        for _, row in filtered.iterrows():
+            variants.append(
+                Variant(
+                    chrom=row["CHROM"],
+                    pos=int(row["POS"]),
+                    ref=row["REF"],
+                    alt=row["ALT"],
+                    gene=row.get("SYMBOL"),
+                    consequence=row.get("Consequence"),
+                    protein_change=row.get("Protein_position"),
+                    clinvar_significance=row.get("CLIN_SIG"),
+                    allele_frequency=_maybe_float(row.get("AF")) or _maybe_float(row.get("gnomAD_AF")),
+                    annotations={"gnomad_af": _maybe_float(row.get("gnomAD_AF"))},
+                )
+            )
+        return variants
+
+
+def _maybe_float(value) -> float | None:
+    try:
+        if value is None:
+            return None
+        return float(value)
+    except (TypeError, ValueError):
+        return None
--- a/src/genomic_consultant/store/query.py
+++ b/src/genomic_consultant/store/query.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import csv
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, List, Sequence
+
+from genomic_consultant.utils.models import FilterConfig, Variant
+
+
+@dataclass
+class GenomicStore:
+    """Lightweight wrapper around annotated variants."""
+
+    variants: List[Variant]
+
+    @classmethod
+    def from_tsv(cls, path: Path) -> "GenomicStore":
+        """
+        Load variants from a flattened TSV generated by the annotation plan.
+        Expected columns (flexible, missing columns are tolerated):
+        CHROM POS REF ALT SYMBOL Consequence Protein_position PolyPhen SIFT CLIN_SIG AF gnomAD_AF SpliceAI CADD_PHRED
+        """
+        variants: List[Variant] = []
+        with Path(path).open() as fh:
+            reader = csv.DictReader(fh, delimiter="\t")
+            for row in reader:
+                row = {k: v for k, v in row.items()} if row else {}
+                if not row:
+                    continue
+                variants.append(_row_to_variant(row))
+        return cls(variants=variants)
+
+    def get_variants_by_gene(
+        self, individual_id: str, genes: Sequence[str], filters: FilterConfig | None = None
+    ) -> List[Variant]:
+        filters = filters or FilterConfig()
+        gene_set = {g.upper() for g in genes}
+        return self._apply_filters((v for v in self.variants if (v.gene or "").upper() in gene_set), filters)
+
+    def get_variants_by_region(
+        self, individual_id: str, chrom: str, start: int, end: int, filters: FilterConfig | None = None
+    ) -> List[Variant]:
+        filters = filters or FilterConfig()
+        return self._apply_filters(
+            (v for v in self.variants if v.chrom == chrom and start <= v.pos <= end),
+            filters,
+        )
+
+    def _apply_filters(self, variants: Iterable[Variant], filters: FilterConfig) -> List[Variant]:
+        out: List[Variant] = []
+        for v in variants:
+            if filters.max_af is not None and v.allele_frequency is not None and v.allele_frequency > filters.max_af:
+                continue
+            if filters.min_af is not None and v.allele_frequency is not None and v.allele_frequency < filters.min_af:
+                continue
+            if filters.clinvar_significance and (v.clinvar_significance or "").lower() not in {
+                sig.lower() for sig in filters.clinvar_significance
+            }:
+                continue
+            if filters.consequence_includes and not _matches_any(v.consequence, filters.consequence_includes):
+                continue
+            if filters.consequence_excludes and _matches_any(v.consequence, filters.consequence_excludes):
+                continue
+            out.append(v)
+        return out
+
+
+def _matches_any(value: str | None, patterns: Sequence[str]) -> bool:
+    if value is None:
+        return False
+    v = value.lower()
+    return any(pat.lower() in v for pat in patterns)
+
+
+def _parse_float(val: str | None) -> float | None:
+    if val in (None, "", "."):
+        return None
+    try:
+        return float(val)
+    except ValueError:
+        return None
+
+
+def _row_to_variant(row: Dict[str, str]) -> Variant:
+    chrom = row.get("CHROM") or row.get("#CHROM")
+    pos = int(row["POS"])
+    af = _parse_float(row.get("AF"))
+    gnomad_af = _parse_float(row.get("gnomAD_AF"))
+    splice_ai = _parse_float(row.get("SpliceAI"))
+    cadd = _parse_float(row.get("CADD_PHRED"))
+    return Variant(
+        chrom=chrom,
+        pos=pos,
+        ref=row.get("REF"),
+        alt=row.get("ALT"),
+        gene=row.get("SYMBOL") or None,
+        consequence=row.get("Consequence") or None,
+        protein_change=row.get("Protein_position") or None,
+        clinvar_significance=row.get("CLIN_SIG") or None,
+        allele_frequency=af if af is not None else gnomad_af,
+        annotations={
+            "polyphen": row.get("PolyPhen"),
+            "sift": row.get("SIFT"),
+            "gnomad_af": gnomad_af,
+            "splice_ai_delta_score": splice_ai,
+            "cadd_phred": cadd,
+        },
+    )
--- a/src/genomic_consultant/utils/init.py
+++ b/src/genomic_consultant/utils/init.py
@@ -0,0 +1 @@
+
--- a/src/genomic_consultant/utils/hashing.py
+++ b/src/genomic_consultant/utils/hashing.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+import hashlib
+from pathlib import Path
+from typing import Optional
+
+
+def sha256sum(path: Path) -> Optional[str]:
+    if not Path(path).exists():
+        return None
+    h = hashlib.sha256()
+    with Path(path).open("rb") as fh:
+        for chunk in iter(lambda: fh.read(8192), b""):
+            h.update(chunk)
+    return h.hexdigest()
--- a/src/genomic_consultant/utils/models.py
+++ b/src/genomic_consultant/utils/models.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class Variant:
+    """Lightweight variant representation used by the query layer."""
+
+    chrom: str
+    pos: int
+    ref: str
+    alt: str
+    gene: Optional[str] = None
+    transcript: Optional[str] = None
+    consequence: Optional[str] = None
+    protein_change: Optional[str] = None
+    clinvar_significance: Optional[str] = None
+    allele_frequency: Optional[float] = None
+    annotations: Dict[str, Any] = field(default_factory=dict)
+
+    @property
+    def id(self) -> str:
+        return f"{self.chrom}-{self.pos}-{self.ref}-{self.alt}"
+
+
+@dataclass
+class FilterConfig:
+    """Common filters for variant queries."""
+
+    max_af: Optional[float] = None
+    min_af: Optional[float] = None
+    clinvar_significance: Optional[List[str]] = None
+    consequence_includes: Optional[List[str]] = None
+    consequence_excludes: Optional[List[str]] = None
+    inheritances: Optional[List[str]] = None
+
+
+@dataclass
+class EvidenceTag:
+    tag: str
+    strength: str
+    rationale: str
+
+
+@dataclass
+class SuggestedClassification:
+    suggested_class: str
+    evidence: List[EvidenceTag] = field(default_factory=list)
+    human_classification: Optional[str] = None
+    human_reviewer: Optional[str] = None
+    human_notes: Optional[str] = None
+
+
+@dataclass
+class GenePanel:
+    name: str
+    genes: List[str]
+    source: str
+    version: str
+    last_updated: str
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class RunLog:
+    """Machine-readable record of a pipeline or analysis run."""
+
+    run_id: str
+    started_at: datetime
+    inputs: Dict[str, Any]
+    parameters: Dict[str, Any]
+    tool_versions: Dict[str, str]
+    database_versions: Dict[str, str]
+    config_hashes: Dict[str, str]
+    automation_levels: Dict[str, str]
+    overrides: List[Dict[str, Any]] = field(default_factory=list)
+    outputs: Dict[str, Any] = field(default_factory=dict)
+    notes: Optional[str] = None
--- a/src/genomic_consultant/utils/tooling.py
+++ b/src/genomic_consultant/utils/tooling.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+import subprocess
+from typing import Dict
+
+
+def probe_tool_versions(commands: Dict[str, str]) -> Dict[str, str]:
+    """
+    Attempt to get tool versions by running the provided commands with --version.
+    Returns best-effort outputs; missing tools are skipped.
+    """
+    results: Dict[str, str] = {}
+    for name, cmd in commands.items():
+        try:
+            proc = subprocess.run(f"{cmd} --version", shell=True, capture_output=True, text=True, timeout=10)
+            if proc.returncode == 0 and proc.stdout:
+                results[name] = proc.stdout.strip().splitlines()[0]
+            elif proc.returncode == 0 and proc.stderr:
+                results[name] = proc.stderr.strip().splitlines()[0]
+        except Exception:
+            continue
+    return results
--- a/tests/test_acmg.py
+++ b/tests/test_acmg.py
@@ -0,0 +1,41 @@
+from genomic_consultant.acmg.tagger import ACMGConfig, tag_variant
+from genomic_consultant.utils.models import Variant
+
+
+def test_ba1_trumps():
+    cfg = ACMGConfig(ba1_af=0.05, bs1_af=0.01, pm2_af=0.0005, lof_genes=set())
+    v = Variant(chrom="1", pos=1, ref="A", alt="T", allele_frequency=0.2)
+    result = tag_variant(v, cfg)
+    assert result.suggested_class == "Benign"
+    assert any(e.tag == "BA1" for e in result.evidence)
+
+
+def test_pvs1_pm2_likely_pathogenic():
+    cfg = ACMGConfig(lof_genes={"GENE1"}, pm2_af=0.0005, ba1_af=0.05, bs1_af=0.01)
+    v = Variant(
+        chrom="1",
+        pos=1,
+        ref="A",
+        alt="T",
+        gene="GENE1",
+        consequence="stop_gained",
+        allele_frequency=0.0001,
+    )
+    result = tag_variant(v, cfg)
+    assert result.suggested_class == "Likely pathogenic"
+    tags = {e.tag for e in result.evidence}
+    assert {"PVS1", "PM2"} <= tags
+
+
+def test_bp7_supporting():
+    cfg = ACMGConfig(bp7_splice_ai_max=0.1)
+    v = Variant(
+        chrom="1",
+        pos=1,
+        ref="A",
+        alt="T",
+        consequence="synonymous_variant",
+        annotations={"splice_ai_delta_score": 0.05},
+    )
+    result = tag_variant(v, cfg)
+    assert any(e.tag == "BP7" for e in result.evidence)
--- a/tests/test_aggregate.py
+++ b/tests/test_aggregate.py
@@ -0,0 +1,16 @@
+from pathlib import Path
+
+from genomic_consultant.panels.aggregate import merge_mappings
+import json
+
+
+def test_merge_mappings(tmp_path: Path):
+    a = tmp_path / "a.json"
+    b = tmp_path / "b.json"
+    a.write_text('{"source":"A","phenotype_to_genes":{"HP:1":["G1","G2"]}}')
+    b.write_text('{"source":"B","phenotype_to_genes":{"HP:1":["G2","G3"],"HP:2":["G4"]}}')
+    out = tmp_path / "out.json"
+    merge_mappings([a, b], out)
+    data = json.loads(out.read_text())
+    assert sorted(data["phenotype_to_genes"]["HP:1"]) == ["G1", "G2", "G3"]
+    assert data["phenotype_to_genes"]["HP:2"] == ["G4"]
--- a/tests/test_phase1_pipeline.py
+++ b/tests/test_phase1_pipeline.py
@@ -0,0 +1,27 @@
+from pathlib import Path
+
+from genomic_consultant.orchestration.phase1_pipeline import run_phase1_pipeline
+
+
+def test_phase1_pipeline_with_existing_tsv():
+    root = Path(__file__).resolve().parents[1]
+    tsv = root / "sample_data/example_annotated.tsv"
+    panel = root / "configs/panel.example.json"
+    acmg = root / "configs/acmg_config.example.yaml"
+    result = run_phase1_pipeline(
+        samples=None,
+        reference_fasta=None,
+        workdir=root / "runtime",
+        output_prefix="demo",
+        acmg_config_path=acmg,
+        max_af=0.05,
+        panel_path=panel,
+        phenotype_id=None,
+        phenotype_mapping=None,
+        report_format="markdown",
+        existing_tsv=tsv,
+        skip_call=True,
+        skip_annotate=True,
+    )
+    assert "Panel Report" in result.panel_report
+    assert result.tsv_path == tsv
--- a/tests/test_resolver.py
+++ b/tests/test_resolver.py
@@ -0,0 +1,18 @@
+from pathlib import Path
+
+from genomic_consultant.panels.resolver import PhenotypeGeneResolver
+
+
+def test_resolver_build_panel(tmp_path: Path):
+    data = {
+        "version": "test",
+        "source": "example",
+        "phenotype_to_genes": {"HP:0001": ["GENE1", "GENE2"]},
+    }
+    path = tmp_path / "map.json"
+    path.write_text('{"version":"test","source":"example","phenotype_to_genes":{"HP:0001":["GENE1","GENE2"]}}')
+    resolver = PhenotypeGeneResolver.from_json(path)
+    panel = resolver.build_panel("HP:0001")
+    assert panel is not None
+    assert panel.genes == ["GENE1", "GENE2"]
+    assert "phenotype_id" in panel.metadata
--- a/tests/test_store.py
+++ b/tests/test_store.py
@@ -0,0 +1,28 @@
+from genomic_consultant.store.query import GenomicStore
+from genomic_consultant.utils.models import FilterConfig, Variant
+
+
+def test_filter_by_gene_and_af():
+    store = GenomicStore(
+        variants=[
+            Variant(chrom="1", pos=1, ref="A", alt="T", gene="GENE1", allele_frequency=0.02),
+            Variant(chrom="1", pos=2, ref="G", alt="C", gene="GENE2", allele_frequency=0.0001),
+        ]
+    )
+    res = store.get_variants_by_gene("ind", ["GENE2"], filters=FilterConfig(max_af=0.001))
+    assert len(res) == 1
+    assert res[0].gene == "GENE2"
+
+
+def test_consequence_include_exclude():
+    store = GenomicStore(
+        variants=[
+            Variant(chrom="1", pos=1, ref="A", alt="T", gene="GENE1", consequence="missense_variant"),
+            Variant(chrom="1", pos=2, ref="G", alt="C", gene="GENE1", consequence="synonymous_variant"),
+        ]
+    )
+    res = store.get_variants_by_gene(
+        "ind", ["GENE1"], filters=FilterConfig(consequence_includes=["missense"], consequence_excludes=["synonymous"])
+    )
+    assert len(res) == 1
+    assert res[0].consequence == "missense_variant"
--- a/tests/test_store_tsv.py
+++ b/tests/test_store_tsv.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+
+from genomic_consultant.store.query import GenomicStore
+
+
+def test_from_tsv_with_extra_columns(tmp_path: Path):
+    content = "\t".join(
+        [
+            "#CHROM",
+            "POS",
+            "REF",
+            "ALT",
+            "SYMBOL",
+            "Consequence",
+            "Protein_position",
+            "PolyPhen",
+            "SIFT",
+            "CLIN_SIG",
+            "AF",
+            "gnomAD_AF",
+            "SpliceAI",
+            "CADD_PHRED",
+        ]
+    ) + "\n"
+    content += "1\t100\tA\tT\tGENE1\tmissense_variant\tp.X\t.\t.\tPathogenic\t0.0001\t0.0002\t0.05\t20.1\n"
+    path = tmp_path / "v.tsv"
+    path.write_text(content)
+
+    store = GenomicStore.from_tsv(path)
+    assert len(store.variants) == 1
+    v = store.variants[0]
+    assert v.annotations["splice_ai_delta_score"] == 0.05
+    assert v.annotations["cadd_phred"] == 20.1