Initial commit
This commit is contained in:
31
src/genomic_consultant/panels/aggregate.py
Normal file
31
src/genomic_consultant/panels/aggregate.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Set
|
||||
|
||||
|
||||
def merge_mappings(inputs: Iterable[Path], output: Path, version: str = "merged", sources: List[str] | None = None) -> Path:
|
||||
"""
|
||||
Merge multiple phenotype→gene mapping JSON files into one.
|
||||
Input schema: {"phenotype_to_genes": {"HP:xxxx": ["GENE1", ...]}, "version": "...", "source": "..."}
|
||||
"""
|
||||
merged: Dict[str, Set[str]] = {}
|
||||
source_list: List[str] = sources or []
|
||||
for path in inputs:
|
||||
data = json.loads(Path(path).read_text())
|
||||
phenos = data.get("phenotype_to_genes", {})
|
||||
for pid, genes in phenos.items():
|
||||
merged.setdefault(pid, set()).update(genes)
|
||||
src_label = data.get("source") or path.name
|
||||
source_list.append(src_label)
|
||||
|
||||
out = {
|
||||
"version": version,
|
||||
"source": ",".join(source_list),
|
||||
"phenotype_to_genes": {pid: sorted(list(genes)) for pid, genes in merged.items()},
|
||||
"metadata": {"merged_from": [str(p) for p in inputs]},
|
||||
}
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
output.write_text(json.dumps(out, indent=2))
|
||||
return output
|
||||
Reference in New Issue
Block a user