Initial commit

This commit is contained in:
2025-11-28 11:52:04 +08:00
commit f74dc351f7
51 changed files with 2402 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Dict, Iterable, List, Set
def merge_mappings(inputs: Iterable[Path], output: Path, version: str = "merged", sources: List[str] | None = None) -> Path:
"""
Merge multiple phenotype→gene mapping JSON files into one.
Input schema: {"phenotype_to_genes": {"HP:xxxx": ["GENE1", ...]}, "version": "...", "source": "..."}
"""
merged: Dict[str, Set[str]] = {}
source_list: List[str] = sources or []
for path in inputs:
data = json.loads(Path(path).read_text())
phenos = data.get("phenotype_to_genes", {})
for pid, genes in phenos.items():
merged.setdefault(pid, set()).update(genes)
src_label = data.get("source") or path.name
source_list.append(src_label)
out = {
"version": version,
"source": ",".join(source_list),
"phenotype_to_genes": {pid: sorted(list(genes)) for pid, genes in merged.items()},
"metadata": {"merged_from": [str(p) for p in inputs]},
}
output.parent.mkdir(parents=True, exist_ok=True)
output.write_text(json.dumps(out, indent=2))
return output