feat(01-03): create DuckDB persistence layer with checkpoint-restart

- PipelineStore class for DuckDB-based storage
- save_dataframe/load_dataframe for polars and pandas
- Checkpoint system with has_checkpoint and metadata tracking
- Parquet export capability
- Context manager support
This commit is contained in:
2026-02-11 16:30:25 +08:00
parent 9ee3ec2e84
commit d51141f7d5
5 changed files with 557 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
"""Gene ID mapping module.
Provides gene universe definition, batch ID mapping via mygene,
and validation gates for quality control.
"""
from usher_pipeline.gene_mapping.mapper import (
GeneMapper,
MappingResult,
MappingReport,
)
from usher_pipeline.gene_mapping.universe import (
fetch_protein_coding_genes,
GeneUniverse,
)
__all__ = [
"GeneMapper",
"MappingResult",
"MappingReport",
"fetch_protein_coding_genes",
"GeneUniverse",
]