- Add MappingValidator with configurable success rate thresholds (min_success_rate, warn_threshold) - Add validate_gene_universe for gene count, format, and duplicate checks - Add save_unmapped_report for manual review output - Implement 15 comprehensive tests with mocked mygene responses (no real API calls) - Tests cover: successful mapping, notfound handling, uniprot list parsing, batching, validation gates, universe validation
32 lines
680 B
Python
32 lines
680 B
Python
"""Gene ID mapping module.
|
|
|
|
Provides gene universe definition, batch ID mapping via mygene,
|
|
and validation gates for quality control.
|
|
"""
|
|
|
|
from usher_pipeline.gene_mapping.mapper import (
|
|
GeneMapper,
|
|
MappingResult,
|
|
MappingReport,
|
|
)
|
|
from usher_pipeline.gene_mapping.universe import (
|
|
fetch_protein_coding_genes,
|
|
GeneUniverse,
|
|
)
|
|
from usher_pipeline.gene_mapping.validator import (
|
|
MappingValidator,
|
|
ValidationResult,
|
|
validate_gene_universe,
|
|
)
|
|
|
|
__all__ = [
|
|
"GeneMapper",
|
|
"MappingResult",
|
|
"MappingReport",
|
|
"fetch_protein_coding_genes",
|
|
"GeneUniverse",
|
|
"MappingValidator",
|
|
"ValidationResult",
|
|
"validate_gene_universe",
|
|
]
|