fix: resolve runtime bugs for pipeline execution on Python 3.14 + latest deps
- gene_mapping: wrap mygene fetch_all generator in list() to fix len() error - gene_mapping: raise MAX_EXPECTED_GENES to 23000 (mygene DB growth) - setup_cmd: rename gene_universe columns to gene_id/gene_symbol for consistency with all downstream evidence layer code - gnomad: handle missing coverage columns in v4.1 constraint TSV - expression: fix HPA URL (v23.proteinatlas.org) and GTEx URL (v8 path) - expression: fix Polars pivot() API change (columns -> on), collect first - expression: handle missing GTEx tissues (Eye - Retina not in v8) - expression: ensure all expected columns exist even when sources unavailable - expression/load: safely check column existence before filtering - localization: fix HPA subcellular URL to v23 - animal_models: fix httpx stream response.read() before .text access - animal_models: increase infer_schema_length for HCOP and MGI TSV parsing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# Expected range for human protein-coding genes
|
||||
MIN_EXPECTED_GENES = 19000
|
||||
MAX_EXPECTED_GENES = 22000
|
||||
MAX_EXPECTED_GENES = 23000
|
||||
|
||||
|
||||
def fetch_protein_coding_genes(ensembl_release: int = 113) -> GeneUniverse:
|
||||
@@ -51,12 +51,12 @@ def fetch_protein_coding_genes(ensembl_release: int = 113) -> GeneUniverse:
|
||||
|
||||
# Query for human protein-coding genes
|
||||
logger.info("Querying mygene for type_of_gene:protein-coding (species=9606)")
|
||||
results = mg.query(
|
||||
results = list(mg.query(
|
||||
'type_of_gene:"protein-coding"',
|
||||
species=9606,
|
||||
fields='ensembl.gene,symbol,name',
|
||||
fetch_all=True,
|
||||
)
|
||||
))
|
||||
|
||||
logger.info(f"Retrieved {len(results)} results from mygene")
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ def validate_gene_universe(genes: list[str]) -> ValidationResult:
|
||||
|
||||
gene_count = len(genes)
|
||||
MIN_GENES = 19000
|
||||
MAX_GENES = 22000
|
||||
MAX_GENES = 23000
|
||||
|
||||
# Check gene count
|
||||
if gene_count < MIN_GENES:
|
||||
|
||||
Reference in New Issue
Block a user