"""Unit tests for literature evidence layer.""" import polars as pl import pytest from unittest.mock import Mock, patch from usher_pipeline.evidence.literature import ( classify_evidence_tier, compute_literature_score, SEARCH_CONTEXTS, DIRECT_EVIDENCE_TERMS, ) @pytest.fixture def synthetic_literature_data(): """Create synthetic literature data for testing tier classification and scoring.""" return pl.DataFrame({ "gene_id": [ "ENSG00000001", # Direct experimental: knockout + cilia context "ENSG00000002", # Functional mention: cilia context, multiple pubs "ENSG00000003", # HTS hit: screen hit + cilia context "ENSG00000004", # Incidental: publications but no context "ENSG00000005", # None: zero publications "ENSG00000006", # Well-studied (TP53-like): many total, few cilia "ENSG00000007", # Focused novel: few total, many cilia (should score high) ], "gene_symbol": [ "GENE1", "GENE2", "GENE3", "GENE4", "GENE5", "TP53LIKE", "NOVELGENE", ], "total_pubmed_count": [ 100, # Gene1: moderate total 50, # Gene2: moderate total 30, # Gene3: moderate total 1000, # Gene4: many total, but no cilia context 0, # Gene5: zero 100000, # TP53-like: very many 10, # Novel: very few ], "cilia_context_count": [ 10, # Gene1: good cilia evidence 5, # Gene2: some cilia evidence 3, # Gene3: some cilia evidence 0, # Gene4: no context 0, # Gene5: zero 5, # TP53-like: same as Gene2, but huge total 5, # Novel: same as Gene2, but tiny total ], "sensory_context_count": [ 5, # Gene1 3, # Gene2 2, # Gene3 0, # Gene4 0, # Gene5 2, # TP53-like 2, # Novel ], "cytoskeleton_context_count": [ 8, # Gene1 4, # Gene2 2, # Gene3 0, # Gene4 0, # Gene5 10, # TP53-like 3, # Novel ], "cell_polarity_context_count": [ 3, # Gene1 2, # Gene2 1, # Gene3 0, # Gene4 0, # Gene5 4, # TP53-like 1, # Novel ], "direct_experimental_count": [ 3, # Gene1: knockout evidence 0, # Gene2: no knockout 0, # Gene3: no knockout 0, # Gene4: no knockout 0, # Gene5: zero 1, # TP53-like: has knockout but incidental 0, # Novel: no knockout ], "hts_screen_count": [ 0, # Gene1: not from screen 0, # Gene2: not from screen 2, # Gene3: from HTS screen 0, # Gene4: not from screen 0, # Gene5: zero 5, # TP53-like: many screens 0, # Novel: not from screen ], }) def test_direct_experimental_classification(synthetic_literature_data): """Gene with knockout paper in cilia context should be classified as direct_experimental.""" df = classify_evidence_tier(synthetic_literature_data) gene1 = df.filter(pl.col("gene_symbol") == "GENE1") assert gene1["evidence_tier"][0] == "direct_experimental" def test_functional_mention_classification(synthetic_literature_data): """Gene with cilia context but no knockout should be functional_mention.""" df = classify_evidence_tier(synthetic_literature_data) gene2 = df.filter(pl.col("gene_symbol") == "GENE2") assert gene2["evidence_tier"][0] == "functional_mention" def test_hts_hit_classification(synthetic_literature_data): """Gene from proteomics screen in cilia context should be hts_hit.""" df = classify_evidence_tier(synthetic_literature_data) gene3 = df.filter(pl.col("gene_symbol") == "GENE3") assert gene3["evidence_tier"][0] == "hts_hit" def test_incidental_classification(synthetic_literature_data): """Gene with publications but no cilia/sensory context should be incidental.""" df = classify_evidence_tier(synthetic_literature_data) gene4 = df.filter(pl.col("gene_symbol") == "GENE4") assert gene4["evidence_tier"][0] == "incidental" def test_no_evidence_classification(synthetic_literature_data): """Gene with zero publications should be classified as none.""" df = classify_evidence_tier(synthetic_literature_data) gene5 = df.filter(pl.col("gene_symbol") == "GENE5") assert gene5["evidence_tier"][0] == "none" def test_bias_mitigation(synthetic_literature_data): """TP53-like gene (100K total, 5 cilia) should score LOWER than novel gene (10 total, 5 cilia). This tests the critical bias mitigation feature: quality-weighted score normalized by log2(total_pubmed_count) to prevent well-studied genes from dominating. """ df = classify_evidence_tier(synthetic_literature_data) df = compute_literature_score(df) tp53_like = df.filter(pl.col("gene_symbol") == "TP53LIKE") novel = df.filter(pl.col("gene_symbol") == "NOVELGENE") tp53_score = tp53_like["literature_score_normalized"][0] novel_score = novel["literature_score_normalized"][0] # Novel gene should score higher despite having same cilia context count assert novel_score > tp53_score, ( f"Novel gene (10 total/5 cilia) should score higher than TP53-like (100K total/5 cilia). " f"Got novel={novel_score:.4f}, TP53-like={tp53_score:.4f}" ) def test_quality_weighting(synthetic_literature_data): """Direct experimental evidence should score higher than incidental mention.""" df = classify_evidence_tier(synthetic_literature_data) df = compute_literature_score(df) direct = df.filter(pl.col("gene_symbol") == "GENE1") incidental = df.filter(pl.col("gene_symbol") == "GENE4") direct_score = direct["literature_score_normalized"][0] incidental_score = incidental["literature_score_normalized"][0] # Direct experimental should always score higher than incidental assert direct_score > incidental_score def test_null_preservation(): """Failed PubMed query should result in NULL counts, not zero.""" # Simulate failed query with NULL values df = pl.DataFrame({ "gene_id": ["ENSG00000001"], "gene_symbol": ["GENE1"], "total_pubmed_count": [None], "cilia_context_count": [None], "sensory_context_count": [None], "cytoskeleton_context_count": [None], "cell_polarity_context_count": [None], "direct_experimental_count": [None], "hts_screen_count": [None], }) df = classify_evidence_tier(df) df = compute_literature_score(df) # Evidence tier should be "none" for NULL counts assert df["evidence_tier"][0] == "none" # Score should be NULL (not zero) assert df["literature_score_normalized"][0] is None def test_context_weighting(synthetic_literature_data): """Cilia/sensory contexts should be weighted higher than cytoskeleton.""" # Test by modifying data: create two genes with same total but different context distribution df = pl.DataFrame({ "gene_id": ["ENSG00000001", "ENSG00000002"], "gene_symbol": ["CILIA_FOCUSED", "CYTO_FOCUSED"], "total_pubmed_count": [50, 50], # Same total "cilia_context_count": [10, 0], # Cilia-focused has cilia context "sensory_context_count": [5, 0], # Cilia-focused has sensory context "cytoskeleton_context_count": [0, 20], # Cyto-focused has cytoskeleton context "cell_polarity_context_count": [0, 0], "direct_experimental_count": [1, 1], # Same experimental evidence "hts_screen_count": [0, 0], }) df = classify_evidence_tier(df) df = compute_literature_score(df) cilia_score = df.filter(pl.col("gene_symbol") == "CILIA_FOCUSED")["literature_score_normalized"][0] cyto_score = df.filter(pl.col("gene_symbol") == "CYTO_FOCUSED")["literature_score_normalized"][0] # Cilia-focused should score higher due to context weights (cilia=2.0, cyto=1.0) # CILIA_FOCUSED context_score = 10*2.0 + 5*2.0 = 30 # CYTO_FOCUSED context_score = 20*1.0 = 20 assert cilia_score > cyto_score def test_score_normalization(synthetic_literature_data): """Final literature_score_normalized should be in [0, 1] range.""" df = classify_evidence_tier(synthetic_literature_data) df = compute_literature_score(df) # Filter to non-NULL scores scores = df.filter(pl.col("literature_score_normalized").is_not_null())["literature_score_normalized"] assert scores.min() >= 0.0 assert scores.max() <= 1.0 @patch('usher_pipeline.evidence.literature.fetch.Entrez') def test_query_pubmed_gene_mock(mock_entrez): """Test query_pubmed_gene with mocked Biopython Entrez.""" from usher_pipeline.evidence.literature.fetch import query_pubmed_gene # Mock esearch responses def mock_esearch(db, term, retmax): """Return different counts based on query term.""" count_map = { "GENE1": 100, # Total "GENE1 cilia": 10, "GENE1 sensory": 5, "GENE1 knockout": 3, "GENE1 screen": 0, } # Simple matching on term content for key, count in count_map.items(): if key.replace(" ", ") AND (") in term or key in term: mock_handle = Mock() mock_handle.__enter__ = Mock(return_value=mock_handle) mock_handle.__exit__ = Mock(return_value=False) return mock_handle # Default mock_handle = Mock() mock_handle.__enter__ = Mock(return_value=mock_handle) mock_handle.__exit__ = Mock(return_value=False) return mock_handle # Set up mock mock_entrez.esearch = mock_esearch mock_entrez.read = Mock(return_value={"Count": "10"}) # Test query result = query_pubmed_gene( gene_symbol="GENE1", contexts=SEARCH_CONTEXTS, email="test@example.com", api_key=None, ) # Verify result structure assert "gene_symbol" in result assert "total_pubmed_count" in result assert "cilia_context_count" in result assert "sensory_context_count" in result assert "direct_experimental_count" in result assert "hts_screen_count" in result