feat(05-02): implement visualization module with matplotlib/seaborn plots

- Add matplotlib>=3.8.0 and seaborn>=0.13.0 to dependencies - Create visualizations.py with 3 plot functions and orchestrator - plot_score_distribution: histogram colored by confidence tier - plot_layer_contributions: bar chart of evidence layer coverage - plot_tier_breakdown: pie chart of tier distribution - Use Agg backend for headless/CLI safety - All plots saved at 300 DPI with proper figure cleanup - 6 tests covering file creation, edge cases, and return values
2026-02-12 03:57:50 +08:00
parent d2ef3a2b84
commit 150417ffcc
3 changed files with 358 additions and 0 deletions
--- a/tests/test_visualizations.py
+++ b/tests/test_visualizations.py
@@ -0,0 +1,112 @@
+"""Tests for visualization generation."""
+
+from pathlib import Path
+
+import polars as pl
+import pytest
+
+from usher_pipeline.output.visualizations import (
+    generate_all_plots,
+    plot_layer_contributions,
+    plot_score_distribution,
+    plot_tier_breakdown,
+)
+
+
+@pytest.fixture
+def synthetic_results_df():
+    """Create synthetic scored results DataFrame."""
+    return pl.DataFrame({
+        "gene_symbol": [f"GENE{i}" for i in range(30)],
+        "composite_score": [0.1 + i * 0.03 for i in range(30)],
+        "confidence_tier": (
+            ["HIGH"] * 10 + ["MEDIUM"] * 10 + ["LOW"] * 10
+        ),
+        "gnomad_score": [0.5 if i % 2 == 0 else None for i in range(30)],
+        "expression_score": [0.6 if i % 3 == 0 else None for i in range(30)],
+        "annotation_score": [0.7 if i % 4 == 0 else None for i in range(30)],
+        "localization_score": [0.8 if i % 5 == 0 else None for i in range(30)],
+        "animal_model_score": [0.9 if i % 6 == 0 else None for i in range(30)],
+        "literature_score": [0.85 if i % 7 == 0 else None for i in range(30)],
+    })
+
+
+def test_plot_score_distribution_creates_file(synthetic_results_df, tmp_path):
+    """Test that score distribution plot creates a PNG file."""
+    output_path = tmp_path / "score_dist.png"
+
+    result = plot_score_distribution(synthetic_results_df, output_path)
+
+    assert result == output_path
+    assert output_path.exists()
+    assert output_path.stat().st_size > 0
+
+
+def test_plot_layer_contributions_creates_file(synthetic_results_df, tmp_path):
+    """Test that layer contributions plot creates a PNG file."""
+    output_path = tmp_path / "layer_contrib.png"
+
+    result = plot_layer_contributions(synthetic_results_df, output_path)
+
+    assert result == output_path
+    assert output_path.exists()
+    assert output_path.stat().st_size > 0
+
+
+def test_plot_tier_breakdown_creates_file(synthetic_results_df, tmp_path):
+    """Test that tier breakdown plot creates a PNG file."""
+    output_path = tmp_path / "tier_breakdown.png"
+
+    result = plot_tier_breakdown(synthetic_results_df, output_path)
+
+    assert result == output_path
+    assert output_path.exists()
+    assert output_path.stat().st_size > 0
+
+
+def test_generate_all_plots_creates_all_files(synthetic_results_df, tmp_path):
+    """Test that generate_all_plots creates all 3 PNG files."""
+    output_dir = tmp_path / "plots"
+
+    plots = generate_all_plots(synthetic_results_df, output_dir)
+
+    # Check all files exist
+    assert (output_dir / "score_distribution.png").exists()
+    assert (output_dir / "layer_contributions.png").exists()
+    assert (output_dir / "tier_breakdown.png").exists()
+
+
+def test_generate_all_plots_returns_paths(synthetic_results_df, tmp_path):
+    """Test that generate_all_plots returns dict with 3 entries."""
+    output_dir = tmp_path / "plots"
+
+    plots = generate_all_plots(synthetic_results_df, output_dir)
+
+    assert len(plots) == 3
+    assert "score_distribution" in plots
+    assert "layer_contributions" in plots
+    assert "tier_breakdown" in plots
+
+
+def test_plots_handle_empty_dataframe(tmp_path):
+    """Test that plots handle empty DataFrames without crashing."""
+    empty_df = pl.DataFrame({
+        "gene_symbol": [],
+        "composite_score": [],
+        "confidence_tier": [],
+        "gnomad_score": [],
+        "expression_score": [],
+        "annotation_score": [],
+        "localization_score": [],
+        "animal_model_score": [],
+        "literature_score": [],
+    })
+
+    output_dir = tmp_path / "empty_plots"
+
+    # Should not crash
+    plots = generate_all_plots(empty_df, output_dir)
+
+    # At minimum, the function should return without error
+    # Some plots may succeed (empty plot) or fail gracefully
+    assert isinstance(plots, dict)