Files
usher-exploring/tests/test_reproducibility.py
gbanyan 5af63eab46 feat(05-02): implement reproducibility report module with JSON and Markdown output
- Create ReproducibilityReport dataclass with all metadata fields
- Implement generate_reproducibility_report function
- Extract parameters from PipelineConfig (scoring weights, data versions)
- Capture software environment (Python, polars, duckdb versions)
- Build filtering steps from ProvenanceTracker
- Compute tier statistics from tiered DataFrame
- Support optional validation metrics
- to_json: write as indented JSON for machine-readable format
- to_markdown: write with tables and headers for human-readable format
- 7 tests covering all report fields, formats, and edge cases
2026-02-12 04:00:21 +08:00

246 lines
7.0 KiB
Python

"""Tests for reproducibility report generation."""
import json
from pathlib import Path
import polars as pl
import pytest
from usher_pipeline.config.schema import (
APIConfig,
DataSourceVersions,
PipelineConfig,
ScoringWeights,
)
from usher_pipeline.output.reproducibility import generate_reproducibility_report
from usher_pipeline.persistence.provenance import ProvenanceTracker
@pytest.fixture
def mock_config(tmp_path):
"""Create mock pipeline configuration."""
return PipelineConfig(
data_dir=tmp_path / "data",
cache_dir=tmp_path / "cache",
duckdb_path=tmp_path / "pipeline.db",
versions=DataSourceVersions(
ensembl_release=113,
gnomad_version="v4.1",
gtex_version="v8",
hpa_version="23.0",
),
api=APIConfig(),
scoring=ScoringWeights(
gnomad=0.20,
expression=0.20,
annotation=0.15,
localization=0.15,
animal_model=0.15,
literature=0.15,
),
)
@pytest.fixture
def mock_provenance(mock_config):
"""Create mock provenance tracker."""
provenance = ProvenanceTracker(
pipeline_version="0.1.0",
config=mock_config,
)
# Record some processing steps
provenance.record_step(
"gene_universe_fetch",
details={
"input_count": 0,
"output_count": 20000,
"criteria": "Human protein-coding genes from Ensembl",
},
)
provenance.record_step(
"gnomad_filtering",
details={
"input_count": 20000,
"output_count": 19500,
"criteria": "Remove genes with quality flags",
},
)
return provenance
@pytest.fixture
def synthetic_tiered_df():
"""Create synthetic tiered DataFrame."""
return pl.DataFrame({
"gene_id": [f"ENSG{i:011d}" for i in range(100)],
"gene_symbol": [f"GENE{i}" for i in range(100)],
"composite_score": [0.1 + i * 0.008 for i in range(100)],
"confidence_tier": (
["HIGH"] * 30 + ["MEDIUM"] * 40 + ["LOW"] * 30
),
})
def test_generate_report_has_all_fields(
mock_config, mock_provenance, synthetic_tiered_df
):
"""Test that report contains all required fields."""
report = generate_reproducibility_report(
config=mock_config,
tiered_df=synthetic_tiered_df,
provenance=mock_provenance,
validation_result=None,
)
# Check all required fields exist
assert report.run_id is not None
assert report.timestamp is not None
assert report.pipeline_version == "0.1.0"
assert report.parameters is not None
assert report.data_versions is not None
assert report.software_environment is not None
assert report.tier_statistics is not None
def test_report_to_json_parseable(
mock_config, mock_provenance, synthetic_tiered_df, tmp_path
):
"""Test that JSON output is valid and parseable."""
report = generate_reproducibility_report(
config=mock_config,
tiered_df=synthetic_tiered_df,
provenance=mock_provenance,
)
json_path = tmp_path / "report.json"
report.to_json(json_path)
# Read back and verify it's valid JSON
with open(json_path) as f:
data = json.load(f)
# Verify expected keys
assert "run_id" in data
assert "timestamp" in data
assert "pipeline_version" in data
assert "parameters" in data
assert "data_versions" in data
assert "software_environment" in data
assert "filtering_steps" in data
assert "tier_statistics" in data
def test_report_to_markdown_has_headers(
mock_config, mock_provenance, synthetic_tiered_df, tmp_path
):
"""Test that Markdown output contains required sections."""
report = generate_reproducibility_report(
config=mock_config,
tiered_df=synthetic_tiered_df,
provenance=mock_provenance,
)
md_path = tmp_path / "report.md"
report.to_markdown(md_path)
# Read content
content = md_path.read_text()
# Verify headers
assert "# Pipeline Reproducibility Report" in content
assert "## Parameters" in content
assert "## Data Versions" in content
assert "## Filtering Steps" in content
assert "## Tier Statistics" in content
assert "## Software Environment" in content
def test_report_tier_statistics_match(
mock_config, mock_provenance, synthetic_tiered_df
):
"""Test that tier statistics match DataFrame counts."""
report = generate_reproducibility_report(
config=mock_config,
tiered_df=synthetic_tiered_df,
provenance=mock_provenance,
)
# Verify total matches
assert report.tier_statistics["total"] == synthetic_tiered_df.height
# Verify tier counts
assert report.tier_statistics["high"] == 30
assert report.tier_statistics["medium"] == 40
assert report.tier_statistics["low"] == 30
# Verify sum
tier_sum = (
report.tier_statistics["high"]
+ report.tier_statistics["medium"]
+ report.tier_statistics["low"]
)
assert tier_sum == report.tier_statistics["total"]
def test_report_includes_validation_when_provided(
mock_config, mock_provenance, synthetic_tiered_df
):
"""Test that validation metrics are included when provided."""
validation_result = {
"median_percentile": 0.85,
"top_quartile_fraction": 0.92,
"validation_passed": True,
}
report = generate_reproducibility_report(
config=mock_config,
tiered_df=synthetic_tiered_df,
provenance=mock_provenance,
validation_result=validation_result,
)
# Verify validation metrics are present
assert "median_percentile" in report.validation_metrics
assert report.validation_metrics["median_percentile"] == 0.85
assert report.validation_metrics["top_quartile_fraction"] == 0.92
assert report.validation_metrics["validation_passed"] is True
def test_report_without_validation(
mock_config, mock_provenance, synthetic_tiered_df
):
"""Test that report generates without error when validation_result is None."""
report = generate_reproducibility_report(
config=mock_config,
tiered_df=synthetic_tiered_df,
provenance=mock_provenance,
validation_result=None,
)
# Should have empty validation metrics
assert report.validation_metrics == {}
def test_report_software_versions(
mock_config, mock_provenance, synthetic_tiered_df
):
"""Test that software environment contains expected keys."""
report = generate_reproducibility_report(
config=mock_config,
tiered_df=synthetic_tiered_df,
provenance=mock_provenance,
)
# Verify software versions are captured
assert "python" in report.software_environment
assert "polars" in report.software_environment
assert "duckdb" in report.software_environment
# Verify they're not empty
assert report.software_environment["python"] != ""
assert report.software_environment["polars"] != ""
assert report.software_environment["duckdb"] != ""