feat(05-02): implement reproducibility report module with JSON and Markdown output
- Create ReproducibilityReport dataclass with all metadata fields - Implement generate_reproducibility_report function - Extract parameters from PipelineConfig (scoring weights, data versions) - Capture software environment (Python, polars, duckdb versions) - Build filtering steps from ProvenanceTracker - Compute tier statistics from tiered DataFrame - Support optional validation metrics - to_json: write as indented JSON for machine-readable format - to_markdown: write with tables and headers for human-readable format - 7 tests covering all report fields, formats, and edge cases
This commit is contained in:
245
tests/test_reproducibility.py
Normal file
245
tests/test_reproducibility.py
Normal file
@@ -0,0 +1,245 @@
|
||||
"""Tests for reproducibility report generation."""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
import pytest
|
||||
|
||||
from usher_pipeline.config.schema import (
|
||||
APIConfig,
|
||||
DataSourceVersions,
|
||||
PipelineConfig,
|
||||
ScoringWeights,
|
||||
)
|
||||
from usher_pipeline.output.reproducibility import generate_reproducibility_report
|
||||
from usher_pipeline.persistence.provenance import ProvenanceTracker
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config(tmp_path):
|
||||
"""Create mock pipeline configuration."""
|
||||
return PipelineConfig(
|
||||
data_dir=tmp_path / "data",
|
||||
cache_dir=tmp_path / "cache",
|
||||
duckdb_path=tmp_path / "pipeline.db",
|
||||
versions=DataSourceVersions(
|
||||
ensembl_release=113,
|
||||
gnomad_version="v4.1",
|
||||
gtex_version="v8",
|
||||
hpa_version="23.0",
|
||||
),
|
||||
api=APIConfig(),
|
||||
scoring=ScoringWeights(
|
||||
gnomad=0.20,
|
||||
expression=0.20,
|
||||
annotation=0.15,
|
||||
localization=0.15,
|
||||
animal_model=0.15,
|
||||
literature=0.15,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_provenance(mock_config):
|
||||
"""Create mock provenance tracker."""
|
||||
provenance = ProvenanceTracker(
|
||||
pipeline_version="0.1.0",
|
||||
config=mock_config,
|
||||
)
|
||||
|
||||
# Record some processing steps
|
||||
provenance.record_step(
|
||||
"gene_universe_fetch",
|
||||
details={
|
||||
"input_count": 0,
|
||||
"output_count": 20000,
|
||||
"criteria": "Human protein-coding genes from Ensembl",
|
||||
},
|
||||
)
|
||||
|
||||
provenance.record_step(
|
||||
"gnomad_filtering",
|
||||
details={
|
||||
"input_count": 20000,
|
||||
"output_count": 19500,
|
||||
"criteria": "Remove genes with quality flags",
|
||||
},
|
||||
)
|
||||
|
||||
return provenance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def synthetic_tiered_df():
|
||||
"""Create synthetic tiered DataFrame."""
|
||||
return pl.DataFrame({
|
||||
"gene_id": [f"ENSG{i:011d}" for i in range(100)],
|
||||
"gene_symbol": [f"GENE{i}" for i in range(100)],
|
||||
"composite_score": [0.1 + i * 0.008 for i in range(100)],
|
||||
"confidence_tier": (
|
||||
["HIGH"] * 30 + ["MEDIUM"] * 40 + ["LOW"] * 30
|
||||
),
|
||||
})
|
||||
|
||||
|
||||
def test_generate_report_has_all_fields(
|
||||
mock_config, mock_provenance, synthetic_tiered_df
|
||||
):
|
||||
"""Test that report contains all required fields."""
|
||||
report = generate_reproducibility_report(
|
||||
config=mock_config,
|
||||
tiered_df=synthetic_tiered_df,
|
||||
provenance=mock_provenance,
|
||||
validation_result=None,
|
||||
)
|
||||
|
||||
# Check all required fields exist
|
||||
assert report.run_id is not None
|
||||
assert report.timestamp is not None
|
||||
assert report.pipeline_version == "0.1.0"
|
||||
assert report.parameters is not None
|
||||
assert report.data_versions is not None
|
||||
assert report.software_environment is not None
|
||||
assert report.tier_statistics is not None
|
||||
|
||||
|
||||
def test_report_to_json_parseable(
|
||||
mock_config, mock_provenance, synthetic_tiered_df, tmp_path
|
||||
):
|
||||
"""Test that JSON output is valid and parseable."""
|
||||
report = generate_reproducibility_report(
|
||||
config=mock_config,
|
||||
tiered_df=synthetic_tiered_df,
|
||||
provenance=mock_provenance,
|
||||
)
|
||||
|
||||
json_path = tmp_path / "report.json"
|
||||
report.to_json(json_path)
|
||||
|
||||
# Read back and verify it's valid JSON
|
||||
with open(json_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Verify expected keys
|
||||
assert "run_id" in data
|
||||
assert "timestamp" in data
|
||||
assert "pipeline_version" in data
|
||||
assert "parameters" in data
|
||||
assert "data_versions" in data
|
||||
assert "software_environment" in data
|
||||
assert "filtering_steps" in data
|
||||
assert "tier_statistics" in data
|
||||
|
||||
|
||||
def test_report_to_markdown_has_headers(
|
||||
mock_config, mock_provenance, synthetic_tiered_df, tmp_path
|
||||
):
|
||||
"""Test that Markdown output contains required sections."""
|
||||
report = generate_reproducibility_report(
|
||||
config=mock_config,
|
||||
tiered_df=synthetic_tiered_df,
|
||||
provenance=mock_provenance,
|
||||
)
|
||||
|
||||
md_path = tmp_path / "report.md"
|
||||
report.to_markdown(md_path)
|
||||
|
||||
# Read content
|
||||
content = md_path.read_text()
|
||||
|
||||
# Verify headers
|
||||
assert "# Pipeline Reproducibility Report" in content
|
||||
assert "## Parameters" in content
|
||||
assert "## Data Versions" in content
|
||||
assert "## Filtering Steps" in content
|
||||
assert "## Tier Statistics" in content
|
||||
assert "## Software Environment" in content
|
||||
|
||||
|
||||
def test_report_tier_statistics_match(
|
||||
mock_config, mock_provenance, synthetic_tiered_df
|
||||
):
|
||||
"""Test that tier statistics match DataFrame counts."""
|
||||
report = generate_reproducibility_report(
|
||||
config=mock_config,
|
||||
tiered_df=synthetic_tiered_df,
|
||||
provenance=mock_provenance,
|
||||
)
|
||||
|
||||
# Verify total matches
|
||||
assert report.tier_statistics["total"] == synthetic_tiered_df.height
|
||||
|
||||
# Verify tier counts
|
||||
assert report.tier_statistics["high"] == 30
|
||||
assert report.tier_statistics["medium"] == 40
|
||||
assert report.tier_statistics["low"] == 30
|
||||
|
||||
# Verify sum
|
||||
tier_sum = (
|
||||
report.tier_statistics["high"]
|
||||
+ report.tier_statistics["medium"]
|
||||
+ report.tier_statistics["low"]
|
||||
)
|
||||
assert tier_sum == report.tier_statistics["total"]
|
||||
|
||||
|
||||
def test_report_includes_validation_when_provided(
|
||||
mock_config, mock_provenance, synthetic_tiered_df
|
||||
):
|
||||
"""Test that validation metrics are included when provided."""
|
||||
validation_result = {
|
||||
"median_percentile": 0.85,
|
||||
"top_quartile_fraction": 0.92,
|
||||
"validation_passed": True,
|
||||
}
|
||||
|
||||
report = generate_reproducibility_report(
|
||||
config=mock_config,
|
||||
tiered_df=synthetic_tiered_df,
|
||||
provenance=mock_provenance,
|
||||
validation_result=validation_result,
|
||||
)
|
||||
|
||||
# Verify validation metrics are present
|
||||
assert "median_percentile" in report.validation_metrics
|
||||
assert report.validation_metrics["median_percentile"] == 0.85
|
||||
assert report.validation_metrics["top_quartile_fraction"] == 0.92
|
||||
assert report.validation_metrics["validation_passed"] is True
|
||||
|
||||
|
||||
def test_report_without_validation(
|
||||
mock_config, mock_provenance, synthetic_tiered_df
|
||||
):
|
||||
"""Test that report generates without error when validation_result is None."""
|
||||
report = generate_reproducibility_report(
|
||||
config=mock_config,
|
||||
tiered_df=synthetic_tiered_df,
|
||||
provenance=mock_provenance,
|
||||
validation_result=None,
|
||||
)
|
||||
|
||||
# Should have empty validation metrics
|
||||
assert report.validation_metrics == {}
|
||||
|
||||
|
||||
def test_report_software_versions(
|
||||
mock_config, mock_provenance, synthetic_tiered_df
|
||||
):
|
||||
"""Test that software environment contains expected keys."""
|
||||
report = generate_reproducibility_report(
|
||||
config=mock_config,
|
||||
tiered_df=synthetic_tiered_df,
|
||||
provenance=mock_provenance,
|
||||
)
|
||||
|
||||
# Verify software versions are captured
|
||||
assert "python" in report.software_environment
|
||||
assert "polars" in report.software_environment
|
||||
assert "duckdb" in report.software_environment
|
||||
|
||||
# Verify they're not empty
|
||||
assert report.software_environment["python"] != ""
|
||||
assert report.software_environment["polars"] != ""
|
||||
assert report.software_environment["duckdb"] != ""
|
||||
Reference in New Issue
Block a user