- Create ReproducibilityReport dataclass with all metadata fields - Implement generate_reproducibility_report function - Extract parameters from PipelineConfig (scoring weights, data versions) - Capture software environment (Python, polars, duckdb versions) - Build filtering steps from ProvenanceTracker - Compute tier statistics from tiered DataFrame - Support optional validation metrics - to_json: write as indented JSON for machine-readable format - to_markdown: write with tables and headers for human-readable format - 7 tests covering all report fields, formats, and edge cases
246 lines
7.0 KiB
Python
246 lines
7.0 KiB
Python
"""Tests for reproducibility report generation."""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
import pytest
|
|
|
|
from usher_pipeline.config.schema import (
|
|
APIConfig,
|
|
DataSourceVersions,
|
|
PipelineConfig,
|
|
ScoringWeights,
|
|
)
|
|
from usher_pipeline.output.reproducibility import generate_reproducibility_report
|
|
from usher_pipeline.persistence.provenance import ProvenanceTracker
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_config(tmp_path):
|
|
"""Create mock pipeline configuration."""
|
|
return PipelineConfig(
|
|
data_dir=tmp_path / "data",
|
|
cache_dir=tmp_path / "cache",
|
|
duckdb_path=tmp_path / "pipeline.db",
|
|
versions=DataSourceVersions(
|
|
ensembl_release=113,
|
|
gnomad_version="v4.1",
|
|
gtex_version="v8",
|
|
hpa_version="23.0",
|
|
),
|
|
api=APIConfig(),
|
|
scoring=ScoringWeights(
|
|
gnomad=0.20,
|
|
expression=0.20,
|
|
annotation=0.15,
|
|
localization=0.15,
|
|
animal_model=0.15,
|
|
literature=0.15,
|
|
),
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_provenance(mock_config):
|
|
"""Create mock provenance tracker."""
|
|
provenance = ProvenanceTracker(
|
|
pipeline_version="0.1.0",
|
|
config=mock_config,
|
|
)
|
|
|
|
# Record some processing steps
|
|
provenance.record_step(
|
|
"gene_universe_fetch",
|
|
details={
|
|
"input_count": 0,
|
|
"output_count": 20000,
|
|
"criteria": "Human protein-coding genes from Ensembl",
|
|
},
|
|
)
|
|
|
|
provenance.record_step(
|
|
"gnomad_filtering",
|
|
details={
|
|
"input_count": 20000,
|
|
"output_count": 19500,
|
|
"criteria": "Remove genes with quality flags",
|
|
},
|
|
)
|
|
|
|
return provenance
|
|
|
|
|
|
@pytest.fixture
|
|
def synthetic_tiered_df():
|
|
"""Create synthetic tiered DataFrame."""
|
|
return pl.DataFrame({
|
|
"gene_id": [f"ENSG{i:011d}" for i in range(100)],
|
|
"gene_symbol": [f"GENE{i}" for i in range(100)],
|
|
"composite_score": [0.1 + i * 0.008 for i in range(100)],
|
|
"confidence_tier": (
|
|
["HIGH"] * 30 + ["MEDIUM"] * 40 + ["LOW"] * 30
|
|
),
|
|
})
|
|
|
|
|
|
def test_generate_report_has_all_fields(
|
|
mock_config, mock_provenance, synthetic_tiered_df
|
|
):
|
|
"""Test that report contains all required fields."""
|
|
report = generate_reproducibility_report(
|
|
config=mock_config,
|
|
tiered_df=synthetic_tiered_df,
|
|
provenance=mock_provenance,
|
|
validation_result=None,
|
|
)
|
|
|
|
# Check all required fields exist
|
|
assert report.run_id is not None
|
|
assert report.timestamp is not None
|
|
assert report.pipeline_version == "0.1.0"
|
|
assert report.parameters is not None
|
|
assert report.data_versions is not None
|
|
assert report.software_environment is not None
|
|
assert report.tier_statistics is not None
|
|
|
|
|
|
def test_report_to_json_parseable(
|
|
mock_config, mock_provenance, synthetic_tiered_df, tmp_path
|
|
):
|
|
"""Test that JSON output is valid and parseable."""
|
|
report = generate_reproducibility_report(
|
|
config=mock_config,
|
|
tiered_df=synthetic_tiered_df,
|
|
provenance=mock_provenance,
|
|
)
|
|
|
|
json_path = tmp_path / "report.json"
|
|
report.to_json(json_path)
|
|
|
|
# Read back and verify it's valid JSON
|
|
with open(json_path) as f:
|
|
data = json.load(f)
|
|
|
|
# Verify expected keys
|
|
assert "run_id" in data
|
|
assert "timestamp" in data
|
|
assert "pipeline_version" in data
|
|
assert "parameters" in data
|
|
assert "data_versions" in data
|
|
assert "software_environment" in data
|
|
assert "filtering_steps" in data
|
|
assert "tier_statistics" in data
|
|
|
|
|
|
def test_report_to_markdown_has_headers(
|
|
mock_config, mock_provenance, synthetic_tiered_df, tmp_path
|
|
):
|
|
"""Test that Markdown output contains required sections."""
|
|
report = generate_reproducibility_report(
|
|
config=mock_config,
|
|
tiered_df=synthetic_tiered_df,
|
|
provenance=mock_provenance,
|
|
)
|
|
|
|
md_path = tmp_path / "report.md"
|
|
report.to_markdown(md_path)
|
|
|
|
# Read content
|
|
content = md_path.read_text()
|
|
|
|
# Verify headers
|
|
assert "# Pipeline Reproducibility Report" in content
|
|
assert "## Parameters" in content
|
|
assert "## Data Versions" in content
|
|
assert "## Filtering Steps" in content
|
|
assert "## Tier Statistics" in content
|
|
assert "## Software Environment" in content
|
|
|
|
|
|
def test_report_tier_statistics_match(
|
|
mock_config, mock_provenance, synthetic_tiered_df
|
|
):
|
|
"""Test that tier statistics match DataFrame counts."""
|
|
report = generate_reproducibility_report(
|
|
config=mock_config,
|
|
tiered_df=synthetic_tiered_df,
|
|
provenance=mock_provenance,
|
|
)
|
|
|
|
# Verify total matches
|
|
assert report.tier_statistics["total"] == synthetic_tiered_df.height
|
|
|
|
# Verify tier counts
|
|
assert report.tier_statistics["high"] == 30
|
|
assert report.tier_statistics["medium"] == 40
|
|
assert report.tier_statistics["low"] == 30
|
|
|
|
# Verify sum
|
|
tier_sum = (
|
|
report.tier_statistics["high"]
|
|
+ report.tier_statistics["medium"]
|
|
+ report.tier_statistics["low"]
|
|
)
|
|
assert tier_sum == report.tier_statistics["total"]
|
|
|
|
|
|
def test_report_includes_validation_when_provided(
|
|
mock_config, mock_provenance, synthetic_tiered_df
|
|
):
|
|
"""Test that validation metrics are included when provided."""
|
|
validation_result = {
|
|
"median_percentile": 0.85,
|
|
"top_quartile_fraction": 0.92,
|
|
"validation_passed": True,
|
|
}
|
|
|
|
report = generate_reproducibility_report(
|
|
config=mock_config,
|
|
tiered_df=synthetic_tiered_df,
|
|
provenance=mock_provenance,
|
|
validation_result=validation_result,
|
|
)
|
|
|
|
# Verify validation metrics are present
|
|
assert "median_percentile" in report.validation_metrics
|
|
assert report.validation_metrics["median_percentile"] == 0.85
|
|
assert report.validation_metrics["top_quartile_fraction"] == 0.92
|
|
assert report.validation_metrics["validation_passed"] is True
|
|
|
|
|
|
def test_report_without_validation(
|
|
mock_config, mock_provenance, synthetic_tiered_df
|
|
):
|
|
"""Test that report generates without error when validation_result is None."""
|
|
report = generate_reproducibility_report(
|
|
config=mock_config,
|
|
tiered_df=synthetic_tiered_df,
|
|
provenance=mock_provenance,
|
|
validation_result=None,
|
|
)
|
|
|
|
# Should have empty validation metrics
|
|
assert report.validation_metrics == {}
|
|
|
|
|
|
def test_report_software_versions(
|
|
mock_config, mock_provenance, synthetic_tiered_df
|
|
):
|
|
"""Test that software environment contains expected keys."""
|
|
report = generate_reproducibility_report(
|
|
config=mock_config,
|
|
tiered_df=synthetic_tiered_df,
|
|
provenance=mock_provenance,
|
|
)
|
|
|
|
# Verify software versions are captured
|
|
assert "python" in report.software_environment
|
|
assert "polars" in report.software_environment
|
|
assert "duckdb" in report.software_environment
|
|
|
|
# Verify they're not empty
|
|
assert report.software_environment["python"] != ""
|
|
assert report.software_environment["polars"] != ""
|
|
assert report.software_environment["duckdb"] != ""
|