- CachedAPIClient with SQLite persistent cache - Exponential backoff retry on 429/5xx/network errors (tenacity) - Rate limiting with skip for cached responses - from_config classmethod for pipeline integration - 5 passing tests for cache creation, rate limiting, and config integration
150 lines
4.6 KiB
Python
150 lines
4.6 KiB
Python
"""Tests for API client with caching and retry logic."""
|
|
|
|
import time
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, Mock, patch
|
|
|
|
import pytest
|
|
import requests
|
|
|
|
from usher_pipeline.api_clients.base import CachedAPIClient
|
|
from usher_pipeline.config import load_config
|
|
|
|
|
|
def test_client_creates_cache_dir(tmp_path):
|
|
"""Test that client creates cache directory if it doesn't exist."""
|
|
cache_dir = tmp_path / "nonexistent_cache"
|
|
|
|
# Directory should not exist before creating client
|
|
assert not cache_dir.exists()
|
|
|
|
# Create client
|
|
client = CachedAPIClient(cache_dir=cache_dir)
|
|
|
|
# Directory should be created
|
|
assert cache_dir.exists()
|
|
assert cache_dir.is_dir()
|
|
|
|
|
|
def test_client_caches_response(tmp_path):
|
|
"""Test that responses are cached and retrieved from cache."""
|
|
cache_dir = tmp_path / "cache"
|
|
client = CachedAPIClient(cache_dir=cache_dir, rate_limit=100)
|
|
|
|
test_url = "https://api.example.com/test"
|
|
mock_response_data = {"data": "test"}
|
|
|
|
# Mock the underlying session.get method
|
|
with patch.object(client.session, "get") as mock_get:
|
|
# Configure mock to return a response object
|
|
mock_response_1 = Mock()
|
|
mock_response_1.status_code = 200
|
|
mock_response_1.json.return_value = mock_response_data
|
|
mock_response_1.from_cache = False
|
|
mock_response_1.raise_for_status = Mock()
|
|
|
|
mock_response_2 = Mock()
|
|
mock_response_2.status_code = 200
|
|
mock_response_2.json.return_value = mock_response_data
|
|
mock_response_2.from_cache = True
|
|
mock_response_2.raise_for_status = Mock()
|
|
|
|
# First call: not from cache
|
|
mock_get.return_value = mock_response_1
|
|
response_1 = client.get(test_url)
|
|
assert response_1.status_code == 200
|
|
|
|
# Second call: from cache
|
|
mock_get.return_value = mock_response_2
|
|
response_2 = client.get(test_url)
|
|
assert response_2.status_code == 200
|
|
|
|
# Verify both calls were made to session.get
|
|
assert mock_get.call_count == 2
|
|
|
|
|
|
def test_client_from_config(tmp_path):
|
|
"""Test creating client from PipelineConfig."""
|
|
# Create a test config file
|
|
config_file = tmp_path / "test_config.yaml"
|
|
config_file.write_text(f"""
|
|
data_dir: {tmp_path / "data"}
|
|
cache_dir: {tmp_path / "cache"}
|
|
duckdb_path: {tmp_path / "test.duckdb"}
|
|
versions:
|
|
ensembl_release: 113
|
|
gnomad_version: v4.1
|
|
api:
|
|
rate_limit_per_second: 10
|
|
max_retries: 3
|
|
cache_ttl_seconds: 3600
|
|
timeout_seconds: 60
|
|
scoring:
|
|
gnomad: 0.20
|
|
expression: 0.20
|
|
annotation: 0.15
|
|
localization: 0.15
|
|
animal_model: 0.15
|
|
literature: 0.15
|
|
""")
|
|
|
|
# Load config and create client
|
|
config = load_config(config_file)
|
|
client = CachedAPIClient.from_config(config)
|
|
|
|
# Verify settings were applied
|
|
assert client.rate_limit == 10
|
|
assert client.max_retries == 3
|
|
assert client.timeout == 60
|
|
assert client.cache_dir == tmp_path / "cache"
|
|
|
|
|
|
def test_rate_limit_respected(tmp_path):
|
|
"""Test that rate limiting sleeps between non-cached requests."""
|
|
cache_dir = tmp_path / "cache"
|
|
client = CachedAPIClient(cache_dir=cache_dir, rate_limit=10)
|
|
|
|
test_url = "https://api.example.com/test"
|
|
|
|
with patch("time.sleep") as mock_sleep, patch.object(
|
|
client.session, "get"
|
|
) as mock_get:
|
|
# Configure mock to return non-cached response
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.from_cache = False
|
|
mock_response.raise_for_status = Mock()
|
|
mock_get.return_value = mock_response
|
|
|
|
# Make request
|
|
client.get(test_url)
|
|
|
|
# Verify sleep was called with correct rate limit
|
|
mock_sleep.assert_called_once()
|
|
# Rate limit is 10 req/sec = 1/10 = 0.1 seconds between requests
|
|
assert mock_sleep.call_args[0][0] == pytest.approx(0.1)
|
|
|
|
|
|
def test_rate_limit_skipped_for_cached(tmp_path):
|
|
"""Test that cached requests don't trigger rate limiting sleep."""
|
|
cache_dir = tmp_path / "cache"
|
|
client = CachedAPIClient(cache_dir=cache_dir, rate_limit=10)
|
|
|
|
test_url = "https://api.example.com/test"
|
|
|
|
with patch("time.sleep") as mock_sleep, patch.object(
|
|
client.session, "get"
|
|
) as mock_get:
|
|
# Configure mock to return cached response
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.from_cache = True
|
|
mock_response.raise_for_status = Mock()
|
|
mock_get.return_value = mock_response
|
|
|
|
# Make request
|
|
client.get(test_url)
|
|
|
|
# Verify sleep was NOT called for cached response
|
|
mock_sleep.assert_not_called()
|