Files
usher-exploring/tests/test_api_client.py
gbanyan 4204116772 feat(01-01): create base API client with retry and caching
- CachedAPIClient with SQLite persistent cache
- Exponential backoff retry on 429/5xx/network errors (tenacity)
- Rate limiting with skip for cached responses
- from_config classmethod for pipeline integration
- 5 passing tests for cache creation, rate limiting, and config integration
2026-02-11 16:25:46 +08:00

150 lines
4.6 KiB
Python

"""Tests for API client with caching and retry logic."""
import time
from pathlib import Path
from unittest.mock import MagicMock, Mock, patch
import pytest
import requests
from usher_pipeline.api_clients.base import CachedAPIClient
from usher_pipeline.config import load_config
def test_client_creates_cache_dir(tmp_path):
"""Test that client creates cache directory if it doesn't exist."""
cache_dir = tmp_path / "nonexistent_cache"
# Directory should not exist before creating client
assert not cache_dir.exists()
# Create client
client = CachedAPIClient(cache_dir=cache_dir)
# Directory should be created
assert cache_dir.exists()
assert cache_dir.is_dir()
def test_client_caches_response(tmp_path):
"""Test that responses are cached and retrieved from cache."""
cache_dir = tmp_path / "cache"
client = CachedAPIClient(cache_dir=cache_dir, rate_limit=100)
test_url = "https://api.example.com/test"
mock_response_data = {"data": "test"}
# Mock the underlying session.get method
with patch.object(client.session, "get") as mock_get:
# Configure mock to return a response object
mock_response_1 = Mock()
mock_response_1.status_code = 200
mock_response_1.json.return_value = mock_response_data
mock_response_1.from_cache = False
mock_response_1.raise_for_status = Mock()
mock_response_2 = Mock()
mock_response_2.status_code = 200
mock_response_2.json.return_value = mock_response_data
mock_response_2.from_cache = True
mock_response_2.raise_for_status = Mock()
# First call: not from cache
mock_get.return_value = mock_response_1
response_1 = client.get(test_url)
assert response_1.status_code == 200
# Second call: from cache
mock_get.return_value = mock_response_2
response_2 = client.get(test_url)
assert response_2.status_code == 200
# Verify both calls were made to session.get
assert mock_get.call_count == 2
def test_client_from_config(tmp_path):
"""Test creating client from PipelineConfig."""
# Create a test config file
config_file = tmp_path / "test_config.yaml"
config_file.write_text(f"""
data_dir: {tmp_path / "data"}
cache_dir: {tmp_path / "cache"}
duckdb_path: {tmp_path / "test.duckdb"}
versions:
ensembl_release: 113
gnomad_version: v4.1
api:
rate_limit_per_second: 10
max_retries: 3
cache_ttl_seconds: 3600
timeout_seconds: 60
scoring:
gnomad: 0.20
expression: 0.20
annotation: 0.15
localization: 0.15
animal_model: 0.15
literature: 0.15
""")
# Load config and create client
config = load_config(config_file)
client = CachedAPIClient.from_config(config)
# Verify settings were applied
assert client.rate_limit == 10
assert client.max_retries == 3
assert client.timeout == 60
assert client.cache_dir == tmp_path / "cache"
def test_rate_limit_respected(tmp_path):
"""Test that rate limiting sleeps between non-cached requests."""
cache_dir = tmp_path / "cache"
client = CachedAPIClient(cache_dir=cache_dir, rate_limit=10)
test_url = "https://api.example.com/test"
with patch("time.sleep") as mock_sleep, patch.object(
client.session, "get"
) as mock_get:
# Configure mock to return non-cached response
mock_response = Mock()
mock_response.status_code = 200
mock_response.from_cache = False
mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response
# Make request
client.get(test_url)
# Verify sleep was called with correct rate limit
mock_sleep.assert_called_once()
# Rate limit is 10 req/sec = 1/10 = 0.1 seconds between requests
assert mock_sleep.call_args[0][0] == pytest.approx(0.1)
def test_rate_limit_skipped_for_cached(tmp_path):
"""Test that cached requests don't trigger rate limiting sleep."""
cache_dir = tmp_path / "cache"
client = CachedAPIClient(cache_dir=cache_dir, rate_limit=10)
test_url = "https://api.example.com/test"
with patch("time.sleep") as mock_sleep, patch.object(
client.session, "get"
) as mock_get:
# Configure mock to return cached response
mock_response = Mock()
mock_response.status_code = 200
mock_response.from_cache = True
mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response
# Make request
client.get(test_url)
# Verify sleep was NOT called for cached response
mock_sleep.assert_not_called()