Files
usher-exploring/src/usher_pipeline/cli/main.py
gbanyan ee27f3ad2f feat(02-02): add DuckDB loader and CLI evidence command for gnomAD
- load_to_duckdb: Saves constraint DataFrame to gnomad_constraint table with provenance tracking
- query_constrained_genes: Queries constrained genes by LOEUF threshold (validates GCON-03 interpretation)
- evidence_cmd.py: CLI command group with gnomad subcommand (fetch->transform->load orchestration)
- Checkpoint-restart: Skips processing if gnomad_constraint table exists (--force to override)
- Full CLI: usher-pipeline evidence gnomad [--force] [--url URL] [--min-depth N] [--min-cds-pct N]
2026-02-11 18:19:07 +08:00

106 lines
3.1 KiB
Python

"""Main CLI entry point for usher-pipeline.
Provides command group with global options and subcommands for pipeline operations.
"""
import logging
from pathlib import Path
import click
from usher_pipeline import __version__
from usher_pipeline.config.loader import load_config
from usher_pipeline.cli.setup_cmd import setup
from usher_pipeline.cli.evidence_cmd import evidence
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
@click.group()
@click.option(
'--config',
type=click.Path(exists=True, path_type=Path),
default='config/default.yaml',
help='Path to pipeline configuration YAML file'
)
@click.option(
'--verbose',
is_flag=True,
help='Enable verbose logging (DEBUG level)'
)
@click.pass_context
def cli(ctx, config, verbose):
"""Usher-pipeline: Reproducible pipeline for discovering under-studied cilia/Usher candidate genes.
Provides data infrastructure, gene ID mapping, evidence layer aggregation,
and scoring for candidate gene prioritization.
"""
# Set up context
ctx.ensure_object(dict)
ctx.obj['config_path'] = config
ctx.obj['verbose'] = verbose
# Set logging level
if verbose:
logging.getLogger().setLevel(logging.DEBUG)
logging.debug("Verbose logging enabled")
@cli.command()
@click.pass_context
def info(ctx):
"""Display pipeline information and configuration summary."""
config_path = ctx.obj['config_path']
click.echo(f"Usher Pipeline v{__version__}")
click.echo(f"Config: {config_path}")
click.echo()
try:
config = load_config(config_path)
# Display config hash
config_hash = config.config_hash()
click.echo(f"Config Hash: {config_hash[:16]}...")
click.echo()
# Display data source versions
click.echo(click.style("Data Source Versions:", bold=True))
click.echo(f" Ensembl Release: {config.versions.ensembl_release}")
click.echo(f" gnomAD Version: {config.versions.gnomad_version}")
click.echo(f" GTEx Version: {config.versions.gtex_version}")
click.echo(f" HPA Version: {config.versions.hpa_version}")
click.echo()
# Display paths
click.echo(click.style("Paths:", bold=True))
click.echo(f" Data Directory: {config.data_dir}")
click.echo(f" Cache Directory: {config.cache_dir}")
click.echo(f" DuckDB Path: {config.duckdb_path}")
click.echo()
# Display API config
click.echo(click.style("API Configuration:", bold=True))
click.echo(f" Rate Limit: {config.api.rate_limit_per_second} req/s")
click.echo(f" Max Retries: {config.api.max_retries}")
click.echo(f" Cache TTL: {config.api.cache_ttl_seconds}s")
click.echo(f" Timeout: {config.api.timeout_seconds}s")
except Exception as e:
click.echo(click.style(f"Error loading config: {e}", fg='red'), err=True)
ctx.exit(1)
# Register commands
cli.add_command(setup)
cli.add_command(evidence)
if __name__ == '__main__':
cli()