codex
This commit is contained in:
@@ -13,20 +13,28 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import redis
|
||||
import typer
|
||||
from rq import Connection, Worker
|
||||
from alembic import command as alembic_command
|
||||
from alembic.config import Config as AlembicConfig
|
||||
from rich import print as rprint
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from pricewatch.app.core import logging as app_logging
|
||||
from pricewatch.app.core.config import get_config
|
||||
from pricewatch.app.core.io import read_yaml_config, write_json_results
|
||||
from pricewatch.app.core.logging import get_logger, set_level
|
||||
from pricewatch.app.core.registry import get_registry, register_store
|
||||
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod
|
||||
from pricewatch.app.db.connection import init_db
|
||||
from pricewatch.app.scraping.http_fetch import fetch_http
|
||||
from pricewatch.app.scraping.pipeline import ScrapingPipeline
|
||||
from pricewatch.app.scraping.pw_fetch import fetch_playwright
|
||||
from pricewatch.app.stores.amazon.store import AmazonStore
|
||||
from pricewatch.app.stores.cdiscount.store import CdiscountStore
|
||||
from pricewatch.app.tasks.scheduler import ScrapingScheduler
|
||||
|
||||
# Créer l'application Typer
|
||||
app = typer.Typer(
|
||||
@@ -46,6 +54,75 @@ def setup_stores():
|
||||
registry.register(CdiscountStore())
|
||||
|
||||
|
||||
def get_alembic_config() -> AlembicConfig:
|
||||
"""Construit la configuration Alembic à partir du repository."""
|
||||
root_path = Path(__file__).resolve().parents[3]
|
||||
config_path = root_path / "alembic.ini"
|
||||
migrations_path = root_path / "pricewatch" / "app" / "db" / "migrations"
|
||||
|
||||
if not config_path.exists():
|
||||
logger.error(f"alembic.ini introuvable: {config_path}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
alembic_cfg = AlembicConfig(str(config_path))
|
||||
alembic_cfg.set_main_option("script_location", str(migrations_path))
|
||||
alembic_cfg.set_main_option("sqlalchemy.url", get_config().db.url)
|
||||
return alembic_cfg
|
||||
|
||||
|
||||
@app.command("init-db")
|
||||
def init_db_command():
|
||||
"""
|
||||
Initialise la base de donnees (creer toutes les tables).
|
||||
"""
|
||||
try:
|
||||
init_db(get_config())
|
||||
except Exception as e:
|
||||
logger.error(f"Init DB echoue: {e}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
|
||||
@app.command()
|
||||
def migrate(
|
||||
message: str = typer.Argument(..., help="Message de migration"),
|
||||
autogenerate: bool = typer.Option(True, "--autogenerate/--no-autogenerate"),
|
||||
):
|
||||
"""
|
||||
Genere une migration Alembic.
|
||||
"""
|
||||
try:
|
||||
alembic_cfg = get_alembic_config()
|
||||
alembic_command.revision(alembic_cfg, message=message, autogenerate=autogenerate)
|
||||
except Exception as e:
|
||||
logger.error(f"Migration echouee: {e}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
|
||||
@app.command()
|
||||
def upgrade(revision: str = typer.Argument("head", help="Revision cible")):
|
||||
"""
|
||||
Applique les migrations Alembic.
|
||||
"""
|
||||
try:
|
||||
alembic_cfg = get_alembic_config()
|
||||
alembic_command.upgrade(alembic_cfg, revision)
|
||||
except Exception as e:
|
||||
logger.error(f"Upgrade echoue: {e}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
|
||||
@app.command()
|
||||
def downgrade(revision: str = typer.Argument("-1", help="Revision cible")):
|
||||
"""
|
||||
Rollback une migration Alembic.
|
||||
"""
|
||||
try:
|
||||
alembic_cfg = get_alembic_config()
|
||||
alembic_command.downgrade(alembic_cfg, revision)
|
||||
except Exception as e:
|
||||
logger.error(f"Downgrade echoue: {e}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
@app.command()
|
||||
def run(
|
||||
yaml: Path = typer.Option(
|
||||
@@ -67,6 +144,11 @@ def run(
|
||||
"-d",
|
||||
help="Activer le mode debug",
|
||||
),
|
||||
save_db: Optional[bool] = typer.Option(
|
||||
None,
|
||||
"--save-db/--no-db",
|
||||
help="Activer la persistence en base de donnees",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Pipeline complet: scrape toutes les URLs du YAML et génère le JSON.
|
||||
@@ -88,6 +170,12 @@ def run(
|
||||
logger.error(f"Erreur lecture YAML: {e}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
app_config = get_config()
|
||||
if save_db is None:
|
||||
save_db = app_config.enable_db
|
||||
|
||||
pipeline = ScrapingPipeline(config=app_config)
|
||||
|
||||
logger.info(f"{len(config.urls)} URL(s) à scraper")
|
||||
|
||||
# Scraper chaque URL
|
||||
@@ -158,6 +246,11 @@ def run(
|
||||
|
||||
snapshot = store.parse(html, canonical_url)
|
||||
snapshot.debug.method = fetch_method
|
||||
if save_db:
|
||||
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
|
||||
if product_id:
|
||||
logger.info(f"DB: produit id={product_id}")
|
||||
|
||||
snapshots.append(snapshot)
|
||||
|
||||
status_emoji = "✓" if snapshot.is_complete() else "⚠"
|
||||
@@ -180,6 +273,8 @@ def run(
|
||||
errors=[f"Parsing failed: {str(e)}"],
|
||||
),
|
||||
)
|
||||
if save_db:
|
||||
pipeline.process_snapshot(snapshot, save_to_db=True)
|
||||
snapshots.append(snapshot)
|
||||
else:
|
||||
# Pas de HTML récupéré
|
||||
@@ -194,6 +289,8 @@ def run(
|
||||
errors=[f"Fetch failed: {fetch_error or 'Unknown error'}"],
|
||||
),
|
||||
)
|
||||
if save_db:
|
||||
pipeline.process_snapshot(snapshot, save_to_db=True)
|
||||
snapshots.append(snapshot)
|
||||
|
||||
# Écrire les résultats
|
||||
@@ -359,5 +456,65 @@ def doctor():
|
||||
rprint("\n[green]✓ PriceWatch est prêt![/green]")
|
||||
|
||||
|
||||
@app.command()
|
||||
def worker(
|
||||
queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
|
||||
with_scheduler: bool = typer.Option(
|
||||
True, "--with-scheduler/--no-scheduler", help="Activer le scheduler RQ"
|
||||
),
|
||||
):
|
||||
"""
|
||||
Lance un worker RQ.
|
||||
"""
|
||||
config = get_config()
|
||||
connection = redis.from_url(config.redis.url)
|
||||
|
||||
with Connection(connection):
|
||||
worker_instance = Worker([queue])
|
||||
worker_instance.work(with_scheduler=with_scheduler)
|
||||
|
||||
|
||||
@app.command()
|
||||
def enqueue(
|
||||
url: str = typer.Argument(..., help="URL du produit a scraper"),
|
||||
queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
|
||||
save_db: bool = typer.Option(True, "--save-db/--no-db", help="Activer la DB"),
|
||||
use_playwright: Optional[bool] = typer.Option(
|
||||
None, "--playwright/--no-playwright", help="Forcer Playwright"
|
||||
),
|
||||
):
|
||||
"""
|
||||
Enqueue un scraping immediat.
|
||||
"""
|
||||
scheduler = ScrapingScheduler(get_config(), queue_name=queue)
|
||||
job = scheduler.enqueue_immediate(url, use_playwright=use_playwright, save_db=save_db)
|
||||
rprint(f"[green]✓ Job enqueued: {job.id}[/green]")
|
||||
|
||||
|
||||
@app.command()
|
||||
def schedule(
|
||||
url: str = typer.Argument(..., help="URL du produit a planifier"),
|
||||
interval: int = typer.Option(24, "--interval", help="Intervalle en heures"),
|
||||
queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
|
||||
save_db: bool = typer.Option(True, "--save-db/--no-db", help="Activer la DB"),
|
||||
use_playwright: Optional[bool] = typer.Option(
|
||||
None, "--playwright/--no-playwright", help="Forcer Playwright"
|
||||
),
|
||||
):
|
||||
"""
|
||||
Planifie un scraping recurrent.
|
||||
"""
|
||||
scheduler = ScrapingScheduler(get_config(), queue_name=queue)
|
||||
job_info = scheduler.schedule_product(
|
||||
url,
|
||||
interval_hours=interval,
|
||||
use_playwright=use_playwright,
|
||||
save_db=save_db,
|
||||
)
|
||||
rprint(
|
||||
f"[green]✓ Job planifie: {job_info.job_id} (next={job_info.next_run.isoformat()})[/green]"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
Reference in New Issue
Block a user