codex

2026-01-14 07:03:38 +01:00
parent ecda149a4b
commit c91c0f1fc9
61 changed files with 4388 additions and 38 deletions
--- a/pricewatch/app/cli/main.py
+++ b/pricewatch/app/cli/main.py
@@ -13,20 +13,28 @@ import sys
 from pathlib import Path
 from typing import Optional

+import redis
 import typer
+from rq import Connection, Worker
+from alembic import command as alembic_command
+from alembic.config import Config as AlembicConfig
 from rich import print as rprint
 from rich.console import Console
 from rich.table import Table

 from pricewatch.app.core import logging as app_logging
+from pricewatch.app.core.config import get_config
 from pricewatch.app.core.io import read_yaml_config, write_json_results
 from pricewatch.app.core.logging import get_logger, set_level
 from pricewatch.app.core.registry import get_registry, register_store
 from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod
+from pricewatch.app.db.connection import init_db
 from pricewatch.app.scraping.http_fetch import fetch_http
+from pricewatch.app.scraping.pipeline import ScrapingPipeline
 from pricewatch.app.scraping.pw_fetch import fetch_playwright
 from pricewatch.app.stores.amazon.store import AmazonStore
 from pricewatch.app.stores.cdiscount.store import CdiscountStore
+from pricewatch.app.tasks.scheduler import ScrapingScheduler

 # Créer l'application Typer
 app = typer.Typer(
@@ -46,6 +54,75 @@ def setup_stores():
    registry.register(CdiscountStore())


+def get_alembic_config() -> AlembicConfig:
+    """Construit la configuration Alembic à partir du repository."""
+    root_path = Path(__file__).resolve().parents[3]
+    config_path = root_path / "alembic.ini"
+    migrations_path = root_path / "pricewatch" / "app" / "db" / "migrations"
+
+    if not config_path.exists():
+        logger.error(f"alembic.ini introuvable: {config_path}")
+        raise typer.Exit(code=1)
+
+    alembic_cfg = AlembicConfig(str(config_path))
+    alembic_cfg.set_main_option("script_location", str(migrations_path))
+    alembic_cfg.set_main_option("sqlalchemy.url", get_config().db.url)
+    return alembic_cfg
+
+
+@app.command("init-db")
+def init_db_command():
+    """
+    Initialise la base de donnees (creer toutes les tables).
+    """
+    try:
+        init_db(get_config())
+    except Exception as e:
+        logger.error(f"Init DB echoue: {e}")
+        raise typer.Exit(code=1)
+
+
+@app.command()
+def migrate(
+    message: str = typer.Argument(..., help="Message de migration"),
+    autogenerate: bool = typer.Option(True, "--autogenerate/--no-autogenerate"),
+):
+    """
+    Genere une migration Alembic.
+    """
+    try:
+        alembic_cfg = get_alembic_config()
+        alembic_command.revision(alembic_cfg, message=message, autogenerate=autogenerate)
+    except Exception as e:
+        logger.error(f"Migration echouee: {e}")
+        raise typer.Exit(code=1)
+
+
+@app.command()
+def upgrade(revision: str = typer.Argument("head", help="Revision cible")):
+    """
+    Applique les migrations Alembic.
+    """
+    try:
+        alembic_cfg = get_alembic_config()
+        alembic_command.upgrade(alembic_cfg, revision)
+    except Exception as e:
+        logger.error(f"Upgrade echoue: {e}")
+        raise typer.Exit(code=1)
+
+
+@app.command()
+def downgrade(revision: str = typer.Argument("-1", help="Revision cible")):
+    """
+    Rollback une migration Alembic.
+    """
+    try:
+        alembic_cfg = get_alembic_config()
+        alembic_command.downgrade(alembic_cfg, revision)
+    except Exception as e:
+        logger.error(f"Downgrade echoue: {e}")
+        raise typer.Exit(code=1)
+
@app.command()
 def run(
    yaml: Path = typer.Option(
@@ -67,6 +144,11 @@ def run(
        "-d",
        help="Activer le mode debug",
    ),
+    save_db: Optional[bool] = typer.Option(
+        None,
+        "--save-db/--no-db",
+        help="Activer la persistence en base de donnees",
+    ),
 ):
    """
    Pipeline complet: scrape toutes les URLs du YAML et génère le JSON.
@@ -88,6 +170,12 @@ def run(
        logger.error(f"Erreur lecture YAML: {e}")
        raise typer.Exit(code=1)

+    app_config = get_config()
+    if save_db is None:
+        save_db = app_config.enable_db
+
+    pipeline = ScrapingPipeline(config=app_config)
+
    logger.info(f"{len(config.urls)} URL(s) à scraper")

    # Scraper chaque URL
@@ -158,6 +246,11 @@ def run(

                snapshot = store.parse(html, canonical_url)
                snapshot.debug.method = fetch_method
+                if save_db:
+                    product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
+                    if product_id:
+                        logger.info(f"DB: produit id={product_id}")
+
                snapshots.append(snapshot)

                status_emoji = "✓" if snapshot.is_complete() else "⚠"
@@ -180,6 +273,8 @@ def run(
                        errors=[f"Parsing failed: {str(e)}"],
                    ),
                )
+                if save_db:
+                    pipeline.process_snapshot(snapshot, save_to_db=True)
                snapshots.append(snapshot)
        else:
            # Pas de HTML récupéré
@@ -194,6 +289,8 @@ def run(
                    errors=[f"Fetch failed: {fetch_error or 'Unknown error'}"],
                ),
            )
+            if save_db:
+                pipeline.process_snapshot(snapshot, save_to_db=True)
            snapshots.append(snapshot)

    # Écrire les résultats
@@ -359,5 +456,65 @@ def doctor():
    rprint("\n[green]✓ PriceWatch est prêt![/green]")


+@app.command()
+def worker(
+    queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
+    with_scheduler: bool = typer.Option(
+        True, "--with-scheduler/--no-scheduler", help="Activer le scheduler RQ"
+    ),
+):
+    """
+    Lance un worker RQ.
+    """
+    config = get_config()
+    connection = redis.from_url(config.redis.url)
+
+    with Connection(connection):
+        worker_instance = Worker([queue])
+        worker_instance.work(with_scheduler=with_scheduler)
+
+
+@app.command()
+def enqueue(
+    url: str = typer.Argument(..., help="URL du produit a scraper"),
+    queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
+    save_db: bool = typer.Option(True, "--save-db/--no-db", help="Activer la DB"),
+    use_playwright: Optional[bool] = typer.Option(
+        None, "--playwright/--no-playwright", help="Forcer Playwright"
+    ),
+):
+    """
+    Enqueue un scraping immediat.
+    """
+    scheduler = ScrapingScheduler(get_config(), queue_name=queue)
+    job = scheduler.enqueue_immediate(url, use_playwright=use_playwright, save_db=save_db)
+    rprint(f"[green]✓ Job enqueued: {job.id}[/green]")
+
+
+@app.command()
+def schedule(
+    url: str = typer.Argument(..., help="URL du produit a planifier"),
+    interval: int = typer.Option(24, "--interval", help="Intervalle en heures"),
+    queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
+    save_db: bool = typer.Option(True, "--save-db/--no-db", help="Activer la DB"),
+    use_playwright: Optional[bool] = typer.Option(
+        None, "--playwright/--no-playwright", help="Forcer Playwright"
+    ),
+):
+    """
+    Planifie un scraping recurrent.
+    """
+    scheduler = ScrapingScheduler(get_config(), queue_name=queue)
+    job_info = scheduler.schedule_product(
+        url,
+        interval_hours=interval,
+        use_playwright=use_playwright,
+        save_db=save_db,
+    )
+    rprint(
+        f"[green]✓ Job planifie: {job_info.job_id} (next={job_info.next_run.isoformat()})[/green]"
+    )
+
+
 if __name__ == "__main__":
    app()