This commit is contained in:
Gilles Soulier
2026-01-14 21:54:55 +01:00
parent c91c0f1fc9
commit d0b73b9319
140 changed files with 5822 additions and 161 deletions

View File

@@ -15,7 +15,7 @@ from typing import Optional
import redis
import typer
from rq import Connection, Worker
from rq import Worker
from alembic import command as alembic_command
from alembic.config import Config as AlembicConfig
from rich import print as rprint
@@ -34,7 +34,7 @@ from pricewatch.app.scraping.pipeline import ScrapingPipeline
from pricewatch.app.scraping.pw_fetch import fetch_playwright
from pricewatch.app.stores.amazon.store import AmazonStore
from pricewatch.app.stores.cdiscount.store import CdiscountStore
from pricewatch.app.tasks.scheduler import ScrapingScheduler
from pricewatch.app.tasks.scheduler import RedisUnavailableError, ScrapingScheduler
# Créer l'application Typer
app = typer.Typer(
@@ -197,18 +197,21 @@ def run(
html = None
fetch_method = FetchMethod.HTTP
fetch_error = None
http_result = None
# Tenter HTTP d'abord
logger.info("Tentative HTTP...")
http_result = fetch_http(canonical_url)
if config.options.force_playwright:
logger.info("Playwright force, skip HTTP")
else:
logger.info("Tentative HTTP...")
http_result = fetch_http(canonical_url)
if http_result.success:
if http_result and http_result.success:
html = http_result.html
fetch_method = FetchMethod.HTTP
logger.info("✓ HTTP réussi")
elif config.options.use_playwright:
# Fallback Playwright
logger.warning(f"HTTP échoué: {http_result.error}, fallback Playwright")
fallback_reason = http_result.error if http_result else "force_playwright"
logger.warning(f"HTTP échoué: {fallback_reason}, fallback Playwright")
pw_result = fetch_playwright(
canonical_url,
headless=not config.options.headful,
@@ -231,7 +234,7 @@ def run(
fetch_error = pw_result.error
logger.error(f"✗ Playwright échoué: {fetch_error}")
else:
fetch_error = http_result.error
fetch_error = http_result.error if http_result else "skip_http"
logger.error(f"✗ HTTP échoué: {fetch_error}")
# Parser si on a du HTML
@@ -467,11 +470,25 @@ def worker(
Lance un worker RQ.
"""
config = get_config()
connection = redis.from_url(config.redis.url)
try:
connection = redis.from_url(config.redis.url)
# Verification connexion avant de lancer le worker
connection.ping()
except redis.exceptions.ConnectionError as e:
rprint(f"[red]✗ Impossible de se connecter a Redis ({config.redis.url})[/red]")
rprint(f"[red] Erreur: {e}[/red]")
rprint("\n[yellow]Verifiez que Redis est demarre:[/yellow]")
rprint(" docker compose up -d redis")
rprint(" # ou")
rprint(" redis-server")
raise typer.Exit(code=1)
except redis.exceptions.RedisError as e:
rprint(f"[red]✗ Erreur Redis: {e}[/red]")
raise typer.Exit(code=1)
with Connection(connection):
worker_instance = Worker([queue])
worker_instance.work(with_scheduler=with_scheduler)
# RQ 2.x: connexion passee directement au Worker
worker_instance = Worker([queue], connection=connection)
worker_instance.work(with_scheduler=with_scheduler)
@app.command()
@@ -486,9 +503,15 @@ def enqueue(
"""
Enqueue un scraping immediat.
"""
scheduler = ScrapingScheduler(get_config(), queue_name=queue)
job = scheduler.enqueue_immediate(url, use_playwright=use_playwright, save_db=save_db)
rprint(f"[green]✓ Job enqueued: {job.id}[/green]")
try:
scheduler = ScrapingScheduler(get_config(), queue_name=queue)
job = scheduler.enqueue_immediate(url, use_playwright=use_playwright, save_db=save_db)
rprint(f"[green]✓ Job enqueued: {job.id}[/green]")
except RedisUnavailableError as e:
rprint(f"[red]✗ {e.message}[/red]")
rprint("\n[yellow]Verifiez que Redis est demarre:[/yellow]")
rprint(" docker compose up -d redis")
raise typer.Exit(code=1)
@app.command()
@@ -504,16 +527,22 @@ def schedule(
"""
Planifie un scraping recurrent.
"""
scheduler = ScrapingScheduler(get_config(), queue_name=queue)
job_info = scheduler.schedule_product(
url,
interval_hours=interval,
use_playwright=use_playwright,
save_db=save_db,
)
rprint(
f"[green]✓ Job planifie: {job_info.job_id} (next={job_info.next_run.isoformat()})[/green]"
)
try:
scheduler = ScrapingScheduler(get_config(), queue_name=queue)
job_info = scheduler.schedule_product(
url,
interval_hours=interval,
use_playwright=use_playwright,
save_db=save_db,
)
rprint(
f"[green]✓ Job planifie: {job_info.job_id} (next={job_info.next_run.isoformat()})[/green]"
)
except RedisUnavailableError as e:
rprint(f"[red]✗ {e.message}[/red]")
rprint("\n[yellow]Verifiez que Redis est demarre:[/yellow]")
rprint(" docker compose up -d redis")
raise typer.Exit(code=1)
if __name__ == "__main__":