This commit is contained in:
2026-01-14 07:03:38 +01:00
parent ecda149a4b
commit c91c0f1fc9
61 changed files with 4388 additions and 38 deletions

View File

@@ -0,0 +1,75 @@
"""
Planification des jobs de scraping via RQ Scheduler.
"""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from typing import Optional
import redis
from rq import Queue
from rq_scheduler import Scheduler
from pricewatch.app.core.config import AppConfig, get_config
from pricewatch.app.core.logging import get_logger
from pricewatch.app.tasks.scrape import scrape_product
logger = get_logger("tasks.scheduler")
@dataclass
class ScheduledJobInfo:
"""Infos de retour pour un job planifie."""
job_id: str
next_run: datetime
class ScrapingScheduler:
"""Scheduler pour les jobs de scraping avec RQ."""
def __init__(self, config: Optional[AppConfig] = None, queue_name: str = "default") -> None:
self.config = config or get_config()
self.redis = redis.from_url(self.config.redis.url)
self.queue = Queue(queue_name, connection=self.redis)
self.scheduler = Scheduler(queue=self.queue, connection=self.redis)
def enqueue_immediate(
self,
url: str,
use_playwright: Optional[bool] = None,
save_db: bool = True,
):
"""Enqueue un job immediat."""
job = self.queue.enqueue(
scrape_product,
url,
use_playwright=use_playwright,
save_db=save_db,
)
logger.info(f"Job enqueued: {job.id}")
return job
def schedule_product(
self,
url: str,
interval_hours: int = 24,
use_playwright: Optional[bool] = None,
save_db: bool = True,
) -> ScheduledJobInfo:
"""Planifie un scraping recurrent (intervalle en heures)."""
interval_seconds = int(timedelta(hours=interval_hours).total_seconds())
next_run = datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)
job = self.scheduler.schedule(
scheduled_time=next_run,
func=scrape_product,
args=[url],
kwargs={"use_playwright": use_playwright, "save_db": save_db},
interval=interval_seconds,
repeat=None,
)
logger.info(f"Job planifie: {job.id}, prochaine execution: {next_run.isoformat()}")
return ScheduledJobInfo(job_id=job.id, next_run=next_run)