codex
This commit is contained in:
75
pricewatch/app/tasks/scheduler.py
Executable file
75
pricewatch/app/tasks/scheduler.py
Executable file
@@ -0,0 +1,75 @@
|
||||
"""
|
||||
Planification des jobs de scraping via RQ Scheduler.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
|
||||
import redis
|
||||
from rq import Queue
|
||||
from rq_scheduler import Scheduler
|
||||
|
||||
from pricewatch.app.core.config import AppConfig, get_config
|
||||
from pricewatch.app.core.logging import get_logger
|
||||
from pricewatch.app.tasks.scrape import scrape_product
|
||||
|
||||
logger = get_logger("tasks.scheduler")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScheduledJobInfo:
|
||||
"""Infos de retour pour un job planifie."""
|
||||
|
||||
job_id: str
|
||||
next_run: datetime
|
||||
|
||||
|
||||
class ScrapingScheduler:
|
||||
"""Scheduler pour les jobs de scraping avec RQ."""
|
||||
|
||||
def __init__(self, config: Optional[AppConfig] = None, queue_name: str = "default") -> None:
|
||||
self.config = config or get_config()
|
||||
self.redis = redis.from_url(self.config.redis.url)
|
||||
self.queue = Queue(queue_name, connection=self.redis)
|
||||
self.scheduler = Scheduler(queue=self.queue, connection=self.redis)
|
||||
|
||||
def enqueue_immediate(
|
||||
self,
|
||||
url: str,
|
||||
use_playwright: Optional[bool] = None,
|
||||
save_db: bool = True,
|
||||
):
|
||||
"""Enqueue un job immediat."""
|
||||
job = self.queue.enqueue(
|
||||
scrape_product,
|
||||
url,
|
||||
use_playwright=use_playwright,
|
||||
save_db=save_db,
|
||||
)
|
||||
logger.info(f"Job enqueued: {job.id}")
|
||||
return job
|
||||
|
||||
def schedule_product(
|
||||
self,
|
||||
url: str,
|
||||
interval_hours: int = 24,
|
||||
use_playwright: Optional[bool] = None,
|
||||
save_db: bool = True,
|
||||
) -> ScheduledJobInfo:
|
||||
"""Planifie un scraping recurrent (intervalle en heures)."""
|
||||
interval_seconds = int(timedelta(hours=interval_hours).total_seconds())
|
||||
next_run = datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)
|
||||
|
||||
job = self.scheduler.schedule(
|
||||
scheduled_time=next_run,
|
||||
func=scrape_product,
|
||||
args=[url],
|
||||
kwargs={"use_playwright": use_playwright, "save_db": save_db},
|
||||
interval=interval_seconds,
|
||||
repeat=None,
|
||||
)
|
||||
logger.info(f"Job planifie: {job.id}, prochaine execution: {next_run.isoformat()}")
|
||||
return ScheduledJobInfo(job_id=job.id, next_run=next_run)
|
||||
Reference in New Issue
Block a user