This commit is contained in:
2026-01-18 12:23:01 +01:00
parent ef3d0ed970
commit bb1263edb8
86 changed files with 90289 additions and 0 deletions

Binary file not shown.

View File

@@ -0,0 +1,47 @@
from __future__ import annotations
from functools import lru_cache
from pathlib import Path
from pydantic import BaseModel
CONFIG_PATH = Path(__file__).resolve().parent.parent.parent / "config_backend.json"
# chemin vers la conf JSON partagée entre les composants backend
class AppConfig(BaseModel):
env: str
version: str
base_url: str
log_level: str
class ScrapeConfig(BaseModel):
interval_minutes: int
headless: bool
timeout_ms: int
retries: int
delay_range_ms: tuple[int, int]
user_agent: str
viewport: dict[str, int]
locale: str
timezone: str
proxy: str | None
class TaxonomyConfig(BaseModel):
categories: list[str]
types_by_category: dict[str, list[str]]
class BackendConfig(BaseModel):
app: AppConfig
scrape: ScrapeConfig
stores_enabled: list[str]
taxonomy: TaxonomyConfig
@lru_cache(maxsize=1)
def load_config() -> BackendConfig:
# on met en cache pour éviter de recharger le fichier à chaque requête
return BackendConfig.parse_file(CONFIG_PATH)

View File

@@ -0,0 +1,20 @@
from __future__ import annotations
from pathlib import Path
from loguru import logger
LOG_DIR = Path(__file__).resolve().parent.parent.parent / "logs"
# dossier de logs pour tracer les scrapes et erreurs
LOG_DIR.mkdir(parents=True, exist_ok=True)
LOG_FILE = LOG_DIR / "scrap.log"
logger.add(
LOG_FILE,
rotation="10 MB",
retention="7 days",
level="INFO",
enqueue=True,
backtrace=True,
diagnose=True,
) # rotation simple pour ne pas gonfler les artefacts

View File

@@ -0,0 +1,28 @@
from __future__ import annotations
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from loguru import logger
from backend.app.core.config import load_config
from backend.app.scraper.runner import scrape_all
scheduler = BackgroundScheduler(timezone=load_config().scrape.timezone)
"""Scheduler interne APScheduler, prêt à déclencher scrape_all selon la config."""
def start_scheduler() -> None:
if scheduler.running:
return
config = load_config()
interval = config.scrape.interval_minutes
scheduler.add_job(
scrape_all,
trigger=IntervalTrigger(minutes=interval),
id="scheduled-scrape-all",
replace_existing=True,
next_run_time=None,
)
scheduler.start()
logger.info("Scheduler démarré avec un intervalle de %s minutes", interval)