claude

2026-01-19 06:16:38 +01:00
parent 4ff5d3ee79
commit dcb25e0163
74 changed files with 232377 additions and 177 deletions
--- a/backend/app/api/routes_config.py
+++ b/backend/app/api/routes_config.py
@@ -1,11 +1,17 @@
 from __future__ import annotations

+import json
+from pathlib import Path
+
 from fastapi import APIRouter, Body, HTTPException

 from backend.app.core.config import BackendConfig, CONFIG_PATH, load_config

 router = APIRouter(prefix="/config", tags=["config"])

+# Chemin vers la config frontend
+FRONTEND_CONFIG_PATH = Path(__file__).resolve().parent.parent.parent.parent / "frontend" / "config_frontend.json"
+

@router.get("/backend", response_model=BackendConfig)
 def read_backend_config() -> BackendConfig:
@@ -18,9 +24,55 @@ def update_backend_config(payload: dict = Body(...)) -> BackendConfig:
    current = load_config()
    try:
        # validation via Pydantic avant écriture
-        updated = current.copy(update=payload)
-        CONFIG_PATH.write_text(updated.json(indent=2, ensure_ascii=False))
+        updated = current.model_copy(update=payload)
+        CONFIG_PATH.write_text(updated.model_dump_json(indent=2), encoding="utf-8")
        load_config.cache_clear()
        return load_config()
    except Exception as exc:  # pragma: no cover
        raise HTTPException(status_code=400, detail=str(exc))
+
+
+@router.get("/frontend")
+def read_frontend_config() -> dict:
+    """Retourne la configuration frontend."""
+    if not FRONTEND_CONFIG_PATH.exists():
+        raise HTTPException(status_code=404, detail="Config frontend introuvable")
+    return json.loads(FRONTEND_CONFIG_PATH.read_text(encoding="utf-8"))
+
+
+@router.put("/frontend")
+def update_frontend_config(payload: dict = Body(...)) -> dict:
+    """Met à jour la configuration frontend."""
+    try:
+        # Charger la config actuelle
+        current = {}
+        if FRONTEND_CONFIG_PATH.exists():
+            current = json.loads(FRONTEND_CONFIG_PATH.read_text(encoding="utf-8"))
+
+        # Fusion profonde des configs
+        def deep_merge(base: dict, update: dict) -> dict:
+            result = base.copy()
+            for key, value in update.items():
+                if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+                    result[key] = deep_merge(result[key], value)
+                else:
+                    result[key] = value
+            return result
+
+        updated = deep_merge(current, payload)
+        FRONTEND_CONFIG_PATH.write_text(
+            json.dumps(updated, indent=2, ensure_ascii=False),
+            encoding="utf-8"
+        )
+
+        # Mettre à jour aussi dans public/ pour le frontend dev
+        public_config = FRONTEND_CONFIG_PATH.parent / "public" / "config_frontend.json"
+        if public_config.parent.exists():
+            public_config.write_text(
+                json.dumps(updated, indent=2, ensure_ascii=False),
+                encoding="utf-8"
+            )
+
+        return updated
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail=str(exc))
--- a/backend/app/api/routes_products.py
+++ b/backend/app/api/routes_products.py
@@ -10,10 +10,10 @@ from backend.app.scraper.runner import scrape_product
 router = APIRouter(prefix="/products", tags=["products"])


-@router.get("", response_model=list[schemas.ProductRead])
-def list_products(skip: int = 0, limit: int = 50, db: Session = Depends(get_db)) -> list[schemas.ProductRead]:
-    # on retourne la liste paginée de produits
-    return crud.list_products(db, skip=skip, limit=limit)
+@router.get("", response_model=list[schemas.ProductWithSnapshot])
+def list_products(skip: int = 0, limit: int = 50, db: Session = Depends(get_db)) -> list[schemas.ProductWithSnapshot]:
+    # on retourne la liste paginée de produits enrichis avec les derniers snapshots
+    return crud.list_products_with_snapshots(db, skip=skip, limit=limit)


@router.post("", response_model=schemas.ProductRead, status_code=status.HTTP_201_CREATED)
@@ -28,9 +28,9 @@ def create_product(
    return product


-@router.get("/{product_id}", response_model=schemas.ProductRead)
-def read_product(product_id: int, db: Session = Depends(get_db)) -> schemas.ProductRead:
-    product = crud.get_product(db, product_id)
+@router.get("/{product_id}", response_model=schemas.ProductWithSnapshot)
+def read_product(product_id: int, db: Session = Depends(get_db)) -> schemas.ProductWithSnapshot:
+    product = crud.get_product_with_snapshot(db, product_id)
    if not product:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable")
    return product
--- a/backend/app/db/crud.py
+++ b/backend/app/db/crud.py
@@ -20,7 +20,14 @@ def list_products(db: Session, skip: int = 0, limit: int = 100) -> list[models.P


 def create_product(db: Session, data: schemas.ProductCreate) -> models.Product:
-    product = models.Product(**data.dict())
+    # Convertir les HttpUrl en strings pour SQLite
+    data_dict = data.model_dump()
+    if data_dict.get("url"):
+        data_dict["url"] = str(data_dict["url"])
+    if data_dict.get("url_image"):
+        data_dict["url_image"] = str(data_dict["url_image"])
+
+    product = models.Product(**data_dict)
    db.add(product)
    try:
        db.commit()
@@ -62,3 +69,63 @@ def get_latest_snapshot(db: Session, product_id: int) -> models.ProductSnapshot
        .order_by(models.ProductSnapshot.scrape_le.desc())
        .first()
    )
+
+
+def get_product_with_snapshot(db: Session, product_id: int) -> dict | None:
+    """Retourne un produit enrichi avec les données du dernier snapshot."""
+    product = get_product(db, product_id)
+    if not product:
+        return None
+    return _enrich_product_with_snapshot(db, product)
+
+
+def list_products_with_snapshots(db: Session, skip: int = 0, limit: int = 100) -> list[dict]:
+    """Retourne la liste des produits enrichis avec leurs derniers snapshots."""
+    products = list_products(db, skip=skip, limit=limit)
+    return [_enrich_product_with_snapshot(db, p) for p in products]
+
+
+def _enrich_product_with_snapshot(db: Session, product: models.Product) -> dict:
+    """Ajoute les données du dernier snapshot au produit."""
+    snapshot = get_latest_snapshot(db, product.id)
+
+    result = {
+        "id": product.id,
+        "boutique": product.boutique,
+        "url": str(product.url),
+        "asin": product.asin,
+        "titre": product.titre,
+        "url_image": str(product.url_image) if product.url_image else None,
+        "categorie": product.categorie,
+        "type": product.type,
+        "actif": product.actif,
+        "cree_le": product.cree_le,
+        "modifie_le": product.modifie_le,
+    }
+
+    if snapshot:
+        # Calcul de la réduction en pourcentage
+        reduction = None
+        if snapshot.prix_actuel and snapshot.prix_conseille:
+            reduction = round((1 - snapshot.prix_actuel / snapshot.prix_conseille) * 100)
+
+        result.update(
+            {
+                "prix_actuel": snapshot.prix_actuel,
+                "prix_conseille": snapshot.prix_conseille,
+                "prix_min_30j": snapshot.prix_min_30j,
+                "reduction_pourcent": reduction,
+                "etat_stock": snapshot.etat_stock,
+                "en_stock": snapshot.en_stock,
+                "note": snapshot.note,
+                "nombre_avis": snapshot.nombre_avis,
+                "prime": snapshot.prime,
+                "choix_amazon": snapshot.choix_amazon,
+                "offre_limitee": snapshot.offre_limitee,
+                "exclusivite_amazon": snapshot.exclusivite_amazon,
+                "dernier_scrape": snapshot.scrape_le,
+                "statut_scrap": snapshot.statut_scrap,
+            }
+        )
+
+    return result
--- a/backend/app/db/schemas.py
+++ b/backend/app/db/schemas.py
@@ -61,3 +61,29 @@ class ProductSnapshotRead(ProductSnapshotBase):

    class Config:
        orm_mode = True
+
+
+class ProductWithSnapshot(ProductBase):
+    """Produit enrichi avec les données du dernier snapshot."""
+
+    id: int
+    cree_le: datetime
+    modifie_le: datetime
+    # Données du dernier snapshot
+    prix_actuel: Optional[float] = None
+    prix_conseille: Optional[float] = None
+    prix_min_30j: Optional[float] = None
+    reduction_pourcent: Optional[int] = None
+    etat_stock: Optional[str] = None
+    en_stock: Optional[bool] = None
+    note: Optional[float] = None
+    nombre_avis: Optional[int] = None
+    prime: Optional[bool] = None
+    choix_amazon: Optional[bool] = None
+    offre_limitee: Optional[bool] = None
+    exclusivite_amazon: Optional[bool] = None
+    dernier_scrape: Optional[datetime] = None
+    statut_scrap: Optional[str] = None
+
+    class Config:
+        orm_mode = True
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 from os import getenv

 from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
 from dotenv import load_dotenv

 from backend.app.api import routes_config, routes_debug, routes_products, routes_scrape
@@ -14,6 +15,15 @@ load_dotenv()

 app = FastAPI(title="suivi_produit")

+# CORS pour le frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:5173", "http://127.0.0.1:5173"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
 app.include_router(routes_products.router)
 app.include_router(routes_scrape.router)
 app.include_router(routes_config.router)
--- a/backend/app/samples/debug/10_20260119_035240_capture.html
+++ b/backend/app/samples/debug/10_20260119_035240_capture.html
--- a/backend/app/samples/debug/10_20260119_035240_capture.png
+++ b/backend/app/samples/debug/10_20260119_035240_capture.png
--- a/backend/app/samples/debug/11_20260119_035408_capture.html
+++ b/backend/app/samples/debug/11_20260119_035408_capture.html
--- a/backend/app/samples/debug/11_20260119_035408_capture.png
+++ b/backend/app/samples/debug/11_20260119_035408_capture.png
--- a/backend/app/samples/debug/1_20260119_033545_capture.html
+++ b/backend/app/samples/debug/1_20260119_033545_capture.html
--- a/backend/app/samples/debug/1_20260119_033545_capture.png
+++ b/backend/app/samples/debug/1_20260119_033545_capture.png
--- a/backend/app/samples/debug/1_20260119_034228_capture.html
+++ b/backend/app/samples/debug/1_20260119_034228_capture.html
--- a/backend/app/samples/debug/1_20260119_034228_capture.png
+++ b/backend/app/samples/debug/1_20260119_034228_capture.png
--- a/backend/app/samples/debug/1_20260119_034843_capture.html
+++ b/backend/app/samples/debug/1_20260119_034843_capture.html
--- a/backend/app/samples/debug/1_20260119_034843_capture.png
+++ b/backend/app/samples/debug/1_20260119_034843_capture.png
--- a/backend/app/samples/debug/2_20260119_033514_capture.html
+++ b/backend/app/samples/debug/2_20260119_033514_capture.html
--- a/backend/app/samples/debug/2_20260119_033514_capture.png
+++ b/backend/app/samples/debug/2_20260119_033514_capture.png
--- a/backend/app/samples/debug/2_20260119_033516_capture.html
+++ b/backend/app/samples/debug/2_20260119_033516_capture.html
--- a/backend/app/samples/debug/2_20260119_033516_capture.png
+++ b/backend/app/samples/debug/2_20260119_033516_capture.png
--- a/backend/app/samples/debug/2_20260119_034233_capture.html
+++ b/backend/app/samples/debug/2_20260119_034233_capture.html
--- a/backend/app/samples/debug/2_20260119_034233_capture.png
+++ b/backend/app/samples/debug/2_20260119_034233_capture.png
--- a/backend/app/samples/debug/2_20260119_034848_capture.html
+++ b/backend/app/samples/debug/2_20260119_034848_capture.html
--- a/backend/app/samples/debug/2_20260119_034848_capture.png
+++ b/backend/app/samples/debug/2_20260119_034848_capture.png
--- a/backend/app/samples/debug/3_20260119_033507_capture.html
+++ b/backend/app/samples/debug/3_20260119_033507_capture.html
--- a/backend/app/samples/debug/3_20260119_033507_capture.png
+++ b/backend/app/samples/debug/3_20260119_033507_capture.png
--- a/backend/app/samples/debug/3_20260119_034238_capture.html
+++ b/backend/app/samples/debug/3_20260119_034238_capture.html
--- a/backend/app/samples/debug/3_20260119_034238_capture.png
+++ b/backend/app/samples/debug/3_20260119_034238_capture.png
--- a/backend/app/samples/debug/3_20260119_034854_capture.html
+++ b/backend/app/samples/debug/3_20260119_034854_capture.html
--- a/backend/app/samples/debug/3_20260119_034854_capture.png
+++ b/backend/app/samples/debug/3_20260119_034854_capture.png
--- a/backend/app/samples/debug/4_20260119_033624_capture.html
+++ b/backend/app/samples/debug/4_20260119_033624_capture.html
--- a/backend/app/samples/debug/4_20260119_033624_capture.png
+++ b/backend/app/samples/debug/4_20260119_033624_capture.png
--- a/backend/app/samples/debug/4_20260119_033635_capture.html
+++ b/backend/app/samples/debug/4_20260119_033635_capture.html
--- a/backend/app/samples/debug/4_20260119_033635_capture.png
+++ b/backend/app/samples/debug/4_20260119_033635_capture.png
--- a/backend/app/samples/debug/4_20260119_034245_capture.html
+++ b/backend/app/samples/debug/4_20260119_034245_capture.html
--- a/backend/app/samples/debug/4_20260119_034245_capture.png
+++ b/backend/app/samples/debug/4_20260119_034245_capture.png
--- a/backend/app/samples/debug/4_20260119_034902_capture.html
+++ b/backend/app/samples/debug/4_20260119_034902_capture.html
--- a/backend/app/samples/debug/4_20260119_034902_capture.png
+++ b/backend/app/samples/debug/4_20260119_034902_capture.png
--- a/backend/app/samples/debug/5_20260119_033709_capture.html
+++ b/backend/app/samples/debug/5_20260119_033709_capture.html
--- a/backend/app/samples/debug/5_20260119_033709_capture.png
+++ b/backend/app/samples/debug/5_20260119_033709_capture.png
--- a/backend/app/samples/debug/5_20260119_034251_capture.html
+++ b/backend/app/samples/debug/5_20260119_034251_capture.html
--- a/backend/app/samples/debug/5_20260119_034251_capture.png
+++ b/backend/app/samples/debug/5_20260119_034251_capture.png
--- a/backend/app/samples/debug/5_20260119_034907_capture.html
+++ b/backend/app/samples/debug/5_20260119_034907_capture.html
--- a/backend/app/samples/debug/5_20260119_034907_capture.png
+++ b/backend/app/samples/debug/5_20260119_034907_capture.png
--- a/backend/app/samples/debug/6_20260119_034451_capture.html
+++ b/backend/app/samples/debug/6_20260119_034451_capture.html
--- a/backend/app/samples/debug/6_20260119_034451_capture.png
+++ b/backend/app/samples/debug/6_20260119_034451_capture.png
--- a/backend/app/samples/debug/6_20260119_034914_capture.html
+++ b/backend/app/samples/debug/6_20260119_034914_capture.html
--- a/backend/app/samples/debug/6_20260119_034914_capture.png
+++ b/backend/app/samples/debug/6_20260119_034914_capture.png
--- a/backend/app/samples/debug/7_20260119_035030_capture.html
+++ b/backend/app/samples/debug/7_20260119_035030_capture.html
--- a/backend/app/samples/debug/7_20260119_035030_capture.png
+++ b/backend/app/samples/debug/7_20260119_035030_capture.png
--- a/backend/app/samples/debug/8_20260119_035115_capture.html
+++ b/backend/app/samples/debug/8_20260119_035115_capture.html
--- a/backend/app/samples/debug/8_20260119_035115_capture.png
+++ b/backend/app/samples/debug/8_20260119_035115_capture.png
--- a/backend/app/samples/debug/9_20260119_035138_capture.html
+++ b/backend/app/samples/debug/9_20260119_035138_capture.html
--- a/backend/app/samples/debug/9_20260119_035138_capture.png
+++ b/backend/app/samples/debug/9_20260119_035138_capture.png
--- a/backend/app/samples/storage_state.json
+++ b/backend/app/samples/storage_state.json
--- a/backend/app/scraper/runner.py
+++ b/backend/app/scraper/runner.py
@@ -13,7 +13,13 @@ from sqlalchemy.orm import Session

 from backend.app.core.config import load_config
 from backend.app.db import database, models
-from backend.app.scraper.amazon.parser import detect_blocked, extract_product_data
+from backend.app.scraper.amazon.parser import extract_product_data
+
+# Répertoires de stockage
+SAMPLES_DIR = Path(__file__).resolve().parent.parent / "samples"
+DEBUG_DIR = SAMPLES_DIR / "debug"
+STORAGE_STATE_PATH = SAMPLES_DIR / "storage_state.json"
+RAW_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data" / "raw"


 def _create_run(session: Session) -> models.ScrapeRun:
@@ -32,9 +38,8 @@ def _finalize_run(run: models.ScrapeRun, session: Session, status: str) -> None:


 def _save_raw_json(payload: dict, product_id: int) -> Path:
-    base_dir = Path(__file__).resolve().parent.parent.parent / "data" / "raw"
    timestamp = datetime.utcnow().strftime("%Y-%m-%d")
-    folder = base_dir / timestamp
+    folder = RAW_DATA_DIR / timestamp
    folder.mkdir(parents=True, exist_ok=True)
    filename = f"{product_id}_{datetime.utcnow().strftime('%H%M%S')}.json"
    path = folder / filename
@@ -42,15 +47,24 @@ def _save_raw_json(payload: dict, product_id: int) -> Path:
    return path


-def _save_debug_artifacts(page, product_id: int) -> tuple[Path, Path]:
-    base_dir = Path(__file__).resolve().parent.parent.parent / "data" / "screenshots"
-    base_dir.mkdir(parents=True, exist_ok=True)
+def _save_debug_artifacts(page, product_id: int, suffix: str = "capture") -> dict:
+    """Sauvegarde screenshot et HTML dans le répertoire debug."""
+    DEBUG_DIR.mkdir(parents=True, exist_ok=True)
    stamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
-    screenshot_path = base_dir / f"{product_id}_{stamp}.png"
-    html_path = base_dir / f"{product_id}_{stamp}.html"
-    page.screenshot(path=str(screenshot_path), full_page=True)
-    html_path.write_text(page.content())
-    return screenshot_path, html_path
+    debug_files = {}
+    try:
+        screenshot_path = DEBUG_DIR / f"{product_id}_{stamp}_{suffix}.png"
+        html_path = DEBUG_DIR / f"{product_id}_{stamp}_{suffix}.html"
+        page.screenshot(path=str(screenshot_path), full_page=True)
+        html_path.write_text(page.content(), encoding="utf-8")
+        debug_files = {
+            "screenshot": str(screenshot_path),
+            "html": str(html_path),
+        }
+        logger.info("Artifacts debug sauvegardés: screenshot={}, html={}", screenshot_path.name, html_path.name)
+    except Exception as e:
+        logger.warning("Impossible de générer les artifacts de debug: {}", e)
+    return debug_files


 def _update_product_from_scrape(
@@ -101,77 +115,130 @@ def _create_snapshot(
    session.commit()


+def _create_browser_context(playwright, config):
+    """Crée un contexte navigateur avec storage_state si disponible."""
+    browser = playwright.chromium.launch(headless=config.scrape.headless)
+    context_kwargs = {
+        "locale": config.scrape.locale,
+        "timezone_id": config.scrape.timezone,
+        "user_agent": config.scrape.user_agent,
+        "viewport": config.scrape.viewport,
+    }
+    # Charger la session persistée si disponible
+    if STORAGE_STATE_PATH.exists():
+        context_kwargs["storage_state"] = str(STORAGE_STATE_PATH)
+        logger.info("Session persistée chargée: {}", STORAGE_STATE_PATH)
+
+    context = browser.new_context(**context_kwargs)
+    return browser, context
+
+
+def _save_storage_state(context) -> None:
+    """Sauvegarde l'état de session pour réutilisation."""
+    try:
+        context.storage_state(path=str(STORAGE_STATE_PATH))
+        logger.info("Session persistée sauvegardée: {}", STORAGE_STATE_PATH)
+    except Exception as e:
+        logger.warning("Impossible de sauvegarder la session: {}", e)
+
+
+def _process_product(
+    page,
+    session: Session,
+    product: models.Product,
+    run: models.ScrapeRun,
+    config,
+) -> tuple[bool, dict]:
+    """Scrape un produit et retourne (success, data)."""
+    logger.info("Scraping produit {} ({})", product.id, product.url)
+
+    page.goto(product.url, wait_until="domcontentloaded", timeout=config.scrape.timeout_ms)
+
+    # Toujours sauvegarder les artifacts de debug
+    debug_files = _save_debug_artifacts(page, product.id, "capture")
+
+    # Extraire les données
+    data = extract_product_data(page, product.url)
+
+    # Vérifier si bloqué (pas de titre = probable blocage)
+    if not data.get("titre"):
+        logger.warning("Titre absent pour produit {}, probable blocage Amazon", product.id)
+        data["bloque"] = True
+        data["debug_files"] = debug_files
+        raw_path = _save_raw_json(data, product.id)
+        _create_snapshot(
+            session,
+            product,
+            run,
+            data,
+            status="bloque",
+            raw_json_path=raw_path,
+            error_message=f"Blocage détecté - debug: {debug_files.get('screenshot', 'N/A')}",
+        )
+        return False, data
+
+    # Succès ou partiel
+    data["debug_files"] = debug_files
+    raw_path = _save_raw_json(data, product.id)
+    required = ["titre", "prix_actuel"]
+    missing = [field for field in required if not data.get(field)]
+    status = "champs_manquants" if missing else "ok"
+
+    _create_snapshot(
+        session,
+        product,
+        run,
+        data,
+        status=status,
+        raw_json_path=raw_path,
+        error_message=", ".join(missing) if missing else None,
+    )
+
+    if missing:
+        logger.warning("Champs manquants pour {}: {}", product.id, missing)
+        return False, data
+
+    logger.info("Scraping OK pour {} (titre={})", product.id, data.get("titre", "")[:50])
+    return True, data
+
+
 def scrape_product(product_id: int) -> None:
-    logger.info("Déclenchement du scraping pour le produit %s", product_id)
+    logger.info("Déclenchement du scraping pour le produit {}", product_id)
    session = database.SessionLocal()
    run = _create_run(session)
    try:
        product = session.get(models.Product, product_id)
        if not product:
-            logger.warning("Produit %s introuvable", product_id)
+            logger.warning("Produit {} introuvable", product_id)
            _finalize_run(run, session, "echec")
            return
+
        config = load_config()
        run.nb_total = 1
        session.commit()

        with sync_playwright() as playwright:
-            browser = playwright.chromium.launch(headless=config.scrape.headless)
-            context = browser.new_context(
-                locale=config.scrape.locale,
-                timezone_id=config.scrape.timezone,
-                user_agent=config.scrape.user_agent,
-                viewport=config.scrape.viewport,
-            )
+            browser, context = _create_browser_context(playwright, config)
            page = context.new_page()
            page.set_default_timeout(config.scrape.timeout_ms)
+
            try:
-                page.goto(product.url, wait_until="domcontentloaded", timeout=config.scrape.timeout_ms)
+                success, _ = _process_product(page, session, product, run, config)
+                run.nb_ok = 1 if success else 0
+                run.nb_echec = 0 if success else 1
+                _finalize_run(run, session, "succes" if success else "partiel")

-                html = page.content()
-                if detect_blocked(html):
-                    screenshot_path, html_path = _save_debug_artifacts(page, product.id)
-                    data = {"url": product.url, "asin": product.asin, "bloque": True}
-                    raw_path = _save_raw_json(data, product.id)
-                    _create_snapshot(
-                        session,
-                        product,
-                        run,
-                        data,
-                        status="bloque",
-                        raw_json_path=raw_path,
-                        error_message=f"Bloque: {screenshot_path.name} / {html_path.name}",
-                    )
-                    run.nb_echec = 1
-                    _finalize_run(run, session, "partiel")
-                    return
-
-                data = extract_product_data(page, product.url)
-                raw_path = _save_raw_json(data, product.id)
-                required = ["titre", "prix_actuel", "note"]
-                missing = [field for field in required if not data.get(field)]
-                status = "champs_manquants" if missing else "ok"
-                _create_snapshot(
-                    session,
-                    product,
-                    run,
-                    data,
-                    status=status,
-                    raw_json_path=raw_path,
-                    error_message=", ".join(missing) if missing else None,
-                )
-                run.nb_ok = 1 if not missing else 0
-                run.nb_echec = 0 if not missing else 1
-                _finalize_run(run, session, "succes" if not missing else "partiel")
+                # Sauvegarder la session pour réutilisation
+                _save_storage_state(context)

+                # Délai anti-blocage
                delay_min, delay_max = config.scrape.delay_range_ms
                time.sleep(random.uniform(delay_min, delay_max) / 1000.0)
            finally:
-                # fermeture propre du navigateur
                context.close()
                browser.close()
-    except Exception:  # pragma: no cover
-        logger.exception("Erreur pendant le scraping de %s", product_id)
+    except Exception as e:
+        logger.exception("Erreur pendant le scraping de {}: {}", product_id, e)
        _finalize_run(run, session, "erreur")
    finally:
        session.close()
@@ -183,20 +250,19 @@ def scrape_all(product_ids: Iterable[int] | None = None) -> None:
    run = _create_run(session)
    try:
        config = load_config()
-        products = session.query(models.Product).all()
+        products = session.query(models.Product).filter(models.Product.actif == True).all()
        if product_ids:
            products = [product for product in products if product.id in product_ids]
        run.nb_total = len(products)
        session.commit()

+        if not products:
+            logger.info("Aucun produit actif à scraper")
+            _finalize_run(run, session, "succes")
+            return
+
        with sync_playwright() as playwright:
-            browser = playwright.chromium.launch(headless=config.scrape.headless)
-            context = browser.new_context(
-                locale=config.scrape.locale,
-                timezone_id=config.scrape.timezone,
-                user_agent=config.scrape.user_agent,
-                viewport=config.scrape.viewport,
-            )
+            browser, context = _create_browser_context(playwright, config)
            page = context.new_page()
            page.set_default_timeout(config.scrape.timeout_ms)

@@ -205,55 +271,31 @@ def scrape_all(product_ids: Iterable[int] | None = None) -> None:

            try:
                for product in products:
-                    page.goto(product.url, wait_until="domcontentloaded", timeout=config.scrape.timeout_ms)
-                    html = page.content()
-                    if detect_blocked(html):
-                        screenshot_path, html_path = _save_debug_artifacts(page, product.id)
-                        data = {"url": product.url, "asin": product.asin, "bloque": True}
-                        raw_path = _save_raw_json(data, product.id)
-                        _create_snapshot(
-                            session,
-                            product,
-                            run,
-                            data,
-                            status="bloque",
-                            raw_json_path=raw_path,
-                            error_message=f"Bloque: {screenshot_path.name} / {html_path.name}",
-                        )
+                    try:
+                        success, _ = _process_product(page, session, product, run, config)
+                        if success:
+                            nb_ok += 1
+                        else:
+                            nb_echec += 1
+                    except Exception as e:
+                        logger.error("Erreur scraping produit {}: {}", product.id, e)
                        nb_echec += 1
-                        continue
-
-                    data = extract_product_data(page, product.url)
-                    raw_path = _save_raw_json(data, product.id)
-                    required = ["titre", "prix_actuel", "note"]
-                    missing = [field for field in required if not data.get(field)]
-                    status = "champs_manquants" if missing else "ok"
-                    _create_snapshot(
-                        session,
-                        product,
-                        run,
-                        data,
-                        status=status,
-                        raw_json_path=raw_path,
-                        error_message=", ".join(missing) if missing else None,
-                    )
-                    if missing:
-                        nb_echec += 1
-                    else:
-                        nb_ok += 1

+                    # Délai anti-blocage entre les produits
                    delay_min, delay_max = config.scrape.delay_range_ms
                    time.sleep(random.uniform(delay_min, delay_max) / 1000.0)

                run.nb_ok = nb_ok
                run.nb_echec = nb_echec
                _finalize_run(run, session, "succes" if nb_echec == 0 else "partiel")
+
+                # Sauvegarder la session pour réutilisation
+                _save_storage_state(context)
            finally:
-                # fermeture propre du navigateur
                context.close()
                browser.close()
-    except Exception:  # pragma: no cover
-        logger.exception("Erreur du scraping global")
+    except Exception as e:
+        logger.exception("Erreur du scraping global: {}", e)
        _finalize_run(run, session, "erreur")
    finally:
        session.close()
--- a/backend/config_backend.json
+++ b/backend/config_backend.json
@@ -10,21 +10,47 @@
    "headless": true,
    "timeout_ms": 30000,
    "retries": 1,
-    "delay_range_ms": [1000, 3000],
+    "delay_range_ms": [
+      1000,
+      3000
+    ],
    "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
-    "viewport": { "width": 1366, "height": 768 },
+    "viewport": {
+      "width": 1366,
+      "height": 768
+    },
    "locale": "fr-FR",
    "timezone": "Europe/Paris",
    "proxy": null
  },
-  "stores_enabled": ["amazon_fr"],
+  "stores_enabled": [
+    "amazon_fr"
+  ],
  "taxonomy": {
-    "categories": ["SSD", "CPU", "GPU", "RAM"],
+    "categories": [
+      "SSD",
+      "CPU",
+      "GPU",
+      "RAM",
+      "Laptop"
+    ],
    "types_by_category": {
-      "SSD": ["NVMe", "SATA"],
-      "CPU": ["Desktop", "Mobile"],
-      "GPU": ["Gaming", "Workstation"],
-      "RAM": ["DDR4", "DDR5"]
+      "SSD": [
+        "NVMe",
+        "SATA"
+      ],
+      "CPU": [
+        "Desktop",
+        "Mobile"
+      ],
+      "GPU": [
+        "Gaming",
+        "Workstation"
+      ],
+      "RAM": [
+        "DDR4",
+        "DDR5"
+      ]
    }
  }
-}
+}