feat: auto-scrape on product creation and update product data (Step 4)

- Add automatic scraping when creating a new product
- Update product title and image from scraped data
- Add GET /products/{id}/snapshots endpoint for price history
- Add list_snapshots and get_latest_snapshot to CRUD

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-18 19:33:04 +01:00
parent 744d16c2c5
commit 4ff5d3ee79
3 changed files with 59 additions and 4 deletions

View File

@@ -1,10 +1,11 @@
from __future__ import annotations from __future__ import annotations
from fastapi import APIRouter, Depends, HTTPException, status from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from backend.app.api.deps import get_db from backend.app.api.deps import get_db
from backend.app.db import crud, schemas from backend.app.db import crud, schemas
from backend.app.scraper.runner import scrape_product
router = APIRouter(prefix="/products", tags=["products"]) router = APIRouter(prefix="/products", tags=["products"])
@@ -16,9 +17,15 @@ def list_products(skip: int = 0, limit: int = 50, db: Session = Depends(get_db))
@router.post("", response_model=schemas.ProductRead, status_code=status.HTTP_201_CREATED) @router.post("", response_model=schemas.ProductRead, status_code=status.HTTP_201_CREATED)
def create_product(payload: schemas.ProductCreate, db: Session = Depends(get_db)) -> schemas.ProductRead: def create_product(
# création de produit rigoureuse via Pydantic payload: schemas.ProductCreate,
return crud.create_product(db, payload) background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
) -> schemas.ProductRead:
product = crud.create_product(db, payload)
# Déclenche automatiquement le scraping après création
background_tasks.add_task(scrape_product, product.id)
return product
@router.get("/{product_id}", response_model=schemas.ProductRead) @router.get("/{product_id}", response_model=schemas.ProductRead)
@@ -45,3 +52,15 @@ def delete_product(product_id: int, db: Session = Depends(get_db)) -> None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable") raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable")
# suppression définitive en base # suppression définitive en base
crud.remove_product(db, product) crud.remove_product(db, product)
@router.get("/{product_id}/snapshots", response_model=list[schemas.ProductSnapshotRead])
def list_snapshots(
product_id: int,
limit: int = 30,
db: Session = Depends(get_db),
) -> list[schemas.ProductSnapshotRead]:
product = crud.get_product(db, product_id)
if not product:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable")
return crud.list_snapshots(db, product_id, limit=limit)

View File

@@ -43,3 +43,22 @@ def update_product(db: Session, product: models.Product, changes: schemas.Produc
def remove_product(db: Session, product: models.Product) -> None: def remove_product(db: Session, product: models.Product) -> None:
db.delete(product) db.delete(product)
db.commit() db.commit()
def list_snapshots(db: Session, product_id: int, limit: int = 30) -> list[models.ProductSnapshot]:
return (
db.query(models.ProductSnapshot)
.filter(models.ProductSnapshot.produit_id == product_id)
.order_by(models.ProductSnapshot.scrape_le.desc())
.limit(limit)
.all()
)
def get_latest_snapshot(db: Session, product_id: int) -> models.ProductSnapshot | None:
return (
db.query(models.ProductSnapshot)
.filter(models.ProductSnapshot.produit_id == product_id)
.order_by(models.ProductSnapshot.scrape_le.desc())
.first()
)

View File

@@ -53,6 +53,20 @@ def _save_debug_artifacts(page, product_id: int) -> tuple[Path, Path]:
return screenshot_path, html_path return screenshot_path, html_path
def _update_product_from_scrape(
session: Session,
product: models.Product,
data: dict,
) -> None:
"""Met à jour le produit avec les données scrappées (titre, image)."""
if data.get("titre") and not product.titre:
product.titre = data["titre"]
if data.get("url_image_principale") and not product.url_image:
product.url_image = data["url_image_principale"]
session.add(product)
session.commit()
def _create_snapshot( def _create_snapshot(
session: Session, session: Session,
product: models.Product, product: models.Product,
@@ -62,6 +76,9 @@ def _create_snapshot(
raw_json_path: Path | None, raw_json_path: Path | None,
error_message: str | None = None, error_message: str | None = None,
) -> None: ) -> None:
# Mettre à jour le produit avec titre/image si manquants
_update_product_from_scrape(session, product, data)
snapshot = models.ProductSnapshot( snapshot = models.ProductSnapshot(
produit_id=product.id, produit_id=product.id,
run_scrap_id=run.id, run_scrap_id=run.id,