feat: auto-scrape on product creation and update product data (Step 4)
- Add automatic scraping when creating a new product
- Update product title and image from scraped data
- Add GET /products/{id}/snapshots endpoint for price history
- Add list_snapshots and get_latest_snapshot to CRUD
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,10 +1,11 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status
|
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from backend.app.api.deps import get_db
|
from backend.app.api.deps import get_db
|
||||||
from backend.app.db import crud, schemas
|
from backend.app.db import crud, schemas
|
||||||
|
from backend.app.scraper.runner import scrape_product
|
||||||
|
|
||||||
router = APIRouter(prefix="/products", tags=["products"])
|
router = APIRouter(prefix="/products", tags=["products"])
|
||||||
|
|
||||||
@@ -16,9 +17,15 @@ def list_products(skip: int = 0, limit: int = 50, db: Session = Depends(get_db))
|
|||||||
|
|
||||||
|
|
||||||
@router.post("", response_model=schemas.ProductRead, status_code=status.HTTP_201_CREATED)
|
@router.post("", response_model=schemas.ProductRead, status_code=status.HTTP_201_CREATED)
|
||||||
def create_product(payload: schemas.ProductCreate, db: Session = Depends(get_db)) -> schemas.ProductRead:
|
def create_product(
|
||||||
# création de produit rigoureuse via Pydantic
|
payload: schemas.ProductCreate,
|
||||||
return crud.create_product(db, payload)
|
background_tasks: BackgroundTasks,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
) -> schemas.ProductRead:
|
||||||
|
product = crud.create_product(db, payload)
|
||||||
|
# Déclenche automatiquement le scraping après création
|
||||||
|
background_tasks.add_task(scrape_product, product.id)
|
||||||
|
return product
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{product_id}", response_model=schemas.ProductRead)
|
@router.get("/{product_id}", response_model=schemas.ProductRead)
|
||||||
@@ -45,3 +52,15 @@ def delete_product(product_id: int, db: Session = Depends(get_db)) -> None:
|
|||||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable")
|
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable")
|
||||||
# suppression définitive en base
|
# suppression définitive en base
|
||||||
crud.remove_product(db, product)
|
crud.remove_product(db, product)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{product_id}/snapshots", response_model=list[schemas.ProductSnapshotRead])
|
||||||
|
def list_snapshots(
|
||||||
|
product_id: int,
|
||||||
|
limit: int = 30,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
) -> list[schemas.ProductSnapshotRead]:
|
||||||
|
product = crud.get_product(db, product_id)
|
||||||
|
if not product:
|
||||||
|
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable")
|
||||||
|
return crud.list_snapshots(db, product_id, limit=limit)
|
||||||
|
|||||||
@@ -43,3 +43,22 @@ def update_product(db: Session, product: models.Product, changes: schemas.Produc
|
|||||||
def remove_product(db: Session, product: models.Product) -> None:
|
def remove_product(db: Session, product: models.Product) -> None:
|
||||||
db.delete(product)
|
db.delete(product)
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def list_snapshots(db: Session, product_id: int, limit: int = 30) -> list[models.ProductSnapshot]:
|
||||||
|
return (
|
||||||
|
db.query(models.ProductSnapshot)
|
||||||
|
.filter(models.ProductSnapshot.produit_id == product_id)
|
||||||
|
.order_by(models.ProductSnapshot.scrape_le.desc())
|
||||||
|
.limit(limit)
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_latest_snapshot(db: Session, product_id: int) -> models.ProductSnapshot | None:
|
||||||
|
return (
|
||||||
|
db.query(models.ProductSnapshot)
|
||||||
|
.filter(models.ProductSnapshot.produit_id == product_id)
|
||||||
|
.order_by(models.ProductSnapshot.scrape_le.desc())
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|||||||
@@ -53,6 +53,20 @@ def _save_debug_artifacts(page, product_id: int) -> tuple[Path, Path]:
|
|||||||
return screenshot_path, html_path
|
return screenshot_path, html_path
|
||||||
|
|
||||||
|
|
||||||
|
def _update_product_from_scrape(
|
||||||
|
session: Session,
|
||||||
|
product: models.Product,
|
||||||
|
data: dict,
|
||||||
|
) -> None:
|
||||||
|
"""Met à jour le produit avec les données scrappées (titre, image)."""
|
||||||
|
if data.get("titre") and not product.titre:
|
||||||
|
product.titre = data["titre"]
|
||||||
|
if data.get("url_image_principale") and not product.url_image:
|
||||||
|
product.url_image = data["url_image_principale"]
|
||||||
|
session.add(product)
|
||||||
|
session.commit()
|
||||||
|
|
||||||
|
|
||||||
def _create_snapshot(
|
def _create_snapshot(
|
||||||
session: Session,
|
session: Session,
|
||||||
product: models.Product,
|
product: models.Product,
|
||||||
@@ -62,6 +76,9 @@ def _create_snapshot(
|
|||||||
raw_json_path: Path | None,
|
raw_json_path: Path | None,
|
||||||
error_message: str | None = None,
|
error_message: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
# Mettre à jour le produit avec titre/image si manquants
|
||||||
|
_update_product_from_scrape(session, product, data)
|
||||||
|
|
||||||
snapshot = models.ProductSnapshot(
|
snapshot = models.ProductSnapshot(
|
||||||
produit_id=product.id,
|
produit_id=product.id,
|
||||||
run_scrap_id=run.id,
|
run_scrap_id=run.id,
|
||||||
|
|||||||
Reference in New Issue
Block a user