Files
scrap/pricewatch/app/api/main.py
Gilles Soulier 740c3d7516 before claude
2026-01-18 06:26:17 +01:00

1042 lines
35 KiB
Python

"""
API REST FastAPI pour PriceWatch (Phase 3).
"""
from __future__ import annotations
import csv
from collections import deque
from datetime import datetime, timezone
import os
from pathlib import Path
from io import StringIO
from typing import Generator, Optional
import httpx
from fastapi import Depends, FastAPI, Header, HTTPException, Response
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from sqlalchemy.exc import IntegrityError, SQLAlchemyError
from sqlalchemy import and_, desc, func
from sqlalchemy.orm import Session
from pricewatch.app.api.schemas import (
BackendLogEntry,
ClassificationOptionsOut,
ClassificationRuleCreate,
ClassificationRuleOut,
ClassificationRuleUpdate,
EnqueueRequest,
EnqueueResponse,
HealthStatus,
PriceHistoryCreate,
PriceHistoryOut,
PriceHistoryUpdate,
ProductCreate,
ProductHistoryPoint,
ProductOut,
ProductUpdate,
ScheduleRequest,
ScheduleResponse,
ScrapeCommitRequest,
ScrapeCommitResponse,
ScrapePreviewRequest,
ScrapePreviewResponse,
ScrapingLogCreate,
ScrapingLogOut,
ScrapingLogUpdate,
UvicornLogEntry,
VersionResponse,
WebhookCreate,
WebhookOut,
WebhookTestResponse,
WebhookUpdate,
)
from pricewatch.app.core.config import get_config
from pricewatch.app.core.logging import get_logger
from pricewatch.app.core.schema import ProductSnapshot
from pricewatch.app.db.connection import check_db_connection, get_session
from pricewatch.app.db.models import ClassificationRule, PriceHistory, Product, ScrapingLog, Webhook
from pricewatch.app.db.repository import ProductRepository
from pricewatch.app.scraping.pipeline import ScrapingPipeline
from pricewatch.app.tasks.scrape import scrape_product
from pricewatch.app.tasks.scheduler import RedisUnavailableError, check_redis_connection, ScrapingScheduler
logger = get_logger("api")
app = FastAPI(title="PriceWatch API", version="0.4.0")
# Buffer de logs backend en memoire pour debug UI.
BACKEND_LOGS = deque(maxlen=200)
UVICORN_LOG_PATH = Path(
os.environ.get("PW_UVICORN_LOG_PATH", "/app/logs/uvicorn.log")
)
def get_db_session() -> Generator[Session, None, None]:
"""Dependency: session SQLAlchemy."""
with get_session(get_config()) as session:
yield session
def require_token(authorization: Optional[str] = Header(default=None)) -> None:
"""Auth simple via token Bearer."""
config = get_config()
token = config.api_token
if not token:
raise HTTPException(status_code=500, detail="API token non configure")
if not authorization or not authorization.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Token manquant")
provided = authorization.split("Bearer ")[-1].strip()
if provided != token:
raise HTTPException(status_code=403, detail="Token invalide")
@app.get("/health", response_model=HealthStatus)
def health_check() -> HealthStatus:
"""Health check DB + Redis."""
config = get_config()
return HealthStatus(
db=check_db_connection(config),
redis=check_redis_connection(config.redis.url),
)
@app.get("/version", response_model=VersionResponse)
def version_info() -> VersionResponse:
"""Expose la version API."""
return VersionResponse(api_version=app.version)
@app.get("/logs/backend", response_model=list[BackendLogEntry], dependencies=[Depends(require_token)])
def list_backend_logs() -> list[BackendLogEntry]:
"""Expose un buffer de logs backend."""
return list(BACKEND_LOGS)
@app.get("/logs/uvicorn", response_model=list[UvicornLogEntry], dependencies=[Depends(require_token)])
def list_uvicorn_logs(limit: int = 200) -> list[UvicornLogEntry]:
"""Expose les dernieres lignes du log Uvicorn."""
lines = _read_uvicorn_lines(limit=limit)
return [UvicornLogEntry(line=line) for line in lines]
@app.get("/products", response_model=list[ProductOut], dependencies=[Depends(require_token)])
def list_products(
source: Optional[str] = None,
reference: Optional[str] = None,
updated_after: Optional[datetime] = None,
price_min: Optional[float] = None,
price_max: Optional[float] = None,
fetched_after: Optional[datetime] = None,
fetched_before: Optional[datetime] = None,
stock_status: Optional[str] = None,
limit: int = 50,
offset: int = 0,
session: Session = Depends(get_db_session),
) -> list[ProductOut]:
"""Liste des produits avec filtres optionnels."""
latest_price_subquery = (
session.query(
PriceHistory.product_id.label("product_id"),
func.max(PriceHistory.fetched_at).label("latest_fetched_at"),
)
.group_by(PriceHistory.product_id)
.subquery()
)
latest_price = (
session.query(PriceHistory)
.join(
latest_price_subquery,
and_(
PriceHistory.product_id == latest_price_subquery.c.product_id,
PriceHistory.fetched_at == latest_price_subquery.c.latest_fetched_at,
),
)
.subquery()
)
query = session.query(Product).outerjoin(latest_price, Product.id == latest_price.c.product_id)
if source:
query = query.filter(Product.source == source)
if reference:
query = query.filter(Product.reference == reference)
if updated_after:
query = query.filter(Product.last_updated_at >= updated_after)
if price_min is not None:
query = query.filter(latest_price.c.price >= price_min)
if price_max is not None:
query = query.filter(latest_price.c.price <= price_max)
if fetched_after:
query = query.filter(latest_price.c.fetched_at >= fetched_after)
if fetched_before:
query = query.filter(latest_price.c.fetched_at <= fetched_before)
if stock_status:
query = query.filter(latest_price.c.stock_status == stock_status)
products = query.order_by(desc(Product.last_updated_at)).offset(offset).limit(limit).all()
return [_product_to_out(session, product) for product in products]
@app.post("/products", response_model=ProductOut, dependencies=[Depends(require_token)])
def create_product(
payload: ProductCreate,
session: Session = Depends(get_db_session),
) -> ProductOut:
"""Cree un produit."""
product = Product(
source=payload.source,
reference=payload.reference,
url=payload.url,
title=payload.title,
category=payload.category,
type=payload.type,
description=payload.description,
currency=payload.currency,
msrp=payload.msrp,
)
session.add(product)
try:
session.commit()
session.refresh(product)
except IntegrityError as exc:
session.rollback()
raise HTTPException(status_code=409, detail="Produit deja existant") from exc
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return _product_to_out(session, product)
@app.get("/products/{product_id}", response_model=ProductOut, dependencies=[Depends(require_token)])
def get_product(
product_id: int,
session: Session = Depends(get_db_session),
) -> ProductOut:
"""Detail produit + dernier prix."""
product = session.query(Product).filter(Product.id == product_id).one_or_none()
if not product:
raise HTTPException(status_code=404, detail="Produit non trouve")
return _product_to_out(session, product)
@app.patch("/products/{product_id}", response_model=ProductOut, dependencies=[Depends(require_token)])
def update_product(
product_id: int,
payload: ProductUpdate,
session: Session = Depends(get_db_session),
) -> ProductOut:
"""Met a jour un produit (partial)."""
product = session.query(Product).filter(Product.id == product_id).one_or_none()
if not product:
raise HTTPException(status_code=404, detail="Produit non trouve")
updates = payload.model_dump(exclude_unset=True)
for key, value in updates.items():
setattr(product, key, value)
try:
session.commit()
session.refresh(product)
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return _product_to_out(session, product)
@app.get(
"/classification/rules",
response_model=list[ClassificationRuleOut],
dependencies=[Depends(require_token)],
)
def list_classification_rules(
session: Session = Depends(get_db_session),
) -> list[ClassificationRuleOut]:
"""Liste les regles de classification."""
rules = (
session.query(ClassificationRule)
.order_by(ClassificationRule.sort_order, ClassificationRule.id)
.all()
)
return [
ClassificationRuleOut(
id=rule.id,
category=rule.category,
type=rule.type,
keywords=rule.keywords or [],
sort_order=rule.sort_order,
is_active=rule.is_active,
)
for rule in rules
]
@app.post(
"/classification/rules",
response_model=ClassificationRuleOut,
dependencies=[Depends(require_token)],
)
def create_classification_rule(
payload: ClassificationRuleCreate,
session: Session = Depends(get_db_session),
) -> ClassificationRuleOut:
"""Cree une regle de classification."""
rule = ClassificationRule(
category=payload.category,
type=payload.type,
keywords=payload.keywords,
sort_order=payload.sort_order or 0,
is_active=True if payload.is_active is None else payload.is_active,
)
session.add(rule)
session.commit()
session.refresh(rule)
return ClassificationRuleOut(
id=rule.id,
category=rule.category,
type=rule.type,
keywords=rule.keywords or [],
sort_order=rule.sort_order,
is_active=rule.is_active,
)
@app.patch(
"/classification/rules/{rule_id}",
response_model=ClassificationRuleOut,
dependencies=[Depends(require_token)],
)
def update_classification_rule(
rule_id: int,
payload: ClassificationRuleUpdate,
session: Session = Depends(get_db_session),
) -> ClassificationRuleOut:
"""Met a jour une regle de classification."""
rule = session.query(ClassificationRule).filter(ClassificationRule.id == rule_id).one_or_none()
if not rule:
raise HTTPException(status_code=404, detail="Regle non trouvee")
updates = payload.model_dump(exclude_unset=True)
for key, value in updates.items():
setattr(rule, key, value)
session.commit()
session.refresh(rule)
return ClassificationRuleOut(
id=rule.id,
category=rule.category,
type=rule.type,
keywords=rule.keywords or [],
sort_order=rule.sort_order,
is_active=rule.is_active,
)
@app.delete(
"/classification/rules/{rule_id}",
dependencies=[Depends(require_token)],
)
def delete_classification_rule(
rule_id: int,
session: Session = Depends(get_db_session),
) -> dict[str, str]:
"""Supprime une regle de classification."""
rule = session.query(ClassificationRule).filter(ClassificationRule.id == rule_id).one_or_none()
if not rule:
raise HTTPException(status_code=404, detail="Regle non trouvee")
session.delete(rule)
session.commit()
return {"status": "deleted"}
@app.get(
"/classification/options",
response_model=ClassificationOptionsOut,
dependencies=[Depends(require_token)],
)
def get_classification_options(
session: Session = Depends(get_db_session),
) -> ClassificationOptionsOut:
"""Expose la liste des categories et types issus des regles actives."""
rules = (
session.query(ClassificationRule)
.filter(ClassificationRule.is_active == True)
.order_by(ClassificationRule.sort_order, ClassificationRule.id)
.all()
)
categories = sorted({rule.category for rule in rules if rule.category})
types = sorted({rule.type for rule in rules if rule.type})
return ClassificationOptionsOut(categories=categories, types=types)
@app.delete("/products/{product_id}", dependencies=[Depends(require_token)])
def delete_product(
product_id: int,
session: Session = Depends(get_db_session),
) -> dict[str, str]:
"""Supprime un produit (cascade)."""
product = session.query(Product).filter(Product.id == product_id).one_or_none()
if not product:
raise HTTPException(status_code=404, detail="Produit non trouve")
session.delete(product)
try:
session.commit()
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return {"status": "deleted"}
@app.get(
"/products/{product_id}/prices",
response_model=list[PriceHistoryOut],
dependencies=[Depends(require_token)],
)
def list_prices(
product_id: int,
price_min: Optional[float] = None,
price_max: Optional[float] = None,
fetched_after: Optional[datetime] = None,
fetched_before: Optional[datetime] = None,
fetch_status: Optional[str] = None,
limit: int = 50,
offset: int = 0,
session: Session = Depends(get_db_session),
) -> list[PriceHistoryOut]:
"""Historique de prix pour un produit."""
query = session.query(PriceHistory).filter(PriceHistory.product_id == product_id)
if price_min is not None:
query = query.filter(PriceHistory.price >= price_min)
if price_max is not None:
query = query.filter(PriceHistory.price <= price_max)
if fetched_after:
query = query.filter(PriceHistory.fetched_at >= fetched_after)
if fetched_before:
query = query.filter(PriceHistory.fetched_at <= fetched_before)
if fetch_status:
query = query.filter(PriceHistory.fetch_status == fetch_status)
prices = query.order_by(desc(PriceHistory.fetched_at)).offset(offset).limit(limit).all()
return [_price_to_out(price) for price in prices]
@app.post("/prices", response_model=PriceHistoryOut, dependencies=[Depends(require_token)])
def create_price(
payload: PriceHistoryCreate,
session: Session = Depends(get_db_session),
) -> PriceHistoryOut:
"""Ajoute une entree d'historique de prix."""
price = PriceHistory(
product_id=payload.product_id,
price=payload.price,
shipping_cost=payload.shipping_cost,
stock_status=payload.stock_status,
fetch_method=payload.fetch_method,
fetch_status=payload.fetch_status,
fetched_at=payload.fetched_at,
)
session.add(price)
try:
session.commit()
session.refresh(price)
except IntegrityError as exc:
session.rollback()
raise HTTPException(status_code=409, detail="Entree prix deja existante") from exc
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return _price_to_out(price)
@app.patch("/prices/{price_id}", response_model=PriceHistoryOut, dependencies=[Depends(require_token)])
def update_price(
price_id: int,
payload: PriceHistoryUpdate,
session: Session = Depends(get_db_session),
) -> PriceHistoryOut:
"""Met a jour une entree de prix."""
price = session.query(PriceHistory).filter(PriceHistory.id == price_id).one_or_none()
if not price:
raise HTTPException(status_code=404, detail="Entree prix non trouvee")
updates = payload.model_dump(exclude_unset=True)
for key, value in updates.items():
setattr(price, key, value)
try:
session.commit()
session.refresh(price)
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return _price_to_out(price)
@app.delete("/prices/{price_id}", dependencies=[Depends(require_token)])
def delete_price(
price_id: int,
session: Session = Depends(get_db_session),
) -> dict[str, str]:
"""Supprime une entree de prix."""
price = session.query(PriceHistory).filter(PriceHistory.id == price_id).one_or_none()
if not price:
raise HTTPException(status_code=404, detail="Entree prix non trouvee")
session.delete(price)
try:
session.commit()
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return {"status": "deleted"}
@app.get("/logs", response_model=list[ScrapingLogOut], dependencies=[Depends(require_token)])
def list_logs(
source: Optional[str] = None,
fetch_status: Optional[str] = None,
fetched_after: Optional[datetime] = None,
fetched_before: Optional[datetime] = None,
limit: int = 50,
offset: int = 0,
session: Session = Depends(get_db_session),
) -> list[ScrapingLogOut]:
"""Liste des logs de scraping."""
query = session.query(ScrapingLog)
if source:
query = query.filter(ScrapingLog.source == source)
if fetch_status:
query = query.filter(ScrapingLog.fetch_status == fetch_status)
if fetched_after:
query = query.filter(ScrapingLog.fetched_at >= fetched_after)
if fetched_before:
query = query.filter(ScrapingLog.fetched_at <= fetched_before)
logs = query.order_by(desc(ScrapingLog.fetched_at)).offset(offset).limit(limit).all()
return [_log_to_out(log) for log in logs]
@app.post("/logs", response_model=ScrapingLogOut, dependencies=[Depends(require_token)])
def create_log(
payload: ScrapingLogCreate,
session: Session = Depends(get_db_session),
) -> ScrapingLogOut:
"""Cree un log de scraping."""
log_entry = ScrapingLog(
product_id=payload.product_id,
url=payload.url,
source=payload.source,
reference=payload.reference,
fetch_method=payload.fetch_method,
fetch_status=payload.fetch_status,
fetched_at=payload.fetched_at,
duration_ms=payload.duration_ms,
html_size_bytes=payload.html_size_bytes,
errors=payload.errors,
notes=payload.notes,
)
session.add(log_entry)
try:
session.commit()
session.refresh(log_entry)
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return _log_to_out(log_entry)
@app.patch("/logs/{log_id}", response_model=ScrapingLogOut, dependencies=[Depends(require_token)])
def update_log(
log_id: int,
payload: ScrapingLogUpdate,
session: Session = Depends(get_db_session),
) -> ScrapingLogOut:
"""Met a jour un log."""
log_entry = session.query(ScrapingLog).filter(ScrapingLog.id == log_id).one_or_none()
if not log_entry:
raise HTTPException(status_code=404, detail="Log non trouve")
updates = payload.model_dump(exclude_unset=True)
for key, value in updates.items():
setattr(log_entry, key, value)
try:
session.commit()
session.refresh(log_entry)
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return _log_to_out(log_entry)
@app.delete("/logs/{log_id}", dependencies=[Depends(require_token)])
def delete_log(
log_id: int,
session: Session = Depends(get_db_session),
) -> dict[str, str]:
"""Supprime un log."""
log_entry = session.query(ScrapingLog).filter(ScrapingLog.id == log_id).one_or_none()
if not log_entry:
raise HTTPException(status_code=404, detail="Log non trouve")
session.delete(log_entry)
try:
session.commit()
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return {"status": "deleted"}
@app.get("/products/export", dependencies=[Depends(require_token)])
def export_products(
source: Optional[str] = None,
reference: Optional[str] = None,
updated_after: Optional[datetime] = None,
price_min: Optional[float] = None,
price_max: Optional[float] = None,
fetched_after: Optional[datetime] = None,
fetched_before: Optional[datetime] = None,
stock_status: Optional[str] = None,
format: str = "csv",
limit: int = 500,
offset: int = 0,
session: Session = Depends(get_db_session),
) -> Response:
"""Export produits en CSV/JSON."""
products = list_products(
source=source,
reference=reference,
updated_after=updated_after,
price_min=price_min,
price_max=price_max,
fetched_after=fetched_after,
fetched_before=fetched_before,
stock_status=stock_status,
limit=limit,
offset=offset,
session=session,
)
rows = [product.model_dump() for product in products]
fieldnames = list(ProductOut.model_fields.keys())
return _export_response(rows, fieldnames, "products", format)
@app.get("/prices/export", dependencies=[Depends(require_token)])
def export_prices(
product_id: Optional[int] = None,
price_min: Optional[float] = None,
price_max: Optional[float] = None,
fetched_after: Optional[datetime] = None,
fetched_before: Optional[datetime] = None,
fetch_status: Optional[str] = None,
format: str = "csv",
limit: int = 500,
offset: int = 0,
session: Session = Depends(get_db_session),
) -> Response:
"""Export historique de prix en CSV/JSON."""
query = session.query(PriceHistory)
if product_id is not None:
query = query.filter(PriceHistory.product_id == product_id)
if price_min is not None:
query = query.filter(PriceHistory.price >= price_min)
if price_max is not None:
query = query.filter(PriceHistory.price <= price_max)
if fetched_after:
query = query.filter(PriceHistory.fetched_at >= fetched_after)
if fetched_before:
query = query.filter(PriceHistory.fetched_at <= fetched_before)
if fetch_status:
query = query.filter(PriceHistory.fetch_status == fetch_status)
prices = query.order_by(desc(PriceHistory.fetched_at)).offset(offset).limit(limit).all()
rows = [_price_to_out(price).model_dump() for price in prices]
fieldnames = list(PriceHistoryOut.model_fields.keys())
return _export_response(rows, fieldnames, "prices", format)
@app.get("/logs/export", dependencies=[Depends(require_token)])
def export_logs(
source: Optional[str] = None,
fetch_status: Optional[str] = None,
fetched_after: Optional[datetime] = None,
fetched_before: Optional[datetime] = None,
format: str = "csv",
limit: int = 500,
offset: int = 0,
session: Session = Depends(get_db_session),
) -> Response:
"""Export logs de scraping en CSV/JSON."""
logs = list_logs(
source=source,
fetch_status=fetch_status,
fetched_after=fetched_after,
fetched_before=fetched_before,
limit=limit,
offset=offset,
session=session,
)
rows = [log.model_dump() for log in logs]
fieldnames = list(ScrapingLogOut.model_fields.keys())
return _export_response(rows, fieldnames, "logs", format)
@app.get("/webhooks", response_model=list[WebhookOut], dependencies=[Depends(require_token)])
def list_webhooks(
event: Optional[str] = None,
enabled: Optional[bool] = None,
limit: int = 50,
offset: int = 0,
session: Session = Depends(get_db_session),
) -> list[WebhookOut]:
"""Liste des webhooks."""
query = session.query(Webhook)
if event:
query = query.filter(Webhook.event == event)
if enabled is not None:
query = query.filter(Webhook.enabled == enabled)
webhooks = query.order_by(desc(Webhook.created_at)).offset(offset).limit(limit).all()
return [_webhook_to_out(webhook) for webhook in webhooks]
@app.post("/webhooks", response_model=WebhookOut, dependencies=[Depends(require_token)])
def create_webhook(
payload: WebhookCreate,
session: Session = Depends(get_db_session),
) -> WebhookOut:
"""Cree un webhook."""
webhook = Webhook(
event=payload.event,
url=payload.url,
enabled=payload.enabled,
secret=payload.secret,
)
session.add(webhook)
try:
session.commit()
session.refresh(webhook)
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return _webhook_to_out(webhook)
@app.patch("/webhooks/{webhook_id}", response_model=WebhookOut, dependencies=[Depends(require_token)])
def update_webhook(
webhook_id: int,
payload: WebhookUpdate,
session: Session = Depends(get_db_session),
) -> WebhookOut:
"""Met a jour un webhook."""
webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none()
if not webhook:
raise HTTPException(status_code=404, detail="Webhook non trouve")
updates = payload.model_dump(exclude_unset=True)
for key, value in updates.items():
setattr(webhook, key, value)
try:
session.commit()
session.refresh(webhook)
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return _webhook_to_out(webhook)
@app.delete("/webhooks/{webhook_id}", dependencies=[Depends(require_token)])
def delete_webhook(
webhook_id: int,
session: Session = Depends(get_db_session),
) -> dict[str, str]:
"""Supprime un webhook."""
webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none()
if not webhook:
raise HTTPException(status_code=404, detail="Webhook non trouve")
session.delete(webhook)
try:
session.commit()
except SQLAlchemyError as exc:
session.rollback()
raise HTTPException(status_code=500, detail="Erreur DB") from exc
return {"status": "deleted"}
@app.post(
"/webhooks/{webhook_id}/test",
response_model=WebhookTestResponse,
dependencies=[Depends(require_token)],
)
def send_webhook_test(
webhook_id: int,
session: Session = Depends(get_db_session),
) -> WebhookTestResponse:
"""Envoie un evenement de test."""
webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none()
if not webhook:
raise HTTPException(status_code=404, detail="Webhook non trouve")
if not webhook.enabled:
raise HTTPException(status_code=409, detail="Webhook desactive")
payload = {"message": "test webhook", "webhook_id": webhook.id}
_send_webhook(webhook, "test", payload)
return WebhookTestResponse(status="sent")
@app.post("/enqueue", response_model=EnqueueResponse, dependencies=[Depends(require_token)])
def enqueue_job(payload: EnqueueRequest) -> EnqueueResponse:
"""Enqueue un job immediat."""
try:
scheduler = ScrapingScheduler(get_config())
job = scheduler.enqueue_immediate(
payload.url,
use_playwright=payload.use_playwright,
save_db=payload.save_db,
)
return EnqueueResponse(job_id=job.id)
except RedisUnavailableError as exc:
raise HTTPException(status_code=503, detail=str(exc)) from exc
@app.post("/schedule", response_model=ScheduleResponse, dependencies=[Depends(require_token)])
def schedule_job(payload: ScheduleRequest) -> ScheduleResponse:
"""Planifie un job recurrent."""
try:
scheduler = ScrapingScheduler(get_config())
job_info = scheduler.schedule_product(
payload.url,
interval_hours=payload.interval_hours,
use_playwright=payload.use_playwright,
save_db=payload.save_db,
)
return ScheduleResponse(job_id=job_info.job_id, next_run=job_info.next_run)
except RedisUnavailableError as exc:
raise HTTPException(status_code=503, detail=str(exc)) from exc
@app.post("/scrape/preview", response_model=ScrapePreviewResponse, dependencies=[Depends(require_token)])
def preview_scrape(payload: ScrapePreviewRequest) -> ScrapePreviewResponse:
"""Scrape un produit sans persistence pour previsualisation."""
_add_backend_log("INFO", f"Preview scraping: {payload.url}")
result = scrape_product(
payload.url,
use_playwright=payload.use_playwright,
save_db=False,
)
snapshot = result.get("snapshot")
if snapshot is None:
_add_backend_log("ERROR", f"Preview scraping KO: {payload.url}")
return ScrapePreviewResponse(success=False, snapshot=None, error=result.get("error"))
config = get_config()
if config.enable_db:
try:
with get_session(config) as session:
ProductRepository(session).apply_classification(snapshot)
except Exception as exc:
snapshot.add_note(f"Classification ignoree: {exc}")
return ScrapePreviewResponse(
success=bool(result.get("success")),
snapshot=snapshot.model_dump(mode="json"),
error=result.get("error"),
)
@app.post("/scrape/commit", response_model=ScrapeCommitResponse, dependencies=[Depends(require_token)])
def commit_scrape(payload: ScrapeCommitRequest) -> ScrapeCommitResponse:
"""Persiste un snapshot previsualise."""
try:
snapshot = ProductSnapshot.model_validate(payload.snapshot)
except Exception as exc:
_add_backend_log("ERROR", "Commit scraping KO: snapshot invalide")
raise HTTPException(status_code=400, detail="Snapshot invalide") from exc
product_id = ScrapingPipeline(config=get_config()).process_snapshot(
snapshot, save_to_db=True, apply_classification=False
)
_add_backend_log("INFO", f"Commit scraping OK: product_id={product_id}")
return ScrapeCommitResponse(success=True, product_id=product_id)
def _export_response(
rows: list[dict[str, object]],
fieldnames: list[str],
filename_prefix: str,
format: str,
) -> Response:
"""Expose une reponse CSV/JSON avec un nom de fichier stable."""
if format not in {"csv", "json"}:
raise HTTPException(status_code=400, detail="Format invalide (csv ou json)")
headers = {"Content-Disposition": f'attachment; filename="{filename_prefix}.{format}"'}
if format == "json":
return JSONResponse(content=jsonable_encoder(rows), headers=headers)
return _to_csv_response(rows, fieldnames, headers)
def _to_csv_response(
rows: list[dict[str, object]],
fieldnames: list[str],
headers: dict[str, str],
) -> Response:
buffer = StringIO()
writer = csv.DictWriter(buffer, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
return Response(content=buffer.getvalue(), media_type="text/csv", headers=headers)
def _send_webhook(webhook: Webhook, event: str, payload: dict[str, object]) -> None:
"""Envoie un webhook avec gestion d'erreur explicite."""
headers = {"Content-Type": "application/json"}
if webhook.secret:
headers["X-Webhook-Secret"] = webhook.secret
try:
response = httpx.post(
webhook.url,
json={"event": event, "payload": payload},
headers=headers,
timeout=5.0,
)
response.raise_for_status()
except httpx.HTTPError as exc:
logger.error("Erreur webhook", extra={"url": webhook.url, "event": event, "error": str(exc)})
raise HTTPException(status_code=502, detail="Echec webhook") from exc
def _add_backend_log(level: str, message: str) -> None:
BACKEND_LOGS.append(
BackendLogEntry(
time=datetime.now(timezone.utc),
level=level,
message=message,
)
)
def _read_uvicorn_lines(limit: int = 200) -> list[str]:
"""Lit les dernieres lignes du log Uvicorn si disponible."""
if limit <= 0:
return []
try:
if not UVICORN_LOG_PATH.exists():
return []
with UVICORN_LOG_PATH.open("r", encoding="utf-8", errors="ignore") as handle:
lines = handle.readlines()
return [line.rstrip("\n") for line in lines[-limit:]]
except Exception:
return []
PRODUCT_HISTORY_LIMIT = 12
def _product_to_out(session: Session, product: Product) -> ProductOut:
"""Helper pour mapper Product + dernier prix."""
latest = (
session.query(PriceHistory)
.filter(PriceHistory.product_id == product.id)
.order_by(desc(PriceHistory.fetched_at))
.first()
)
images = [image.image_url for image in product.images]
specs = {spec.spec_key: spec.spec_value for spec in product.specs}
main_image = images[0] if images else None
gallery_images = images[1:] if len(images) > 1 else []
asin = product.reference if product.source == "amazon" else None
history_rows = (
session.query(PriceHistory)
.filter(PriceHistory.product_id == product.id, PriceHistory.price != None)
.order_by(desc(PriceHistory.fetched_at))
.limit(PRODUCT_HISTORY_LIMIT)
.all()
)
history_points = [
ProductHistoryPoint(price=float(row.price), fetched_at=row.fetched_at)
for row in reversed(history_rows)
if row.price is not None
]
return ProductOut(
id=product.id,
source=product.source,
reference=product.reference,
asin=asin,
url=product.url,
title=product.title,
category=product.category,
type=product.type,
description=product.description,
currency=product.currency,
msrp=float(product.msrp) if product.msrp is not None else None,
rating_value=float(product.rating_value) if product.rating_value is not None else None,
rating_count=product.rating_count,
amazon_choice=product.amazon_choice,
amazon_choice_label=product.amazon_choice_label,
discount_text=product.discount_text,
stock_text=product.stock_text,
in_stock=product.in_stock,
model_number=product.model_number,
model_name=product.model_name,
first_seen_at=product.first_seen_at,
last_updated_at=product.last_updated_at,
latest_price=float(latest.price) if latest and latest.price is not None else None,
latest_shipping_cost=(
float(latest.shipping_cost) if latest and latest.shipping_cost is not None else None
),
latest_stock_status=latest.stock_status if latest else None,
latest_fetched_at=latest.fetched_at if latest else None,
images=images,
main_image=main_image,
gallery_images=gallery_images,
specs=specs,
discount_amount=None,
discount_percent=None,
history=history_points,
)
def _price_to_out(price: PriceHistory) -> PriceHistoryOut:
return PriceHistoryOut(
id=price.id,
product_id=price.product_id,
price=float(price.price) if price.price is not None else None,
shipping_cost=float(price.shipping_cost) if price.shipping_cost is not None else None,
stock_status=price.stock_status,
fetch_method=price.fetch_method,
fetch_status=price.fetch_status,
fetched_at=price.fetched_at,
)
def _log_to_out(log: ScrapingLog) -> ScrapingLogOut:
return ScrapingLogOut(
id=log.id,
product_id=log.product_id,
url=log.url,
source=log.source,
reference=log.reference,
fetch_method=log.fetch_method,
fetch_status=log.fetch_status,
fetched_at=log.fetched_at,
duration_ms=log.duration_ms,
html_size_bytes=log.html_size_bytes,
errors=log.errors,
notes=log.notes,
)
def _webhook_to_out(webhook: Webhook) -> WebhookOut:
return WebhookOut(
id=webhook.id,
event=webhook.event,
url=webhook.url,
enabled=webhook.enabled,
secret=webhook.secret,
created_at=webhook.created_at,
)