1042 lines
35 KiB
Python
1042 lines
35 KiB
Python
"""
|
|
API REST FastAPI pour PriceWatch (Phase 3).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
from collections import deque
|
|
from datetime import datetime, timezone
|
|
import os
|
|
from pathlib import Path
|
|
from io import StringIO
|
|
from typing import Generator, Optional
|
|
|
|
import httpx
|
|
from fastapi import Depends, FastAPI, Header, HTTPException, Response
|
|
from fastapi.encoders import jsonable_encoder
|
|
from fastapi.responses import JSONResponse
|
|
from sqlalchemy.exc import IntegrityError, SQLAlchemyError
|
|
from sqlalchemy import and_, desc, func
|
|
from sqlalchemy.orm import Session
|
|
|
|
from pricewatch.app.api.schemas import (
|
|
BackendLogEntry,
|
|
ClassificationOptionsOut,
|
|
ClassificationRuleCreate,
|
|
ClassificationRuleOut,
|
|
ClassificationRuleUpdate,
|
|
EnqueueRequest,
|
|
EnqueueResponse,
|
|
HealthStatus,
|
|
PriceHistoryCreate,
|
|
PriceHistoryOut,
|
|
PriceHistoryUpdate,
|
|
ProductCreate,
|
|
ProductHistoryPoint,
|
|
ProductOut,
|
|
ProductUpdate,
|
|
ScheduleRequest,
|
|
ScheduleResponse,
|
|
ScrapeCommitRequest,
|
|
ScrapeCommitResponse,
|
|
ScrapePreviewRequest,
|
|
ScrapePreviewResponse,
|
|
ScrapingLogCreate,
|
|
ScrapingLogOut,
|
|
ScrapingLogUpdate,
|
|
UvicornLogEntry,
|
|
VersionResponse,
|
|
WebhookCreate,
|
|
WebhookOut,
|
|
WebhookTestResponse,
|
|
WebhookUpdate,
|
|
)
|
|
from pricewatch.app.core.config import get_config
|
|
from pricewatch.app.core.logging import get_logger
|
|
from pricewatch.app.core.schema import ProductSnapshot
|
|
from pricewatch.app.db.connection import check_db_connection, get_session
|
|
from pricewatch.app.db.models import ClassificationRule, PriceHistory, Product, ScrapingLog, Webhook
|
|
from pricewatch.app.db.repository import ProductRepository
|
|
from pricewatch.app.scraping.pipeline import ScrapingPipeline
|
|
from pricewatch.app.tasks.scrape import scrape_product
|
|
from pricewatch.app.tasks.scheduler import RedisUnavailableError, check_redis_connection, ScrapingScheduler
|
|
|
|
logger = get_logger("api")
|
|
|
|
app = FastAPI(title="PriceWatch API", version="0.4.0")
|
|
|
|
# Buffer de logs backend en memoire pour debug UI.
|
|
BACKEND_LOGS = deque(maxlen=200)
|
|
|
|
UVICORN_LOG_PATH = Path(
|
|
os.environ.get("PW_UVICORN_LOG_PATH", "/app/logs/uvicorn.log")
|
|
)
|
|
|
|
|
|
def get_db_session() -> Generator[Session, None, None]:
|
|
"""Dependency: session SQLAlchemy."""
|
|
with get_session(get_config()) as session:
|
|
yield session
|
|
|
|
|
|
def require_token(authorization: Optional[str] = Header(default=None)) -> None:
|
|
"""Auth simple via token Bearer."""
|
|
config = get_config()
|
|
token = config.api_token
|
|
if not token:
|
|
raise HTTPException(status_code=500, detail="API token non configure")
|
|
|
|
if not authorization or not authorization.startswith("Bearer "):
|
|
raise HTTPException(status_code=401, detail="Token manquant")
|
|
|
|
provided = authorization.split("Bearer ")[-1].strip()
|
|
if provided != token:
|
|
raise HTTPException(status_code=403, detail="Token invalide")
|
|
|
|
|
|
@app.get("/health", response_model=HealthStatus)
|
|
def health_check() -> HealthStatus:
|
|
"""Health check DB + Redis."""
|
|
config = get_config()
|
|
return HealthStatus(
|
|
db=check_db_connection(config),
|
|
redis=check_redis_connection(config.redis.url),
|
|
)
|
|
|
|
|
|
@app.get("/version", response_model=VersionResponse)
|
|
def version_info() -> VersionResponse:
|
|
"""Expose la version API."""
|
|
return VersionResponse(api_version=app.version)
|
|
|
|
|
|
@app.get("/logs/backend", response_model=list[BackendLogEntry], dependencies=[Depends(require_token)])
|
|
def list_backend_logs() -> list[BackendLogEntry]:
|
|
"""Expose un buffer de logs backend."""
|
|
return list(BACKEND_LOGS)
|
|
|
|
|
|
@app.get("/logs/uvicorn", response_model=list[UvicornLogEntry], dependencies=[Depends(require_token)])
|
|
def list_uvicorn_logs(limit: int = 200) -> list[UvicornLogEntry]:
|
|
"""Expose les dernieres lignes du log Uvicorn."""
|
|
lines = _read_uvicorn_lines(limit=limit)
|
|
return [UvicornLogEntry(line=line) for line in lines]
|
|
|
|
|
|
@app.get("/products", response_model=list[ProductOut], dependencies=[Depends(require_token)])
|
|
def list_products(
|
|
source: Optional[str] = None,
|
|
reference: Optional[str] = None,
|
|
updated_after: Optional[datetime] = None,
|
|
price_min: Optional[float] = None,
|
|
price_max: Optional[float] = None,
|
|
fetched_after: Optional[datetime] = None,
|
|
fetched_before: Optional[datetime] = None,
|
|
stock_status: Optional[str] = None,
|
|
limit: int = 50,
|
|
offset: int = 0,
|
|
session: Session = Depends(get_db_session),
|
|
) -> list[ProductOut]:
|
|
"""Liste des produits avec filtres optionnels."""
|
|
latest_price_subquery = (
|
|
session.query(
|
|
PriceHistory.product_id.label("product_id"),
|
|
func.max(PriceHistory.fetched_at).label("latest_fetched_at"),
|
|
)
|
|
.group_by(PriceHistory.product_id)
|
|
.subquery()
|
|
)
|
|
latest_price = (
|
|
session.query(PriceHistory)
|
|
.join(
|
|
latest_price_subquery,
|
|
and_(
|
|
PriceHistory.product_id == latest_price_subquery.c.product_id,
|
|
PriceHistory.fetched_at == latest_price_subquery.c.latest_fetched_at,
|
|
),
|
|
)
|
|
.subquery()
|
|
)
|
|
|
|
query = session.query(Product).outerjoin(latest_price, Product.id == latest_price.c.product_id)
|
|
if source:
|
|
query = query.filter(Product.source == source)
|
|
if reference:
|
|
query = query.filter(Product.reference == reference)
|
|
if updated_after:
|
|
query = query.filter(Product.last_updated_at >= updated_after)
|
|
if price_min is not None:
|
|
query = query.filter(latest_price.c.price >= price_min)
|
|
if price_max is not None:
|
|
query = query.filter(latest_price.c.price <= price_max)
|
|
if fetched_after:
|
|
query = query.filter(latest_price.c.fetched_at >= fetched_after)
|
|
if fetched_before:
|
|
query = query.filter(latest_price.c.fetched_at <= fetched_before)
|
|
if stock_status:
|
|
query = query.filter(latest_price.c.stock_status == stock_status)
|
|
|
|
products = query.order_by(desc(Product.last_updated_at)).offset(offset).limit(limit).all()
|
|
return [_product_to_out(session, product) for product in products]
|
|
|
|
|
|
@app.post("/products", response_model=ProductOut, dependencies=[Depends(require_token)])
|
|
def create_product(
|
|
payload: ProductCreate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> ProductOut:
|
|
"""Cree un produit."""
|
|
product = Product(
|
|
source=payload.source,
|
|
reference=payload.reference,
|
|
url=payload.url,
|
|
title=payload.title,
|
|
category=payload.category,
|
|
type=payload.type,
|
|
description=payload.description,
|
|
currency=payload.currency,
|
|
msrp=payload.msrp,
|
|
)
|
|
session.add(product)
|
|
try:
|
|
session.commit()
|
|
session.refresh(product)
|
|
except IntegrityError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=409, detail="Produit deja existant") from exc
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return _product_to_out(session, product)
|
|
|
|
|
|
@app.get("/products/{product_id}", response_model=ProductOut, dependencies=[Depends(require_token)])
|
|
def get_product(
|
|
product_id: int,
|
|
session: Session = Depends(get_db_session),
|
|
) -> ProductOut:
|
|
"""Detail produit + dernier prix."""
|
|
product = session.query(Product).filter(Product.id == product_id).one_or_none()
|
|
if not product:
|
|
raise HTTPException(status_code=404, detail="Produit non trouve")
|
|
return _product_to_out(session, product)
|
|
|
|
|
|
@app.patch("/products/{product_id}", response_model=ProductOut, dependencies=[Depends(require_token)])
|
|
def update_product(
|
|
product_id: int,
|
|
payload: ProductUpdate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> ProductOut:
|
|
"""Met a jour un produit (partial)."""
|
|
product = session.query(Product).filter(Product.id == product_id).one_or_none()
|
|
if not product:
|
|
raise HTTPException(status_code=404, detail="Produit non trouve")
|
|
|
|
updates = payload.model_dump(exclude_unset=True)
|
|
for key, value in updates.items():
|
|
setattr(product, key, value)
|
|
|
|
try:
|
|
session.commit()
|
|
session.refresh(product)
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return _product_to_out(session, product)
|
|
|
|
|
|
@app.get(
|
|
"/classification/rules",
|
|
response_model=list[ClassificationRuleOut],
|
|
dependencies=[Depends(require_token)],
|
|
)
|
|
def list_classification_rules(
|
|
session: Session = Depends(get_db_session),
|
|
) -> list[ClassificationRuleOut]:
|
|
"""Liste les regles de classification."""
|
|
rules = (
|
|
session.query(ClassificationRule)
|
|
.order_by(ClassificationRule.sort_order, ClassificationRule.id)
|
|
.all()
|
|
)
|
|
return [
|
|
ClassificationRuleOut(
|
|
id=rule.id,
|
|
category=rule.category,
|
|
type=rule.type,
|
|
keywords=rule.keywords or [],
|
|
sort_order=rule.sort_order,
|
|
is_active=rule.is_active,
|
|
)
|
|
for rule in rules
|
|
]
|
|
|
|
|
|
@app.post(
|
|
"/classification/rules",
|
|
response_model=ClassificationRuleOut,
|
|
dependencies=[Depends(require_token)],
|
|
)
|
|
def create_classification_rule(
|
|
payload: ClassificationRuleCreate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> ClassificationRuleOut:
|
|
"""Cree une regle de classification."""
|
|
rule = ClassificationRule(
|
|
category=payload.category,
|
|
type=payload.type,
|
|
keywords=payload.keywords,
|
|
sort_order=payload.sort_order or 0,
|
|
is_active=True if payload.is_active is None else payload.is_active,
|
|
)
|
|
session.add(rule)
|
|
session.commit()
|
|
session.refresh(rule)
|
|
return ClassificationRuleOut(
|
|
id=rule.id,
|
|
category=rule.category,
|
|
type=rule.type,
|
|
keywords=rule.keywords or [],
|
|
sort_order=rule.sort_order,
|
|
is_active=rule.is_active,
|
|
)
|
|
|
|
|
|
@app.patch(
|
|
"/classification/rules/{rule_id}",
|
|
response_model=ClassificationRuleOut,
|
|
dependencies=[Depends(require_token)],
|
|
)
|
|
def update_classification_rule(
|
|
rule_id: int,
|
|
payload: ClassificationRuleUpdate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> ClassificationRuleOut:
|
|
"""Met a jour une regle de classification."""
|
|
rule = session.query(ClassificationRule).filter(ClassificationRule.id == rule_id).one_or_none()
|
|
if not rule:
|
|
raise HTTPException(status_code=404, detail="Regle non trouvee")
|
|
updates = payload.model_dump(exclude_unset=True)
|
|
for key, value in updates.items():
|
|
setattr(rule, key, value)
|
|
session.commit()
|
|
session.refresh(rule)
|
|
return ClassificationRuleOut(
|
|
id=rule.id,
|
|
category=rule.category,
|
|
type=rule.type,
|
|
keywords=rule.keywords or [],
|
|
sort_order=rule.sort_order,
|
|
is_active=rule.is_active,
|
|
)
|
|
|
|
|
|
@app.delete(
|
|
"/classification/rules/{rule_id}",
|
|
dependencies=[Depends(require_token)],
|
|
)
|
|
def delete_classification_rule(
|
|
rule_id: int,
|
|
session: Session = Depends(get_db_session),
|
|
) -> dict[str, str]:
|
|
"""Supprime une regle de classification."""
|
|
rule = session.query(ClassificationRule).filter(ClassificationRule.id == rule_id).one_or_none()
|
|
if not rule:
|
|
raise HTTPException(status_code=404, detail="Regle non trouvee")
|
|
session.delete(rule)
|
|
session.commit()
|
|
return {"status": "deleted"}
|
|
|
|
|
|
@app.get(
|
|
"/classification/options",
|
|
response_model=ClassificationOptionsOut,
|
|
dependencies=[Depends(require_token)],
|
|
)
|
|
def get_classification_options(
|
|
session: Session = Depends(get_db_session),
|
|
) -> ClassificationOptionsOut:
|
|
"""Expose la liste des categories et types issus des regles actives."""
|
|
rules = (
|
|
session.query(ClassificationRule)
|
|
.filter(ClassificationRule.is_active == True)
|
|
.order_by(ClassificationRule.sort_order, ClassificationRule.id)
|
|
.all()
|
|
)
|
|
categories = sorted({rule.category for rule in rules if rule.category})
|
|
types = sorted({rule.type for rule in rules if rule.type})
|
|
return ClassificationOptionsOut(categories=categories, types=types)
|
|
|
|
|
|
@app.delete("/products/{product_id}", dependencies=[Depends(require_token)])
|
|
def delete_product(
|
|
product_id: int,
|
|
session: Session = Depends(get_db_session),
|
|
) -> dict[str, str]:
|
|
"""Supprime un produit (cascade)."""
|
|
product = session.query(Product).filter(Product.id == product_id).one_or_none()
|
|
if not product:
|
|
raise HTTPException(status_code=404, detail="Produit non trouve")
|
|
|
|
session.delete(product)
|
|
try:
|
|
session.commit()
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return {"status": "deleted"}
|
|
|
|
|
|
@app.get(
|
|
"/products/{product_id}/prices",
|
|
response_model=list[PriceHistoryOut],
|
|
dependencies=[Depends(require_token)],
|
|
)
|
|
def list_prices(
|
|
product_id: int,
|
|
price_min: Optional[float] = None,
|
|
price_max: Optional[float] = None,
|
|
fetched_after: Optional[datetime] = None,
|
|
fetched_before: Optional[datetime] = None,
|
|
fetch_status: Optional[str] = None,
|
|
limit: int = 50,
|
|
offset: int = 0,
|
|
session: Session = Depends(get_db_session),
|
|
) -> list[PriceHistoryOut]:
|
|
"""Historique de prix pour un produit."""
|
|
query = session.query(PriceHistory).filter(PriceHistory.product_id == product_id)
|
|
if price_min is not None:
|
|
query = query.filter(PriceHistory.price >= price_min)
|
|
if price_max is not None:
|
|
query = query.filter(PriceHistory.price <= price_max)
|
|
if fetched_after:
|
|
query = query.filter(PriceHistory.fetched_at >= fetched_after)
|
|
if fetched_before:
|
|
query = query.filter(PriceHistory.fetched_at <= fetched_before)
|
|
if fetch_status:
|
|
query = query.filter(PriceHistory.fetch_status == fetch_status)
|
|
|
|
prices = query.order_by(desc(PriceHistory.fetched_at)).offset(offset).limit(limit).all()
|
|
return [_price_to_out(price) for price in prices]
|
|
|
|
|
|
@app.post("/prices", response_model=PriceHistoryOut, dependencies=[Depends(require_token)])
|
|
def create_price(
|
|
payload: PriceHistoryCreate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> PriceHistoryOut:
|
|
"""Ajoute une entree d'historique de prix."""
|
|
price = PriceHistory(
|
|
product_id=payload.product_id,
|
|
price=payload.price,
|
|
shipping_cost=payload.shipping_cost,
|
|
stock_status=payload.stock_status,
|
|
fetch_method=payload.fetch_method,
|
|
fetch_status=payload.fetch_status,
|
|
fetched_at=payload.fetched_at,
|
|
)
|
|
session.add(price)
|
|
try:
|
|
session.commit()
|
|
session.refresh(price)
|
|
except IntegrityError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=409, detail="Entree prix deja existante") from exc
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return _price_to_out(price)
|
|
|
|
|
|
@app.patch("/prices/{price_id}", response_model=PriceHistoryOut, dependencies=[Depends(require_token)])
|
|
def update_price(
|
|
price_id: int,
|
|
payload: PriceHistoryUpdate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> PriceHistoryOut:
|
|
"""Met a jour une entree de prix."""
|
|
price = session.query(PriceHistory).filter(PriceHistory.id == price_id).one_or_none()
|
|
if not price:
|
|
raise HTTPException(status_code=404, detail="Entree prix non trouvee")
|
|
|
|
updates = payload.model_dump(exclude_unset=True)
|
|
for key, value in updates.items():
|
|
setattr(price, key, value)
|
|
|
|
try:
|
|
session.commit()
|
|
session.refresh(price)
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return _price_to_out(price)
|
|
|
|
|
|
@app.delete("/prices/{price_id}", dependencies=[Depends(require_token)])
|
|
def delete_price(
|
|
price_id: int,
|
|
session: Session = Depends(get_db_session),
|
|
) -> dict[str, str]:
|
|
"""Supprime une entree de prix."""
|
|
price = session.query(PriceHistory).filter(PriceHistory.id == price_id).one_or_none()
|
|
if not price:
|
|
raise HTTPException(status_code=404, detail="Entree prix non trouvee")
|
|
|
|
session.delete(price)
|
|
try:
|
|
session.commit()
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return {"status": "deleted"}
|
|
|
|
|
|
@app.get("/logs", response_model=list[ScrapingLogOut], dependencies=[Depends(require_token)])
|
|
def list_logs(
|
|
source: Optional[str] = None,
|
|
fetch_status: Optional[str] = None,
|
|
fetched_after: Optional[datetime] = None,
|
|
fetched_before: Optional[datetime] = None,
|
|
limit: int = 50,
|
|
offset: int = 0,
|
|
session: Session = Depends(get_db_session),
|
|
) -> list[ScrapingLogOut]:
|
|
"""Liste des logs de scraping."""
|
|
query = session.query(ScrapingLog)
|
|
if source:
|
|
query = query.filter(ScrapingLog.source == source)
|
|
if fetch_status:
|
|
query = query.filter(ScrapingLog.fetch_status == fetch_status)
|
|
if fetched_after:
|
|
query = query.filter(ScrapingLog.fetched_at >= fetched_after)
|
|
if fetched_before:
|
|
query = query.filter(ScrapingLog.fetched_at <= fetched_before)
|
|
|
|
logs = query.order_by(desc(ScrapingLog.fetched_at)).offset(offset).limit(limit).all()
|
|
return [_log_to_out(log) for log in logs]
|
|
|
|
|
|
@app.post("/logs", response_model=ScrapingLogOut, dependencies=[Depends(require_token)])
|
|
def create_log(
|
|
payload: ScrapingLogCreate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> ScrapingLogOut:
|
|
"""Cree un log de scraping."""
|
|
log_entry = ScrapingLog(
|
|
product_id=payload.product_id,
|
|
url=payload.url,
|
|
source=payload.source,
|
|
reference=payload.reference,
|
|
fetch_method=payload.fetch_method,
|
|
fetch_status=payload.fetch_status,
|
|
fetched_at=payload.fetched_at,
|
|
duration_ms=payload.duration_ms,
|
|
html_size_bytes=payload.html_size_bytes,
|
|
errors=payload.errors,
|
|
notes=payload.notes,
|
|
)
|
|
session.add(log_entry)
|
|
try:
|
|
session.commit()
|
|
session.refresh(log_entry)
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return _log_to_out(log_entry)
|
|
|
|
|
|
@app.patch("/logs/{log_id}", response_model=ScrapingLogOut, dependencies=[Depends(require_token)])
|
|
def update_log(
|
|
log_id: int,
|
|
payload: ScrapingLogUpdate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> ScrapingLogOut:
|
|
"""Met a jour un log."""
|
|
log_entry = session.query(ScrapingLog).filter(ScrapingLog.id == log_id).one_or_none()
|
|
if not log_entry:
|
|
raise HTTPException(status_code=404, detail="Log non trouve")
|
|
|
|
updates = payload.model_dump(exclude_unset=True)
|
|
for key, value in updates.items():
|
|
setattr(log_entry, key, value)
|
|
|
|
try:
|
|
session.commit()
|
|
session.refresh(log_entry)
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return _log_to_out(log_entry)
|
|
|
|
|
|
@app.delete("/logs/{log_id}", dependencies=[Depends(require_token)])
|
|
def delete_log(
|
|
log_id: int,
|
|
session: Session = Depends(get_db_session),
|
|
) -> dict[str, str]:
|
|
"""Supprime un log."""
|
|
log_entry = session.query(ScrapingLog).filter(ScrapingLog.id == log_id).one_or_none()
|
|
if not log_entry:
|
|
raise HTTPException(status_code=404, detail="Log non trouve")
|
|
|
|
session.delete(log_entry)
|
|
try:
|
|
session.commit()
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return {"status": "deleted"}
|
|
|
|
|
|
@app.get("/products/export", dependencies=[Depends(require_token)])
|
|
def export_products(
|
|
source: Optional[str] = None,
|
|
reference: Optional[str] = None,
|
|
updated_after: Optional[datetime] = None,
|
|
price_min: Optional[float] = None,
|
|
price_max: Optional[float] = None,
|
|
fetched_after: Optional[datetime] = None,
|
|
fetched_before: Optional[datetime] = None,
|
|
stock_status: Optional[str] = None,
|
|
format: str = "csv",
|
|
limit: int = 500,
|
|
offset: int = 0,
|
|
session: Session = Depends(get_db_session),
|
|
) -> Response:
|
|
"""Export produits en CSV/JSON."""
|
|
products = list_products(
|
|
source=source,
|
|
reference=reference,
|
|
updated_after=updated_after,
|
|
price_min=price_min,
|
|
price_max=price_max,
|
|
fetched_after=fetched_after,
|
|
fetched_before=fetched_before,
|
|
stock_status=stock_status,
|
|
limit=limit,
|
|
offset=offset,
|
|
session=session,
|
|
)
|
|
rows = [product.model_dump() for product in products]
|
|
fieldnames = list(ProductOut.model_fields.keys())
|
|
return _export_response(rows, fieldnames, "products", format)
|
|
|
|
|
|
@app.get("/prices/export", dependencies=[Depends(require_token)])
|
|
def export_prices(
|
|
product_id: Optional[int] = None,
|
|
price_min: Optional[float] = None,
|
|
price_max: Optional[float] = None,
|
|
fetched_after: Optional[datetime] = None,
|
|
fetched_before: Optional[datetime] = None,
|
|
fetch_status: Optional[str] = None,
|
|
format: str = "csv",
|
|
limit: int = 500,
|
|
offset: int = 0,
|
|
session: Session = Depends(get_db_session),
|
|
) -> Response:
|
|
"""Export historique de prix en CSV/JSON."""
|
|
query = session.query(PriceHistory)
|
|
if product_id is not None:
|
|
query = query.filter(PriceHistory.product_id == product_id)
|
|
if price_min is not None:
|
|
query = query.filter(PriceHistory.price >= price_min)
|
|
if price_max is not None:
|
|
query = query.filter(PriceHistory.price <= price_max)
|
|
if fetched_after:
|
|
query = query.filter(PriceHistory.fetched_at >= fetched_after)
|
|
if fetched_before:
|
|
query = query.filter(PriceHistory.fetched_at <= fetched_before)
|
|
if fetch_status:
|
|
query = query.filter(PriceHistory.fetch_status == fetch_status)
|
|
|
|
prices = query.order_by(desc(PriceHistory.fetched_at)).offset(offset).limit(limit).all()
|
|
rows = [_price_to_out(price).model_dump() for price in prices]
|
|
fieldnames = list(PriceHistoryOut.model_fields.keys())
|
|
return _export_response(rows, fieldnames, "prices", format)
|
|
|
|
|
|
@app.get("/logs/export", dependencies=[Depends(require_token)])
|
|
def export_logs(
|
|
source: Optional[str] = None,
|
|
fetch_status: Optional[str] = None,
|
|
fetched_after: Optional[datetime] = None,
|
|
fetched_before: Optional[datetime] = None,
|
|
format: str = "csv",
|
|
limit: int = 500,
|
|
offset: int = 0,
|
|
session: Session = Depends(get_db_session),
|
|
) -> Response:
|
|
"""Export logs de scraping en CSV/JSON."""
|
|
logs = list_logs(
|
|
source=source,
|
|
fetch_status=fetch_status,
|
|
fetched_after=fetched_after,
|
|
fetched_before=fetched_before,
|
|
limit=limit,
|
|
offset=offset,
|
|
session=session,
|
|
)
|
|
rows = [log.model_dump() for log in logs]
|
|
fieldnames = list(ScrapingLogOut.model_fields.keys())
|
|
return _export_response(rows, fieldnames, "logs", format)
|
|
|
|
|
|
@app.get("/webhooks", response_model=list[WebhookOut], dependencies=[Depends(require_token)])
|
|
def list_webhooks(
|
|
event: Optional[str] = None,
|
|
enabled: Optional[bool] = None,
|
|
limit: int = 50,
|
|
offset: int = 0,
|
|
session: Session = Depends(get_db_session),
|
|
) -> list[WebhookOut]:
|
|
"""Liste des webhooks."""
|
|
query = session.query(Webhook)
|
|
if event:
|
|
query = query.filter(Webhook.event == event)
|
|
if enabled is not None:
|
|
query = query.filter(Webhook.enabled == enabled)
|
|
|
|
webhooks = query.order_by(desc(Webhook.created_at)).offset(offset).limit(limit).all()
|
|
return [_webhook_to_out(webhook) for webhook in webhooks]
|
|
|
|
|
|
@app.post("/webhooks", response_model=WebhookOut, dependencies=[Depends(require_token)])
|
|
def create_webhook(
|
|
payload: WebhookCreate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> WebhookOut:
|
|
"""Cree un webhook."""
|
|
webhook = Webhook(
|
|
event=payload.event,
|
|
url=payload.url,
|
|
enabled=payload.enabled,
|
|
secret=payload.secret,
|
|
)
|
|
session.add(webhook)
|
|
try:
|
|
session.commit()
|
|
session.refresh(webhook)
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return _webhook_to_out(webhook)
|
|
|
|
|
|
@app.patch("/webhooks/{webhook_id}", response_model=WebhookOut, dependencies=[Depends(require_token)])
|
|
def update_webhook(
|
|
webhook_id: int,
|
|
payload: WebhookUpdate,
|
|
session: Session = Depends(get_db_session),
|
|
) -> WebhookOut:
|
|
"""Met a jour un webhook."""
|
|
webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none()
|
|
if not webhook:
|
|
raise HTTPException(status_code=404, detail="Webhook non trouve")
|
|
|
|
updates = payload.model_dump(exclude_unset=True)
|
|
for key, value in updates.items():
|
|
setattr(webhook, key, value)
|
|
|
|
try:
|
|
session.commit()
|
|
session.refresh(webhook)
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return _webhook_to_out(webhook)
|
|
|
|
|
|
@app.delete("/webhooks/{webhook_id}", dependencies=[Depends(require_token)])
|
|
def delete_webhook(
|
|
webhook_id: int,
|
|
session: Session = Depends(get_db_session),
|
|
) -> dict[str, str]:
|
|
"""Supprime un webhook."""
|
|
webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none()
|
|
if not webhook:
|
|
raise HTTPException(status_code=404, detail="Webhook non trouve")
|
|
|
|
session.delete(webhook)
|
|
try:
|
|
session.commit()
|
|
except SQLAlchemyError as exc:
|
|
session.rollback()
|
|
raise HTTPException(status_code=500, detail="Erreur DB") from exc
|
|
return {"status": "deleted"}
|
|
|
|
|
|
@app.post(
|
|
"/webhooks/{webhook_id}/test",
|
|
response_model=WebhookTestResponse,
|
|
dependencies=[Depends(require_token)],
|
|
)
|
|
def send_webhook_test(
|
|
webhook_id: int,
|
|
session: Session = Depends(get_db_session),
|
|
) -> WebhookTestResponse:
|
|
"""Envoie un evenement de test."""
|
|
webhook = session.query(Webhook).filter(Webhook.id == webhook_id).one_or_none()
|
|
if not webhook:
|
|
raise HTTPException(status_code=404, detail="Webhook non trouve")
|
|
if not webhook.enabled:
|
|
raise HTTPException(status_code=409, detail="Webhook desactive")
|
|
|
|
payload = {"message": "test webhook", "webhook_id": webhook.id}
|
|
_send_webhook(webhook, "test", payload)
|
|
return WebhookTestResponse(status="sent")
|
|
|
|
@app.post("/enqueue", response_model=EnqueueResponse, dependencies=[Depends(require_token)])
|
|
def enqueue_job(payload: EnqueueRequest) -> EnqueueResponse:
|
|
"""Enqueue un job immediat."""
|
|
try:
|
|
scheduler = ScrapingScheduler(get_config())
|
|
job = scheduler.enqueue_immediate(
|
|
payload.url,
|
|
use_playwright=payload.use_playwright,
|
|
save_db=payload.save_db,
|
|
)
|
|
return EnqueueResponse(job_id=job.id)
|
|
except RedisUnavailableError as exc:
|
|
raise HTTPException(status_code=503, detail=str(exc)) from exc
|
|
|
|
|
|
@app.post("/schedule", response_model=ScheduleResponse, dependencies=[Depends(require_token)])
|
|
def schedule_job(payload: ScheduleRequest) -> ScheduleResponse:
|
|
"""Planifie un job recurrent."""
|
|
try:
|
|
scheduler = ScrapingScheduler(get_config())
|
|
job_info = scheduler.schedule_product(
|
|
payload.url,
|
|
interval_hours=payload.interval_hours,
|
|
use_playwright=payload.use_playwright,
|
|
save_db=payload.save_db,
|
|
)
|
|
return ScheduleResponse(job_id=job_info.job_id, next_run=job_info.next_run)
|
|
except RedisUnavailableError as exc:
|
|
raise HTTPException(status_code=503, detail=str(exc)) from exc
|
|
|
|
|
|
@app.post("/scrape/preview", response_model=ScrapePreviewResponse, dependencies=[Depends(require_token)])
|
|
def preview_scrape(payload: ScrapePreviewRequest) -> ScrapePreviewResponse:
|
|
"""Scrape un produit sans persistence pour previsualisation."""
|
|
_add_backend_log("INFO", f"Preview scraping: {payload.url}")
|
|
result = scrape_product(
|
|
payload.url,
|
|
use_playwright=payload.use_playwright,
|
|
save_db=False,
|
|
)
|
|
snapshot = result.get("snapshot")
|
|
if snapshot is None:
|
|
_add_backend_log("ERROR", f"Preview scraping KO: {payload.url}")
|
|
return ScrapePreviewResponse(success=False, snapshot=None, error=result.get("error"))
|
|
config = get_config()
|
|
if config.enable_db:
|
|
try:
|
|
with get_session(config) as session:
|
|
ProductRepository(session).apply_classification(snapshot)
|
|
except Exception as exc:
|
|
snapshot.add_note(f"Classification ignoree: {exc}")
|
|
return ScrapePreviewResponse(
|
|
success=bool(result.get("success")),
|
|
snapshot=snapshot.model_dump(mode="json"),
|
|
error=result.get("error"),
|
|
)
|
|
|
|
|
|
@app.post("/scrape/commit", response_model=ScrapeCommitResponse, dependencies=[Depends(require_token)])
|
|
def commit_scrape(payload: ScrapeCommitRequest) -> ScrapeCommitResponse:
|
|
"""Persiste un snapshot previsualise."""
|
|
try:
|
|
snapshot = ProductSnapshot.model_validate(payload.snapshot)
|
|
except Exception as exc:
|
|
_add_backend_log("ERROR", "Commit scraping KO: snapshot invalide")
|
|
raise HTTPException(status_code=400, detail="Snapshot invalide") from exc
|
|
|
|
product_id = ScrapingPipeline(config=get_config()).process_snapshot(
|
|
snapshot, save_to_db=True, apply_classification=False
|
|
)
|
|
_add_backend_log("INFO", f"Commit scraping OK: product_id={product_id}")
|
|
return ScrapeCommitResponse(success=True, product_id=product_id)
|
|
|
|
|
|
def _export_response(
|
|
rows: list[dict[str, object]],
|
|
fieldnames: list[str],
|
|
filename_prefix: str,
|
|
format: str,
|
|
) -> Response:
|
|
"""Expose une reponse CSV/JSON avec un nom de fichier stable."""
|
|
if format not in {"csv", "json"}:
|
|
raise HTTPException(status_code=400, detail="Format invalide (csv ou json)")
|
|
|
|
headers = {"Content-Disposition": f'attachment; filename="{filename_prefix}.{format}"'}
|
|
if format == "json":
|
|
return JSONResponse(content=jsonable_encoder(rows), headers=headers)
|
|
return _to_csv_response(rows, fieldnames, headers)
|
|
|
|
|
|
def _to_csv_response(
|
|
rows: list[dict[str, object]],
|
|
fieldnames: list[str],
|
|
headers: dict[str, str],
|
|
) -> Response:
|
|
buffer = StringIO()
|
|
writer = csv.DictWriter(buffer, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(rows)
|
|
return Response(content=buffer.getvalue(), media_type="text/csv", headers=headers)
|
|
|
|
|
|
def _send_webhook(webhook: Webhook, event: str, payload: dict[str, object]) -> None:
|
|
"""Envoie un webhook avec gestion d'erreur explicite."""
|
|
headers = {"Content-Type": "application/json"}
|
|
if webhook.secret:
|
|
headers["X-Webhook-Secret"] = webhook.secret
|
|
|
|
try:
|
|
response = httpx.post(
|
|
webhook.url,
|
|
json={"event": event, "payload": payload},
|
|
headers=headers,
|
|
timeout=5.0,
|
|
)
|
|
response.raise_for_status()
|
|
except httpx.HTTPError as exc:
|
|
logger.error("Erreur webhook", extra={"url": webhook.url, "event": event, "error": str(exc)})
|
|
raise HTTPException(status_code=502, detail="Echec webhook") from exc
|
|
|
|
|
|
def _add_backend_log(level: str, message: str) -> None:
|
|
BACKEND_LOGS.append(
|
|
BackendLogEntry(
|
|
time=datetime.now(timezone.utc),
|
|
level=level,
|
|
message=message,
|
|
)
|
|
)
|
|
|
|
|
|
def _read_uvicorn_lines(limit: int = 200) -> list[str]:
|
|
"""Lit les dernieres lignes du log Uvicorn si disponible."""
|
|
if limit <= 0:
|
|
return []
|
|
try:
|
|
if not UVICORN_LOG_PATH.exists():
|
|
return []
|
|
with UVICORN_LOG_PATH.open("r", encoding="utf-8", errors="ignore") as handle:
|
|
lines = handle.readlines()
|
|
return [line.rstrip("\n") for line in lines[-limit:]]
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
PRODUCT_HISTORY_LIMIT = 12
|
|
|
|
|
|
def _product_to_out(session: Session, product: Product) -> ProductOut:
|
|
"""Helper pour mapper Product + dernier prix."""
|
|
latest = (
|
|
session.query(PriceHistory)
|
|
.filter(PriceHistory.product_id == product.id)
|
|
.order_by(desc(PriceHistory.fetched_at))
|
|
.first()
|
|
)
|
|
images = [image.image_url for image in product.images]
|
|
specs = {spec.spec_key: spec.spec_value for spec in product.specs}
|
|
main_image = images[0] if images else None
|
|
gallery_images = images[1:] if len(images) > 1 else []
|
|
asin = product.reference if product.source == "amazon" else None
|
|
history_rows = (
|
|
session.query(PriceHistory)
|
|
.filter(PriceHistory.product_id == product.id, PriceHistory.price != None)
|
|
.order_by(desc(PriceHistory.fetched_at))
|
|
.limit(PRODUCT_HISTORY_LIMIT)
|
|
.all()
|
|
)
|
|
history_points = [
|
|
ProductHistoryPoint(price=float(row.price), fetched_at=row.fetched_at)
|
|
for row in reversed(history_rows)
|
|
if row.price is not None
|
|
]
|
|
return ProductOut(
|
|
id=product.id,
|
|
source=product.source,
|
|
reference=product.reference,
|
|
asin=asin,
|
|
url=product.url,
|
|
title=product.title,
|
|
category=product.category,
|
|
type=product.type,
|
|
description=product.description,
|
|
currency=product.currency,
|
|
msrp=float(product.msrp) if product.msrp is not None else None,
|
|
rating_value=float(product.rating_value) if product.rating_value is not None else None,
|
|
rating_count=product.rating_count,
|
|
amazon_choice=product.amazon_choice,
|
|
amazon_choice_label=product.amazon_choice_label,
|
|
discount_text=product.discount_text,
|
|
stock_text=product.stock_text,
|
|
in_stock=product.in_stock,
|
|
model_number=product.model_number,
|
|
model_name=product.model_name,
|
|
first_seen_at=product.first_seen_at,
|
|
last_updated_at=product.last_updated_at,
|
|
latest_price=float(latest.price) if latest and latest.price is not None else None,
|
|
latest_shipping_cost=(
|
|
float(latest.shipping_cost) if latest and latest.shipping_cost is not None else None
|
|
),
|
|
latest_stock_status=latest.stock_status if latest else None,
|
|
latest_fetched_at=latest.fetched_at if latest else None,
|
|
images=images,
|
|
main_image=main_image,
|
|
gallery_images=gallery_images,
|
|
specs=specs,
|
|
discount_amount=None,
|
|
discount_percent=None,
|
|
history=history_points,
|
|
)
|
|
|
|
|
|
def _price_to_out(price: PriceHistory) -> PriceHistoryOut:
|
|
return PriceHistoryOut(
|
|
id=price.id,
|
|
product_id=price.product_id,
|
|
price=float(price.price) if price.price is not None else None,
|
|
shipping_cost=float(price.shipping_cost) if price.shipping_cost is not None else None,
|
|
stock_status=price.stock_status,
|
|
fetch_method=price.fetch_method,
|
|
fetch_status=price.fetch_status,
|
|
fetched_at=price.fetched_at,
|
|
)
|
|
|
|
|
|
def _log_to_out(log: ScrapingLog) -> ScrapingLogOut:
|
|
return ScrapingLogOut(
|
|
id=log.id,
|
|
product_id=log.product_id,
|
|
url=log.url,
|
|
source=log.source,
|
|
reference=log.reference,
|
|
fetch_method=log.fetch_method,
|
|
fetch_status=log.fetch_status,
|
|
fetched_at=log.fetched_at,
|
|
duration_ms=log.duration_ms,
|
|
html_size_bytes=log.html_size_bytes,
|
|
errors=log.errors,
|
|
notes=log.notes,
|
|
)
|
|
|
|
|
|
def _webhook_to_out(webhook: Webhook) -> WebhookOut:
|
|
return WebhookOut(
|
|
id=webhook.id,
|
|
event=webhook.event,
|
|
url=webhook.url,
|
|
enabled=webhook.enabled,
|
|
secret=webhook.secret,
|
|
created_at=webhook.created_at,
|
|
)
|