Files
scrap/pricewatch/app/db/repository.py
Gilles Soulier d0b73b9319 codex2
2026-01-14 21:54:55 +01:00

145 lines
5.4 KiB
Python

"""
Repository pattern pour la persistence SQLAlchemy.
Centralise les operations CRUD sur les modeles DB a partir d'un ProductSnapshot.
"""
from __future__ import annotations
from typing import Optional
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session
from pricewatch.app.core.logging import get_logger
from pricewatch.app.core.schema import ProductSnapshot
from pricewatch.app.db.models import PriceHistory, Product, ProductImage, ProductSpec, ScrapingLog
logger = get_logger("db.repository")
class ProductRepository:
"""Repository de persistence pour ProductSnapshot."""
def __init__(self, session: Session) -> None:
self.session = session
def get_or_create(self, source: str, reference: str, url: str) -> Product:
"""
Recuperer ou creer un produit par cle naturelle (source, reference).
"""
product = (
self.session.query(Product)
.filter(Product.source == source, Product.reference == reference)
.one_or_none()
)
if product:
return product
product = Product(source=source, reference=reference, url=url)
self.session.add(product)
self.session.flush()
return product
def update_product_metadata(self, product: Product, snapshot: ProductSnapshot) -> None:
"""Met a jour les metadonnees produit si disponibles."""
if snapshot.url:
product.url = snapshot.url
if snapshot.title:
product.title = snapshot.title
if snapshot.category:
product.category = snapshot.category
if snapshot.description:
product.description = snapshot.description
if snapshot.currency:
product.currency = snapshot.currency
if snapshot.msrp is not None:
product.msrp = snapshot.msrp
def add_price_history(self, product: Product, snapshot: ProductSnapshot) -> Optional[PriceHistory]:
"""Ajoute une entree d'historique de prix si inexistante."""
existing = (
self.session.query(PriceHistory)
.filter(
PriceHistory.product_id == product.id,
PriceHistory.fetched_at == snapshot.fetched_at,
)
.one_or_none()
)
if existing:
return existing
price_entry = PriceHistory(
product_id=product.id,
price=snapshot.price,
shipping_cost=snapshot.shipping_cost,
stock_status=snapshot.stock_status,
fetch_method=snapshot.debug.method,
fetch_status=snapshot.debug.status,
fetched_at=snapshot.fetched_at,
)
self.session.add(price_entry)
return price_entry
def sync_images(self, product: Product, images: list[str]) -> None:
"""Synchronise les images (ajout des nouvelles)."""
existing_urls = {image.image_url for image in product.images}
for position, url in enumerate(images):
if url in existing_urls:
continue
self.session.add(ProductImage(product_id=product.id, image_url=url, position=position))
def sync_specs(self, product: Product, specs: dict[str, str]) -> None:
"""Synchronise les specs (upsert par cle)."""
existing_specs = {spec.spec_key: spec for spec in product.specs}
for key, value in specs.items():
if key in existing_specs:
existing_specs[key].spec_value = value
else:
self.session.add(ProductSpec(product_id=product.id, spec_key=key, spec_value=value))
def add_scraping_log(self, snapshot: ProductSnapshot, product_id: Optional[int]) -> ScrapingLog:
"""Ajoute un log de scraping pour observabilite."""
log_entry = ScrapingLog(
product_id=product_id,
url=snapshot.url,
source=snapshot.source,
reference=snapshot.reference,
fetch_method=snapshot.debug.method,
fetch_status=snapshot.debug.status,
fetched_at=snapshot.fetched_at,
duration_ms=snapshot.debug.duration_ms,
html_size_bytes=snapshot.debug.html_size_bytes,
errors=snapshot.debug.errors or None,
notes=snapshot.debug.notes or None,
)
self.session.add(log_entry)
return log_entry
def save_snapshot(self, snapshot: ProductSnapshot) -> Optional[int]:
"""
Persiste un ProductSnapshot complet dans la base.
Retourne l'id produit ou None si reference absente.
"""
if not snapshot.reference:
logger.warning("Reference absente: persistence ignoree")
self.add_scraping_log(snapshot, product_id=None)
return None
product = self.get_or_create(snapshot.source, snapshot.reference, snapshot.url)
self.update_product_metadata(product, snapshot)
self.add_price_history(product, snapshot)
self.sync_images(product, snapshot.images)
self.sync_specs(product, snapshot.specs)
self.add_scraping_log(snapshot, product_id=product.id)
return product.id
def safe_save_snapshot(self, snapshot: ProductSnapshot) -> Optional[int]:
"""Sauvegarde avec gestion d'erreur SQLAlchemy."""
try:
return self.save_snapshot(snapshot)
except SQLAlchemyError as exc:
logger.error(f"Erreur SQLAlchemy: {exc}")
raise