""" Modèles SQLAlchemy pour PriceWatch Phase 2. Schéma normalisé pour persistence PostgreSQL: - products: Catalogue produits (déduplication sur source + reference) - price_history: Historique prix time-series - product_images: Images produit (N par produit) - product_specs: Caractéristiques produit (key-value) - scraping_logs: Logs observabilité pour debugging Justification technique: - Normalisation: products séparée de price_history (catalogue vs time-series) - Clé naturelle: (source, reference) comme unique constraint (ASIN Amazon, etc.) - Pas de JSONB pour données structurées: tables séparées pour images/specs - JSONB uniquement pour données variables: errors, notes dans logs """ from datetime import datetime, timezone from decimal import Decimal from typing import List, Optional from sqlalchemy import ( TIMESTAMP, CheckConstraint, Column, ForeignKey, Index, Integer, JSON, Numeric, Boolean, String, Text, UniqueConstraint, ) from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship class Base(DeclarativeBase): """Base class pour tous les modèles SQLAlchemy.""" pass def utcnow() -> datetime: return datetime.now(timezone.utc) class Product(Base): """ Catalogue produits (1 ligne par produit unique). Clé naturelle: (source, reference) - Ex: (amazon, B08N5WRWNW) Mise à jour: title, category, url à chaque scraping Historique prix: relation 1-N vers PriceHistory """ __tablename__ = "products" # Primary key id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # Natural key (unique) source: Mapped[str] = mapped_column( String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)" ) reference: Mapped[str] = mapped_column( String(100), nullable=False, comment="Product reference (ASIN, SKU, etc.)" ) # Product metadata url: Mapped[str] = mapped_column(Text, nullable=False, comment="Canonical product URL") title: Mapped[Optional[str]] = mapped_column(Text, nullable=True, comment="Product title") category: Mapped[Optional[str]] = mapped_column( Text, nullable=True, comment="Product category (breadcrumb)" ) description: Mapped[Optional[str]] = mapped_column( Text, nullable=True, comment="Product description" ) currency: Mapped[Optional[str]] = mapped_column( String(3), nullable=True, comment="Currency code (EUR, USD, GBP)" ) msrp: Mapped[Optional[Decimal]] = mapped_column( Numeric(10, 2), nullable=True, comment="Recommended price" ) # Timestamps first_seen_at: Mapped[datetime] = mapped_column( TIMESTAMP, nullable=False, default=utcnow, comment="First scraping timestamp" ) last_updated_at: Mapped[datetime] = mapped_column( TIMESTAMP, nullable=False, default=utcnow, onupdate=utcnow, comment="Last metadata update", ) # Relationships price_history: Mapped[List["PriceHistory"]] = relationship( "PriceHistory", back_populates="product", cascade="all, delete-orphan" ) images: Mapped[List["ProductImage"]] = relationship( "ProductImage", back_populates="product", cascade="all, delete-orphan" ) specs: Mapped[List["ProductSpec"]] = relationship( "ProductSpec", back_populates="product", cascade="all, delete-orphan" ) logs: Mapped[List["ScrapingLog"]] = relationship( "ScrapingLog", back_populates="product", cascade="all, delete-orphan" ) # Constraints __table_args__ = ( UniqueConstraint("source", "reference", name="uq_product_source_reference"), Index("ix_product_source", "source"), Index("ix_product_reference", "reference"), Index("ix_product_last_updated", "last_updated_at"), ) def __repr__(self) -> str: return f"" class PriceHistory(Base): """ Historique prix (time-series). Une ligne par scraping réussi avec extraction prix. Unique constraint sur (product_id, fetched_at) évite doublons. """ __tablename__ = "price_history" # Primary key id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # Foreign key product_id: Mapped[int] = mapped_column( Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False ) # Price data price: Mapped[Optional[Decimal]] = mapped_column( Numeric(10, 2), nullable=True, comment="Product price" ) shipping_cost: Mapped[Optional[Decimal]] = mapped_column( Numeric(10, 2), nullable=True, comment="Shipping cost" ) stock_status: Mapped[Optional[str]] = mapped_column( String(20), nullable=True, comment="Stock status (in_stock, out_of_stock, unknown)" ) # Fetch metadata fetch_method: Mapped[str] = mapped_column( String(20), nullable=False, comment="Fetch method (http, playwright)" ) fetch_status: Mapped[str] = mapped_column( String(20), nullable=False, comment="Fetch status (success, partial, failed)" ) fetched_at: Mapped[datetime] = mapped_column( TIMESTAMP, nullable=False, comment="Scraping timestamp" ) # Relationship product: Mapped["Product"] = relationship("Product", back_populates="price_history") # Constraints __table_args__ = ( UniqueConstraint("product_id", "fetched_at", name="uq_price_history_product_time"), Index("ix_price_history_product_id", "product_id"), Index("ix_price_history_fetched_at", "fetched_at"), CheckConstraint("stock_status IN ('in_stock', 'out_of_stock', 'unknown')"), CheckConstraint("fetch_method IN ('http', 'playwright')"), CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"), ) def __repr__(self) -> str: return f"" class ProductImage(Base): """ Images produit (N images par produit). Unique constraint sur (product_id, image_url) évite doublons. Position permet de garder l'ordre des images. """ __tablename__ = "product_images" # Primary key id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # Foreign key product_id: Mapped[int] = mapped_column( Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False ) # Image data image_url: Mapped[str] = mapped_column(Text, nullable=False, comment="Image URL") position: Mapped[int] = mapped_column( Integer, nullable=False, default=0, comment="Image position (0=main)" ) # Relationship product: Mapped["Product"] = relationship("Product", back_populates="images") # Constraints __table_args__ = ( UniqueConstraint("product_id", "image_url", name="uq_product_image_url"), Index("ix_product_image_product_id", "product_id"), ) def __repr__(self) -> str: return f"" class ProductSpec(Base): """ Caractéristiques produit (key-value). Unique constraint sur (product_id, spec_key) évite doublons. Permet queries efficaces par clé. """ __tablename__ = "product_specs" # Primary key id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # Foreign key product_id: Mapped[int] = mapped_column( Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False ) # Spec data spec_key: Mapped[str] = mapped_column( String(200), nullable=False, comment="Specification key (e.g., 'Brand', 'Color')" ) spec_value: Mapped[str] = mapped_column(Text, nullable=False, comment="Specification value") # Relationship product: Mapped["Product"] = relationship("Product", back_populates="specs") # Constraints __table_args__ = ( UniqueConstraint("product_id", "spec_key", name="uq_product_spec_key"), Index("ix_product_spec_product_id", "product_id"), Index("ix_product_spec_key", "spec_key"), ) def __repr__(self) -> str: return f"" class ScrapingLog(Base): """ Logs observabilité pour debugging. FK optionnelle vers products (permet logs même si produit non créé). JSONB pour errors/notes car structure variable. Permet analytics: taux succès, durée moyenne, etc. """ __tablename__ = "scraping_logs" # Primary key id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # Foreign key (optional) product_id: Mapped[Optional[int]] = mapped_column( Integer, ForeignKey("products.id", ondelete="SET NULL"), nullable=True ) # Scraping metadata url: Mapped[str] = mapped_column(Text, nullable=False, comment="Scraped URL") source: Mapped[str] = mapped_column( String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)" ) reference: Mapped[Optional[str]] = mapped_column( String(100), nullable=True, comment="Product reference (if extracted)" ) # Fetch metadata fetch_method: Mapped[str] = mapped_column( String(20), nullable=False, comment="Fetch method (http, playwright)" ) fetch_status: Mapped[str] = mapped_column( String(20), nullable=False, comment="Fetch status (success, partial, failed)" ) fetched_at: Mapped[datetime] = mapped_column( TIMESTAMP, nullable=False, default=utcnow, comment="Scraping timestamp" ) # Performance metrics duration_ms: Mapped[Optional[int]] = mapped_column( Integer, nullable=True, comment="Fetch duration in milliseconds" ) html_size_bytes: Mapped[Optional[int]] = mapped_column( Integer, nullable=True, comment="HTML response size in bytes" ) # Debug data (JSONB) errors: Mapped[Optional[list[str]]] = mapped_column( JSON().with_variant(JSONB, "postgresql"), nullable=True, comment="Error messages (list of strings)", ) notes: Mapped[Optional[list[str]]] = mapped_column( JSON().with_variant(JSONB, "postgresql"), nullable=True, comment="Debug notes (list of strings)", ) # Relationship product: Mapped[Optional["Product"]] = relationship("Product", back_populates="logs") # Constraints __table_args__ = ( Index("ix_scraping_log_product_id", "product_id"), Index("ix_scraping_log_source", "source"), Index("ix_scraping_log_fetched_at", "fetched_at"), Index("ix_scraping_log_fetch_status", "fetch_status"), CheckConstraint("fetch_method IN ('http', 'playwright')"), CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"), ) def __repr__(self) -> str: return f"" class Webhook(Base): """ Webhooks pour notifications externes. """ __tablename__ = "webhooks" id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) event: Mapped[str] = mapped_column(String(50), nullable=False, comment="Event name") url: Mapped[str] = mapped_column(Text, nullable=False, comment="Webhook URL") enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) secret: Mapped[Optional[str]] = mapped_column( String(200), nullable=True, comment="Secret optionnel" ) created_at: Mapped[datetime] = mapped_column( TIMESTAMP, nullable=False, default=utcnow, comment="Creation timestamp" ) __table_args__ = ( Index("ix_webhook_event", "event"), Index("ix_webhook_enabled", "enabled"), ) def __repr__(self) -> str: return f""