359 lines
12 KiB
Python
359 lines
12 KiB
Python
"""
|
|
Modèles SQLAlchemy pour PriceWatch Phase 2.
|
|
|
|
Schéma normalisé pour persistence PostgreSQL:
|
|
- products: Catalogue produits (déduplication sur source + reference)
|
|
- price_history: Historique prix time-series
|
|
- product_images: Images produit (N par produit)
|
|
- product_specs: Caractéristiques produit (key-value)
|
|
- scraping_logs: Logs observabilité pour debugging
|
|
|
|
Justification technique:
|
|
- Normalisation: products séparée de price_history (catalogue vs time-series)
|
|
- Clé naturelle: (source, reference) comme unique constraint (ASIN Amazon, etc.)
|
|
- Pas de JSONB pour données structurées: tables séparées pour images/specs
|
|
- JSONB uniquement pour données variables: errors, notes dans logs
|
|
"""
|
|
|
|
from datetime import datetime, timezone
|
|
from decimal import Decimal
|
|
from typing import List, Optional
|
|
|
|
from sqlalchemy import (
|
|
TIMESTAMP,
|
|
CheckConstraint,
|
|
Column,
|
|
ForeignKey,
|
|
Index,
|
|
Integer,
|
|
JSON,
|
|
Numeric,
|
|
Boolean,
|
|
String,
|
|
Text,
|
|
UniqueConstraint,
|
|
)
|
|
from sqlalchemy.dialects.postgresql import JSONB
|
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
|
|
|
|
|
class Base(DeclarativeBase):
|
|
"""Base class pour tous les modèles SQLAlchemy."""
|
|
|
|
pass
|
|
|
|
|
|
def utcnow() -> datetime:
|
|
return datetime.now(timezone.utc)
|
|
|
|
|
|
class Product(Base):
|
|
"""
|
|
Catalogue produits (1 ligne par produit unique).
|
|
|
|
Clé naturelle: (source, reference) - Ex: (amazon, B08N5WRWNW)
|
|
Mise à jour: title, category, url à chaque scraping
|
|
Historique prix: relation 1-N vers PriceHistory
|
|
"""
|
|
|
|
__tablename__ = "products"
|
|
|
|
# Primary key
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
|
|
# Natural key (unique)
|
|
source: Mapped[str] = mapped_column(
|
|
String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)"
|
|
)
|
|
reference: Mapped[str] = mapped_column(
|
|
String(100), nullable=False, comment="Product reference (ASIN, SKU, etc.)"
|
|
)
|
|
|
|
# Product metadata
|
|
url: Mapped[str] = mapped_column(Text, nullable=False, comment="Canonical product URL")
|
|
title: Mapped[Optional[str]] = mapped_column(Text, nullable=True, comment="Product title")
|
|
category: Mapped[Optional[str]] = mapped_column(
|
|
Text, nullable=True, comment="Product category (breadcrumb)"
|
|
)
|
|
description: Mapped[Optional[str]] = mapped_column(
|
|
Text, nullable=True, comment="Product description"
|
|
)
|
|
currency: Mapped[Optional[str]] = mapped_column(
|
|
String(3), nullable=True, comment="Currency code (EUR, USD, GBP)"
|
|
)
|
|
msrp: Mapped[Optional[Decimal]] = mapped_column(
|
|
Numeric(10, 2), nullable=True, comment="Recommended price"
|
|
)
|
|
|
|
# Timestamps
|
|
first_seen_at: Mapped[datetime] = mapped_column(
|
|
TIMESTAMP, nullable=False, default=utcnow, comment="First scraping timestamp"
|
|
)
|
|
last_updated_at: Mapped[datetime] = mapped_column(
|
|
TIMESTAMP,
|
|
nullable=False,
|
|
default=utcnow,
|
|
onupdate=utcnow,
|
|
comment="Last metadata update",
|
|
)
|
|
|
|
# Relationships
|
|
price_history: Mapped[List["PriceHistory"]] = relationship(
|
|
"PriceHistory", back_populates="product", cascade="all, delete-orphan"
|
|
)
|
|
images: Mapped[List["ProductImage"]] = relationship(
|
|
"ProductImage", back_populates="product", cascade="all, delete-orphan"
|
|
)
|
|
specs: Mapped[List["ProductSpec"]] = relationship(
|
|
"ProductSpec", back_populates="product", cascade="all, delete-orphan"
|
|
)
|
|
logs: Mapped[List["ScrapingLog"]] = relationship(
|
|
"ScrapingLog", back_populates="product", cascade="all, delete-orphan"
|
|
)
|
|
|
|
# Constraints
|
|
__table_args__ = (
|
|
UniqueConstraint("source", "reference", name="uq_product_source_reference"),
|
|
Index("ix_product_source", "source"),
|
|
Index("ix_product_reference", "reference"),
|
|
Index("ix_product_last_updated", "last_updated_at"),
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<Product(id={self.id}, source={self.source}, reference={self.reference})>"
|
|
|
|
|
|
class PriceHistory(Base):
|
|
"""
|
|
Historique prix (time-series).
|
|
|
|
Une ligne par scraping réussi avec extraction prix.
|
|
Unique constraint sur (product_id, fetched_at) évite doublons.
|
|
"""
|
|
|
|
__tablename__ = "price_history"
|
|
|
|
# Primary key
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
|
|
# Foreign key
|
|
product_id: Mapped[int] = mapped_column(
|
|
Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
|
|
# Price data
|
|
price: Mapped[Optional[Decimal]] = mapped_column(
|
|
Numeric(10, 2), nullable=True, comment="Product price"
|
|
)
|
|
shipping_cost: Mapped[Optional[Decimal]] = mapped_column(
|
|
Numeric(10, 2), nullable=True, comment="Shipping cost"
|
|
)
|
|
stock_status: Mapped[Optional[str]] = mapped_column(
|
|
String(20), nullable=True, comment="Stock status (in_stock, out_of_stock, unknown)"
|
|
)
|
|
|
|
# Fetch metadata
|
|
fetch_method: Mapped[str] = mapped_column(
|
|
String(20), nullable=False, comment="Fetch method (http, playwright)"
|
|
)
|
|
fetch_status: Mapped[str] = mapped_column(
|
|
String(20), nullable=False, comment="Fetch status (success, partial, failed)"
|
|
)
|
|
fetched_at: Mapped[datetime] = mapped_column(
|
|
TIMESTAMP, nullable=False, comment="Scraping timestamp"
|
|
)
|
|
|
|
# Relationship
|
|
product: Mapped["Product"] = relationship("Product", back_populates="price_history")
|
|
|
|
# Constraints
|
|
__table_args__ = (
|
|
UniqueConstraint("product_id", "fetched_at", name="uq_price_history_product_time"),
|
|
Index("ix_price_history_product_id", "product_id"),
|
|
Index("ix_price_history_fetched_at", "fetched_at"),
|
|
CheckConstraint("stock_status IN ('in_stock', 'out_of_stock', 'unknown')"),
|
|
CheckConstraint("fetch_method IN ('http', 'playwright')"),
|
|
CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"),
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<PriceHistory(id={self.id}, product_id={self.product_id}, price={self.price}, fetched_at={self.fetched_at})>"
|
|
|
|
|
|
class ProductImage(Base):
|
|
"""
|
|
Images produit (N images par produit).
|
|
|
|
Unique constraint sur (product_id, image_url) évite doublons.
|
|
Position permet de garder l'ordre des images.
|
|
"""
|
|
|
|
__tablename__ = "product_images"
|
|
|
|
# Primary key
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
|
|
# Foreign key
|
|
product_id: Mapped[int] = mapped_column(
|
|
Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
|
|
# Image data
|
|
image_url: Mapped[str] = mapped_column(Text, nullable=False, comment="Image URL")
|
|
position: Mapped[int] = mapped_column(
|
|
Integer, nullable=False, default=0, comment="Image position (0=main)"
|
|
)
|
|
|
|
# Relationship
|
|
product: Mapped["Product"] = relationship("Product", back_populates="images")
|
|
|
|
# Constraints
|
|
__table_args__ = (
|
|
UniqueConstraint("product_id", "image_url", name="uq_product_image_url"),
|
|
Index("ix_product_image_product_id", "product_id"),
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<ProductImage(id={self.id}, product_id={self.product_id}, position={self.position})>"
|
|
|
|
|
|
class ProductSpec(Base):
|
|
"""
|
|
Caractéristiques produit (key-value).
|
|
|
|
Unique constraint sur (product_id, spec_key) évite doublons.
|
|
Permet queries efficaces par clé.
|
|
"""
|
|
|
|
__tablename__ = "product_specs"
|
|
|
|
# Primary key
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
|
|
# Foreign key
|
|
product_id: Mapped[int] = mapped_column(
|
|
Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
|
|
)
|
|
|
|
# Spec data
|
|
spec_key: Mapped[str] = mapped_column(
|
|
String(200), nullable=False, comment="Specification key (e.g., 'Brand', 'Color')"
|
|
)
|
|
spec_value: Mapped[str] = mapped_column(Text, nullable=False, comment="Specification value")
|
|
|
|
# Relationship
|
|
product: Mapped["Product"] = relationship("Product", back_populates="specs")
|
|
|
|
# Constraints
|
|
__table_args__ = (
|
|
UniqueConstraint("product_id", "spec_key", name="uq_product_spec_key"),
|
|
Index("ix_product_spec_product_id", "product_id"),
|
|
Index("ix_product_spec_key", "spec_key"),
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<ProductSpec(id={self.id}, product_id={self.product_id}, key={self.spec_key})>"
|
|
|
|
|
|
class ScrapingLog(Base):
|
|
"""
|
|
Logs observabilité pour debugging.
|
|
|
|
FK optionnelle vers products (permet logs même si produit non créé).
|
|
JSONB pour errors/notes car structure variable.
|
|
Permet analytics: taux succès, durée moyenne, etc.
|
|
"""
|
|
|
|
__tablename__ = "scraping_logs"
|
|
|
|
# Primary key
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
|
|
# Foreign key (optional)
|
|
product_id: Mapped[Optional[int]] = mapped_column(
|
|
Integer, ForeignKey("products.id", ondelete="SET NULL"), nullable=True
|
|
)
|
|
|
|
# Scraping metadata
|
|
url: Mapped[str] = mapped_column(Text, nullable=False, comment="Scraped URL")
|
|
source: Mapped[str] = mapped_column(
|
|
String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)"
|
|
)
|
|
reference: Mapped[Optional[str]] = mapped_column(
|
|
String(100), nullable=True, comment="Product reference (if extracted)"
|
|
)
|
|
|
|
# Fetch metadata
|
|
fetch_method: Mapped[str] = mapped_column(
|
|
String(20), nullable=False, comment="Fetch method (http, playwright)"
|
|
)
|
|
fetch_status: Mapped[str] = mapped_column(
|
|
String(20), nullable=False, comment="Fetch status (success, partial, failed)"
|
|
)
|
|
fetched_at: Mapped[datetime] = mapped_column(
|
|
TIMESTAMP, nullable=False, default=utcnow, comment="Scraping timestamp"
|
|
)
|
|
|
|
# Performance metrics
|
|
duration_ms: Mapped[Optional[int]] = mapped_column(
|
|
Integer, nullable=True, comment="Fetch duration in milliseconds"
|
|
)
|
|
html_size_bytes: Mapped[Optional[int]] = mapped_column(
|
|
Integer, nullable=True, comment="HTML response size in bytes"
|
|
)
|
|
|
|
# Debug data (JSONB)
|
|
errors: Mapped[Optional[list[str]]] = mapped_column(
|
|
JSON().with_variant(JSONB, "postgresql"),
|
|
nullable=True,
|
|
comment="Error messages (list of strings)",
|
|
)
|
|
notes: Mapped[Optional[list[str]]] = mapped_column(
|
|
JSON().with_variant(JSONB, "postgresql"),
|
|
nullable=True,
|
|
comment="Debug notes (list of strings)",
|
|
)
|
|
|
|
# Relationship
|
|
product: Mapped[Optional["Product"]] = relationship("Product", back_populates="logs")
|
|
|
|
# Constraints
|
|
__table_args__ = (
|
|
Index("ix_scraping_log_product_id", "product_id"),
|
|
Index("ix_scraping_log_source", "source"),
|
|
Index("ix_scraping_log_fetched_at", "fetched_at"),
|
|
Index("ix_scraping_log_fetch_status", "fetch_status"),
|
|
CheckConstraint("fetch_method IN ('http', 'playwright')"),
|
|
CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"),
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<ScrapingLog(id={self.id}, url={self.url}, status={self.fetch_status}, fetched_at={self.fetched_at})>"
|
|
|
|
|
|
class Webhook(Base):
|
|
"""
|
|
Webhooks pour notifications externes.
|
|
"""
|
|
|
|
__tablename__ = "webhooks"
|
|
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
event: Mapped[str] = mapped_column(String(50), nullable=False, comment="Event name")
|
|
url: Mapped[str] = mapped_column(Text, nullable=False, comment="Webhook URL")
|
|
enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
|
|
secret: Mapped[Optional[str]] = mapped_column(
|
|
String(200), nullable=True, comment="Secret optionnel"
|
|
)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
TIMESTAMP, nullable=False, default=utcnow, comment="Creation timestamp"
|
|
)
|
|
|
|
__table_args__ = (
|
|
Index("ix_webhook_event", "event"),
|
|
Index("ix_webhook_enabled", "enabled"),
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<Webhook(id={self.id}, event={self.event}, url={self.url})>"
|