codex
This commit is contained in:
320
pricewatch/app/db/models.py
Executable file
320
pricewatch/app/db/models.py
Executable file
@@ -0,0 +1,320 @@
|
||||
"""
|
||||
Modèles SQLAlchemy pour PriceWatch Phase 2.
|
||||
|
||||
Schéma normalisé pour persistence PostgreSQL:
|
||||
- products: Catalogue produits (déduplication sur source + reference)
|
||||
- price_history: Historique prix time-series
|
||||
- product_images: Images produit (N par produit)
|
||||
- product_specs: Caractéristiques produit (key-value)
|
||||
- scraping_logs: Logs observabilité pour debugging
|
||||
|
||||
Justification technique:
|
||||
- Normalisation: products séparée de price_history (catalogue vs time-series)
|
||||
- Clé naturelle: (source, reference) comme unique constraint (ASIN Amazon, etc.)
|
||||
- Pas de JSONB pour données structurées: tables séparées pour images/specs
|
||||
- JSONB uniquement pour données variables: errors, notes dans logs
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from typing import List, Optional
|
||||
|
||||
from sqlalchemy import (
|
||||
TIMESTAMP,
|
||||
CheckConstraint,
|
||||
Column,
|
||||
ForeignKey,
|
||||
Index,
|
||||
Integer,
|
||||
JSON,
|
||||
Numeric,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""Base class pour tous les modèles SQLAlchemy."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class Product(Base):
|
||||
"""
|
||||
Catalogue produits (1 ligne par produit unique).
|
||||
|
||||
Clé naturelle: (source, reference) - Ex: (amazon, B08N5WRWNW)
|
||||
Mise à jour: title, category, url à chaque scraping
|
||||
Historique prix: relation 1-N vers PriceHistory
|
||||
"""
|
||||
|
||||
__tablename__ = "products"
|
||||
|
||||
# Primary key
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
|
||||
# Natural key (unique)
|
||||
source: Mapped[str] = mapped_column(
|
||||
String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)"
|
||||
)
|
||||
reference: Mapped[str] = mapped_column(
|
||||
String(100), nullable=False, comment="Product reference (ASIN, SKU, etc.)"
|
||||
)
|
||||
|
||||
# Product metadata
|
||||
url: Mapped[str] = mapped_column(Text, nullable=False, comment="Canonical product URL")
|
||||
title: Mapped[Optional[str]] = mapped_column(Text, nullable=True, comment="Product title")
|
||||
category: Mapped[Optional[str]] = mapped_column(
|
||||
Text, nullable=True, comment="Product category (breadcrumb)"
|
||||
)
|
||||
currency: Mapped[Optional[str]] = mapped_column(
|
||||
String(3), nullable=True, comment="Currency code (EUR, USD, GBP)"
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
first_seen_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP, nullable=False, default=datetime.utcnow, comment="First scraping timestamp"
|
||||
)
|
||||
last_updated_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP,
|
||||
nullable=False,
|
||||
default=datetime.utcnow,
|
||||
onupdate=datetime.utcnow,
|
||||
comment="Last metadata update",
|
||||
)
|
||||
|
||||
# Relationships
|
||||
price_history: Mapped[List["PriceHistory"]] = relationship(
|
||||
"PriceHistory", back_populates="product", cascade="all, delete-orphan"
|
||||
)
|
||||
images: Mapped[List["ProductImage"]] = relationship(
|
||||
"ProductImage", back_populates="product", cascade="all, delete-orphan"
|
||||
)
|
||||
specs: Mapped[List["ProductSpec"]] = relationship(
|
||||
"ProductSpec", back_populates="product", cascade="all, delete-orphan"
|
||||
)
|
||||
logs: Mapped[List["ScrapingLog"]] = relationship(
|
||||
"ScrapingLog", back_populates="product", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
# Constraints
|
||||
__table_args__ = (
|
||||
UniqueConstraint("source", "reference", name="uq_product_source_reference"),
|
||||
Index("ix_product_source", "source"),
|
||||
Index("ix_product_reference", "reference"),
|
||||
Index("ix_product_last_updated", "last_updated_at"),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<Product(id={self.id}, source={self.source}, reference={self.reference})>"
|
||||
|
||||
|
||||
class PriceHistory(Base):
|
||||
"""
|
||||
Historique prix (time-series).
|
||||
|
||||
Une ligne par scraping réussi avec extraction prix.
|
||||
Unique constraint sur (product_id, fetched_at) évite doublons.
|
||||
"""
|
||||
|
||||
__tablename__ = "price_history"
|
||||
|
||||
# Primary key
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
|
||||
# Foreign key
|
||||
product_id: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
|
||||
# Price data
|
||||
price: Mapped[Optional[Decimal]] = mapped_column(
|
||||
Numeric(10, 2), nullable=True, comment="Product price"
|
||||
)
|
||||
shipping_cost: Mapped[Optional[Decimal]] = mapped_column(
|
||||
Numeric(10, 2), nullable=True, comment="Shipping cost"
|
||||
)
|
||||
stock_status: Mapped[Optional[str]] = mapped_column(
|
||||
String(20), nullable=True, comment="Stock status (in_stock, out_of_stock, unknown)"
|
||||
)
|
||||
|
||||
# Fetch metadata
|
||||
fetch_method: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False, comment="Fetch method (http, playwright)"
|
||||
)
|
||||
fetch_status: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False, comment="Fetch status (success, partial, failed)"
|
||||
)
|
||||
fetched_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP, nullable=False, comment="Scraping timestamp"
|
||||
)
|
||||
|
||||
# Relationship
|
||||
product: Mapped["Product"] = relationship("Product", back_populates="price_history")
|
||||
|
||||
# Constraints
|
||||
__table_args__ = (
|
||||
UniqueConstraint("product_id", "fetched_at", name="uq_price_history_product_time"),
|
||||
Index("ix_price_history_product_id", "product_id"),
|
||||
Index("ix_price_history_fetched_at", "fetched_at"),
|
||||
CheckConstraint("stock_status IN ('in_stock', 'out_of_stock', 'unknown')"),
|
||||
CheckConstraint("fetch_method IN ('http', 'playwright')"),
|
||||
CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<PriceHistory(id={self.id}, product_id={self.product_id}, price={self.price}, fetched_at={self.fetched_at})>"
|
||||
|
||||
|
||||
class ProductImage(Base):
|
||||
"""
|
||||
Images produit (N images par produit).
|
||||
|
||||
Unique constraint sur (product_id, image_url) évite doublons.
|
||||
Position permet de garder l'ordre des images.
|
||||
"""
|
||||
|
||||
__tablename__ = "product_images"
|
||||
|
||||
# Primary key
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
|
||||
# Foreign key
|
||||
product_id: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
|
||||
# Image data
|
||||
image_url: Mapped[str] = mapped_column(Text, nullable=False, comment="Image URL")
|
||||
position: Mapped[int] = mapped_column(
|
||||
Integer, nullable=False, default=0, comment="Image position (0=main)"
|
||||
)
|
||||
|
||||
# Relationship
|
||||
product: Mapped["Product"] = relationship("Product", back_populates="images")
|
||||
|
||||
# Constraints
|
||||
__table_args__ = (
|
||||
UniqueConstraint("product_id", "image_url", name="uq_product_image_url"),
|
||||
Index("ix_product_image_product_id", "product_id"),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<ProductImage(id={self.id}, product_id={self.product_id}, position={self.position})>"
|
||||
|
||||
|
||||
class ProductSpec(Base):
|
||||
"""
|
||||
Caractéristiques produit (key-value).
|
||||
|
||||
Unique constraint sur (product_id, spec_key) évite doublons.
|
||||
Permet queries efficaces par clé.
|
||||
"""
|
||||
|
||||
__tablename__ = "product_specs"
|
||||
|
||||
# Primary key
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
|
||||
# Foreign key
|
||||
product_id: Mapped[int] = mapped_column(
|
||||
Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
|
||||
# Spec data
|
||||
spec_key: Mapped[str] = mapped_column(
|
||||
String(200), nullable=False, comment="Specification key (e.g., 'Brand', 'Color')"
|
||||
)
|
||||
spec_value: Mapped[str] = mapped_column(Text, nullable=False, comment="Specification value")
|
||||
|
||||
# Relationship
|
||||
product: Mapped["Product"] = relationship("Product", back_populates="specs")
|
||||
|
||||
# Constraints
|
||||
__table_args__ = (
|
||||
UniqueConstraint("product_id", "spec_key", name="uq_product_spec_key"),
|
||||
Index("ix_product_spec_product_id", "product_id"),
|
||||
Index("ix_product_spec_key", "spec_key"),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<ProductSpec(id={self.id}, product_id={self.product_id}, key={self.spec_key})>"
|
||||
|
||||
|
||||
class ScrapingLog(Base):
|
||||
"""
|
||||
Logs observabilité pour debugging.
|
||||
|
||||
FK optionnelle vers products (permet logs même si produit non créé).
|
||||
JSONB pour errors/notes car structure variable.
|
||||
Permet analytics: taux succès, durée moyenne, etc.
|
||||
"""
|
||||
|
||||
__tablename__ = "scraping_logs"
|
||||
|
||||
# Primary key
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
|
||||
# Foreign key (optional)
|
||||
product_id: Mapped[Optional[int]] = mapped_column(
|
||||
Integer, ForeignKey("products.id", ondelete="SET NULL"), nullable=True
|
||||
)
|
||||
|
||||
# Scraping metadata
|
||||
url: Mapped[str] = mapped_column(Text, nullable=False, comment="Scraped URL")
|
||||
source: Mapped[str] = mapped_column(
|
||||
String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)"
|
||||
)
|
||||
reference: Mapped[Optional[str]] = mapped_column(
|
||||
String(100), nullable=True, comment="Product reference (if extracted)"
|
||||
)
|
||||
|
||||
# Fetch metadata
|
||||
fetch_method: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False, comment="Fetch method (http, playwright)"
|
||||
)
|
||||
fetch_status: Mapped[str] = mapped_column(
|
||||
String(20), nullable=False, comment="Fetch status (success, partial, failed)"
|
||||
)
|
||||
fetched_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP, nullable=False, default=datetime.utcnow, comment="Scraping timestamp"
|
||||
)
|
||||
|
||||
# Performance metrics
|
||||
duration_ms: Mapped[Optional[int]] = mapped_column(
|
||||
Integer, nullable=True, comment="Fetch duration in milliseconds"
|
||||
)
|
||||
html_size_bytes: Mapped[Optional[int]] = mapped_column(
|
||||
Integer, nullable=True, comment="HTML response size in bytes"
|
||||
)
|
||||
|
||||
# Debug data (JSONB)
|
||||
errors: Mapped[Optional[list[str]]] = mapped_column(
|
||||
JSON().with_variant(JSONB, "postgresql"),
|
||||
nullable=True,
|
||||
comment="Error messages (list of strings)",
|
||||
)
|
||||
notes: Mapped[Optional[list[str]]] = mapped_column(
|
||||
JSON().with_variant(JSONB, "postgresql"),
|
||||
nullable=True,
|
||||
comment="Debug notes (list of strings)",
|
||||
)
|
||||
|
||||
# Relationship
|
||||
product: Mapped[Optional["Product"]] = relationship("Product", back_populates="logs")
|
||||
|
||||
# Constraints
|
||||
__table_args__ = (
|
||||
Index("ix_scraping_log_product_id", "product_id"),
|
||||
Index("ix_scraping_log_source", "source"),
|
||||
Index("ix_scraping_log_fetched_at", "fetched_at"),
|
||||
Index("ix_scraping_log_fetch_status", "fetch_status"),
|
||||
CheckConstraint("fetch_method IN ('http', 'playwright')"),
|
||||
CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<ScrapingLog(id={self.id}, url={self.url}, status={self.fetch_status}, fetched_at={self.fetched_at})>"
|
||||
Reference in New Issue
Block a user