before claude

This commit is contained in:
Gilles Soulier
2026-01-18 06:26:17 +01:00
parent dc19315e5d
commit 740c3d7516
60 changed files with 3815 additions and 354 deletions

View File

@@ -0,0 +1,350 @@
"""Ajout champs Amazon produit
Revision ID: 0014e51c4927
Revises: 20260115_02_product_details
Create Date: 2026-01-17 19:23:01.866891
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# Revision identifiers, used by Alembic.
revision = '0014e51c4927'
down_revision = '20260115_02_product_details'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('price_history', 'price',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment='Product price',
existing_nullable=True)
op.alter_column('price_history', 'shipping_cost',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment='Shipping cost',
existing_nullable=True)
op.alter_column('price_history', 'stock_status',
existing_type=sa.VARCHAR(length=20),
comment='Stock status (in_stock, out_of_stock, unknown)',
existing_nullable=True)
op.alter_column('price_history', 'fetch_method',
existing_type=sa.VARCHAR(length=20),
comment='Fetch method (http, playwright)',
existing_nullable=False)
op.alter_column('price_history', 'fetch_status',
existing_type=sa.VARCHAR(length=20),
comment='Fetch status (success, partial, failed)',
existing_nullable=False)
op.alter_column('price_history', 'fetched_at',
existing_type=postgresql.TIMESTAMP(),
comment='Scraping timestamp',
existing_nullable=False)
op.alter_column('product_images', 'image_url',
existing_type=sa.TEXT(),
comment='Image URL',
existing_nullable=False)
op.alter_column('product_images', 'position',
existing_type=sa.INTEGER(),
comment='Image position (0=main)',
existing_nullable=False)
op.alter_column('product_specs', 'spec_key',
existing_type=sa.VARCHAR(length=200),
comment="Specification key (e.g., 'Brand', 'Color')",
existing_nullable=False)
op.alter_column('product_specs', 'spec_value',
existing_type=sa.TEXT(),
comment='Specification value',
existing_nullable=False)
op.add_column('products', sa.Column('rating_value', sa.Numeric(precision=3, scale=2), nullable=True, comment='Note moyenne'))
op.add_column('products', sa.Column('rating_count', sa.Integer(), nullable=True, comment="Nombre d'evaluations"))
op.add_column('products', sa.Column('amazon_choice', sa.Boolean(), nullable=True, comment="Badge Choix d'Amazon"))
op.add_column('products', sa.Column('amazon_choice_label', sa.Text(), nullable=True, comment="Libelle Choix d'Amazon"))
op.add_column('products', sa.Column('discount_text', sa.Text(), nullable=True, comment='Texte de reduction affiche'))
op.add_column('products', sa.Column('stock_text', sa.Text(), nullable=True, comment='Texte brut du stock'))
op.add_column('products', sa.Column('in_stock', sa.Boolean(), nullable=True, comment='Disponibilite derivee'))
op.add_column('products', sa.Column('model_number', sa.Text(), nullable=True, comment='Numero du modele'))
op.add_column('products', sa.Column('model_name', sa.Text(), nullable=True, comment='Nom du modele'))
op.alter_column('products', 'source',
existing_type=sa.VARCHAR(length=50),
comment='Store ID (amazon, cdiscount, etc.)',
existing_nullable=False)
op.alter_column('products', 'reference',
existing_type=sa.VARCHAR(length=100),
comment='Product reference (ASIN, SKU, etc.)',
existing_nullable=False)
op.alter_column('products', 'url',
existing_type=sa.TEXT(),
comment='Canonical product URL',
existing_nullable=False)
op.alter_column('products', 'title',
existing_type=sa.TEXT(),
comment='Product title',
existing_nullable=True)
op.alter_column('products', 'category',
existing_type=sa.TEXT(),
comment='Product category (breadcrumb)',
existing_nullable=True)
op.alter_column('products', 'description',
existing_type=sa.TEXT(),
comment='Product description',
existing_nullable=True)
op.alter_column('products', 'currency',
existing_type=sa.VARCHAR(length=3),
comment='Currency code (EUR, USD, GBP)',
existing_nullable=True)
op.alter_column('products', 'msrp',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment='Recommended price',
existing_nullable=True)
op.alter_column('products', 'first_seen_at',
existing_type=postgresql.TIMESTAMP(),
comment='First scraping timestamp',
existing_nullable=False)
op.alter_column('products', 'last_updated_at',
existing_type=postgresql.TIMESTAMP(),
comment='Last metadata update',
existing_nullable=False)
op.alter_column('scraping_logs', 'url',
existing_type=sa.TEXT(),
comment='Scraped URL',
existing_nullable=False)
op.alter_column('scraping_logs', 'source',
existing_type=sa.VARCHAR(length=50),
comment='Store ID (amazon, cdiscount, etc.)',
existing_nullable=False)
op.alter_column('scraping_logs', 'reference',
existing_type=sa.VARCHAR(length=100),
comment='Product reference (if extracted)',
existing_nullable=True)
op.alter_column('scraping_logs', 'fetch_method',
existing_type=sa.VARCHAR(length=20),
comment='Fetch method (http, playwright)',
existing_nullable=False)
op.alter_column('scraping_logs', 'fetch_status',
existing_type=sa.VARCHAR(length=20),
comment='Fetch status (success, partial, failed)',
existing_nullable=False)
op.alter_column('scraping_logs', 'fetched_at',
existing_type=postgresql.TIMESTAMP(),
comment='Scraping timestamp',
existing_nullable=False)
op.alter_column('scraping_logs', 'duration_ms',
existing_type=sa.INTEGER(),
comment='Fetch duration in milliseconds',
existing_nullable=True)
op.alter_column('scraping_logs', 'html_size_bytes',
existing_type=sa.INTEGER(),
comment='HTML response size in bytes',
existing_nullable=True)
op.alter_column('scraping_logs', 'errors',
existing_type=postgresql.JSONB(astext_type=sa.Text()),
comment='Error messages (list of strings)',
existing_nullable=True)
op.alter_column('scraping_logs', 'notes',
existing_type=postgresql.JSONB(astext_type=sa.Text()),
comment='Debug notes (list of strings)',
existing_nullable=True)
op.alter_column('webhooks', 'event',
existing_type=sa.VARCHAR(length=50),
comment='Event name',
existing_nullable=False)
op.alter_column('webhooks', 'url',
existing_type=sa.TEXT(),
comment='Webhook URL',
existing_nullable=False)
op.alter_column('webhooks', 'secret',
existing_type=sa.VARCHAR(length=200),
comment='Secret optionnel',
existing_nullable=True)
op.alter_column('webhooks', 'created_at',
existing_type=postgresql.TIMESTAMP(),
comment='Creation timestamp',
existing_nullable=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('webhooks', 'created_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='Creation timestamp',
existing_nullable=False)
op.alter_column('webhooks', 'secret',
existing_type=sa.VARCHAR(length=200),
comment=None,
existing_comment='Secret optionnel',
existing_nullable=True)
op.alter_column('webhooks', 'url',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Webhook URL',
existing_nullable=False)
op.alter_column('webhooks', 'event',
existing_type=sa.VARCHAR(length=50),
comment=None,
existing_comment='Event name',
existing_nullable=False)
op.alter_column('scraping_logs', 'notes',
existing_type=postgresql.JSONB(astext_type=sa.Text()),
comment=None,
existing_comment='Debug notes (list of strings)',
existing_nullable=True)
op.alter_column('scraping_logs', 'errors',
existing_type=postgresql.JSONB(astext_type=sa.Text()),
comment=None,
existing_comment='Error messages (list of strings)',
existing_nullable=True)
op.alter_column('scraping_logs', 'html_size_bytes',
existing_type=sa.INTEGER(),
comment=None,
existing_comment='HTML response size in bytes',
existing_nullable=True)
op.alter_column('scraping_logs', 'duration_ms',
existing_type=sa.INTEGER(),
comment=None,
existing_comment='Fetch duration in milliseconds',
existing_nullable=True)
op.alter_column('scraping_logs', 'fetched_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='Scraping timestamp',
existing_nullable=False)
op.alter_column('scraping_logs', 'fetch_status',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Fetch status (success, partial, failed)',
existing_nullable=False)
op.alter_column('scraping_logs', 'fetch_method',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Fetch method (http, playwright)',
existing_nullable=False)
op.alter_column('scraping_logs', 'reference',
existing_type=sa.VARCHAR(length=100),
comment=None,
existing_comment='Product reference (if extracted)',
existing_nullable=True)
op.alter_column('scraping_logs', 'source',
existing_type=sa.VARCHAR(length=50),
comment=None,
existing_comment='Store ID (amazon, cdiscount, etc.)',
existing_nullable=False)
op.alter_column('scraping_logs', 'url',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Scraped URL',
existing_nullable=False)
op.alter_column('products', 'last_updated_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='Last metadata update',
existing_nullable=False)
op.alter_column('products', 'first_seen_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='First scraping timestamp',
existing_nullable=False)
op.alter_column('products', 'msrp',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment=None,
existing_comment='Recommended price',
existing_nullable=True)
op.alter_column('products', 'currency',
existing_type=sa.VARCHAR(length=3),
comment=None,
existing_comment='Currency code (EUR, USD, GBP)',
existing_nullable=True)
op.alter_column('products', 'description',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Product description',
existing_nullable=True)
op.alter_column('products', 'category',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Product category (breadcrumb)',
existing_nullable=True)
op.alter_column('products', 'title',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Product title',
existing_nullable=True)
op.alter_column('products', 'url',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Canonical product URL',
existing_nullable=False)
op.alter_column('products', 'reference',
existing_type=sa.VARCHAR(length=100),
comment=None,
existing_comment='Product reference (ASIN, SKU, etc.)',
existing_nullable=False)
op.alter_column('products', 'source',
existing_type=sa.VARCHAR(length=50),
comment=None,
existing_comment='Store ID (amazon, cdiscount, etc.)',
existing_nullable=False)
op.drop_column('products', 'model_name')
op.drop_column('products', 'model_number')
op.drop_column('products', 'in_stock')
op.drop_column('products', 'stock_text')
op.drop_column('products', 'discount_text')
op.drop_column('products', 'amazon_choice_label')
op.drop_column('products', 'amazon_choice')
op.drop_column('products', 'rating_count')
op.drop_column('products', 'rating_value')
op.alter_column('product_specs', 'spec_value',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Specification value',
existing_nullable=False)
op.alter_column('product_specs', 'spec_key',
existing_type=sa.VARCHAR(length=200),
comment=None,
existing_comment="Specification key (e.g., 'Brand', 'Color')",
existing_nullable=False)
op.alter_column('product_images', 'position',
existing_type=sa.INTEGER(),
comment=None,
existing_comment='Image position (0=main)',
existing_nullable=False)
op.alter_column('product_images', 'image_url',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Image URL',
existing_nullable=False)
op.alter_column('price_history', 'fetched_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='Scraping timestamp',
existing_nullable=False)
op.alter_column('price_history', 'fetch_status',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Fetch status (success, partial, failed)',
existing_nullable=False)
op.alter_column('price_history', 'fetch_method',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Fetch method (http, playwright)',
existing_nullable=False)
op.alter_column('price_history', 'stock_status',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Stock status (in_stock, out_of_stock, unknown)',
existing_nullable=True)
op.alter_column('price_history', 'shipping_cost',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment=None,
existing_comment='Shipping cost',
existing_nullable=True)
op.alter_column('price_history', 'price',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment=None,
existing_comment='Product price',
existing_nullable=True)
# ### end Alembic commands ###

View File

@@ -0,0 +1,28 @@
"""Ajout champs Amazon produit
Revision ID: 1467e98fcbea
Revises: 3e68b0f0c9e4
Create Date: 2026-01-17 20:08:32.991650
"""
from alembic import op
import sqlalchemy as sa
# Revision identifiers, used by Alembic.
revision = '1467e98fcbea'
down_revision = '3e68b0f0c9e4'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View File

@@ -0,0 +1,114 @@
"""Ajout classification rules et type produit
Revision ID: 20260117_03_classification_rules
Revises: 3e68b0f0c9e4
Create Date: 2026-01-17 20:05:00.000000
"""
from datetime import datetime, timezone
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# Revision identifiers, used by Alembic.
revision = "20260117_03_classification_rules"
down_revision = "3e68b0f0c9e4"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"products",
sa.Column("type", sa.Text(), nullable=True, comment="Product type"),
)
op.create_table(
"classification_rules",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("category", sa.String(length=80), nullable=True, comment="Categorie cible"),
sa.Column("type", sa.String(length=80), nullable=True, comment="Type cible"),
sa.Column(
"keywords",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
comment="Mots-cles de matching",
),
sa.Column("sort_order", sa.Integer(), nullable=False, server_default="0"),
sa.Column("is_active", sa.Boolean(), nullable=False, server_default=sa.text("true")),
sa.Column(
"created_at",
sa.TIMESTAMP(),
nullable=False,
server_default=sa.text("CURRENT_TIMESTAMP"),
comment="Creation timestamp",
),
)
op.create_index("ix_classification_rule_order", "classification_rules", ["sort_order"])
op.create_index("ix_classification_rule_active", "classification_rules", ["is_active"])
rules_table = sa.table(
"classification_rules",
sa.column("category", sa.String),
sa.column("type", sa.String),
sa.column("keywords", postgresql.JSONB),
sa.column("sort_order", sa.Integer),
sa.column("is_active", sa.Boolean),
sa.column("created_at", sa.TIMESTAMP),
)
now = datetime.now(timezone.utc)
op.bulk_insert(
rules_table,
[
{
"category": "Informatique",
"type": "Ecran",
"keywords": ["ecran", "moniteur", "display"],
"sort_order": 0,
"is_active": True,
"created_at": now,
},
{
"category": "Informatique",
"type": "PC portable",
"keywords": ["pc portable", "ordinateur portable", "laptop", "notebook"],
"sort_order": 1,
"is_active": True,
"created_at": now,
},
{
"category": "Informatique",
"type": "Unite centrale",
"keywords": ["unite centrale", "tour", "desktop", "pc fixe"],
"sort_order": 2,
"is_active": True,
"created_at": now,
},
{
"category": "Informatique",
"type": "Clavier",
"keywords": ["clavier", "keyboard"],
"sort_order": 3,
"is_active": True,
"created_at": now,
},
{
"category": "Informatique",
"type": "Souris",
"keywords": ["souris", "mouse"],
"sort_order": 4,
"is_active": True,
"created_at": now,
},
],
)
def downgrade() -> None:
op.drop_index("ix_classification_rule_active", table_name="classification_rules")
op.drop_index("ix_classification_rule_order", table_name="classification_rules")
op.drop_table("classification_rules")
op.drop_column("products", "type")

View File

@@ -0,0 +1,28 @@
"""Ajout champs Amazon produit
Revision ID: 3e68b0f0c9e4
Revises: 0014e51c4927
Create Date: 2026-01-17 19:45:03.730218
"""
from alembic import op
import sqlalchemy as sa
# Revision identifiers, used by Alembic.
revision = '3e68b0f0c9e4'
down_revision = '0014e51c4927'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View File

@@ -84,6 +84,36 @@ class Product(Base):
msrp: Mapped[Optional[Decimal]] = mapped_column(
Numeric(10, 2), nullable=True, comment="Recommended price"
)
type: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Product type"
)
rating_value: Mapped[Optional[Decimal]] = mapped_column(
Numeric(3, 2), nullable=True, comment="Note moyenne"
)
rating_count: Mapped[Optional[int]] = mapped_column(
Integer, nullable=True, comment="Nombre d'evaluations"
)
amazon_choice: Mapped[Optional[bool]] = mapped_column(
Boolean, nullable=True, comment="Badge Choix d'Amazon"
)
amazon_choice_label: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Libelle Choix d'Amazon"
)
discount_text: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Texte de reduction affiche"
)
stock_text: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Texte brut du stock"
)
in_stock: Mapped[Optional[bool]] = mapped_column(
Boolean, nullable=True, comment="Disponibilite derivee"
)
model_number: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Numero du modele"
)
model_name: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Nom du modele"
)
# Timestamps
first_seen_at: Mapped[datetime] = mapped_column(
@@ -331,6 +361,45 @@ class ScrapingLog(Base):
return f"<ScrapingLog(id={self.id}, url={self.url}, status={self.fetch_status}, fetched_at={self.fetched_at})>"
class ClassificationRule(Base):
"""
Regles de classification categorie/type basees sur des mots-cles.
"""
__tablename__ = "classification_rules"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
category: Mapped[Optional[str]] = mapped_column(
String(80), nullable=True, comment="Categorie cible"
)
type: Mapped[Optional[str]] = mapped_column(
String(80), nullable=True, comment="Type cible"
)
keywords: Mapped[list[str]] = mapped_column(
JSON().with_variant(JSONB, "postgresql"),
nullable=False,
default=list,
comment="Mots-cles de matching",
)
sort_order: Mapped[int] = mapped_column(
Integer, nullable=False, default=0, comment="Ordre de priorite (0=haut)"
)
is_active: Mapped[bool] = mapped_column(
Boolean, nullable=False, default=True, comment="Regle active"
)
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP, nullable=False, default=utcnow, comment="Creation timestamp"
)
__table_args__ = (
Index("ix_classification_rule_order", "sort_order"),
Index("ix_classification_rule_active", "is_active"),
)
def __repr__(self) -> str:
return f"<ClassificationRule(id={self.id}, category={self.category}, type={self.type})>"
class Webhook(Base):
"""
Webhooks pour notifications externes.

View File

@@ -13,7 +13,14 @@ from sqlalchemy.orm import Session
from pricewatch.app.core.logging import get_logger
from pricewatch.app.core.schema import ProductSnapshot
from pricewatch.app.db.models import PriceHistory, Product, ProductImage, ProductSpec, ScrapingLog
from pricewatch.app.db.models import (
ClassificationRule,
PriceHistory,
Product,
ProductImage,
ProductSpec,
ScrapingLog,
)
logger = get_logger("db.repository")
@@ -49,12 +56,58 @@ class ProductRepository:
product.title = snapshot.title
if snapshot.category:
product.category = snapshot.category
if snapshot.type:
product.type = snapshot.type
if snapshot.description:
product.description = snapshot.description
if snapshot.currency:
product.currency = snapshot.currency
if snapshot.msrp is not None:
product.msrp = snapshot.msrp
if snapshot.rating_value is not None:
product.rating_value = snapshot.rating_value
if snapshot.rating_count is not None:
product.rating_count = snapshot.rating_count
if snapshot.amazon_choice is not None:
product.amazon_choice = snapshot.amazon_choice
if snapshot.amazon_choice_label:
product.amazon_choice_label = snapshot.amazon_choice_label
if snapshot.discount_text:
product.discount_text = snapshot.discount_text
if snapshot.stock_text:
product.stock_text = snapshot.stock_text
if snapshot.in_stock is not None:
product.in_stock = snapshot.in_stock
if snapshot.model_number:
product.model_number = snapshot.model_number
if snapshot.model_name:
product.model_name = snapshot.model_name
def apply_classification(self, snapshot: ProductSnapshot) -> None:
"""Applique les regles de classification au snapshot."""
if not snapshot.title:
return
rules = (
self.session.query(ClassificationRule)
.filter(ClassificationRule.is_active == True)
.order_by(ClassificationRule.sort_order, ClassificationRule.id)
.all()
)
if not rules:
return
title = snapshot.title.lower()
for rule in rules:
keywords = rule.keywords or []
if isinstance(keywords, str):
keywords = [keywords]
if any(keyword and keyword.lower() in title for keyword in keywords):
if rule.category:
snapshot.category = rule.category
if rule.type:
snapshot.type = rule.type
return
def add_price_history(self, product: Product, snapshot: ProductSnapshot) -> Optional[PriceHistory]:
"""Ajoute une entree d'historique de prix si inexistante."""