before claude

2026-01-18 06:26:17 +01:00
parent dc19315e5d
commit 740c3d7516
60 changed files with 3815 additions and 354 deletions
@@ -22,6 +22,10 @@ from sqlalchemy.orm import Session

 from pricewatch.app.api.schemas import (
    BackendLogEntry,
+    ClassificationOptionsOut,
+    ClassificationRuleCreate,
+    ClassificationRuleOut,
+    ClassificationRuleUpdate,
    EnqueueRequest,
    EnqueueResponse,
    HealthStatus,
@@ -52,7 +56,8 @@ from pricewatch.app.core.config import get_config
 from pricewatch.app.core.logging import get_logger
 from pricewatch.app.core.schema import ProductSnapshot
 from pricewatch.app.db.connection import check_db_connection, get_session
-from pricewatch.app.db.models import PriceHistory, Product, ScrapingLog, Webhook
+from pricewatch.app.db.models import ClassificationRule, PriceHistory, Product, ScrapingLog, Webhook
+from pricewatch.app.db.repository import ProductRepository
 from pricewatch.app.scraping.pipeline import ScrapingPipeline
 from pricewatch.app.tasks.scrape import scrape_product
 from pricewatch.app.tasks.scheduler import RedisUnavailableError, check_redis_connection, ScrapingScheduler
@@ -188,6 +193,7 @@ def create_product(
        url=payload.url,
        title=payload.title,
        category=payload.category,
+        type=payload.type,
        description=payload.description,
        currency=payload.currency,
        msrp=payload.msrp,
@@ -241,6 +247,129 @@ def update_product(
    return _product_to_out(session, product)


+@app.get(
+    "/classification/rules",
+    response_model=list[ClassificationRuleOut],
+    dependencies=[Depends(require_token)],
+)
+def list_classification_rules(
+    session: Session = Depends(get_db_session),
+) -> list[ClassificationRuleOut]:
+    """Liste les regles de classification."""
+    rules = (
+        session.query(ClassificationRule)
+        .order_by(ClassificationRule.sort_order, ClassificationRule.id)
+        .all()
+    )
+    return [
+        ClassificationRuleOut(
+            id=rule.id,
+            category=rule.category,
+            type=rule.type,
+            keywords=rule.keywords or [],
+            sort_order=rule.sort_order,
+            is_active=rule.is_active,
+        )
+        for rule in rules
+    ]
+
+
+@app.post(
+    "/classification/rules",
+    response_model=ClassificationRuleOut,
+    dependencies=[Depends(require_token)],
+)
+def create_classification_rule(
+    payload: ClassificationRuleCreate,
+    session: Session = Depends(get_db_session),
+) -> ClassificationRuleOut:
+    """Cree une regle de classification."""
+    rule = ClassificationRule(
+        category=payload.category,
+        type=payload.type,
+        keywords=payload.keywords,
+        sort_order=payload.sort_order or 0,
+        is_active=True if payload.is_active is None else payload.is_active,
+    )
+    session.add(rule)
+    session.commit()
+    session.refresh(rule)
+    return ClassificationRuleOut(
+        id=rule.id,
+        category=rule.category,
+        type=rule.type,
+        keywords=rule.keywords or [],
+        sort_order=rule.sort_order,
+        is_active=rule.is_active,
+    )
+
+
+@app.patch(
+    "/classification/rules/{rule_id}",
+    response_model=ClassificationRuleOut,
+    dependencies=[Depends(require_token)],
+)
+def update_classification_rule(
+    rule_id: int,
+    payload: ClassificationRuleUpdate,
+    session: Session = Depends(get_db_session),
+) -> ClassificationRuleOut:
+    """Met a jour une regle de classification."""
+    rule = session.query(ClassificationRule).filter(ClassificationRule.id == rule_id).one_or_none()
+    if not rule:
+        raise HTTPException(status_code=404, detail="Regle non trouvee")
+    updates = payload.model_dump(exclude_unset=True)
+    for key, value in updates.items():
+        setattr(rule, key, value)
+    session.commit()
+    session.refresh(rule)
+    return ClassificationRuleOut(
+        id=rule.id,
+        category=rule.category,
+        type=rule.type,
+        keywords=rule.keywords or [],
+        sort_order=rule.sort_order,
+        is_active=rule.is_active,
+    )
+
+
+@app.delete(
+    "/classification/rules/{rule_id}",
+    dependencies=[Depends(require_token)],
+)
+def delete_classification_rule(
+    rule_id: int,
+    session: Session = Depends(get_db_session),
+) -> dict[str, str]:
+    """Supprime une regle de classification."""
+    rule = session.query(ClassificationRule).filter(ClassificationRule.id == rule_id).one_or_none()
+    if not rule:
+        raise HTTPException(status_code=404, detail="Regle non trouvee")
+    session.delete(rule)
+    session.commit()
+    return {"status": "deleted"}
+
+
+@app.get(
+    "/classification/options",
+    response_model=ClassificationOptionsOut,
+    dependencies=[Depends(require_token)],
+)
+def get_classification_options(
+    session: Session = Depends(get_db_session),
+) -> ClassificationOptionsOut:
+    """Expose la liste des categories et types issus des regles actives."""
+    rules = (
+        session.query(ClassificationRule)
+        .filter(ClassificationRule.is_active == True)
+        .order_by(ClassificationRule.sort_order, ClassificationRule.id)
+        .all()
+    )
+    categories = sorted({rule.category for rule in rules if rule.category})
+    types = sorted({rule.type for rule in rules if rule.type})
+    return ClassificationOptionsOut(categories=categories, types=types)
+
+
@app.delete("/products/{product_id}", dependencies=[Depends(require_token)])
 def delete_product(
    product_id: int,
@@ -703,6 +832,13 @@ def preview_scrape(payload: ScrapePreviewRequest) -> ScrapePreviewResponse:
    if snapshot is None:
        _add_backend_log("ERROR", f"Preview scraping KO: {payload.url}")
        return ScrapePreviewResponse(success=False, snapshot=None, error=result.get("error"))
+    config = get_config()
+    if config.enable_db:
+        try:
+            with get_session(config) as session:
+                ProductRepository(session).apply_classification(snapshot)
+        except Exception as exc:
+            snapshot.add_note(f"Classification ignoree: {exc}")
    return ScrapePreviewResponse(
        success=bool(result.get("success")),
        snapshot=snapshot.model_dump(mode="json"),
@@ -719,7 +855,9 @@ def commit_scrape(payload: ScrapeCommitRequest) -> ScrapeCommitResponse:
        _add_backend_log("ERROR", "Commit scraping KO: snapshot invalide")
        raise HTTPException(status_code=400, detail="Snapshot invalide") from exc

-    product_id = ScrapingPipeline(config=get_config()).process_snapshot(snapshot, save_to_db=True)
+    product_id = ScrapingPipeline(config=get_config()).process_snapshot(
+        snapshot, save_to_db=True, apply_classification=False
+    )
    _add_backend_log("INFO", f"Commit scraping OK: product_id={product_id}")
    return ScrapeCommitResponse(success=True, product_id=product_id)

@@ -808,12 +946,9 @@ def _product_to_out(session: Session, product: Product) -> ProductOut:
    )
    images = [image.image_url for image in product.images]
    specs = {spec.spec_key: spec.spec_value for spec in product.specs}
-    discount_amount = None
-    discount_percent = None
-    if latest and latest.price is not None and product.msrp:
-        discount_amount = float(product.msrp) - float(latest.price)
-        if product.msrp > 0:
-            discount_percent = (discount_amount / float(product.msrp)) * 100
+    main_image = images[0] if images else None
+    gallery_images = images[1:] if len(images) > 1 else []
+    asin = product.reference if product.source == "amazon" else None
    history_rows = (
        session.query(PriceHistory)
        .filter(PriceHistory.product_id == product.id, PriceHistory.price != None)
@@ -830,12 +965,23 @@ def _product_to_out(session: Session, product: Product) -> ProductOut:
        id=product.id,
        source=product.source,
        reference=product.reference,
+        asin=asin,
        url=product.url,
        title=product.title,
        category=product.category,
+        type=product.type,
        description=product.description,
        currency=product.currency,
        msrp=float(product.msrp) if product.msrp is not None else None,
+        rating_value=float(product.rating_value) if product.rating_value is not None else None,
+        rating_count=product.rating_count,
+        amazon_choice=product.amazon_choice,
+        amazon_choice_label=product.amazon_choice_label,
+        discount_text=product.discount_text,
+        stock_text=product.stock_text,
+        in_stock=product.in_stock,
+        model_number=product.model_number,
+        model_name=product.model_name,
        first_seen_at=product.first_seen_at,
        last_updated_at=product.last_updated_at,
        latest_price=float(latest.price) if latest and latest.price is not None else None,
@@ -845,9 +991,11 @@ def _product_to_out(session: Session, product: Product) -> ProductOut:
        latest_stock_status=latest.stock_status if latest else None,
        latest_fetched_at=latest.fetched_at if latest else None,
        images=images,
+        main_image=main_image,
+        gallery_images=gallery_images,
        specs=specs,
-        discount_amount=discount_amount,
-        discount_percent=discount_percent,
+        discount_amount=None,
+        discount_percent=None,
        history=history_points,
    )

@@ -22,12 +22,23 @@ class ProductOut(BaseModel):
    id: int
    source: str
    reference: str
+    asin: Optional[str] = None
    url: str
    title: Optional[str] = None
    category: Optional[str] = None
+    type: Optional[str] = None
    description: Optional[str] = None
    currency: Optional[str] = None
    msrp: Optional[float] = None
+    rating_value: Optional[float] = None
+    rating_count: Optional[int] = None
+    amazon_choice: Optional[bool] = None
+    amazon_choice_label: Optional[str] = None
+    discount_text: Optional[str] = None
+    stock_text: Optional[str] = None
+    in_stock: Optional[bool] = None
+    model_number: Optional[str] = None
+    model_name: Optional[str] = None
    first_seen_at: datetime
    last_updated_at: datetime
    latest_price: Optional[float] = None
@@ -35,6 +46,8 @@ class ProductOut(BaseModel):
    latest_stock_status: Optional[str] = None
    latest_fetched_at: Optional[datetime] = None
    images: list[str] = []
+    main_image: Optional[str] = None
+    gallery_images: list[str] = []
    specs: dict[str, str] = {}
    discount_amount: Optional[float] = None
    discount_percent: Optional[float] = None
@@ -47,6 +60,7 @@ class ProductCreate(BaseModel):
    url: str
    title: Optional[str] = None
    category: Optional[str] = None
+    type: Optional[str] = None
    description: Optional[str] = None
    currency: Optional[str] = None
    msrp: Optional[float] = None
@@ -56,6 +70,7 @@ class ProductUpdate(BaseModel):
    url: Optional[str] = None
    title: Optional[str] = None
    category: Optional[str] = None
+    type: Optional[str] = None
    description: Optional[str] = None
    currency: Optional[str] = None
    msrp: Optional[float] = None
@@ -208,6 +223,36 @@ class VersionResponse(BaseModel):
    api_version: str


+class ClassificationRuleOut(BaseModel):
+    id: int
+    category: Optional[str] = None
+    type: Optional[str] = None
+    keywords: list[str] = Field(default_factory=list)
+    sort_order: int = 0
+    is_active: bool = True
+
+
+class ClassificationRuleCreate(BaseModel):
+    category: Optional[str] = None
+    type: Optional[str] = None
+    keywords: list[str] = Field(default_factory=list)
+    sort_order: Optional[int] = 0
+    is_active: Optional[bool] = True
+
+
+class ClassificationRuleUpdate(BaseModel):
+    category: Optional[str] = None
+    type: Optional[str] = None
+    keywords: Optional[list[str]] = None
+    sort_order: Optional[int] = None
+    is_active: Optional[bool] = None
+
+
+class ClassificationOptionsOut(BaseModel):
+    categories: list[str] = Field(default_factory=list)
+    types: list[str] = Field(default_factory=list)
+
+
 class BackendLogEntry(BaseModel):
    time: datetime
    level: str
@@ -93,13 +93,52 @@ class ProductSnapshot(BaseModel):
    reference: Optional[str] = Field(
        default=None, description="Référence produit (ASIN, SKU, etc.)"
    )
+    asin: Optional[str] = Field(
+        default=None, description="ASIN Amazon si disponible"
+    )
    category: Optional[str] = Field(default=None, description="Catégorie du produit")
+    type: Optional[str] = Field(default=None, description="Type du produit")
    description: Optional[str] = Field(default=None, description="Description produit")

+    # Données Amazon explicites (si disponibles)
+    rating_value: Optional[float] = Field(
+        default=None, description="Note moyenne affichée"
+    )
+    rating_count: Optional[int] = Field(
+        default=None, description="Nombre d'évaluations"
+    )
+    amazon_choice: Optional[bool] = Field(
+        default=None, description="Badge Choix d'Amazon présent"
+    )
+    amazon_choice_label: Optional[str] = Field(
+        default=None, description="Libellé du badge Choix d'Amazon"
+    )
+    discount_text: Optional[str] = Field(
+        default=None, description="Texte de réduction affiché"
+    )
+    stock_text: Optional[str] = Field(
+        default=None, description="Texte brut de stock"
+    )
+    in_stock: Optional[bool] = Field(
+        default=None, description="Disponibilité dérivée"
+    )
+    model_number: Optional[str] = Field(
+        default=None, description="Numéro du modèle de l'article"
+    )
+    model_name: Optional[str] = Field(
+        default=None, description="Nom du modèle explicite"
+    )
+
    # Médias
    images: list[str] = Field(
        default_factory=list, description="Liste des URLs d'images du produit"
    )
+    main_image: Optional[str] = Field(
+        default=None, description="Image principale du produit"
+    )
+    gallery_images: list[str] = Field(
+        default_factory=list, description="Images de galerie dédoublonnées"
+    )

    # Caractéristiques techniques
    specs: dict[str, str] = Field(
@@ -134,6 +173,12 @@ class ProductSnapshot(BaseModel):
        """Filtre les URLs d'images vides."""
        return [url.strip() for url in v if url and url.strip()]

+    @field_validator("gallery_images")
+    @classmethod
+    def validate_gallery_images(cls, v: list[str]) -> list[str]:
+        """Filtre les URLs de galerie vides."""
+        return [url.strip() for url in v if url and url.strip()]
+
    model_config = ConfigDict(
        use_enum_values=True,
        json_schema_extra={
@@ -0,0 +1,350 @@
+"""Ajout champs Amazon produit
+
+Revision ID: 0014e51c4927
+Revises: 20260115_02_product_details
+Create Date: 2026-01-17 19:23:01.866891
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# Revision identifiers, used by Alembic.
+revision = '0014e51c4927'
+down_revision = '20260115_02_product_details'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column('price_history', 'price',
+               existing_type=sa.NUMERIC(precision=10, scale=2),
+               comment='Product price',
+               existing_nullable=True)
+    op.alter_column('price_history', 'shipping_cost',
+               existing_type=sa.NUMERIC(precision=10, scale=2),
+               comment='Shipping cost',
+               existing_nullable=True)
+    op.alter_column('price_history', 'stock_status',
+               existing_type=sa.VARCHAR(length=20),
+               comment='Stock status (in_stock, out_of_stock, unknown)',
+               existing_nullable=True)
+    op.alter_column('price_history', 'fetch_method',
+               existing_type=sa.VARCHAR(length=20),
+               comment='Fetch method (http, playwright)',
+               existing_nullable=False)
+    op.alter_column('price_history', 'fetch_status',
+               existing_type=sa.VARCHAR(length=20),
+               comment='Fetch status (success, partial, failed)',
+               existing_nullable=False)
+    op.alter_column('price_history', 'fetched_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment='Scraping timestamp',
+               existing_nullable=False)
+    op.alter_column('product_images', 'image_url',
+               existing_type=sa.TEXT(),
+               comment='Image URL',
+               existing_nullable=False)
+    op.alter_column('product_images', 'position',
+               existing_type=sa.INTEGER(),
+               comment='Image position (0=main)',
+               existing_nullable=False)
+    op.alter_column('product_specs', 'spec_key',
+               existing_type=sa.VARCHAR(length=200),
+               comment="Specification key (e.g., 'Brand', 'Color')",
+               existing_nullable=False)
+    op.alter_column('product_specs', 'spec_value',
+               existing_type=sa.TEXT(),
+               comment='Specification value',
+               existing_nullable=False)
+    op.add_column('products', sa.Column('rating_value', sa.Numeric(precision=3, scale=2), nullable=True, comment='Note moyenne'))
+    op.add_column('products', sa.Column('rating_count', sa.Integer(), nullable=True, comment="Nombre d'evaluations"))
+    op.add_column('products', sa.Column('amazon_choice', sa.Boolean(), nullable=True, comment="Badge Choix d'Amazon"))
+    op.add_column('products', sa.Column('amazon_choice_label', sa.Text(), nullable=True, comment="Libelle Choix d'Amazon"))
+    op.add_column('products', sa.Column('discount_text', sa.Text(), nullable=True, comment='Texte de reduction affiche'))
+    op.add_column('products', sa.Column('stock_text', sa.Text(), nullable=True, comment='Texte brut du stock'))
+    op.add_column('products', sa.Column('in_stock', sa.Boolean(), nullable=True, comment='Disponibilite derivee'))
+    op.add_column('products', sa.Column('model_number', sa.Text(), nullable=True, comment='Numero du modele'))
+    op.add_column('products', sa.Column('model_name', sa.Text(), nullable=True, comment='Nom du modele'))
+    op.alter_column('products', 'source',
+               existing_type=sa.VARCHAR(length=50),
+               comment='Store ID (amazon, cdiscount, etc.)',
+               existing_nullable=False)
+    op.alter_column('products', 'reference',
+               existing_type=sa.VARCHAR(length=100),
+               comment='Product reference (ASIN, SKU, etc.)',
+               existing_nullable=False)
+    op.alter_column('products', 'url',
+               existing_type=sa.TEXT(),
+               comment='Canonical product URL',
+               existing_nullable=False)
+    op.alter_column('products', 'title',
+               existing_type=sa.TEXT(),
+               comment='Product title',
+               existing_nullable=True)
+    op.alter_column('products', 'category',
+               existing_type=sa.TEXT(),
+               comment='Product category (breadcrumb)',
+               existing_nullable=True)
+    op.alter_column('products', 'description',
+               existing_type=sa.TEXT(),
+               comment='Product description',
+               existing_nullable=True)
+    op.alter_column('products', 'currency',
+               existing_type=sa.VARCHAR(length=3),
+               comment='Currency code (EUR, USD, GBP)',
+               existing_nullable=True)
+    op.alter_column('products', 'msrp',
+               existing_type=sa.NUMERIC(precision=10, scale=2),
+               comment='Recommended price',
+               existing_nullable=True)
+    op.alter_column('products', 'first_seen_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment='First scraping timestamp',
+               existing_nullable=False)
+    op.alter_column('products', 'last_updated_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment='Last metadata update',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'url',
+               existing_type=sa.TEXT(),
+               comment='Scraped URL',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'source',
+               existing_type=sa.VARCHAR(length=50),
+               comment='Store ID (amazon, cdiscount, etc.)',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'reference',
+               existing_type=sa.VARCHAR(length=100),
+               comment='Product reference (if extracted)',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'fetch_method',
+               existing_type=sa.VARCHAR(length=20),
+               comment='Fetch method (http, playwright)',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'fetch_status',
+               existing_type=sa.VARCHAR(length=20),
+               comment='Fetch status (success, partial, failed)',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'fetched_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment='Scraping timestamp',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'duration_ms',
+               existing_type=sa.INTEGER(),
+               comment='Fetch duration in milliseconds',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'html_size_bytes',
+               existing_type=sa.INTEGER(),
+               comment='HTML response size in bytes',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'errors',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               comment='Error messages (list of strings)',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'notes',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               comment='Debug notes (list of strings)',
+               existing_nullable=True)
+    op.alter_column('webhooks', 'event',
+               existing_type=sa.VARCHAR(length=50),
+               comment='Event name',
+               existing_nullable=False)
+    op.alter_column('webhooks', 'url',
+               existing_type=sa.TEXT(),
+               comment='Webhook URL',
+               existing_nullable=False)
+    op.alter_column('webhooks', 'secret',
+               existing_type=sa.VARCHAR(length=200),
+               comment='Secret optionnel',
+               existing_nullable=True)
+    op.alter_column('webhooks', 'created_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment='Creation timestamp',
+               existing_nullable=False)
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column('webhooks', 'created_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment=None,
+               existing_comment='Creation timestamp',
+               existing_nullable=False)
+    op.alter_column('webhooks', 'secret',
+               existing_type=sa.VARCHAR(length=200),
+               comment=None,
+               existing_comment='Secret optionnel',
+               existing_nullable=True)
+    op.alter_column('webhooks', 'url',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Webhook URL',
+               existing_nullable=False)
+    op.alter_column('webhooks', 'event',
+               existing_type=sa.VARCHAR(length=50),
+               comment=None,
+               existing_comment='Event name',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'notes',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               comment=None,
+               existing_comment='Debug notes (list of strings)',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'errors',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               comment=None,
+               existing_comment='Error messages (list of strings)',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'html_size_bytes',
+               existing_type=sa.INTEGER(),
+               comment=None,
+               existing_comment='HTML response size in bytes',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'duration_ms',
+               existing_type=sa.INTEGER(),
+               comment=None,
+               existing_comment='Fetch duration in milliseconds',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'fetched_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment=None,
+               existing_comment='Scraping timestamp',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'fetch_status',
+               existing_type=sa.VARCHAR(length=20),
+               comment=None,
+               existing_comment='Fetch status (success, partial, failed)',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'fetch_method',
+               existing_type=sa.VARCHAR(length=20),
+               comment=None,
+               existing_comment='Fetch method (http, playwright)',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'reference',
+               existing_type=sa.VARCHAR(length=100),
+               comment=None,
+               existing_comment='Product reference (if extracted)',
+               existing_nullable=True)
+    op.alter_column('scraping_logs', 'source',
+               existing_type=sa.VARCHAR(length=50),
+               comment=None,
+               existing_comment='Store ID (amazon, cdiscount, etc.)',
+               existing_nullable=False)
+    op.alter_column('scraping_logs', 'url',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Scraped URL',
+               existing_nullable=False)
+    op.alter_column('products', 'last_updated_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment=None,
+               existing_comment='Last metadata update',
+               existing_nullable=False)
+    op.alter_column('products', 'first_seen_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment=None,
+               existing_comment='First scraping timestamp',
+               existing_nullable=False)
+    op.alter_column('products', 'msrp',
+               existing_type=sa.NUMERIC(precision=10, scale=2),
+               comment=None,
+               existing_comment='Recommended price',
+               existing_nullable=True)
+    op.alter_column('products', 'currency',
+               existing_type=sa.VARCHAR(length=3),
+               comment=None,
+               existing_comment='Currency code (EUR, USD, GBP)',
+               existing_nullable=True)
+    op.alter_column('products', 'description',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Product description',
+               existing_nullable=True)
+    op.alter_column('products', 'category',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Product category (breadcrumb)',
+               existing_nullable=True)
+    op.alter_column('products', 'title',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Product title',
+               existing_nullable=True)
+    op.alter_column('products', 'url',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Canonical product URL',
+               existing_nullable=False)
+    op.alter_column('products', 'reference',
+               existing_type=sa.VARCHAR(length=100),
+               comment=None,
+               existing_comment='Product reference (ASIN, SKU, etc.)',
+               existing_nullable=False)
+    op.alter_column('products', 'source',
+               existing_type=sa.VARCHAR(length=50),
+               comment=None,
+               existing_comment='Store ID (amazon, cdiscount, etc.)',
+               existing_nullable=False)
+    op.drop_column('products', 'model_name')
+    op.drop_column('products', 'model_number')
+    op.drop_column('products', 'in_stock')
+    op.drop_column('products', 'stock_text')
+    op.drop_column('products', 'discount_text')
+    op.drop_column('products', 'amazon_choice_label')
+    op.drop_column('products', 'amazon_choice')
+    op.drop_column('products', 'rating_count')
+    op.drop_column('products', 'rating_value')
+    op.alter_column('product_specs', 'spec_value',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Specification value',
+               existing_nullable=False)
+    op.alter_column('product_specs', 'spec_key',
+               existing_type=sa.VARCHAR(length=200),
+               comment=None,
+               existing_comment="Specification key (e.g., 'Brand', 'Color')",
+               existing_nullable=False)
+    op.alter_column('product_images', 'position',
+               existing_type=sa.INTEGER(),
+               comment=None,
+               existing_comment='Image position (0=main)',
+               existing_nullable=False)
+    op.alter_column('product_images', 'image_url',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Image URL',
+               existing_nullable=False)
+    op.alter_column('price_history', 'fetched_at',
+               existing_type=postgresql.TIMESTAMP(),
+               comment=None,
+               existing_comment='Scraping timestamp',
+               existing_nullable=False)
+    op.alter_column('price_history', 'fetch_status',
+               existing_type=sa.VARCHAR(length=20),
+               comment=None,
+               existing_comment='Fetch status (success, partial, failed)',
+               existing_nullable=False)
+    op.alter_column('price_history', 'fetch_method',
+               existing_type=sa.VARCHAR(length=20),
+               comment=None,
+               existing_comment='Fetch method (http, playwright)',
+               existing_nullable=False)
+    op.alter_column('price_history', 'stock_status',
+               existing_type=sa.VARCHAR(length=20),
+               comment=None,
+               existing_comment='Stock status (in_stock, out_of_stock, unknown)',
+               existing_nullable=True)
+    op.alter_column('price_history', 'shipping_cost',
+               existing_type=sa.NUMERIC(precision=10, scale=2),
+               comment=None,
+               existing_comment='Shipping cost',
+               existing_nullable=True)
+    op.alter_column('price_history', 'price',
+               existing_type=sa.NUMERIC(precision=10, scale=2),
+               comment=None,
+               existing_comment='Product price',
+               existing_nullable=True)
+    # ### end Alembic commands ###
@@ -0,0 +1,28 @@
+"""Ajout champs Amazon produit
+
+Revision ID: 1467e98fcbea
+Revises: 3e68b0f0c9e4
+Create Date: 2026-01-17 20:08:32.991650
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# Revision identifiers, used by Alembic.
+revision = '1467e98fcbea'
+down_revision = '3e68b0f0c9e4'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
@@ -0,0 +1,114 @@
+"""Ajout classification rules et type produit
+
+Revision ID: 20260117_03_classification_rules
+Revises: 3e68b0f0c9e4
+Create Date: 2026-01-17 20:05:00.000000
+"""
+
+from datetime import datetime, timezone
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+
+# Revision identifiers, used by Alembic.
+revision = "20260117_03_classification_rules"
+down_revision = "3e68b0f0c9e4"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "products",
+        sa.Column("type", sa.Text(), nullable=True, comment="Product type"),
+    )
+
+    op.create_table(
+        "classification_rules",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("category", sa.String(length=80), nullable=True, comment="Categorie cible"),
+        sa.Column("type", sa.String(length=80), nullable=True, comment="Type cible"),
+        sa.Column(
+            "keywords",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=False,
+            comment="Mots-cles de matching",
+        ),
+        sa.Column("sort_order", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("is_active", sa.Boolean(), nullable=False, server_default=sa.text("true")),
+        sa.Column(
+            "created_at",
+            sa.TIMESTAMP(),
+            nullable=False,
+            server_default=sa.text("CURRENT_TIMESTAMP"),
+            comment="Creation timestamp",
+        ),
+    )
+    op.create_index("ix_classification_rule_order", "classification_rules", ["sort_order"])
+    op.create_index("ix_classification_rule_active", "classification_rules", ["is_active"])
+
+    rules_table = sa.table(
+        "classification_rules",
+        sa.column("category", sa.String),
+        sa.column("type", sa.String),
+        sa.column("keywords", postgresql.JSONB),
+        sa.column("sort_order", sa.Integer),
+        sa.column("is_active", sa.Boolean),
+        sa.column("created_at", sa.TIMESTAMP),
+    )
+
+    now = datetime.now(timezone.utc)
+    op.bulk_insert(
+        rules_table,
+        [
+            {
+                "category": "Informatique",
+                "type": "Ecran",
+                "keywords": ["ecran", "moniteur", "display"],
+                "sort_order": 0,
+                "is_active": True,
+                "created_at": now,
+            },
+            {
+                "category": "Informatique",
+                "type": "PC portable",
+                "keywords": ["pc portable", "ordinateur portable", "laptop", "notebook"],
+                "sort_order": 1,
+                "is_active": True,
+                "created_at": now,
+            },
+            {
+                "category": "Informatique",
+                "type": "Unite centrale",
+                "keywords": ["unite centrale", "tour", "desktop", "pc fixe"],
+                "sort_order": 2,
+                "is_active": True,
+                "created_at": now,
+            },
+            {
+                "category": "Informatique",
+                "type": "Clavier",
+                "keywords": ["clavier", "keyboard"],
+                "sort_order": 3,
+                "is_active": True,
+                "created_at": now,
+            },
+            {
+                "category": "Informatique",
+                "type": "Souris",
+                "keywords": ["souris", "mouse"],
+                "sort_order": 4,
+                "is_active": True,
+                "created_at": now,
+            },
+        ],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_classification_rule_active", table_name="classification_rules")
+    op.drop_index("ix_classification_rule_order", table_name="classification_rules")
+    op.drop_table("classification_rules")
+    op.drop_column("products", "type")
@@ -0,0 +1,28 @@
+"""Ajout champs Amazon produit
+
+Revision ID: 3e68b0f0c9e4
+Revises: 0014e51c4927
+Create Date: 2026-01-17 19:45:03.730218
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# Revision identifiers, used by Alembic.
+revision = '3e68b0f0c9e4'
+down_revision = '0014e51c4927'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
@@ -84,6 +84,36 @@ class Product(Base):
    msrp: Mapped[Optional[Decimal]] = mapped_column(
        Numeric(10, 2), nullable=True, comment="Recommended price"
    )
+    type: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True, comment="Product type"
+    )
+    rating_value: Mapped[Optional[Decimal]] = mapped_column(
+        Numeric(3, 2), nullable=True, comment="Note moyenne"
+    )
+    rating_count: Mapped[Optional[int]] = mapped_column(
+        Integer, nullable=True, comment="Nombre d'evaluations"
+    )
+    amazon_choice: Mapped[Optional[bool]] = mapped_column(
+        Boolean, nullable=True, comment="Badge Choix d'Amazon"
+    )
+    amazon_choice_label: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True, comment="Libelle Choix d'Amazon"
+    )
+    discount_text: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True, comment="Texte de reduction affiche"
+    )
+    stock_text: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True, comment="Texte brut du stock"
+    )
+    in_stock: Mapped[Optional[bool]] = mapped_column(
+        Boolean, nullable=True, comment="Disponibilite derivee"
+    )
+    model_number: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True, comment="Numero du modele"
+    )
+    model_name: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True, comment="Nom du modele"
+    )

    # Timestamps
    first_seen_at: Mapped[datetime] = mapped_column(
@@ -331,6 +361,45 @@ class ScrapingLog(Base):
        return f"<ScrapingLog(id={self.id}, url={self.url}, status={self.fetch_status}, fetched_at={self.fetched_at})>"


+class ClassificationRule(Base):
+    """
+    Regles de classification categorie/type basees sur des mots-cles.
+    """
+
+    __tablename__ = "classification_rules"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    category: Mapped[Optional[str]] = mapped_column(
+        String(80), nullable=True, comment="Categorie cible"
+    )
+    type: Mapped[Optional[str]] = mapped_column(
+        String(80), nullable=True, comment="Type cible"
+    )
+    keywords: Mapped[list[str]] = mapped_column(
+        JSON().with_variant(JSONB, "postgresql"),
+        nullable=False,
+        default=list,
+        comment="Mots-cles de matching",
+    )
+    sort_order: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, comment="Ordre de priorite (0=haut)"
+    )
+    is_active: Mapped[bool] = mapped_column(
+        Boolean, nullable=False, default=True, comment="Regle active"
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP, nullable=False, default=utcnow, comment="Creation timestamp"
+    )
+
+    __table_args__ = (
+        Index("ix_classification_rule_order", "sort_order"),
+        Index("ix_classification_rule_active", "is_active"),
+    )
+
+    def __repr__(self) -> str:
+        return f"<ClassificationRule(id={self.id}, category={self.category}, type={self.type})>"
+
+
 class Webhook(Base):
    """
    Webhooks pour notifications externes.
@@ -13,7 +13,14 @@ from sqlalchemy.orm import Session

 from pricewatch.app.core.logging import get_logger
 from pricewatch.app.core.schema import ProductSnapshot
-from pricewatch.app.db.models import PriceHistory, Product, ProductImage, ProductSpec, ScrapingLog
+from pricewatch.app.db.models import (
+    ClassificationRule,
+    PriceHistory,
+    Product,
+    ProductImage,
+    ProductSpec,
+    ScrapingLog,
+)

 logger = get_logger("db.repository")

@@ -49,12 +56,58 @@ class ProductRepository:
            product.title = snapshot.title
        if snapshot.category:
            product.category = snapshot.category
+        if snapshot.type:
+            product.type = snapshot.type
        if snapshot.description:
            product.description = snapshot.description
        if snapshot.currency:
            product.currency = snapshot.currency
        if snapshot.msrp is not None:
            product.msrp = snapshot.msrp
+        if snapshot.rating_value is not None:
+            product.rating_value = snapshot.rating_value
+        if snapshot.rating_count is not None:
+            product.rating_count = snapshot.rating_count
+        if snapshot.amazon_choice is not None:
+            product.amazon_choice = snapshot.amazon_choice
+        if snapshot.amazon_choice_label:
+            product.amazon_choice_label = snapshot.amazon_choice_label
+        if snapshot.discount_text:
+            product.discount_text = snapshot.discount_text
+        if snapshot.stock_text:
+            product.stock_text = snapshot.stock_text
+        if snapshot.in_stock is not None:
+            product.in_stock = snapshot.in_stock
+        if snapshot.model_number:
+            product.model_number = snapshot.model_number
+        if snapshot.model_name:
+            product.model_name = snapshot.model_name
+
+    def apply_classification(self, snapshot: ProductSnapshot) -> None:
+        """Applique les regles de classification au snapshot."""
+        if not snapshot.title:
+            return
+
+        rules = (
+            self.session.query(ClassificationRule)
+            .filter(ClassificationRule.is_active == True)
+            .order_by(ClassificationRule.sort_order, ClassificationRule.id)
+            .all()
+        )
+        if not rules:
+            return
+
+        title = snapshot.title.lower()
+        for rule in rules:
+            keywords = rule.keywords or []
+            if isinstance(keywords, str):
+                keywords = [keywords]
+            if any(keyword and keyword.lower() in title for keyword in keywords):
+                if rule.category:
+                    snapshot.category = rule.category
+                if rule.type:
+                    snapshot.type = rule.type
+                return

    def add_price_history(self, product: Product, snapshot: ProductSnapshot) -> Optional[PriceHistory]:
        """Ajoute une entree d'historique de prix si inexistante."""
@@ -25,7 +25,12 @@ class ScrapingPipeline:
    def __init__(self, config: Optional[AppConfig] = None) -> None:
        self.config = config

-    def process_snapshot(self, snapshot: ProductSnapshot, save_to_db: bool = True) -> Optional[int]:
+    def process_snapshot(
+        self,
+        snapshot: ProductSnapshot,
+        save_to_db: bool = True,
+        apply_classification: bool = True,
+    ) -> Optional[int]:
        """
        Persiste un snapshot en base si active.

@@ -39,6 +44,8 @@ class ScrapingPipeline:
        try:
            with get_session(app_config) as session:
                repo = ProductRepository(session)
+                if apply_classification:
+                    repo.apply_classification(snapshot)
                product_id = repo.safe_save_snapshot(snapshot)
                session.commit()
                return product_id
@@ -15,6 +15,13 @@ price:
  - "#priceblock_dealprice"
  - ".a-price-range .a-price .a-offscreen"

+# Texte de réduction explicite
+discount_text:
+  - "#regularprice_savings"
+  - "#dealprice_savings"
+  - "#savingsPercentage"
+  - "span.savingsPercentage"
+
 # Devise (généralement dans le symbole)
 currency:
  - "span.a-price-symbol"
@@ -32,6 +39,24 @@ stock_status:
  - "#availability"
  - ".a-declarative .a-size-medium"

+# Note moyenne
+rating_value:
+  - "#acrPopover"
+  - "#averageCustomerReviews .a-icon-alt"
+  - "#averageCustomerReviews span.a-icon-alt"
+
+# Nombre d'évaluations
+rating_count:
+  - "#acrCustomerReviewText"
+  - "#acrCustomerReviewLink"
+
+# Badge Choix d'Amazon
+amazon_choice:
+  - "#acBadge_feature_div"
+  - "#acBadge_feature_div .ac-badge"
+  - "#acBadge_feature_div .ac-badge-rectangle"
+  - "#acBadge_feature_div .ac-badge-rectangle-icon"
+
 # Images produit
 images:
  - "#landingImage"
@@ -44,6 +69,13 @@ category:
  - "#wayfinding-breadcrumbs_feature_div"
  - ".a-breadcrumb"

+# Description (détails de l'article)
+description:
+  - "#detailBullets_feature_div"
+  - "#detailBulletsWrapper_feature_div"
+  - "#productDetails_detailBullets_sections1"
+  - "#feature-bullets"
+
 # Caractéristiques techniques (table specs)
 specs_table:
  - "#productDetails_techSpec_section_1"
@@ -130,13 +130,19 @@ class AmazonStore(BaseStore):
        title = self._extract_title(soup, debug_info)
        price = self._extract_price(soup, debug_info)
        currency = self._extract_currency(soup, debug_info)
-        stock_status = self._extract_stock(soup, debug_info)
-        images = self._extract_images(soup, debug_info)
+        stock_status, stock_text, in_stock = self._extract_stock_details(soup, debug_info)
+        main_image, gallery_images, images = self._extract_images(soup, debug_info)
        category = self._extract_category(soup, debug_info)
        specs = self._extract_specs(soup, debug_info)
        description = self._extract_description(soup, debug_info)
        msrp = self._extract_msrp(soup, debug_info)
        reference = self.extract_reference(url) or self._extract_asin_from_html(soup)
+        rating_value = self._extract_rating_value(soup, debug_info)
+        rating_count = self._extract_rating_count(soup, debug_info)
+        amazon_choice, amazon_choice_label = self._extract_amazon_choice(soup, debug_info)
+        discount_text = self._extract_discount_text(soup, debug_info)
+        model_number, model_name = self._extract_model_details(specs)
+        asin = reference

        # Déterminer le statut final (ne pas écraser FAILED)
        if debug_info.status != DebugStatus.FAILED:
@@ -153,12 +159,24 @@ class AmazonStore(BaseStore):
            currency=currency or "EUR",
            shipping_cost=None,  # Difficile à extraire
            stock_status=stock_status,
+            stock_text=stock_text,
+            in_stock=in_stock,
            reference=reference,
+            asin=asin,
            category=category,
            description=description,
            images=images,
+            main_image=main_image,
+            gallery_images=gallery_images,
            specs=specs,
            msrp=msrp,
+            rating_value=rating_value,
+            rating_count=rating_count,
+            amazon_choice=amazon_choice,
+            amazon_choice_label=amazon_choice_label,
+            discount_text=discount_text,
+            model_number=model_number,
+            model_name=model_name,
            debug=debug_info,
        )

@@ -203,14 +221,26 @@ class AmazonStore(BaseStore):
        return None

    def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
-        """Extrait la description (meta tags)."""
-        meta = soup.find("meta", property="og:description") or soup.find(
-            "meta", attrs={"name": "description"}
-        )
-        if meta:
-            description = meta.get("content", "").strip()
-            if description:
-                return description
+        """Extrait la description depuis les détails de l'article."""
+        selectors = self.get_selector("description", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if not element:
+                continue
+            items = [
+                item.get_text(" ", strip=True)
+                for item in element.select("li")
+                if item.get_text(strip=True)
+            ]
+            if items:
+                return "\n".join(items)
+            text = " ".join(element.stripped_strings)
+            if text:
+                return text
+
        return None

    def _extract_price(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]:
@@ -271,8 +301,10 @@ class AmazonStore(BaseStore):
        # Défaut basé sur le domaine
        return "EUR"

-    def _extract_stock(self, soup: BeautifulSoup, debug: DebugInfo) -> StockStatus:
-        """Extrait le statut de stock."""
+    def _extract_stock_details(
+        self, soup: BeautifulSoup, debug: DebugInfo
+    ) -> tuple[StockStatus, Optional[str], Optional[bool]]:
+        """Extrait le statut de stock avec texte brut."""
        selectors = self.get_selector("stock_status", [])
        if isinstance(selectors, str):
            selectors = [selectors]
@@ -280,22 +312,27 @@ class AmazonStore(BaseStore):
        for selector in selectors:
            element = soup.select_one(selector)
            if element:
-                text = element.get_text(strip=True).lower()
-                if "en stock" in text or "available" in text or "in stock" in text:
-                    return StockStatus.IN_STOCK
+                text = element.get_text(strip=True)
+                normalized = text.lower()
+                if "en stock" in normalized or "available" in normalized or "in stock" in normalized:
+                    return StockStatus.IN_STOCK, text, True
                elif (
-                    "rupture" in text
-                    or "indisponible" in text
-                    or "out of stock" in text
+                    "rupture" in normalized
+                    or "indisponible" in normalized
+                    or "out of stock" in normalized
                ):
-                    return StockStatus.OUT_OF_STOCK
+                    return StockStatus.OUT_OF_STOCK, text, False

-        return StockStatus.UNKNOWN
+        return StockStatus.UNKNOWN, None, None

-    def _extract_images(self, soup: BeautifulSoup, debug: DebugInfo) -> list[str]:
-        """Extrait les URLs d'images."""
-        images = []
-        seen = set()
+    def _extract_images(
+        self, soup: BeautifulSoup, debug: DebugInfo
+    ) -> tuple[Optional[str], list[str], list[str]]:
+        """Extrait l'image principale et la galerie."""
+        images: list[str] = []
+        seen: set[str] = set()
+        main_image: Optional[str] = None
+        max_gallery = 15
        selectors = self.get_selector("images", [])
        if isinstance(selectors, str):
            selectors = [selectors]
@@ -309,6 +346,8 @@ class AmazonStore(BaseStore):
                    if self._is_product_image(url) and url not in seen:
                        images.append(url)
                        seen.add(url)
+                        if main_image is None:
+                            main_image = url
                dynamic = element.get("data-a-dynamic-image")
                if dynamic:
                    urls = self._extract_dynamic_images(dynamic)
@@ -316,6 +355,8 @@ class AmazonStore(BaseStore):
                        if self._is_product_image(dyn_url) and dyn_url not in seen:
                            images.append(dyn_url)
                            seen.add(dyn_url)
+                            if main_image is None:
+                                main_image = dyn_url

        # Fallback: chercher tous les img tags si aucune image trouvée
        if not images:
@@ -326,8 +367,15 @@ class AmazonStore(BaseStore):
                    if url not in seen:
                        images.append(url)
                        seen.add(url)
+                        if main_image is None:
+                            main_image = url

-        return images
+        if main_image is None and images:
+            main_image = images[0]
+        gallery_images = [url for url in images if url != main_image]
+        gallery_images = gallery_images[:max_gallery]
+        final_images = [main_image] + gallery_images if main_image else gallery_images
+        return main_image, gallery_images, final_images

    def _extract_dynamic_images(self, raw: str) -> list[str]:
        """Extrait les URLs du JSON data-a-dynamic-image."""
@@ -393,8 +441,111 @@ class AmazonStore(BaseStore):
                        if key and value:
                            specs[key] = value

+        # Détails de l'article sous forme de liste
+        detail_list = soup.select("#detailBullets_feature_div li")
+        for item in detail_list:
+            text = item.get_text(" ", strip=True)
+            if ":" not in text:
+                continue
+            key, value = text.split(":", 1)
+            key = key.strip()
+            value = value.strip()
+            if key and value and key not in specs:
+                specs[key] = value
+
        return specs

+    def _extract_rating_value(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]:
+        """Extrait la note moyenne."""
+        selectors = self.get_selector("rating_value", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if not element:
+                continue
+            text = element.get_text(" ", strip=True) or element.get("title", "").strip()
+            match = re.search(r"([\d.,]+)", text)
+            if match:
+                value = match.group(1).replace(",", ".")
+                try:
+                    return float(value)
+                except ValueError:
+                    continue
+        return None
+
+    def _extract_rating_count(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[int]:
+        """Extrait le nombre d'évaluations."""
+        selectors = self.get_selector("rating_count", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if not element:
+                continue
+            text = element.get_text(" ", strip=True)
+            match = re.search(r"([\d\s\u202f\u00a0]+)", text)
+            if match:
+                numeric = re.sub(r"[^\d]", "", match.group(1))
+                if numeric:
+                    return int(numeric)
+        return None
+
+    def _extract_amazon_choice(
+        self, soup: BeautifulSoup, debug: DebugInfo
+    ) -> tuple[Optional[bool], Optional[str]]:
+        """Extrait le badge Choix d'Amazon."""
+        selectors = self.get_selector("amazon_choice", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if element:
+                label_candidates = [
+                    element.get_text(" ", strip=True),
+                    element.get("aria-label", "").strip(),
+                    element.get("title", "").strip(),
+                    element.get("data-a-badge-label", "").strip(),
+                ]
+                label = next((item for item in label_candidates if item), "")
+                normalized = label.lower()
+                if "choix d'amazon" in normalized or "amazon's choice" in normalized:
+                    return True, label
+                if label:
+                    return True, label
+                return True, None
+        return None, None
+
+    def _extract_discount_text(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
+        """Extrait le texte de réduction explicite."""
+        selectors = self.get_selector("discount_text", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if not element:
+                continue
+            text = element.get_text(" ", strip=True)
+            if text:
+                return text
+        return None
+
+    def _extract_model_details(self, specs: dict[str, str]) -> tuple[Optional[str], Optional[str]]:
+        """Extrait le numero et le nom du modele depuis les specs."""
+        model_number = None
+        model_name = None
+        for key, value in specs.items():
+            normalized = key.lower()
+            if "numéro du modèle de l'article" in normalized or "numero du modele de l'article" in normalized:
+                model_number = value
+            if "nom du modèle" in normalized or "nom du modele" in normalized:
+                model_name = value
+        return model_number, model_name
+
    def _extract_asin_from_html(self, soup: BeautifulSoup) -> Optional[str]:
        """Extrait l'ASIN depuis le HTML (fallback)."""
        selectors = self.get_selector("asin", [])
@@ -6,6 +6,7 @@ from __future__ import annotations

 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
+import hashlib
 from typing import Optional

 import redis
@@ -127,11 +128,13 @@ class ScrapingScheduler:
        interval_hours: int = 24,
        use_playwright: Optional[bool] = None,
        save_db: bool = True,
+        job_id: Optional[str] = None,
    ) -> ScheduledJobInfo:
        """Planifie un scraping recurrent (intervalle en heures)."""
        interval_seconds = int(timedelta(hours=interval_hours).total_seconds())
        next_run = datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)

+        resolved_job_id = job_id or self._job_id_for_url(url)
        job = self.scheduler.schedule(
            scheduled_time=next_run,
            func=scrape_product,
@@ -139,6 +142,13 @@ class ScrapingScheduler:
            kwargs={"use_playwright": use_playwright, "save_db": save_db},
            interval=interval_seconds,
            repeat=None,
+            id=resolved_job_id,
        )
        logger.info(f"Job planifie: {job.id}, prochaine execution: {next_run.isoformat()}")
        return ScheduledJobInfo(job_id=job.id, next_run=next_run)
+
+    @staticmethod
+    def _job_id_for_url(url: str) -> str:
+        """Genere un job_id stable pour eviter les doublons."""
+        fingerprint = hashlib.sha1(url.strip().lower().encode("utf-8")).hexdigest()
+        return f"scrape_{fingerprint}"
@@ -157,6 +157,36 @@ def scrape_product(
            )
            success = False
            fetch_error = str(exc)
+        # Si captcha detecte via HTTP, forcer une tentative Playwright.
+        if (
+            fetch_method == FetchMethod.HTTP
+            and use_playwright
+            and snapshot.debug.errors
+            and any("captcha" in error.lower() for error in snapshot.debug.errors)
+        ):
+            logger.info("[FETCH] Captcha detecte, tentative Playwright")
+            pw_result = fetch_playwright(
+                canonical_url,
+                headless=not headful,
+                timeout_ms=timeout_ms,
+                save_screenshot=save_screenshot,
+            )
+            if pw_result.success and pw_result.html:
+                try:
+                    snapshot = store.parse(pw_result.html, canonical_url)
+                    snapshot.debug.method = FetchMethod.PLAYWRIGHT
+                    snapshot.debug.duration_ms = pw_result.duration_ms
+                    snapshot.debug.html_size_bytes = len(pw_result.html.encode("utf-8"))
+                    snapshot.add_note("Captcha detecte via HTTP, fallback Playwright")
+                    success = snapshot.debug.status != DebugStatus.FAILED
+                except Exception as exc:
+                    snapshot.add_note(f"Fallback Playwright echoue: {exc}")
+                    logger.error(f"[PARSE] Exception fallback Playwright: {exc}")
+                    fetch_error = str(exc)
+            else:
+                error = pw_result.error or "Erreur Playwright"
+                snapshot.add_note(f"Fallback Playwright echoue: {error}")
+                fetch_error = error
    else:
        snapshot = ProductSnapshot(
            source=store.store_id,