before claude

This commit is contained in:
Gilles Soulier
2026-01-18 06:26:17 +01:00
parent dc19315e5d
commit 740c3d7516
60 changed files with 3815 additions and 354 deletions

View File

@@ -22,6 +22,10 @@ from sqlalchemy.orm import Session
from pricewatch.app.api.schemas import (
BackendLogEntry,
ClassificationOptionsOut,
ClassificationRuleCreate,
ClassificationRuleOut,
ClassificationRuleUpdate,
EnqueueRequest,
EnqueueResponse,
HealthStatus,
@@ -52,7 +56,8 @@ from pricewatch.app.core.config import get_config
from pricewatch.app.core.logging import get_logger
from pricewatch.app.core.schema import ProductSnapshot
from pricewatch.app.db.connection import check_db_connection, get_session
from pricewatch.app.db.models import PriceHistory, Product, ScrapingLog, Webhook
from pricewatch.app.db.models import ClassificationRule, PriceHistory, Product, ScrapingLog, Webhook
from pricewatch.app.db.repository import ProductRepository
from pricewatch.app.scraping.pipeline import ScrapingPipeline
from pricewatch.app.tasks.scrape import scrape_product
from pricewatch.app.tasks.scheduler import RedisUnavailableError, check_redis_connection, ScrapingScheduler
@@ -188,6 +193,7 @@ def create_product(
url=payload.url,
title=payload.title,
category=payload.category,
type=payload.type,
description=payload.description,
currency=payload.currency,
msrp=payload.msrp,
@@ -241,6 +247,129 @@ def update_product(
return _product_to_out(session, product)
@app.get(
"/classification/rules",
response_model=list[ClassificationRuleOut],
dependencies=[Depends(require_token)],
)
def list_classification_rules(
session: Session = Depends(get_db_session),
) -> list[ClassificationRuleOut]:
"""Liste les regles de classification."""
rules = (
session.query(ClassificationRule)
.order_by(ClassificationRule.sort_order, ClassificationRule.id)
.all()
)
return [
ClassificationRuleOut(
id=rule.id,
category=rule.category,
type=rule.type,
keywords=rule.keywords or [],
sort_order=rule.sort_order,
is_active=rule.is_active,
)
for rule in rules
]
@app.post(
"/classification/rules",
response_model=ClassificationRuleOut,
dependencies=[Depends(require_token)],
)
def create_classification_rule(
payload: ClassificationRuleCreate,
session: Session = Depends(get_db_session),
) -> ClassificationRuleOut:
"""Cree une regle de classification."""
rule = ClassificationRule(
category=payload.category,
type=payload.type,
keywords=payload.keywords,
sort_order=payload.sort_order or 0,
is_active=True if payload.is_active is None else payload.is_active,
)
session.add(rule)
session.commit()
session.refresh(rule)
return ClassificationRuleOut(
id=rule.id,
category=rule.category,
type=rule.type,
keywords=rule.keywords or [],
sort_order=rule.sort_order,
is_active=rule.is_active,
)
@app.patch(
"/classification/rules/{rule_id}",
response_model=ClassificationRuleOut,
dependencies=[Depends(require_token)],
)
def update_classification_rule(
rule_id: int,
payload: ClassificationRuleUpdate,
session: Session = Depends(get_db_session),
) -> ClassificationRuleOut:
"""Met a jour une regle de classification."""
rule = session.query(ClassificationRule).filter(ClassificationRule.id == rule_id).one_or_none()
if not rule:
raise HTTPException(status_code=404, detail="Regle non trouvee")
updates = payload.model_dump(exclude_unset=True)
for key, value in updates.items():
setattr(rule, key, value)
session.commit()
session.refresh(rule)
return ClassificationRuleOut(
id=rule.id,
category=rule.category,
type=rule.type,
keywords=rule.keywords or [],
sort_order=rule.sort_order,
is_active=rule.is_active,
)
@app.delete(
"/classification/rules/{rule_id}",
dependencies=[Depends(require_token)],
)
def delete_classification_rule(
rule_id: int,
session: Session = Depends(get_db_session),
) -> dict[str, str]:
"""Supprime une regle de classification."""
rule = session.query(ClassificationRule).filter(ClassificationRule.id == rule_id).one_or_none()
if not rule:
raise HTTPException(status_code=404, detail="Regle non trouvee")
session.delete(rule)
session.commit()
return {"status": "deleted"}
@app.get(
"/classification/options",
response_model=ClassificationOptionsOut,
dependencies=[Depends(require_token)],
)
def get_classification_options(
session: Session = Depends(get_db_session),
) -> ClassificationOptionsOut:
"""Expose la liste des categories et types issus des regles actives."""
rules = (
session.query(ClassificationRule)
.filter(ClassificationRule.is_active == True)
.order_by(ClassificationRule.sort_order, ClassificationRule.id)
.all()
)
categories = sorted({rule.category for rule in rules if rule.category})
types = sorted({rule.type for rule in rules if rule.type})
return ClassificationOptionsOut(categories=categories, types=types)
@app.delete("/products/{product_id}", dependencies=[Depends(require_token)])
def delete_product(
product_id: int,
@@ -703,6 +832,13 @@ def preview_scrape(payload: ScrapePreviewRequest) -> ScrapePreviewResponse:
if snapshot is None:
_add_backend_log("ERROR", f"Preview scraping KO: {payload.url}")
return ScrapePreviewResponse(success=False, snapshot=None, error=result.get("error"))
config = get_config()
if config.enable_db:
try:
with get_session(config) as session:
ProductRepository(session).apply_classification(snapshot)
except Exception as exc:
snapshot.add_note(f"Classification ignoree: {exc}")
return ScrapePreviewResponse(
success=bool(result.get("success")),
snapshot=snapshot.model_dump(mode="json"),
@@ -719,7 +855,9 @@ def commit_scrape(payload: ScrapeCommitRequest) -> ScrapeCommitResponse:
_add_backend_log("ERROR", "Commit scraping KO: snapshot invalide")
raise HTTPException(status_code=400, detail="Snapshot invalide") from exc
product_id = ScrapingPipeline(config=get_config()).process_snapshot(snapshot, save_to_db=True)
product_id = ScrapingPipeline(config=get_config()).process_snapshot(
snapshot, save_to_db=True, apply_classification=False
)
_add_backend_log("INFO", f"Commit scraping OK: product_id={product_id}")
return ScrapeCommitResponse(success=True, product_id=product_id)
@@ -808,12 +946,9 @@ def _product_to_out(session: Session, product: Product) -> ProductOut:
)
images = [image.image_url for image in product.images]
specs = {spec.spec_key: spec.spec_value for spec in product.specs}
discount_amount = None
discount_percent = None
if latest and latest.price is not None and product.msrp:
discount_amount = float(product.msrp) - float(latest.price)
if product.msrp > 0:
discount_percent = (discount_amount / float(product.msrp)) * 100
main_image = images[0] if images else None
gallery_images = images[1:] if len(images) > 1 else []
asin = product.reference if product.source == "amazon" else None
history_rows = (
session.query(PriceHistory)
.filter(PriceHistory.product_id == product.id, PriceHistory.price != None)
@@ -830,12 +965,23 @@ def _product_to_out(session: Session, product: Product) -> ProductOut:
id=product.id,
source=product.source,
reference=product.reference,
asin=asin,
url=product.url,
title=product.title,
category=product.category,
type=product.type,
description=product.description,
currency=product.currency,
msrp=float(product.msrp) if product.msrp is not None else None,
rating_value=float(product.rating_value) if product.rating_value is not None else None,
rating_count=product.rating_count,
amazon_choice=product.amazon_choice,
amazon_choice_label=product.amazon_choice_label,
discount_text=product.discount_text,
stock_text=product.stock_text,
in_stock=product.in_stock,
model_number=product.model_number,
model_name=product.model_name,
first_seen_at=product.first_seen_at,
last_updated_at=product.last_updated_at,
latest_price=float(latest.price) if latest and latest.price is not None else None,
@@ -845,9 +991,11 @@ def _product_to_out(session: Session, product: Product) -> ProductOut:
latest_stock_status=latest.stock_status if latest else None,
latest_fetched_at=latest.fetched_at if latest else None,
images=images,
main_image=main_image,
gallery_images=gallery_images,
specs=specs,
discount_amount=discount_amount,
discount_percent=discount_percent,
discount_amount=None,
discount_percent=None,
history=history_points,
)

View File

@@ -22,12 +22,23 @@ class ProductOut(BaseModel):
id: int
source: str
reference: str
asin: Optional[str] = None
url: str
title: Optional[str] = None
category: Optional[str] = None
type: Optional[str] = None
description: Optional[str] = None
currency: Optional[str] = None
msrp: Optional[float] = None
rating_value: Optional[float] = None
rating_count: Optional[int] = None
amazon_choice: Optional[bool] = None
amazon_choice_label: Optional[str] = None
discount_text: Optional[str] = None
stock_text: Optional[str] = None
in_stock: Optional[bool] = None
model_number: Optional[str] = None
model_name: Optional[str] = None
first_seen_at: datetime
last_updated_at: datetime
latest_price: Optional[float] = None
@@ -35,6 +46,8 @@ class ProductOut(BaseModel):
latest_stock_status: Optional[str] = None
latest_fetched_at: Optional[datetime] = None
images: list[str] = []
main_image: Optional[str] = None
gallery_images: list[str] = []
specs: dict[str, str] = {}
discount_amount: Optional[float] = None
discount_percent: Optional[float] = None
@@ -47,6 +60,7 @@ class ProductCreate(BaseModel):
url: str
title: Optional[str] = None
category: Optional[str] = None
type: Optional[str] = None
description: Optional[str] = None
currency: Optional[str] = None
msrp: Optional[float] = None
@@ -56,6 +70,7 @@ class ProductUpdate(BaseModel):
url: Optional[str] = None
title: Optional[str] = None
category: Optional[str] = None
type: Optional[str] = None
description: Optional[str] = None
currency: Optional[str] = None
msrp: Optional[float] = None
@@ -208,6 +223,36 @@ class VersionResponse(BaseModel):
api_version: str
class ClassificationRuleOut(BaseModel):
id: int
category: Optional[str] = None
type: Optional[str] = None
keywords: list[str] = Field(default_factory=list)
sort_order: int = 0
is_active: bool = True
class ClassificationRuleCreate(BaseModel):
category: Optional[str] = None
type: Optional[str] = None
keywords: list[str] = Field(default_factory=list)
sort_order: Optional[int] = 0
is_active: Optional[bool] = True
class ClassificationRuleUpdate(BaseModel):
category: Optional[str] = None
type: Optional[str] = None
keywords: Optional[list[str]] = None
sort_order: Optional[int] = None
is_active: Optional[bool] = None
class ClassificationOptionsOut(BaseModel):
categories: list[str] = Field(default_factory=list)
types: list[str] = Field(default_factory=list)
class BackendLogEntry(BaseModel):
time: datetime
level: str

View File

@@ -93,13 +93,52 @@ class ProductSnapshot(BaseModel):
reference: Optional[str] = Field(
default=None, description="Référence produit (ASIN, SKU, etc.)"
)
asin: Optional[str] = Field(
default=None, description="ASIN Amazon si disponible"
)
category: Optional[str] = Field(default=None, description="Catégorie du produit")
type: Optional[str] = Field(default=None, description="Type du produit")
description: Optional[str] = Field(default=None, description="Description produit")
# Données Amazon explicites (si disponibles)
rating_value: Optional[float] = Field(
default=None, description="Note moyenne affichée"
)
rating_count: Optional[int] = Field(
default=None, description="Nombre d'évaluations"
)
amazon_choice: Optional[bool] = Field(
default=None, description="Badge Choix d'Amazon présent"
)
amazon_choice_label: Optional[str] = Field(
default=None, description="Libellé du badge Choix d'Amazon"
)
discount_text: Optional[str] = Field(
default=None, description="Texte de réduction affiché"
)
stock_text: Optional[str] = Field(
default=None, description="Texte brut de stock"
)
in_stock: Optional[bool] = Field(
default=None, description="Disponibilité dérivée"
)
model_number: Optional[str] = Field(
default=None, description="Numéro du modèle de l'article"
)
model_name: Optional[str] = Field(
default=None, description="Nom du modèle explicite"
)
# Médias
images: list[str] = Field(
default_factory=list, description="Liste des URLs d'images du produit"
)
main_image: Optional[str] = Field(
default=None, description="Image principale du produit"
)
gallery_images: list[str] = Field(
default_factory=list, description="Images de galerie dédoublonnées"
)
# Caractéristiques techniques
specs: dict[str, str] = Field(
@@ -134,6 +173,12 @@ class ProductSnapshot(BaseModel):
"""Filtre les URLs d'images vides."""
return [url.strip() for url in v if url and url.strip()]
@field_validator("gallery_images")
@classmethod
def validate_gallery_images(cls, v: list[str]) -> list[str]:
"""Filtre les URLs de galerie vides."""
return [url.strip() for url in v if url and url.strip()]
model_config = ConfigDict(
use_enum_values=True,
json_schema_extra={

View File

@@ -0,0 +1,350 @@
"""Ajout champs Amazon produit
Revision ID: 0014e51c4927
Revises: 20260115_02_product_details
Create Date: 2026-01-17 19:23:01.866891
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# Revision identifiers, used by Alembic.
revision = '0014e51c4927'
down_revision = '20260115_02_product_details'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('price_history', 'price',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment='Product price',
existing_nullable=True)
op.alter_column('price_history', 'shipping_cost',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment='Shipping cost',
existing_nullable=True)
op.alter_column('price_history', 'stock_status',
existing_type=sa.VARCHAR(length=20),
comment='Stock status (in_stock, out_of_stock, unknown)',
existing_nullable=True)
op.alter_column('price_history', 'fetch_method',
existing_type=sa.VARCHAR(length=20),
comment='Fetch method (http, playwright)',
existing_nullable=False)
op.alter_column('price_history', 'fetch_status',
existing_type=sa.VARCHAR(length=20),
comment='Fetch status (success, partial, failed)',
existing_nullable=False)
op.alter_column('price_history', 'fetched_at',
existing_type=postgresql.TIMESTAMP(),
comment='Scraping timestamp',
existing_nullable=False)
op.alter_column('product_images', 'image_url',
existing_type=sa.TEXT(),
comment='Image URL',
existing_nullable=False)
op.alter_column('product_images', 'position',
existing_type=sa.INTEGER(),
comment='Image position (0=main)',
existing_nullable=False)
op.alter_column('product_specs', 'spec_key',
existing_type=sa.VARCHAR(length=200),
comment="Specification key (e.g., 'Brand', 'Color')",
existing_nullable=False)
op.alter_column('product_specs', 'spec_value',
existing_type=sa.TEXT(),
comment='Specification value',
existing_nullable=False)
op.add_column('products', sa.Column('rating_value', sa.Numeric(precision=3, scale=2), nullable=True, comment='Note moyenne'))
op.add_column('products', sa.Column('rating_count', sa.Integer(), nullable=True, comment="Nombre d'evaluations"))
op.add_column('products', sa.Column('amazon_choice', sa.Boolean(), nullable=True, comment="Badge Choix d'Amazon"))
op.add_column('products', sa.Column('amazon_choice_label', sa.Text(), nullable=True, comment="Libelle Choix d'Amazon"))
op.add_column('products', sa.Column('discount_text', sa.Text(), nullable=True, comment='Texte de reduction affiche'))
op.add_column('products', sa.Column('stock_text', sa.Text(), nullable=True, comment='Texte brut du stock'))
op.add_column('products', sa.Column('in_stock', sa.Boolean(), nullable=True, comment='Disponibilite derivee'))
op.add_column('products', sa.Column('model_number', sa.Text(), nullable=True, comment='Numero du modele'))
op.add_column('products', sa.Column('model_name', sa.Text(), nullable=True, comment='Nom du modele'))
op.alter_column('products', 'source',
existing_type=sa.VARCHAR(length=50),
comment='Store ID (amazon, cdiscount, etc.)',
existing_nullable=False)
op.alter_column('products', 'reference',
existing_type=sa.VARCHAR(length=100),
comment='Product reference (ASIN, SKU, etc.)',
existing_nullable=False)
op.alter_column('products', 'url',
existing_type=sa.TEXT(),
comment='Canonical product URL',
existing_nullable=False)
op.alter_column('products', 'title',
existing_type=sa.TEXT(),
comment='Product title',
existing_nullable=True)
op.alter_column('products', 'category',
existing_type=sa.TEXT(),
comment='Product category (breadcrumb)',
existing_nullable=True)
op.alter_column('products', 'description',
existing_type=sa.TEXT(),
comment='Product description',
existing_nullable=True)
op.alter_column('products', 'currency',
existing_type=sa.VARCHAR(length=3),
comment='Currency code (EUR, USD, GBP)',
existing_nullable=True)
op.alter_column('products', 'msrp',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment='Recommended price',
existing_nullable=True)
op.alter_column('products', 'first_seen_at',
existing_type=postgresql.TIMESTAMP(),
comment='First scraping timestamp',
existing_nullable=False)
op.alter_column('products', 'last_updated_at',
existing_type=postgresql.TIMESTAMP(),
comment='Last metadata update',
existing_nullable=False)
op.alter_column('scraping_logs', 'url',
existing_type=sa.TEXT(),
comment='Scraped URL',
existing_nullable=False)
op.alter_column('scraping_logs', 'source',
existing_type=sa.VARCHAR(length=50),
comment='Store ID (amazon, cdiscount, etc.)',
existing_nullable=False)
op.alter_column('scraping_logs', 'reference',
existing_type=sa.VARCHAR(length=100),
comment='Product reference (if extracted)',
existing_nullable=True)
op.alter_column('scraping_logs', 'fetch_method',
existing_type=sa.VARCHAR(length=20),
comment='Fetch method (http, playwright)',
existing_nullable=False)
op.alter_column('scraping_logs', 'fetch_status',
existing_type=sa.VARCHAR(length=20),
comment='Fetch status (success, partial, failed)',
existing_nullable=False)
op.alter_column('scraping_logs', 'fetched_at',
existing_type=postgresql.TIMESTAMP(),
comment='Scraping timestamp',
existing_nullable=False)
op.alter_column('scraping_logs', 'duration_ms',
existing_type=sa.INTEGER(),
comment='Fetch duration in milliseconds',
existing_nullable=True)
op.alter_column('scraping_logs', 'html_size_bytes',
existing_type=sa.INTEGER(),
comment='HTML response size in bytes',
existing_nullable=True)
op.alter_column('scraping_logs', 'errors',
existing_type=postgresql.JSONB(astext_type=sa.Text()),
comment='Error messages (list of strings)',
existing_nullable=True)
op.alter_column('scraping_logs', 'notes',
existing_type=postgresql.JSONB(astext_type=sa.Text()),
comment='Debug notes (list of strings)',
existing_nullable=True)
op.alter_column('webhooks', 'event',
existing_type=sa.VARCHAR(length=50),
comment='Event name',
existing_nullable=False)
op.alter_column('webhooks', 'url',
existing_type=sa.TEXT(),
comment='Webhook URL',
existing_nullable=False)
op.alter_column('webhooks', 'secret',
existing_type=sa.VARCHAR(length=200),
comment='Secret optionnel',
existing_nullable=True)
op.alter_column('webhooks', 'created_at',
existing_type=postgresql.TIMESTAMP(),
comment='Creation timestamp',
existing_nullable=False)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('webhooks', 'created_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='Creation timestamp',
existing_nullable=False)
op.alter_column('webhooks', 'secret',
existing_type=sa.VARCHAR(length=200),
comment=None,
existing_comment='Secret optionnel',
existing_nullable=True)
op.alter_column('webhooks', 'url',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Webhook URL',
existing_nullable=False)
op.alter_column('webhooks', 'event',
existing_type=sa.VARCHAR(length=50),
comment=None,
existing_comment='Event name',
existing_nullable=False)
op.alter_column('scraping_logs', 'notes',
existing_type=postgresql.JSONB(astext_type=sa.Text()),
comment=None,
existing_comment='Debug notes (list of strings)',
existing_nullable=True)
op.alter_column('scraping_logs', 'errors',
existing_type=postgresql.JSONB(astext_type=sa.Text()),
comment=None,
existing_comment='Error messages (list of strings)',
existing_nullable=True)
op.alter_column('scraping_logs', 'html_size_bytes',
existing_type=sa.INTEGER(),
comment=None,
existing_comment='HTML response size in bytes',
existing_nullable=True)
op.alter_column('scraping_logs', 'duration_ms',
existing_type=sa.INTEGER(),
comment=None,
existing_comment='Fetch duration in milliseconds',
existing_nullable=True)
op.alter_column('scraping_logs', 'fetched_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='Scraping timestamp',
existing_nullable=False)
op.alter_column('scraping_logs', 'fetch_status',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Fetch status (success, partial, failed)',
existing_nullable=False)
op.alter_column('scraping_logs', 'fetch_method',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Fetch method (http, playwright)',
existing_nullable=False)
op.alter_column('scraping_logs', 'reference',
existing_type=sa.VARCHAR(length=100),
comment=None,
existing_comment='Product reference (if extracted)',
existing_nullable=True)
op.alter_column('scraping_logs', 'source',
existing_type=sa.VARCHAR(length=50),
comment=None,
existing_comment='Store ID (amazon, cdiscount, etc.)',
existing_nullable=False)
op.alter_column('scraping_logs', 'url',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Scraped URL',
existing_nullable=False)
op.alter_column('products', 'last_updated_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='Last metadata update',
existing_nullable=False)
op.alter_column('products', 'first_seen_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='First scraping timestamp',
existing_nullable=False)
op.alter_column('products', 'msrp',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment=None,
existing_comment='Recommended price',
existing_nullable=True)
op.alter_column('products', 'currency',
existing_type=sa.VARCHAR(length=3),
comment=None,
existing_comment='Currency code (EUR, USD, GBP)',
existing_nullable=True)
op.alter_column('products', 'description',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Product description',
existing_nullable=True)
op.alter_column('products', 'category',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Product category (breadcrumb)',
existing_nullable=True)
op.alter_column('products', 'title',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Product title',
existing_nullable=True)
op.alter_column('products', 'url',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Canonical product URL',
existing_nullable=False)
op.alter_column('products', 'reference',
existing_type=sa.VARCHAR(length=100),
comment=None,
existing_comment='Product reference (ASIN, SKU, etc.)',
existing_nullable=False)
op.alter_column('products', 'source',
existing_type=sa.VARCHAR(length=50),
comment=None,
existing_comment='Store ID (amazon, cdiscount, etc.)',
existing_nullable=False)
op.drop_column('products', 'model_name')
op.drop_column('products', 'model_number')
op.drop_column('products', 'in_stock')
op.drop_column('products', 'stock_text')
op.drop_column('products', 'discount_text')
op.drop_column('products', 'amazon_choice_label')
op.drop_column('products', 'amazon_choice')
op.drop_column('products', 'rating_count')
op.drop_column('products', 'rating_value')
op.alter_column('product_specs', 'spec_value',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Specification value',
existing_nullable=False)
op.alter_column('product_specs', 'spec_key',
existing_type=sa.VARCHAR(length=200),
comment=None,
existing_comment="Specification key (e.g., 'Brand', 'Color')",
existing_nullable=False)
op.alter_column('product_images', 'position',
existing_type=sa.INTEGER(),
comment=None,
existing_comment='Image position (0=main)',
existing_nullable=False)
op.alter_column('product_images', 'image_url',
existing_type=sa.TEXT(),
comment=None,
existing_comment='Image URL',
existing_nullable=False)
op.alter_column('price_history', 'fetched_at',
existing_type=postgresql.TIMESTAMP(),
comment=None,
existing_comment='Scraping timestamp',
existing_nullable=False)
op.alter_column('price_history', 'fetch_status',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Fetch status (success, partial, failed)',
existing_nullable=False)
op.alter_column('price_history', 'fetch_method',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Fetch method (http, playwright)',
existing_nullable=False)
op.alter_column('price_history', 'stock_status',
existing_type=sa.VARCHAR(length=20),
comment=None,
existing_comment='Stock status (in_stock, out_of_stock, unknown)',
existing_nullable=True)
op.alter_column('price_history', 'shipping_cost',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment=None,
existing_comment='Shipping cost',
existing_nullable=True)
op.alter_column('price_history', 'price',
existing_type=sa.NUMERIC(precision=10, scale=2),
comment=None,
existing_comment='Product price',
existing_nullable=True)
# ### end Alembic commands ###

View File

@@ -0,0 +1,28 @@
"""Ajout champs Amazon produit
Revision ID: 1467e98fcbea
Revises: 3e68b0f0c9e4
Create Date: 2026-01-17 20:08:32.991650
"""
from alembic import op
import sqlalchemy as sa
# Revision identifiers, used by Alembic.
revision = '1467e98fcbea'
down_revision = '3e68b0f0c9e4'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View File

@@ -0,0 +1,114 @@
"""Ajout classification rules et type produit
Revision ID: 20260117_03_classification_rules
Revises: 3e68b0f0c9e4
Create Date: 2026-01-17 20:05:00.000000
"""
from datetime import datetime, timezone
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# Revision identifiers, used by Alembic.
revision = "20260117_03_classification_rules"
down_revision = "3e68b0f0c9e4"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"products",
sa.Column("type", sa.Text(), nullable=True, comment="Product type"),
)
op.create_table(
"classification_rules",
sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
sa.Column("category", sa.String(length=80), nullable=True, comment="Categorie cible"),
sa.Column("type", sa.String(length=80), nullable=True, comment="Type cible"),
sa.Column(
"keywords",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
comment="Mots-cles de matching",
),
sa.Column("sort_order", sa.Integer(), nullable=False, server_default="0"),
sa.Column("is_active", sa.Boolean(), nullable=False, server_default=sa.text("true")),
sa.Column(
"created_at",
sa.TIMESTAMP(),
nullable=False,
server_default=sa.text("CURRENT_TIMESTAMP"),
comment="Creation timestamp",
),
)
op.create_index("ix_classification_rule_order", "classification_rules", ["sort_order"])
op.create_index("ix_classification_rule_active", "classification_rules", ["is_active"])
rules_table = sa.table(
"classification_rules",
sa.column("category", sa.String),
sa.column("type", sa.String),
sa.column("keywords", postgresql.JSONB),
sa.column("sort_order", sa.Integer),
sa.column("is_active", sa.Boolean),
sa.column("created_at", sa.TIMESTAMP),
)
now = datetime.now(timezone.utc)
op.bulk_insert(
rules_table,
[
{
"category": "Informatique",
"type": "Ecran",
"keywords": ["ecran", "moniteur", "display"],
"sort_order": 0,
"is_active": True,
"created_at": now,
},
{
"category": "Informatique",
"type": "PC portable",
"keywords": ["pc portable", "ordinateur portable", "laptop", "notebook"],
"sort_order": 1,
"is_active": True,
"created_at": now,
},
{
"category": "Informatique",
"type": "Unite centrale",
"keywords": ["unite centrale", "tour", "desktop", "pc fixe"],
"sort_order": 2,
"is_active": True,
"created_at": now,
},
{
"category": "Informatique",
"type": "Clavier",
"keywords": ["clavier", "keyboard"],
"sort_order": 3,
"is_active": True,
"created_at": now,
},
{
"category": "Informatique",
"type": "Souris",
"keywords": ["souris", "mouse"],
"sort_order": 4,
"is_active": True,
"created_at": now,
},
],
)
def downgrade() -> None:
op.drop_index("ix_classification_rule_active", table_name="classification_rules")
op.drop_index("ix_classification_rule_order", table_name="classification_rules")
op.drop_table("classification_rules")
op.drop_column("products", "type")

View File

@@ -0,0 +1,28 @@
"""Ajout champs Amazon produit
Revision ID: 3e68b0f0c9e4
Revises: 0014e51c4927
Create Date: 2026-01-17 19:45:03.730218
"""
from alembic import op
import sqlalchemy as sa
# Revision identifiers, used by Alembic.
revision = '3e68b0f0c9e4'
down_revision = '0014e51c4927'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
pass
# ### end Alembic commands ###

View File

@@ -84,6 +84,36 @@ class Product(Base):
msrp: Mapped[Optional[Decimal]] = mapped_column(
Numeric(10, 2), nullable=True, comment="Recommended price"
)
type: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Product type"
)
rating_value: Mapped[Optional[Decimal]] = mapped_column(
Numeric(3, 2), nullable=True, comment="Note moyenne"
)
rating_count: Mapped[Optional[int]] = mapped_column(
Integer, nullable=True, comment="Nombre d'evaluations"
)
amazon_choice: Mapped[Optional[bool]] = mapped_column(
Boolean, nullable=True, comment="Badge Choix d'Amazon"
)
amazon_choice_label: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Libelle Choix d'Amazon"
)
discount_text: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Texte de reduction affiche"
)
stock_text: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Texte brut du stock"
)
in_stock: Mapped[Optional[bool]] = mapped_column(
Boolean, nullable=True, comment="Disponibilite derivee"
)
model_number: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Numero du modele"
)
model_name: Mapped[Optional[str]] = mapped_column(
Text, nullable=True, comment="Nom du modele"
)
# Timestamps
first_seen_at: Mapped[datetime] = mapped_column(
@@ -331,6 +361,45 @@ class ScrapingLog(Base):
return f"<ScrapingLog(id={self.id}, url={self.url}, status={self.fetch_status}, fetched_at={self.fetched_at})>"
class ClassificationRule(Base):
"""
Regles de classification categorie/type basees sur des mots-cles.
"""
__tablename__ = "classification_rules"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
category: Mapped[Optional[str]] = mapped_column(
String(80), nullable=True, comment="Categorie cible"
)
type: Mapped[Optional[str]] = mapped_column(
String(80), nullable=True, comment="Type cible"
)
keywords: Mapped[list[str]] = mapped_column(
JSON().with_variant(JSONB, "postgresql"),
nullable=False,
default=list,
comment="Mots-cles de matching",
)
sort_order: Mapped[int] = mapped_column(
Integer, nullable=False, default=0, comment="Ordre de priorite (0=haut)"
)
is_active: Mapped[bool] = mapped_column(
Boolean, nullable=False, default=True, comment="Regle active"
)
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP, nullable=False, default=utcnow, comment="Creation timestamp"
)
__table_args__ = (
Index("ix_classification_rule_order", "sort_order"),
Index("ix_classification_rule_active", "is_active"),
)
def __repr__(self) -> str:
return f"<ClassificationRule(id={self.id}, category={self.category}, type={self.type})>"
class Webhook(Base):
"""
Webhooks pour notifications externes.

View File

@@ -13,7 +13,14 @@ from sqlalchemy.orm import Session
from pricewatch.app.core.logging import get_logger
from pricewatch.app.core.schema import ProductSnapshot
from pricewatch.app.db.models import PriceHistory, Product, ProductImage, ProductSpec, ScrapingLog
from pricewatch.app.db.models import (
ClassificationRule,
PriceHistory,
Product,
ProductImage,
ProductSpec,
ScrapingLog,
)
logger = get_logger("db.repository")
@@ -49,12 +56,58 @@ class ProductRepository:
product.title = snapshot.title
if snapshot.category:
product.category = snapshot.category
if snapshot.type:
product.type = snapshot.type
if snapshot.description:
product.description = snapshot.description
if snapshot.currency:
product.currency = snapshot.currency
if snapshot.msrp is not None:
product.msrp = snapshot.msrp
if snapshot.rating_value is not None:
product.rating_value = snapshot.rating_value
if snapshot.rating_count is not None:
product.rating_count = snapshot.rating_count
if snapshot.amazon_choice is not None:
product.amazon_choice = snapshot.amazon_choice
if snapshot.amazon_choice_label:
product.amazon_choice_label = snapshot.amazon_choice_label
if snapshot.discount_text:
product.discount_text = snapshot.discount_text
if snapshot.stock_text:
product.stock_text = snapshot.stock_text
if snapshot.in_stock is not None:
product.in_stock = snapshot.in_stock
if snapshot.model_number:
product.model_number = snapshot.model_number
if snapshot.model_name:
product.model_name = snapshot.model_name
def apply_classification(self, snapshot: ProductSnapshot) -> None:
"""Applique les regles de classification au snapshot."""
if not snapshot.title:
return
rules = (
self.session.query(ClassificationRule)
.filter(ClassificationRule.is_active == True)
.order_by(ClassificationRule.sort_order, ClassificationRule.id)
.all()
)
if not rules:
return
title = snapshot.title.lower()
for rule in rules:
keywords = rule.keywords or []
if isinstance(keywords, str):
keywords = [keywords]
if any(keyword and keyword.lower() in title for keyword in keywords):
if rule.category:
snapshot.category = rule.category
if rule.type:
snapshot.type = rule.type
return
def add_price_history(self, product: Product, snapshot: ProductSnapshot) -> Optional[PriceHistory]:
"""Ajoute une entree d'historique de prix si inexistante."""

View File

@@ -25,7 +25,12 @@ class ScrapingPipeline:
def __init__(self, config: Optional[AppConfig] = None) -> None:
self.config = config
def process_snapshot(self, snapshot: ProductSnapshot, save_to_db: bool = True) -> Optional[int]:
def process_snapshot(
self,
snapshot: ProductSnapshot,
save_to_db: bool = True,
apply_classification: bool = True,
) -> Optional[int]:
"""
Persiste un snapshot en base si active.
@@ -39,6 +44,8 @@ class ScrapingPipeline:
try:
with get_session(app_config) as session:
repo = ProductRepository(session)
if apply_classification:
repo.apply_classification(snapshot)
product_id = repo.safe_save_snapshot(snapshot)
session.commit()
return product_id

View File

@@ -15,6 +15,13 @@ price:
- "#priceblock_dealprice"
- ".a-price-range .a-price .a-offscreen"
# Texte de réduction explicite
discount_text:
- "#regularprice_savings"
- "#dealprice_savings"
- "#savingsPercentage"
- "span.savingsPercentage"
# Devise (généralement dans le symbole)
currency:
- "span.a-price-symbol"
@@ -32,6 +39,24 @@ stock_status:
- "#availability"
- ".a-declarative .a-size-medium"
# Note moyenne
rating_value:
- "#acrPopover"
- "#averageCustomerReviews .a-icon-alt"
- "#averageCustomerReviews span.a-icon-alt"
# Nombre d'évaluations
rating_count:
- "#acrCustomerReviewText"
- "#acrCustomerReviewLink"
# Badge Choix d'Amazon
amazon_choice:
- "#acBadge_feature_div"
- "#acBadge_feature_div .ac-badge"
- "#acBadge_feature_div .ac-badge-rectangle"
- "#acBadge_feature_div .ac-badge-rectangle-icon"
# Images produit
images:
- "#landingImage"
@@ -44,6 +69,13 @@ category:
- "#wayfinding-breadcrumbs_feature_div"
- ".a-breadcrumb"
# Description (détails de l'article)
description:
- "#detailBullets_feature_div"
- "#detailBulletsWrapper_feature_div"
- "#productDetails_detailBullets_sections1"
- "#feature-bullets"
# Caractéristiques techniques (table specs)
specs_table:
- "#productDetails_techSpec_section_1"

View File

@@ -130,13 +130,19 @@ class AmazonStore(BaseStore):
title = self._extract_title(soup, debug_info)
price = self._extract_price(soup, debug_info)
currency = self._extract_currency(soup, debug_info)
stock_status = self._extract_stock(soup, debug_info)
images = self._extract_images(soup, debug_info)
stock_status, stock_text, in_stock = self._extract_stock_details(soup, debug_info)
main_image, gallery_images, images = self._extract_images(soup, debug_info)
category = self._extract_category(soup, debug_info)
specs = self._extract_specs(soup, debug_info)
description = self._extract_description(soup, debug_info)
msrp = self._extract_msrp(soup, debug_info)
reference = self.extract_reference(url) or self._extract_asin_from_html(soup)
rating_value = self._extract_rating_value(soup, debug_info)
rating_count = self._extract_rating_count(soup, debug_info)
amazon_choice, amazon_choice_label = self._extract_amazon_choice(soup, debug_info)
discount_text = self._extract_discount_text(soup, debug_info)
model_number, model_name = self._extract_model_details(specs)
asin = reference
# Déterminer le statut final (ne pas écraser FAILED)
if debug_info.status != DebugStatus.FAILED:
@@ -153,12 +159,24 @@ class AmazonStore(BaseStore):
currency=currency or "EUR",
shipping_cost=None, # Difficile à extraire
stock_status=stock_status,
stock_text=stock_text,
in_stock=in_stock,
reference=reference,
asin=asin,
category=category,
description=description,
images=images,
main_image=main_image,
gallery_images=gallery_images,
specs=specs,
msrp=msrp,
rating_value=rating_value,
rating_count=rating_count,
amazon_choice=amazon_choice,
amazon_choice_label=amazon_choice_label,
discount_text=discount_text,
model_number=model_number,
model_name=model_name,
debug=debug_info,
)
@@ -203,14 +221,26 @@ class AmazonStore(BaseStore):
return None
def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
"""Extrait la description (meta tags)."""
meta = soup.find("meta", property="og:description") or soup.find(
"meta", attrs={"name": "description"}
)
if meta:
description = meta.get("content", "").strip()
if description:
return description
"""Extrait la description depuis les détails de l'article."""
selectors = self.get_selector("description", [])
if isinstance(selectors, str):
selectors = [selectors]
for selector in selectors:
element = soup.select_one(selector)
if not element:
continue
items = [
item.get_text(" ", strip=True)
for item in element.select("li")
if item.get_text(strip=True)
]
if items:
return "\n".join(items)
text = " ".join(element.stripped_strings)
if text:
return text
return None
def _extract_price(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]:
@@ -271,8 +301,10 @@ class AmazonStore(BaseStore):
# Défaut basé sur le domaine
return "EUR"
def _extract_stock(self, soup: BeautifulSoup, debug: DebugInfo) -> StockStatus:
"""Extrait le statut de stock."""
def _extract_stock_details(
self, soup: BeautifulSoup, debug: DebugInfo
) -> tuple[StockStatus, Optional[str], Optional[bool]]:
"""Extrait le statut de stock avec texte brut."""
selectors = self.get_selector("stock_status", [])
if isinstance(selectors, str):
selectors = [selectors]
@@ -280,22 +312,27 @@ class AmazonStore(BaseStore):
for selector in selectors:
element = soup.select_one(selector)
if element:
text = element.get_text(strip=True).lower()
if "en stock" in text or "available" in text or "in stock" in text:
return StockStatus.IN_STOCK
text = element.get_text(strip=True)
normalized = text.lower()
if "en stock" in normalized or "available" in normalized or "in stock" in normalized:
return StockStatus.IN_STOCK, text, True
elif (
"rupture" in text
or "indisponible" in text
or "out of stock" in text
"rupture" in normalized
or "indisponible" in normalized
or "out of stock" in normalized
):
return StockStatus.OUT_OF_STOCK
return StockStatus.OUT_OF_STOCK, text, False
return StockStatus.UNKNOWN
return StockStatus.UNKNOWN, None, None
def _extract_images(self, soup: BeautifulSoup, debug: DebugInfo) -> list[str]:
"""Extrait les URLs d'images."""
images = []
seen = set()
def _extract_images(
self, soup: BeautifulSoup, debug: DebugInfo
) -> tuple[Optional[str], list[str], list[str]]:
"""Extrait l'image principale et la galerie."""
images: list[str] = []
seen: set[str] = set()
main_image: Optional[str] = None
max_gallery = 15
selectors = self.get_selector("images", [])
if isinstance(selectors, str):
selectors = [selectors]
@@ -309,6 +346,8 @@ class AmazonStore(BaseStore):
if self._is_product_image(url) and url not in seen:
images.append(url)
seen.add(url)
if main_image is None:
main_image = url
dynamic = element.get("data-a-dynamic-image")
if dynamic:
urls = self._extract_dynamic_images(dynamic)
@@ -316,6 +355,8 @@ class AmazonStore(BaseStore):
if self._is_product_image(dyn_url) and dyn_url not in seen:
images.append(dyn_url)
seen.add(dyn_url)
if main_image is None:
main_image = dyn_url
# Fallback: chercher tous les img tags si aucune image trouvée
if not images:
@@ -326,8 +367,15 @@ class AmazonStore(BaseStore):
if url not in seen:
images.append(url)
seen.add(url)
if main_image is None:
main_image = url
return images
if main_image is None and images:
main_image = images[0]
gallery_images = [url for url in images if url != main_image]
gallery_images = gallery_images[:max_gallery]
final_images = [main_image] + gallery_images if main_image else gallery_images
return main_image, gallery_images, final_images
def _extract_dynamic_images(self, raw: str) -> list[str]:
"""Extrait les URLs du JSON data-a-dynamic-image."""
@@ -393,8 +441,111 @@ class AmazonStore(BaseStore):
if key and value:
specs[key] = value
# Détails de l'article sous forme de liste
detail_list = soup.select("#detailBullets_feature_div li")
for item in detail_list:
text = item.get_text(" ", strip=True)
if ":" not in text:
continue
key, value = text.split(":", 1)
key = key.strip()
value = value.strip()
if key and value and key not in specs:
specs[key] = value
return specs
def _extract_rating_value(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]:
"""Extrait la note moyenne."""
selectors = self.get_selector("rating_value", [])
if isinstance(selectors, str):
selectors = [selectors]
for selector in selectors:
element = soup.select_one(selector)
if not element:
continue
text = element.get_text(" ", strip=True) or element.get("title", "").strip()
match = re.search(r"([\d.,]+)", text)
if match:
value = match.group(1).replace(",", ".")
try:
return float(value)
except ValueError:
continue
return None
def _extract_rating_count(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[int]:
"""Extrait le nombre d'évaluations."""
selectors = self.get_selector("rating_count", [])
if isinstance(selectors, str):
selectors = [selectors]
for selector in selectors:
element = soup.select_one(selector)
if not element:
continue
text = element.get_text(" ", strip=True)
match = re.search(r"([\d\s\u202f\u00a0]+)", text)
if match:
numeric = re.sub(r"[^\d]", "", match.group(1))
if numeric:
return int(numeric)
return None
def _extract_amazon_choice(
self, soup: BeautifulSoup, debug: DebugInfo
) -> tuple[Optional[bool], Optional[str]]:
"""Extrait le badge Choix d'Amazon."""
selectors = self.get_selector("amazon_choice", [])
if isinstance(selectors, str):
selectors = [selectors]
for selector in selectors:
element = soup.select_one(selector)
if element:
label_candidates = [
element.get_text(" ", strip=True),
element.get("aria-label", "").strip(),
element.get("title", "").strip(),
element.get("data-a-badge-label", "").strip(),
]
label = next((item for item in label_candidates if item), "")
normalized = label.lower()
if "choix d'amazon" in normalized or "amazon's choice" in normalized:
return True, label
if label:
return True, label
return True, None
return None, None
def _extract_discount_text(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
"""Extrait le texte de réduction explicite."""
selectors = self.get_selector("discount_text", [])
if isinstance(selectors, str):
selectors = [selectors]
for selector in selectors:
element = soup.select_one(selector)
if not element:
continue
text = element.get_text(" ", strip=True)
if text:
return text
return None
def _extract_model_details(self, specs: dict[str, str]) -> tuple[Optional[str], Optional[str]]:
"""Extrait le numero et le nom du modele depuis les specs."""
model_number = None
model_name = None
for key, value in specs.items():
normalized = key.lower()
if "numéro du modèle de l'article" in normalized or "numero du modele de l'article" in normalized:
model_number = value
if "nom du modèle" in normalized or "nom du modele" in normalized:
model_name = value
return model_number, model_name
def _extract_asin_from_html(self, soup: BeautifulSoup) -> Optional[str]:
"""Extrait l'ASIN depuis le HTML (fallback)."""
selectors = self.get_selector("asin", [])

View File

@@ -6,6 +6,7 @@ from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
import hashlib
from typing import Optional
import redis
@@ -127,11 +128,13 @@ class ScrapingScheduler:
interval_hours: int = 24,
use_playwright: Optional[bool] = None,
save_db: bool = True,
job_id: Optional[str] = None,
) -> ScheduledJobInfo:
"""Planifie un scraping recurrent (intervalle en heures)."""
interval_seconds = int(timedelta(hours=interval_hours).total_seconds())
next_run = datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)
resolved_job_id = job_id or self._job_id_for_url(url)
job = self.scheduler.schedule(
scheduled_time=next_run,
func=scrape_product,
@@ -139,6 +142,13 @@ class ScrapingScheduler:
kwargs={"use_playwright": use_playwright, "save_db": save_db},
interval=interval_seconds,
repeat=None,
id=resolved_job_id,
)
logger.info(f"Job planifie: {job.id}, prochaine execution: {next_run.isoformat()}")
return ScheduledJobInfo(job_id=job.id, next_run=next_run)
@staticmethod
def _job_id_for_url(url: str) -> str:
"""Genere un job_id stable pour eviter les doublons."""
fingerprint = hashlib.sha1(url.strip().lower().encode("utf-8")).hexdigest()
return f"scrape_{fingerprint}"

View File

@@ -157,6 +157,36 @@ def scrape_product(
)
success = False
fetch_error = str(exc)
# Si captcha detecte via HTTP, forcer une tentative Playwright.
if (
fetch_method == FetchMethod.HTTP
and use_playwright
and snapshot.debug.errors
and any("captcha" in error.lower() for error in snapshot.debug.errors)
):
logger.info("[FETCH] Captcha detecte, tentative Playwright")
pw_result = fetch_playwright(
canonical_url,
headless=not headful,
timeout_ms=timeout_ms,
save_screenshot=save_screenshot,
)
if pw_result.success and pw_result.html:
try:
snapshot = store.parse(pw_result.html, canonical_url)
snapshot.debug.method = FetchMethod.PLAYWRIGHT
snapshot.debug.duration_ms = pw_result.duration_ms
snapshot.debug.html_size_bytes = len(pw_result.html.encode("utf-8"))
snapshot.add_note("Captcha detecte via HTTP, fallback Playwright")
success = snapshot.debug.status != DebugStatus.FAILED
except Exception as exc:
snapshot.add_note(f"Fallback Playwright echoue: {exc}")
logger.error(f"[PARSE] Exception fallback Playwright: {exc}")
fetch_error = str(exc)
else:
error = pw_result.error or "Erreur Playwright"
snapshot.add_note(f"Fallback Playwright echoue: {error}")
fetch_error = error
else:
snapshot = ProductSnapshot(
source=store.store_id,