codex

2026-01-14 07:03:38 +01:00
parent ecda149a4b
commit c91c0f1fc9
61 changed files with 4388 additions and 38 deletions
@@ -13,20 +13,28 @@ import sys
 from pathlib import Path
 from typing import Optional

+import redis
 import typer
+from rq import Connection, Worker
+from alembic import command as alembic_command
+from alembic.config import Config as AlembicConfig
 from rich import print as rprint
 from rich.console import Console
 from rich.table import Table

 from pricewatch.app.core import logging as app_logging
+from pricewatch.app.core.config import get_config
 from pricewatch.app.core.io import read_yaml_config, write_json_results
 from pricewatch.app.core.logging import get_logger, set_level
 from pricewatch.app.core.registry import get_registry, register_store
 from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod
+from pricewatch.app.db.connection import init_db
 from pricewatch.app.scraping.http_fetch import fetch_http
+from pricewatch.app.scraping.pipeline import ScrapingPipeline
 from pricewatch.app.scraping.pw_fetch import fetch_playwright
 from pricewatch.app.stores.amazon.store import AmazonStore
 from pricewatch.app.stores.cdiscount.store import CdiscountStore
+from pricewatch.app.tasks.scheduler import ScrapingScheduler

 # Créer l'application Typer
 app = typer.Typer(
@@ -46,6 +54,75 @@ def setup_stores():
    registry.register(CdiscountStore())


+def get_alembic_config() -> AlembicConfig:
+    """Construit la configuration Alembic à partir du repository."""
+    root_path = Path(__file__).resolve().parents[3]
+    config_path = root_path / "alembic.ini"
+    migrations_path = root_path / "pricewatch" / "app" / "db" / "migrations"
+
+    if not config_path.exists():
+        logger.error(f"alembic.ini introuvable: {config_path}")
+        raise typer.Exit(code=1)
+
+    alembic_cfg = AlembicConfig(str(config_path))
+    alembic_cfg.set_main_option("script_location", str(migrations_path))
+    alembic_cfg.set_main_option("sqlalchemy.url", get_config().db.url)
+    return alembic_cfg
+
+
+@app.command("init-db")
+def init_db_command():
+    """
+    Initialise la base de donnees (creer toutes les tables).
+    """
+    try:
+        init_db(get_config())
+    except Exception as e:
+        logger.error(f"Init DB echoue: {e}")
+        raise typer.Exit(code=1)
+
+
+@app.command()
+def migrate(
+    message: str = typer.Argument(..., help="Message de migration"),
+    autogenerate: bool = typer.Option(True, "--autogenerate/--no-autogenerate"),
+):
+    """
+    Genere une migration Alembic.
+    """
+    try:
+        alembic_cfg = get_alembic_config()
+        alembic_command.revision(alembic_cfg, message=message, autogenerate=autogenerate)
+    except Exception as e:
+        logger.error(f"Migration echouee: {e}")
+        raise typer.Exit(code=1)
+
+
+@app.command()
+def upgrade(revision: str = typer.Argument("head", help="Revision cible")):
+    """
+    Applique les migrations Alembic.
+    """
+    try:
+        alembic_cfg = get_alembic_config()
+        alembic_command.upgrade(alembic_cfg, revision)
+    except Exception as e:
+        logger.error(f"Upgrade echoue: {e}")
+        raise typer.Exit(code=1)
+
+
+@app.command()
+def downgrade(revision: str = typer.Argument("-1", help="Revision cible")):
+    """
+    Rollback une migration Alembic.
+    """
+    try:
+        alembic_cfg = get_alembic_config()
+        alembic_command.downgrade(alembic_cfg, revision)
+    except Exception as e:
+        logger.error(f"Downgrade echoue: {e}")
+        raise typer.Exit(code=1)
+
@app.command()
 def run(
    yaml: Path = typer.Option(
@@ -67,6 +144,11 @@ def run(
        "-d",
        help="Activer le mode debug",
    ),
+    save_db: Optional[bool] = typer.Option(
+        None,
+        "--save-db/--no-db",
+        help="Activer la persistence en base de donnees",
+    ),
 ):
    """
    Pipeline complet: scrape toutes les URLs du YAML et génère le JSON.
@@ -88,6 +170,12 @@ def run(
        logger.error(f"Erreur lecture YAML: {e}")
        raise typer.Exit(code=1)

+    app_config = get_config()
+    if save_db is None:
+        save_db = app_config.enable_db
+
+    pipeline = ScrapingPipeline(config=app_config)
+
    logger.info(f"{len(config.urls)} URL(s) à scraper")

    # Scraper chaque URL
@@ -158,6 +246,11 @@ def run(

                snapshot = store.parse(html, canonical_url)
                snapshot.debug.method = fetch_method
+                if save_db:
+                    product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
+                    if product_id:
+                        logger.info(f"DB: produit id={product_id}")
+
                snapshots.append(snapshot)

                status_emoji = "✓" if snapshot.is_complete() else "⚠"
@@ -180,6 +273,8 @@ def run(
                        errors=[f"Parsing failed: {str(e)}"],
                    ),
                )
+                if save_db:
+                    pipeline.process_snapshot(snapshot, save_to_db=True)
                snapshots.append(snapshot)
        else:
            # Pas de HTML récupéré
@@ -194,6 +289,8 @@ def run(
                    errors=[f"Fetch failed: {fetch_error or 'Unknown error'}"],
                ),
            )
+            if save_db:
+                pipeline.process_snapshot(snapshot, save_to_db=True)
            snapshots.append(snapshot)

    # Écrire les résultats
@@ -359,5 +456,65 @@ def doctor():
    rprint("\n[green]✓ PriceWatch est prêt![/green]")


+@app.command()
+def worker(
+    queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
+    with_scheduler: bool = typer.Option(
+        True, "--with-scheduler/--no-scheduler", help="Activer le scheduler RQ"
+    ),
+):
+    """
+    Lance un worker RQ.
+    """
+    config = get_config()
+    connection = redis.from_url(config.redis.url)
+
+    with Connection(connection):
+        worker_instance = Worker([queue])
+        worker_instance.work(with_scheduler=with_scheduler)
+
+
+@app.command()
+def enqueue(
+    url: str = typer.Argument(..., help="URL du produit a scraper"),
+    queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
+    save_db: bool = typer.Option(True, "--save-db/--no-db", help="Activer la DB"),
+    use_playwright: Optional[bool] = typer.Option(
+        None, "--playwright/--no-playwright", help="Forcer Playwright"
+    ),
+):
+    """
+    Enqueue un scraping immediat.
+    """
+    scheduler = ScrapingScheduler(get_config(), queue_name=queue)
+    job = scheduler.enqueue_immediate(url, use_playwright=use_playwright, save_db=save_db)
+    rprint(f"[green]✓ Job enqueued: {job.id}[/green]")
+
+
+@app.command()
+def schedule(
+    url: str = typer.Argument(..., help="URL du produit a planifier"),
+    interval: int = typer.Option(24, "--interval", help="Intervalle en heures"),
+    queue: str = typer.Option("default", "--queue", "-q", help="Nom de la queue RQ"),
+    save_db: bool = typer.Option(True, "--save-db/--no-db", help="Activer la DB"),
+    use_playwright: Optional[bool] = typer.Option(
+        None, "--playwright/--no-playwright", help="Forcer Playwright"
+    ),
+):
+    """
+    Planifie un scraping recurrent.
+    """
+    scheduler = ScrapingScheduler(get_config(), queue_name=queue)
+    job_info = scheduler.schedule_product(
+        url,
+        interval_hours=interval,
+        use_playwright=use_playwright,
+        save_db=save_db,
+    )
+    rprint(
+        f"[green]✓ Job planifie: {job_info.job_id} (next={job_info.next_run.isoformat()})[/green]"
+    )
+
+
 if __name__ == "__main__":
    app()
@@ -0,0 +1,186 @@
+"""
+Configuration centralisée pour PriceWatch Phase 2.
+
+Gère la configuration de la base de données, Redis, et l'application globale.
+Utilise Pydantic Settings pour validation et chargement depuis variables d'environnement.
+
+Justification technique:
+- Pattern 12-factor app: configuration via env vars
+- Pydantic validation garantit config valide au démarrage
+- Valeurs par défaut pour développement local
+- Support .env file pour faciliter le setup
+"""
+
+from typing import Optional
+
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+from pricewatch.app.core.logging import get_logger
+
+logger = get_logger("core.config")
+
+
+class DatabaseConfig(BaseSettings):
+    """Configuration PostgreSQL."""
+
+    host: str = Field(default="localhost", description="PostgreSQL host")
+    port: int = Field(default=5432, description="PostgreSQL port")
+    database: str = Field(default="pricewatch", description="Database name")
+    user: str = Field(default="pricewatch", description="Database user")
+    password: str = Field(default="pricewatch", description="Database password")
+
+    model_config = SettingsConfigDict(
+        env_prefix="PW_DB_",  # PW_DB_HOST, PW_DB_PORT, etc.
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+
+    @property
+    def url(self) -> str:
+        """
+        SQLAlchemy connection URL.
+
+        Format: postgresql://user:password@host:port/database
+        """
+        return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
+
+    @property
+    def url_async(self) -> str:
+        """
+        Async SQLAlchemy connection URL (pour usage futur avec asyncpg).
+
+        Format: postgresql+asyncpg://user:password@host:port/database
+        """
+        return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}"
+
+
+class RedisConfig(BaseSettings):
+    """Configuration Redis pour RQ worker."""
+
+    host: str = Field(default="localhost", description="Redis host")
+    port: int = Field(default=6379, description="Redis port")
+    db: int = Field(default=0, description="Redis database number (0-15)")
+    password: Optional[str] = Field(default=None, description="Redis password (optional)")
+
+    model_config = SettingsConfigDict(
+        env_prefix="PW_REDIS_",  # PW_REDIS_HOST, PW_REDIS_PORT, etc.
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+
+    @property
+    def url(self) -> str:
+        """
+        Redis connection URL pour RQ.
+
+        Format: redis://[password@]host:port/db
+        """
+        auth = f":{self.password}@" if self.password else ""
+        return f"redis://{auth}{self.host}:{self.port}/{self.db}"
+
+
+class AppConfig(BaseSettings):
+    """Configuration globale de l'application."""
+
+    # Mode debug
+    debug: bool = Field(
+        default=False, description="Enable debug mode (verbose logging, SQL echo)"
+    )
+
+    # Worker configuration
+    worker_timeout: int = Field(
+        default=300, description="Worker job timeout in seconds (5 minutes)"
+    )
+
+    worker_concurrency: int = Field(
+        default=2, description="Number of concurrent worker processes"
+    )
+
+    # Feature flags
+    enable_db: bool = Field(
+        default=True, description="Enable database persistence (can disable for testing)"
+    )
+
+    enable_worker: bool = Field(
+        default=True, description="Enable background worker functionality"
+    )
+
+    # Scraping defaults
+    default_playwright_timeout: int = Field(
+        default=60000, description="Default Playwright timeout in milliseconds"
+    )
+
+    default_use_playwright: bool = Field(
+        default=True, description="Use Playwright fallback by default"
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="PW_",  # PW_DEBUG, PW_WORKER_TIMEOUT, etc.
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+
+    # Nested configs (instances, not classes)
+    db: DatabaseConfig = Field(default_factory=DatabaseConfig)
+    redis: RedisConfig = Field(default_factory=RedisConfig)
+
+    def log_config(self) -> None:
+        """Log la configuration active (sans password)."""
+        logger.info("=== Configuration PriceWatch ===")
+        logger.info(f"Debug mode: {self.debug}")
+        logger.info(f"Database: {self.db.host}:{self.db.port}/{self.db.database}")
+        logger.info(f"Redis: {self.redis.host}:{self.redis.port}/{self.redis.db}")
+        logger.info(f"DB enabled: {self.enable_db}")
+        logger.info(f"Worker enabled: {self.enable_worker}")
+        logger.info(f"Worker timeout: {self.worker_timeout}s")
+        logger.info(f"Worker concurrency: {self.worker_concurrency}")
+        logger.info("================================")
+
+
+# Singleton global config instance
+_config: Optional[AppConfig] = None
+
+
+def get_config() -> AppConfig:
+    """
+    Récupère l'instance globale de configuration (singleton).
+
+    Returns:
+        Instance AppConfig
+
+    Justification:
+    - Évite de recharger la config à chaque appel
+    - Centralise la configuration pour toute l'application
+    - Permet d'override pour les tests
+    """
+    global _config
+
+    if _config is None:
+        _config = AppConfig()
+        if _config.debug:
+            _config.log_config()
+
+    return _config
+
+
+def set_config(config: AppConfig) -> None:
+    """
+    Override la configuration globale (principalement pour tests).
+
+    Args:
+        config: Instance AppConfig à utiliser
+    """
+    global _config
+    _config = config
+    logger.debug("Configuration overridden")
+
+
+def reset_config() -> None:
+    """Reset la configuration globale (pour tests)."""
+    global _config
+    _config = None
+    logger.debug("Configuration reset")
@@ -0,0 +1,41 @@
+"""
+Module de base de données pour PriceWatch Phase 2.
+
+Gère la persistence PostgreSQL avec SQLAlchemy ORM.
+"""
+
+from pricewatch.app.db.connection import (
+    check_db_connection,
+    get_engine,
+    get_session,
+    get_session_factory,
+    init_db,
+    reset_engine,
+)
+from pricewatch.app.db.repository import ProductRepository
+from pricewatch.app.db.models import (
+    Base,
+    Product,
+    PriceHistory,
+    ProductImage,
+    ProductSpec,
+    ScrapingLog,
+)
+
+__all__ = [
+    # Models
+    "Base",
+    "Product",
+    "PriceHistory",
+    "ProductImage",
+    "ProductSpec",
+    "ScrapingLog",
+    "ProductRepository",
+    # Connection
+    "get_engine",
+    "get_session_factory",
+    "get_session",
+    "init_db",
+    "check_db_connection",
+    "reset_engine",
+]
@@ -0,0 +1,238 @@
+"""
+Gestion des connexions PostgreSQL pour PriceWatch Phase 2.
+
+Fournit:
+- Engine SQLAlchemy avec connection pooling
+- Session factory avec context manager
+- Initialisation des tables
+- Health check
+
+Justification technique:
+- Connection pooling: réutilisation connexions pour performance
+- Context manager: garantit fermeture session (pas de leak)
+- pool_pre_ping: vérifie connexion avant usage (robustesse)
+- echo=debug: logs SQL en mode debug
+"""
+
+from contextlib import contextmanager
+from typing import Generator, Optional
+
+from sqlalchemy import create_engine, text
+from sqlalchemy.engine import Engine
+from sqlalchemy.engine.url import make_url
+from sqlalchemy.exc import OperationalError, SQLAlchemyError
+from sqlalchemy.orm import Session, sessionmaker
+
+from pricewatch.app.core.config import AppConfig, get_config
+from pricewatch.app.core.logging import get_logger
+from pricewatch.app.db.models import Base
+
+logger = get_logger("db.connection")
+
+# Global engine instance (singleton)
+_engine: Optional[Engine] = None
+_session_factory: Optional[sessionmaker] = None
+
+
+def get_engine(config: Optional[AppConfig] = None) -> Engine:
+    """
+    Récupère ou crée l'Engine SQLAlchemy (singleton).
+
+    Args:
+        config: Configuration app (utilise get_config() si None)
+
+    Returns:
+        Engine SQLAlchemy configuré
+
+    Justification:
+    - Singleton: une seule pool de connexions par application
+    - pool_pre_ping: vérifie connexion avant usage (évite "connection closed")
+    - pool_size=5, max_overflow=10: limite connexions (15 max)
+    - echo=debug: logs SQL pour debugging
+    """
+    global _engine
+
+    if _engine is None:
+        if config is None:
+            config = get_config()
+
+        db_url = config.db.url
+        url = make_url(db_url)
+        is_sqlite = url.get_backend_name() == "sqlite"
+
+        logger.info(f"Creating database engine: {db_url}")
+
+        engine_kwargs = {
+            "pool_pre_ping": True,
+            "pool_recycle": 3600,
+            "echo": config.debug,
+        }
+
+        if not is_sqlite:
+            engine_kwargs.update(
+                {
+                    "pool_size": 5,
+                    "max_overflow": 10,
+                }
+            )
+
+        _engine = create_engine(db_url, **engine_kwargs)
+
+        logger.info("Database engine created successfully")
+
+    return _engine
+
+
+def init_db(config: Optional[AppConfig] = None) -> None:
+    """
+    Initialise la base de données (crée toutes les tables).
+
+    Args:
+        config: Configuration app (utilise get_config() si None)
+
+    Raises:
+        OperationalError: Si connexion impossible
+        SQLAlchemyError: Si création tables échoue
+
+    Note:
+        Utilise Base.metadata.create_all() - idempotent (ne crash pas si tables existent)
+    """
+    if config is None:
+        config = get_config()
+
+    logger.info("Initializing database...")
+
+    try:
+        engine = get_engine(config)
+
+        # Créer toutes les tables définies dans Base.metadata
+        Base.metadata.create_all(bind=engine)
+
+        logger.info("Database initialized successfully")
+        logger.info(f"Tables created: {', '.join(Base.metadata.tables.keys())}")
+
+    except OperationalError as e:
+        logger.error(f"Failed to connect to database: {e}")
+        raise
+    except SQLAlchemyError as e:
+        logger.error(f"Failed to create tables: {e}")
+        raise
+
+
+def get_session_factory(config: Optional[AppConfig] = None) -> sessionmaker:
+    """
+    Récupère ou crée la session factory (singleton).
+
+    Args:
+        config: Configuration app (utilise get_config() si None)
+
+    Returns:
+        Session factory SQLAlchemy
+
+    Justification:
+    - expire_on_commit=False: objets restent accessibles après commit
+    - autocommit=False, autoflush=False: contrôle explicite
+    """
+    global _session_factory
+
+    if _session_factory is None:
+        engine = get_engine(config)
+
+        _session_factory = sessionmaker(
+            bind=engine,
+            expire_on_commit=False,  # Objets restent accessibles après commit
+            autocommit=False,  # Contrôle explicite du commit
+            autoflush=False,  # Contrôle explicite du flush
+        )
+
+        logger.debug("Session factory created")
+
+    return _session_factory
+
+
+@contextmanager
+def get_session(config: Optional[AppConfig] = None) -> Generator[Session, None, None]:
+    """
+    Context manager pour session SQLAlchemy.
+
+    Args:
+        config: Configuration app (utilise get_config() si None)
+
+    Yields:
+        Session SQLAlchemy
+
+    Usage:
+        with get_session() as session:
+            product = session.query(Product).filter_by(reference="B08N5WRWNW").first()
+            session.commit()
+
+    Justification:
+    - Context manager: garantit fermeture session (pas de leak)
+    - Rollback automatique sur exception
+    - Close automatique en fin de bloc
+    """
+    factory = get_session_factory(config)
+    session = factory()
+
+    try:
+        logger.debug("Session opened")
+        yield session
+    except Exception as e:
+        logger.error(f"Session error, rolling back: {e}")
+        session.rollback()
+        raise
+    finally:
+        logger.debug("Session closed")
+        session.close()
+
+
+def check_db_connection(config: Optional[AppConfig] = None) -> bool:
+    """
+    Vérifie la connexion à la base de données (health check).
+
+    Args:
+        config: Configuration app (utilise get_config() si None)
+
+    Returns:
+        True si connexion OK, False sinon
+
+    Note:
+        Execute une query simple: SELECT 1
+    """
+    if config is None:
+        config = get_config()
+
+    try:
+        engine = get_engine(config)
+
+        with engine.connect() as conn:
+            result = conn.execute(text("SELECT 1"))
+            result.scalar()
+
+        logger.info("Database connection OK")
+        return True
+
+    except OperationalError as e:
+        logger.error(f"Database connection failed: {e}")
+        return False
+    except SQLAlchemyError as e:
+        logger.error(f"Database health check failed: {e}")
+        return False
+
+
+def reset_engine() -> None:
+    """
+    Reset l'engine global (pour tests).
+
+    Note:
+        Dispose l'engine et reset les singletons.
+    """
+    global _engine, _session_factory
+
+    if _engine is not None:
+        logger.debug("Disposing database engine")
+        _engine.dispose()
+        _engine = None
+
+    _session_factory = None
+    logger.debug("Engine reset complete")
@@ -0,0 +1,80 @@
+"""
+Configuration Alembic pour PriceWatch.
+
+Recupere l'URL DB depuis AppConfig pour garantir un setup coherent.
+"""
+
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy import engine_from_config, pool
+
+from pricewatch.app.core.config import get_config
+from pricewatch.app.db.models import Base
+
+# Alembic Config object
+config = context.config
+
+# Configure logging
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# Metadata SQLAlchemy pour autogenerate
+target_metadata = Base.metadata
+
+
+def _get_database_url() -> str:
+    """Construit l'URL DB depuis la config applicative."""
+    app_config = get_config()
+    return app_config.db.url
+
+
+def run_migrations_offline() -> None:
+    """
+    Execute les migrations en mode offline.
+
+    Configure le contexte avec l'URL DB sans creer d'engine.
+    """
+    url = _get_database_url()
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+        compare_type=True,
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """
+    Execute les migrations en mode online.
+
+    Cree un engine SQLAlchemy et etablit la connexion.
+    """
+    configuration = config.get_section(config.config_ini_section) or {}
+    configuration["sqlalchemy.url"] = _get_database_url()
+
+    connectable = engine_from_config(
+        configuration,
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata,
+            compare_type=True,
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
@@ -0,0 +1,24 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+"""
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# Revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
@@ -0,0 +1,124 @@
+"""Initial schema
+
+Revision ID: 20260114_01
+Revises: None
+Create Date: 2026-01-14 00:00:00
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# Revision identifiers, used by Alembic.
+revision = "20260114_01"
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "products",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("source", sa.String(length=50), nullable=False),
+        sa.Column("reference", sa.String(length=100), nullable=False),
+        sa.Column("url", sa.Text(), nullable=False),
+        sa.Column("title", sa.Text(), nullable=True),
+        sa.Column("category", sa.Text(), nullable=True),
+        sa.Column("currency", sa.String(length=3), nullable=True),
+        sa.Column("first_seen_at", sa.TIMESTAMP(), nullable=False),
+        sa.Column("last_updated_at", sa.TIMESTAMP(), nullable=False),
+        sa.UniqueConstraint("source", "reference", name="uq_product_source_reference"),
+    )
+    op.create_index("ix_product_source", "products", ["source"], unique=False)
+    op.create_index("ix_product_reference", "products", ["reference"], unique=False)
+    op.create_index("ix_product_last_updated", "products", ["last_updated_at"], unique=False)
+
+    op.create_table(
+        "price_history",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("product_id", sa.Integer(), nullable=False),
+        sa.Column("price", sa.Numeric(10, 2), nullable=True),
+        sa.Column("shipping_cost", sa.Numeric(10, 2), nullable=True),
+        sa.Column("stock_status", sa.String(length=20), nullable=True),
+        sa.Column("fetch_method", sa.String(length=20), nullable=False),
+        sa.Column("fetch_status", sa.String(length=20), nullable=False),
+        sa.Column("fetched_at", sa.TIMESTAMP(), nullable=False),
+        sa.ForeignKeyConstraint(["product_id"], ["products.id"], ondelete="CASCADE"),
+        sa.UniqueConstraint("product_id", "fetched_at", name="uq_price_history_product_time"),
+        sa.CheckConstraint("stock_status IN ('in_stock', 'out_of_stock', 'unknown')"),
+        sa.CheckConstraint("fetch_method IN ('http', 'playwright')"),
+        sa.CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"),
+    )
+    op.create_index("ix_price_history_product_id", "price_history", ["product_id"], unique=False)
+    op.create_index("ix_price_history_fetched_at", "price_history", ["fetched_at"], unique=False)
+
+    op.create_table(
+        "product_images",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("product_id", sa.Integer(), nullable=False),
+        sa.Column("image_url", sa.Text(), nullable=False),
+        sa.Column("position", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(["product_id"], ["products.id"], ondelete="CASCADE"),
+        sa.UniqueConstraint("product_id", "image_url", name="uq_product_image_url"),
+    )
+    op.create_index("ix_product_image_product_id", "product_images", ["product_id"], unique=False)
+
+    op.create_table(
+        "product_specs",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("product_id", sa.Integer(), nullable=False),
+        sa.Column("spec_key", sa.String(length=200), nullable=False),
+        sa.Column("spec_value", sa.Text(), nullable=False),
+        sa.ForeignKeyConstraint(["product_id"], ["products.id"], ondelete="CASCADE"),
+        sa.UniqueConstraint("product_id", "spec_key", name="uq_product_spec_key"),
+    )
+    op.create_index("ix_product_spec_product_id", "product_specs", ["product_id"], unique=False)
+    op.create_index("ix_product_spec_key", "product_specs", ["spec_key"], unique=False)
+
+    op.create_table(
+        "scraping_logs",
+        sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
+        sa.Column("product_id", sa.Integer(), nullable=True),
+        sa.Column("url", sa.Text(), nullable=False),
+        sa.Column("source", sa.String(length=50), nullable=False),
+        sa.Column("reference", sa.String(length=100), nullable=True),
+        sa.Column("fetch_method", sa.String(length=20), nullable=False),
+        sa.Column("fetch_status", sa.String(length=20), nullable=False),
+        sa.Column("fetched_at", sa.TIMESTAMP(), nullable=False),
+        sa.Column("duration_ms", sa.Integer(), nullable=True),
+        sa.Column("html_size_bytes", sa.Integer(), nullable=True),
+        sa.Column("errors", postgresql.JSONB(), nullable=True),
+        sa.Column("notes", postgresql.JSONB(), nullable=True),
+        sa.ForeignKeyConstraint(["product_id"], ["products.id"], ondelete="SET NULL"),
+        sa.CheckConstraint("fetch_method IN ('http', 'playwright')"),
+        sa.CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"),
+    )
+    op.create_index("ix_scraping_log_product_id", "scraping_logs", ["product_id"], unique=False)
+    op.create_index("ix_scraping_log_source", "scraping_logs", ["source"], unique=False)
+    op.create_index("ix_scraping_log_fetched_at", "scraping_logs", ["fetched_at"], unique=False)
+    op.create_index("ix_scraping_log_fetch_status", "scraping_logs", ["fetch_status"], unique=False)
+
+
+def downgrade() -> None:
+    op.drop_index("ix_scraping_log_fetch_status", table_name="scraping_logs")
+    op.drop_index("ix_scraping_log_fetched_at", table_name="scraping_logs")
+    op.drop_index("ix_scraping_log_source", table_name="scraping_logs")
+    op.drop_index("ix_scraping_log_product_id", table_name="scraping_logs")
+    op.drop_table("scraping_logs")
+
+    op.drop_index("ix_product_spec_key", table_name="product_specs")
+    op.drop_index("ix_product_spec_product_id", table_name="product_specs")
+    op.drop_table("product_specs")
+
+    op.drop_index("ix_product_image_product_id", table_name="product_images")
+    op.drop_table("product_images")
+
+    op.drop_index("ix_price_history_fetched_at", table_name="price_history")
+    op.drop_index("ix_price_history_product_id", table_name="price_history")
+    op.drop_table("price_history")
+
+    op.drop_index("ix_product_last_updated", table_name="products")
+    op.drop_index("ix_product_reference", table_name="products")
+    op.drop_index("ix_product_source", table_name="products")
+    op.drop_table("products")
@@ -0,0 +1,320 @@
+"""
+Modèles SQLAlchemy pour PriceWatch Phase 2.
+
+Schéma normalisé pour persistence PostgreSQL:
+- products: Catalogue produits (déduplication sur source + reference)
+- price_history: Historique prix time-series
+- product_images: Images produit (N par produit)
+- product_specs: Caractéristiques produit (key-value)
+- scraping_logs: Logs observabilité pour debugging
+
+Justification technique:
+- Normalisation: products séparée de price_history (catalogue vs time-series)
+- Clé naturelle: (source, reference) comme unique constraint (ASIN Amazon, etc.)
+- Pas de JSONB pour données structurées: tables séparées pour images/specs
+- JSONB uniquement pour données variables: errors, notes dans logs
+"""
+
+from datetime import datetime
+from decimal import Decimal
+from typing import List, Optional
+
+from sqlalchemy import (
+    TIMESTAMP,
+    CheckConstraint,
+    Column,
+    ForeignKey,
+    Index,
+    Integer,
+    JSON,
+    Numeric,
+    String,
+    Text,
+    UniqueConstraint,
+)
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
+
+
+class Base(DeclarativeBase):
+    """Base class pour tous les modèles SQLAlchemy."""
+
+    pass
+
+
+class Product(Base):
+    """
+    Catalogue produits (1 ligne par produit unique).
+
+    Clé naturelle: (source, reference) - Ex: (amazon, B08N5WRWNW)
+    Mise à jour: title, category, url à chaque scraping
+    Historique prix: relation 1-N vers PriceHistory
+    """
+
+    __tablename__ = "products"
+
+    # Primary key
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+
+    # Natural key (unique)
+    source: Mapped[str] = mapped_column(
+        String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)"
+    )
+    reference: Mapped[str] = mapped_column(
+        String(100), nullable=False, comment="Product reference (ASIN, SKU, etc.)"
+    )
+
+    # Product metadata
+    url: Mapped[str] = mapped_column(Text, nullable=False, comment="Canonical product URL")
+    title: Mapped[Optional[str]] = mapped_column(Text, nullable=True, comment="Product title")
+    category: Mapped[Optional[str]] = mapped_column(
+        Text, nullable=True, comment="Product category (breadcrumb)"
+    )
+    currency: Mapped[Optional[str]] = mapped_column(
+        String(3), nullable=True, comment="Currency code (EUR, USD, GBP)"
+    )
+
+    # Timestamps
+    first_seen_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP, nullable=False, default=datetime.utcnow, comment="First scraping timestamp"
+    )
+    last_updated_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP,
+        nullable=False,
+        default=datetime.utcnow,
+        onupdate=datetime.utcnow,
+        comment="Last metadata update",
+    )
+
+    # Relationships
+    price_history: Mapped[List["PriceHistory"]] = relationship(
+        "PriceHistory", back_populates="product", cascade="all, delete-orphan"
+    )
+    images: Mapped[List["ProductImage"]] = relationship(
+        "ProductImage", back_populates="product", cascade="all, delete-orphan"
+    )
+    specs: Mapped[List["ProductSpec"]] = relationship(
+        "ProductSpec", back_populates="product", cascade="all, delete-orphan"
+    )
+    logs: Mapped[List["ScrapingLog"]] = relationship(
+        "ScrapingLog", back_populates="product", cascade="all, delete-orphan"
+    )
+
+    # Constraints
+    __table_args__ = (
+        UniqueConstraint("source", "reference", name="uq_product_source_reference"),
+        Index("ix_product_source", "source"),
+        Index("ix_product_reference", "reference"),
+        Index("ix_product_last_updated", "last_updated_at"),
+    )
+
+    def __repr__(self) -> str:
+        return f"<Product(id={self.id}, source={self.source}, reference={self.reference})>"
+
+
+class PriceHistory(Base):
+    """
+    Historique prix (time-series).
+
+    Une ligne par scraping réussi avec extraction prix.
+    Unique constraint sur (product_id, fetched_at) évite doublons.
+    """
+
+    __tablename__ = "price_history"
+
+    # Primary key
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+
+    # Foreign key
+    product_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
+    )
+
+    # Price data
+    price: Mapped[Optional[Decimal]] = mapped_column(
+        Numeric(10, 2), nullable=True, comment="Product price"
+    )
+    shipping_cost: Mapped[Optional[Decimal]] = mapped_column(
+        Numeric(10, 2), nullable=True, comment="Shipping cost"
+    )
+    stock_status: Mapped[Optional[str]] = mapped_column(
+        String(20), nullable=True, comment="Stock status (in_stock, out_of_stock, unknown)"
+    )
+
+    # Fetch metadata
+    fetch_method: Mapped[str] = mapped_column(
+        String(20), nullable=False, comment="Fetch method (http, playwright)"
+    )
+    fetch_status: Mapped[str] = mapped_column(
+        String(20), nullable=False, comment="Fetch status (success, partial, failed)"
+    )
+    fetched_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP, nullable=False, comment="Scraping timestamp"
+    )
+
+    # Relationship
+    product: Mapped["Product"] = relationship("Product", back_populates="price_history")
+
+    # Constraints
+    __table_args__ = (
+        UniqueConstraint("product_id", "fetched_at", name="uq_price_history_product_time"),
+        Index("ix_price_history_product_id", "product_id"),
+        Index("ix_price_history_fetched_at", "fetched_at"),
+        CheckConstraint("stock_status IN ('in_stock', 'out_of_stock', 'unknown')"),
+        CheckConstraint("fetch_method IN ('http', 'playwright')"),
+        CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"),
+    )
+
+    def __repr__(self) -> str:
+        return f"<PriceHistory(id={self.id}, product_id={self.product_id}, price={self.price}, fetched_at={self.fetched_at})>"
+
+
+class ProductImage(Base):
+    """
+    Images produit (N images par produit).
+
+    Unique constraint sur (product_id, image_url) évite doublons.
+    Position permet de garder l'ordre des images.
+    """
+
+    __tablename__ = "product_images"
+
+    # Primary key
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+
+    # Foreign key
+    product_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
+    )
+
+    # Image data
+    image_url: Mapped[str] = mapped_column(Text, nullable=False, comment="Image URL")
+    position: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, comment="Image position (0=main)"
+    )
+
+    # Relationship
+    product: Mapped["Product"] = relationship("Product", back_populates="images")
+
+    # Constraints
+    __table_args__ = (
+        UniqueConstraint("product_id", "image_url", name="uq_product_image_url"),
+        Index("ix_product_image_product_id", "product_id"),
+    )
+
+    def __repr__(self) -> str:
+        return f"<ProductImage(id={self.id}, product_id={self.product_id}, position={self.position})>"
+
+
+class ProductSpec(Base):
+    """
+    Caractéristiques produit (key-value).
+
+    Unique constraint sur (product_id, spec_key) évite doublons.
+    Permet queries efficaces par clé.
+    """
+
+    __tablename__ = "product_specs"
+
+    # Primary key
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+
+    # Foreign key
+    product_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("products.id", ondelete="CASCADE"), nullable=False
+    )
+
+    # Spec data
+    spec_key: Mapped[str] = mapped_column(
+        String(200), nullable=False, comment="Specification key (e.g., 'Brand', 'Color')"
+    )
+    spec_value: Mapped[str] = mapped_column(Text, nullable=False, comment="Specification value")
+
+    # Relationship
+    product: Mapped["Product"] = relationship("Product", back_populates="specs")
+
+    # Constraints
+    __table_args__ = (
+        UniqueConstraint("product_id", "spec_key", name="uq_product_spec_key"),
+        Index("ix_product_spec_product_id", "product_id"),
+        Index("ix_product_spec_key", "spec_key"),
+    )
+
+    def __repr__(self) -> str:
+        return f"<ProductSpec(id={self.id}, product_id={self.product_id}, key={self.spec_key})>"
+
+
+class ScrapingLog(Base):
+    """
+    Logs observabilité pour debugging.
+
+    FK optionnelle vers products (permet logs même si produit non créé).
+    JSONB pour errors/notes car structure variable.
+    Permet analytics: taux succès, durée moyenne, etc.
+    """
+
+    __tablename__ = "scraping_logs"
+
+    # Primary key
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+
+    # Foreign key (optional)
+    product_id: Mapped[Optional[int]] = mapped_column(
+        Integer, ForeignKey("products.id", ondelete="SET NULL"), nullable=True
+    )
+
+    # Scraping metadata
+    url: Mapped[str] = mapped_column(Text, nullable=False, comment="Scraped URL")
+    source: Mapped[str] = mapped_column(
+        String(50), nullable=False, comment="Store ID (amazon, cdiscount, etc.)"
+    )
+    reference: Mapped[Optional[str]] = mapped_column(
+        String(100), nullable=True, comment="Product reference (if extracted)"
+    )
+
+    # Fetch metadata
+    fetch_method: Mapped[str] = mapped_column(
+        String(20), nullable=False, comment="Fetch method (http, playwright)"
+    )
+    fetch_status: Mapped[str] = mapped_column(
+        String(20), nullable=False, comment="Fetch status (success, partial, failed)"
+    )
+    fetched_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP, nullable=False, default=datetime.utcnow, comment="Scraping timestamp"
+    )
+
+    # Performance metrics
+    duration_ms: Mapped[Optional[int]] = mapped_column(
+        Integer, nullable=True, comment="Fetch duration in milliseconds"
+    )
+    html_size_bytes: Mapped[Optional[int]] = mapped_column(
+        Integer, nullable=True, comment="HTML response size in bytes"
+    )
+
+    # Debug data (JSONB)
+    errors: Mapped[Optional[list[str]]] = mapped_column(
+        JSON().with_variant(JSONB, "postgresql"),
+        nullable=True,
+        comment="Error messages (list of strings)",
+    )
+    notes: Mapped[Optional[list[str]]] = mapped_column(
+        JSON().with_variant(JSONB, "postgresql"),
+        nullable=True,
+        comment="Debug notes (list of strings)",
+    )
+
+    # Relationship
+    product: Mapped[Optional["Product"]] = relationship("Product", back_populates="logs")
+
+    # Constraints
+    __table_args__ = (
+        Index("ix_scraping_log_product_id", "product_id"),
+        Index("ix_scraping_log_source", "source"),
+        Index("ix_scraping_log_fetched_at", "fetched_at"),
+        Index("ix_scraping_log_fetch_status", "fetch_status"),
+        CheckConstraint("fetch_method IN ('http', 'playwright')"),
+        CheckConstraint("fetch_status IN ('success', 'partial', 'failed')"),
+    )
+
+    def __repr__(self) -> str:
+        return f"<ScrapingLog(id={self.id}, url={self.url}, status={self.fetch_status}, fetched_at={self.fetched_at})>"
@@ -0,0 +1,140 @@
+"""
+Repository pattern pour la persistence SQLAlchemy.
+
+Centralise les operations CRUD sur les modeles DB a partir d'un ProductSnapshot.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.orm import Session
+
+from pricewatch.app.core.logging import get_logger
+from pricewatch.app.core.schema import ProductSnapshot
+from pricewatch.app.db.models import PriceHistory, Product, ProductImage, ProductSpec, ScrapingLog
+
+logger = get_logger("db.repository")
+
+
+class ProductRepository:
+    """Repository de persistence pour ProductSnapshot."""
+
+    def __init__(self, session: Session) -> None:
+        self.session = session
+
+    def get_or_create(self, source: str, reference: str, url: str) -> Product:
+        """
+        Recuperer ou creer un produit par cle naturelle (source, reference).
+        """
+        product = (
+            self.session.query(Product)
+            .filter(Product.source == source, Product.reference == reference)
+            .one_or_none()
+        )
+        if product:
+            return product
+
+        product = Product(source=source, reference=reference, url=url)
+        self.session.add(product)
+        self.session.flush()
+        return product
+
+    def update_product_metadata(self, product: Product, snapshot: ProductSnapshot) -> None:
+        """Met a jour les metadonnees produit si disponibles."""
+        if snapshot.url:
+            product.url = snapshot.url
+        if snapshot.title:
+            product.title = snapshot.title
+        if snapshot.category:
+            product.category = snapshot.category
+        if snapshot.currency:
+            product.currency = snapshot.currency
+
+    def add_price_history(self, product: Product, snapshot: ProductSnapshot) -> Optional[PriceHistory]:
+        """Ajoute une entree d'historique de prix si inexistante."""
+        existing = (
+            self.session.query(PriceHistory)
+            .filter(
+                PriceHistory.product_id == product.id,
+                PriceHistory.fetched_at == snapshot.fetched_at,
+            )
+            .one_or_none()
+        )
+        if existing:
+            return existing
+
+        price_entry = PriceHistory(
+            product_id=product.id,
+            price=snapshot.price,
+            shipping_cost=snapshot.shipping_cost,
+            stock_status=snapshot.stock_status,
+            fetch_method=snapshot.debug.method,
+            fetch_status=snapshot.debug.status,
+            fetched_at=snapshot.fetched_at,
+        )
+        self.session.add(price_entry)
+        return price_entry
+
+    def sync_images(self, product: Product, images: list[str]) -> None:
+        """Synchronise les images (ajout des nouvelles)."""
+        existing_urls = {image.image_url for image in product.images}
+        for position, url in enumerate(images):
+            if url in existing_urls:
+                continue
+            self.session.add(ProductImage(product_id=product.id, image_url=url, position=position))
+
+    def sync_specs(self, product: Product, specs: dict[str, str]) -> None:
+        """Synchronise les specs (upsert par cle)."""
+        existing_specs = {spec.spec_key: spec for spec in product.specs}
+        for key, value in specs.items():
+            if key in existing_specs:
+                existing_specs[key].spec_value = value
+            else:
+                self.session.add(ProductSpec(product_id=product.id, spec_key=key, spec_value=value))
+
+    def add_scraping_log(self, snapshot: ProductSnapshot, product_id: Optional[int]) -> ScrapingLog:
+        """Ajoute un log de scraping pour observabilite."""
+        log_entry = ScrapingLog(
+            product_id=product_id,
+            url=snapshot.url,
+            source=snapshot.source,
+            reference=snapshot.reference,
+            fetch_method=snapshot.debug.method,
+            fetch_status=snapshot.debug.status,
+            fetched_at=snapshot.fetched_at,
+            duration_ms=snapshot.debug.duration_ms,
+            html_size_bytes=snapshot.debug.html_size_bytes,
+            errors=snapshot.debug.errors or None,
+            notes=snapshot.debug.notes or None,
+        )
+        self.session.add(log_entry)
+        return log_entry
+
+    def save_snapshot(self, snapshot: ProductSnapshot) -> Optional[int]:
+        """
+        Persiste un ProductSnapshot complet dans la base.
+
+        Retourne l'id produit ou None si reference absente.
+        """
+        if not snapshot.reference:
+            logger.warning("Reference absente: persistence ignoree")
+            self.add_scraping_log(snapshot, product_id=None)
+            return None
+
+        product = self.get_or_create(snapshot.source, snapshot.reference, snapshot.url)
+        self.update_product_metadata(product, snapshot)
+        self.add_price_history(product, snapshot)
+        self.sync_images(product, snapshot.images)
+        self.sync_specs(product, snapshot.specs)
+        self.add_scraping_log(snapshot, product_id=product.id)
+        return product.id
+
+    def safe_save_snapshot(self, snapshot: ProductSnapshot) -> Optional[int]:
+        """Sauvegarde avec gestion d'erreur SQLAlchemy."""
+        try:
+            return self.save_snapshot(snapshot)
+        except SQLAlchemyError as exc:
+            logger.error(f"Erreur SQLAlchemy: {exc}")
+            raise
@@ -0,0 +1,3 @@
+from pricewatch.app.scraping.pipeline import ScrapingPipeline
+
+__all__ = ["ScrapingPipeline"]
@@ -0,0 +1,52 @@
+"""
+Pipeline de persistence pour les snapshots de scraping.
+
+Ne doit jamais bloquer le pipeline principal si la DB est indisponible.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+from sqlalchemy.exc import SQLAlchemyError
+
+from pricewatch.app.core.config import AppConfig, get_config
+from pricewatch.app.core.logging import get_logger
+from pricewatch.app.core.schema import ProductSnapshot
+from pricewatch.app.db.connection import get_session
+from pricewatch.app.db.repository import ProductRepository
+
+logger = get_logger("scraping.pipeline")
+
+
+class ScrapingPipeline:
+    """Orchestration de persistence DB pour un ProductSnapshot."""
+
+    def __init__(self, config: Optional[AppConfig] = None) -> None:
+        self.config = config
+
+    def process_snapshot(self, snapshot: ProductSnapshot, save_to_db: bool = True) -> Optional[int]:
+        """
+        Persiste un snapshot en base si active.
+
+        Retourne l'id produit si sauve, sinon None.
+        """
+        app_config = self.config or get_config()
+        if not save_to_db or not app_config.enable_db:
+            logger.debug("Persistence DB desactivee")
+            return None
+
+        try:
+            with get_session(app_config) as session:
+                repo = ProductRepository(session)
+                product_id = repo.safe_save_snapshot(snapshot)
+                session.commit()
+                return product_id
+        except SQLAlchemyError as exc:
+            snapshot.add_note(f"Persistence DB echouee: {exc}")
+            logger.error(f"Persistence DB echouee: {exc}")
+            return None
+        except Exception as exc:
+            snapshot.add_note(f"Erreur pipeline DB: {exc}")
+            logger.error(f"Erreur pipeline DB: {exc}")
+            return None
@@ -214,6 +214,18 @@ class AmazonStore(BaseStore):
                    except ValueError:
                        continue

+        # Fallback: chercher les spans séparés a-price-whole et a-price-fraction
+        whole = soup.select_one("span.a-price-whole")
+        fraction = soup.select_one("span.a-price-fraction")
+        if whole and fraction:
+            whole_text = whole.get_text(strip=True)
+            fraction_text = fraction.get_text(strip=True)
+            try:
+                price_str = f"{whole_text}.{fraction_text}"
+                return float(price_str)
+            except ValueError:
+                pass
+
        debug.errors.append("Prix non trouvé")
        return None

@@ -270,6 +282,14 @@ class AmazonStore(BaseStore):
                if url and url.startswith("http"):
                    images.append(url)

+        # Fallback: chercher tous les img tags si aucune image trouvée
+        if not images:
+            all_imgs = soup.find_all("img")
+            for img in all_imgs:
+                url = img.get("src") or img.get("data-src")
+                if url and url.startswith("http"):
+                    images.append(url)
+
        return list(set(images))  # Dédupliquer

    def _extract_category(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
@@ -0,0 +1,8 @@
+"""
+Module tasks pour les jobs RQ.
+"""
+
+from pricewatch.app.tasks.scrape import scrape_product
+from pricewatch.app.tasks.scheduler import ScrapingScheduler
+
+__all__ = ["scrape_product", "ScrapingScheduler"]
@@ -0,0 +1,75 @@
+"""
+Planification des jobs de scraping via RQ Scheduler.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from typing import Optional
+
+import redis
+from rq import Queue
+from rq_scheduler import Scheduler
+
+from pricewatch.app.core.config import AppConfig, get_config
+from pricewatch.app.core.logging import get_logger
+from pricewatch.app.tasks.scrape import scrape_product
+
+logger = get_logger("tasks.scheduler")
+
+
+@dataclass
+class ScheduledJobInfo:
+    """Infos de retour pour un job planifie."""
+
+    job_id: str
+    next_run: datetime
+
+
+class ScrapingScheduler:
+    """Scheduler pour les jobs de scraping avec RQ."""
+
+    def __init__(self, config: Optional[AppConfig] = None, queue_name: str = "default") -> None:
+        self.config = config or get_config()
+        self.redis = redis.from_url(self.config.redis.url)
+        self.queue = Queue(queue_name, connection=self.redis)
+        self.scheduler = Scheduler(queue=self.queue, connection=self.redis)
+
+    def enqueue_immediate(
+        self,
+        url: str,
+        use_playwright: Optional[bool] = None,
+        save_db: bool = True,
+    ):
+        """Enqueue un job immediat."""
+        job = self.queue.enqueue(
+            scrape_product,
+            url,
+            use_playwright=use_playwright,
+            save_db=save_db,
+        )
+        logger.info(f"Job enqueued: {job.id}")
+        return job
+
+    def schedule_product(
+        self,
+        url: str,
+        interval_hours: int = 24,
+        use_playwright: Optional[bool] = None,
+        save_db: bool = True,
+    ) -> ScheduledJobInfo:
+        """Planifie un scraping recurrent (intervalle en heures)."""
+        interval_seconds = int(timedelta(hours=interval_hours).total_seconds())
+        next_run = datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)
+
+        job = self.scheduler.schedule(
+            scheduled_time=next_run,
+            func=scrape_product,
+            args=[url],
+            kwargs={"use_playwright": use_playwright, "save_db": save_db},
+            interval=interval_seconds,
+            repeat=None,
+        )
+        logger.info(f"Job planifie: {job.id}, prochaine execution: {next_run.isoformat()}")
+        return ScheduledJobInfo(job_id=job.id, next_run=next_run)
@@ -0,0 +1,160 @@
+"""
+Tache de scraping asynchrone pour RQ.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from pricewatch.app.core.config import AppConfig, get_config
+from pricewatch.app.core.logging import get_logger
+from pricewatch.app.core.registry import get_registry
+from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
+from pricewatch.app.scraping.http_fetch import fetch_http
+from pricewatch.app.scraping.pipeline import ScrapingPipeline
+from pricewatch.app.scraping.pw_fetch import fetch_playwright
+from pricewatch.app.stores.aliexpress.store import AliexpressStore
+from pricewatch.app.stores.amazon.store import AmazonStore
+from pricewatch.app.stores.backmarket.store import BackmarketStore
+from pricewatch.app.stores.cdiscount.store import CdiscountStore
+
+logger = get_logger("tasks.scrape")
+
+
+def setup_stores() -> None:
+    """Enregistre les stores disponibles si besoin."""
+    registry = get_registry()
+    if registry.list_stores():
+        return
+    registry.register(AmazonStore())
+    registry.register(CdiscountStore())
+    registry.register(BackmarketStore())
+    registry.register(AliexpressStore())
+
+
+def scrape_product(
+    url: str,
+    use_playwright: Optional[bool] = None,
+    save_db: bool = True,
+    save_html: bool = False,
+    save_screenshot: bool = False,
+    headful: bool = False,
+    timeout_ms: Optional[int] = None,
+) -> dict[str, Any]:
+    """
+    Scrape un produit et persiste en base via ScrapingPipeline.
+
+    Retourne un dict avec success, product_id, snapshot, error.
+    """
+    config: AppConfig = get_config()
+    setup_stores()
+
+    if use_playwright is None:
+        use_playwright = config.default_use_playwright
+
+    if timeout_ms is None:
+        timeout_ms = config.default_playwright_timeout
+
+    registry = get_registry()
+    store = registry.detect_store(url)
+    if not store:
+        snapshot = ProductSnapshot(
+            source="unknown",
+            url=url,
+            debug=DebugInfo(
+                method=FetchMethod.HTTP,
+                status=DebugStatus.FAILED,
+                errors=["Aucun store detecte"],
+            ),
+        )
+        ScrapingPipeline(config=config).process_snapshot(snapshot, save_to_db=save_db)
+        return {"success": False, "product_id": None, "snapshot": snapshot, "error": "store"}
+
+    canonical_url = store.canonicalize(url)
+
+    html = None
+    fetch_method = FetchMethod.HTTP
+    fetch_error = None
+    duration_ms = None
+    html_size_bytes = None
+    pw_result = None
+
+    http_result = fetch_http(canonical_url)
+    duration_ms = http_result.duration_ms
+
+    if http_result.success:
+        html = http_result.html
+        fetch_method = FetchMethod.HTTP
+    elif use_playwright:
+        pw_result = fetch_playwright(
+            canonical_url,
+            headless=not headful,
+            timeout_ms=timeout_ms,
+            save_screenshot=save_screenshot,
+        )
+        duration_ms = pw_result.duration_ms
+
+        if pw_result.success:
+            html = pw_result.html
+            fetch_method = FetchMethod.PLAYWRIGHT
+        else:
+            fetch_error = pw_result.error
+    else:
+        fetch_error = http_result.error
+
+    if html:
+        html_size_bytes = len(html.encode("utf-8"))
+        if save_html:
+            from pricewatch.app.core.io import save_debug_html
+
+            ref = store.extract_reference(canonical_url) or "unknown"
+            save_debug_html(html, f"{store.store_id}_{ref}")
+
+        if save_screenshot and fetch_method == FetchMethod.PLAYWRIGHT and pw_result:
+            from pricewatch.app.core.io import save_debug_screenshot
+
+            if pw_result and pw_result.screenshot:
+                ref = store.extract_reference(canonical_url) or "unknown"
+                save_debug_screenshot(pw_result.screenshot, f"{store.store_id}_{ref}")
+
+        try:
+            snapshot = store.parse(html, canonical_url)
+            snapshot.debug.method = fetch_method
+            snapshot.debug.duration_ms = duration_ms
+            snapshot.debug.html_size_bytes = html_size_bytes
+            success = snapshot.debug.status != DebugStatus.FAILED
+        except Exception as exc:
+            snapshot = ProductSnapshot(
+                source=store.store_id,
+                url=canonical_url,
+                debug=DebugInfo(
+                    method=fetch_method,
+                    status=DebugStatus.FAILED,
+                    errors=[f"Parsing failed: {exc}"],
+                    duration_ms=duration_ms,
+                    html_size_bytes=html_size_bytes,
+                ),
+            )
+            success = False
+            fetch_error = str(exc)
+    else:
+        snapshot = ProductSnapshot(
+            source=store.store_id,
+            url=canonical_url,
+            debug=DebugInfo(
+                method=fetch_method,
+                status=DebugStatus.FAILED,
+                errors=[f"Fetch failed: {fetch_error or 'Unknown error'}"],
+                duration_ms=duration_ms,
+            ),
+        )
+        success = False
+
+    product_id = ScrapingPipeline(config=config).process_snapshot(snapshot, save_to_db=save_db)
+
+    return {
+        "success": success,
+        "product_id": product_id,
+        "snapshot": snapshot,
+        "error": fetch_error,
+    }