""" Tests end-to-end pour la tache RQ de scraping avec persistence DB. """ from dataclasses import dataclass from datetime import datetime from pricewatch.app.core.registry import get_registry from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot from pricewatch.app.db.connection import get_session, init_db, reset_engine from pricewatch.app.db.models import Product, ScrapingLog from pricewatch.app.stores.base import BaseStore from pricewatch.app.tasks import scrape as scrape_task @dataclass class FakeDbConfig: url: str @dataclass class FakeAppConfig: db: FakeDbConfig debug: bool = False enable_db: bool = True default_use_playwright: bool = False default_playwright_timeout: int = 1000 class DummyStore(BaseStore): def __init__(self) -> None: super().__init__(store_id="dummy") def match(self, url: str) -> float: return 1.0 if "example.com" in url else 0.0 def canonicalize(self, url: str) -> str: return url def extract_reference(self, url: str) -> str | None: return "REF-TEST" def parse(self, html: str, url: str) -> ProductSnapshot: return ProductSnapshot( source=self.store_id, url=url, fetched_at=datetime(2026, 1, 14, 10, 0, 0), title="Produit test", price=19.99, currency="EUR", reference="REF-TEST", debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS), ) class DummyFetchResult: def __init__(self, html: str) -> None: self.success = True self.html = html self.error = None self.duration_ms = 123 def test_scrape_product_persists_db(tmp_path, monkeypatch): """La tache scrape_product persiste en DB et logge un scraping.""" reset_engine() db_path = tmp_path / "scrape.db" config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}")) init_db(config) registry = get_registry() previous_stores = list(registry._stores) registry._stores = [] registry.register(DummyStore()) monkeypatch.setattr(scrape_task, "get_config", lambda: config) monkeypatch.setattr(scrape_task, "setup_stores", lambda: None) monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("")) try: result = scrape_task.scrape_product("https://example.com/product", save_db=True) finally: registry._stores = previous_stores reset_engine() assert result["success"] is True assert result["product_id"] is not None with get_session(config) as session: assert session.query(Product).count() == 1 assert session.query(ScrapingLog).count() == 1