Files
scrap/tests/scraping/test_pipeline.py
Gilles Soulier d0b73b9319 codex2
2026-01-14 21:54:55 +01:00

113 lines
3.2 KiB
Python

"""
Tests pour ScrapingPipeline.
"""
from dataclasses import dataclass
from datetime import datetime
import pytest
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product
from pricewatch.app.scraping.pipeline import ScrapingPipeline
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
debug: bool = False
enable_db: bool = True
@pytest.fixture(autouse=True)
def reset_db_engine():
"""Reset l'engine global entre les tests."""
reset_engine()
yield
reset_engine()
def test_pipeline_persists_snapshot():
"""Le pipeline persiste un snapshot en base SQLite."""
config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"))
init_db(config)
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com/product",
fetched_at=datetime(2026, 1, 14, 12, 30, 0),
title="Produit pipeline",
price=99.99,
currency="EUR",
reference="B08PIPE",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
pipeline = ScrapingPipeline(config=config)
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
assert product_id is not None
with get_session(config) as session:
assert session.query(Product).count() == 1
def test_pipeline_respects_disable_flag():
"""Le pipeline ignore la persistence si enable_db=False."""
config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"), enable_db=False)
init_db(config)
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com/product",
fetched_at=datetime(2026, 1, 14, 12, 45, 0),
title="Produit pipeline",
price=99.99,
currency="EUR",
reference="B08PIPE",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
pipeline = ScrapingPipeline(config=config)
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
assert product_id is None
with get_session(config) as session:
assert session.query(Product).count() == 0
def test_pipeline_db_error_adds_note(monkeypatch):
"""Une erreur DB ajoute une note et retourne None."""
from sqlalchemy.exc import SQLAlchemyError
class DummyError(SQLAlchemyError):
pass
def raise_session(*args, **kwargs):
raise DummyError("db down")
monkeypatch.setattr("pricewatch.app.scraping.pipeline.get_session", raise_session)
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com/product",
fetched_at=datetime(2026, 1, 14, 13, 0, 0),
title="Produit",
price=10.0,
currency="EUR",
reference="B08PIPE",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
pipeline = ScrapingPipeline(config=FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:")))
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
assert product_id is None
assert any("Persistence DB echouee" in note for note in snapshot.debug.notes)