This commit is contained in:
Gilles Soulier
2026-01-14 21:54:55 +01:00
parent c91c0f1fc9
commit d0b73b9319
140 changed files with 5822 additions and 161 deletions

View File

@@ -0,0 +1,127 @@
"""
Tests pour la gestion des erreurs Redis dans le scheduler.
"""
import pytest
from redis.exceptions import ConnectionError as RedisConnectionError
from redis.exceptions import RedisError, TimeoutError as RedisTimeoutError
from pricewatch.app.tasks.scheduler import RedisUnavailableError, ScrapingScheduler, check_redis_connection
class DummyRedisOk:
def ping(self) -> bool:
return True
class DummyRedisError:
def __init__(self, exc: Exception) -> None:
self._exc = exc
def ping(self) -> None:
raise self._exc
class DummyQueue:
def __init__(self, name: str, connection=None) -> None:
self.name = name
self.connection = connection
class DummyScheduler:
def __init__(self, queue=None, connection=None) -> None:
self.queue = queue
self.connection = connection
def schedule(self, scheduled_time, func, args=None, kwargs=None, interval=None, repeat=None):
return type("Job", (), {"id": "job-redis"})()
class FakeRedisConfig:
def __init__(self, url: str) -> None:
self.url = url
class FakeAppConfig:
def __init__(self, redis_url: str) -> None:
self.redis = FakeRedisConfig(redis_url)
def test_check_redis_connection_success(monkeypatch):
"""Ping OK retourne True."""
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: DummyRedisOk())
assert check_redis_connection("redis://localhost:6379/0") is True
def test_check_redis_connection_failure_connection(monkeypatch):
"""Ping en echec retourne False."""
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: DummyRedisError(RedisConnectionError("no")),
)
assert check_redis_connection("redis://localhost:6379/0") is False
def test_check_redis_connection_failure_timeout(monkeypatch):
"""Timeout Redis retourne False."""
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: DummyRedisError(RedisTimeoutError("timeout")),
)
assert check_redis_connection("redis://localhost:6379/0") is False
def test_scheduler_lazy_connection(monkeypatch):
"""La connexion Redis est lazy."""
config = FakeAppConfig("redis://localhost:6379/0")
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: DummyRedisOk())
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config)
assert scheduler._redis is None
_ = scheduler.queue
assert scheduler._redis is not None
def test_scheduler_redis_connection_error(monkeypatch):
"""Une erreur de connexion leve RedisUnavailableError."""
config = FakeAppConfig("redis://localhost:6379/0")
def raise_connection(url):
raise RedisConnectionError("no")
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", raise_connection)
scheduler = ScrapingScheduler(config=config)
with pytest.raises(RedisUnavailableError):
_ = scheduler.queue
def test_scheduler_schedule_redis_error(monkeypatch):
"""Une erreur Redis leve RedisUnavailableError lors du schedule."""
config = FakeAppConfig("redis://localhost:6379/0")
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: DummyRedisError(RedisError("boom")),
)
scheduler = ScrapingScheduler(config=config)
with pytest.raises(RedisUnavailableError):
scheduler.schedule_product("https://example.com/product", interval_hours=1)
def test_scheduler_enqueue_redis_error(monkeypatch):
"""Une erreur Redis leve RedisUnavailableError lors de l'enqueue."""
config = FakeAppConfig("redis://localhost:6379/0")
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: DummyRedisError(RedisError("boom")),
)
scheduler = ScrapingScheduler(config=config)
with pytest.raises(RedisUnavailableError):
scheduler.enqueue_immediate("https://example.com/product")

View File

@@ -0,0 +1,184 @@
"""
Tests pour ScrapingScheduler avec mocks Redis/RQ.
"""
from dataclasses import dataclass
import pytest
from redis.exceptions import ConnectionError as RedisConnectionError
from pricewatch.app.tasks.scheduler import (
RedisUnavailableError,
ScheduledJobInfo,
ScrapingScheduler,
check_redis_connection,
)
@dataclass
class FakeRedis:
url: str
def ping(self):
"""Simule un ping reussi."""
return True
class FakeRedisConnectionError:
"""FakeRedis qui leve une erreur a la connexion."""
def __init__(self, url: str):
self.url = url
def ping(self):
raise RedisConnectionError("Connection refused")
class DummyQueue:
def __init__(self, name: str, connection=None) -> None:
self.name = name
self.connection = connection
self.enqueued = []
def enqueue(self, func, *args, **kwargs):
job = type("Job", (), {"id": "job-123"})()
self.enqueued.append((func, args, kwargs))
return job
class DummyScheduler:
def __init__(self, queue=None, connection=None) -> None:
self.queue = queue
self.connection = connection
self.scheduled = []
def schedule(self, scheduled_time, func, args=None, kwargs=None, interval=None, repeat=None):
job = type("Job", (), {"id": "job-456"})()
self.scheduled.append((scheduled_time, func, args, kwargs, interval, repeat))
return job
@dataclass
class FakeRedisConfig:
url: str
@dataclass
class FakeAppConfig:
redis: FakeRedisConfig
def test_scheduler_enqueue_immediate(monkeypatch):
"""Enqueue immediate utilise la queue RQ."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: FakeRedis(url))
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
job = scheduler.enqueue_immediate("https://example.com/product")
assert job.id == "job-123"
assert len(scheduler.queue.enqueued) == 1
def test_scheduler_schedule_product(monkeypatch):
"""Schedule product cree un job recurrent."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: FakeRedis(url))
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
info = scheduler.schedule_product("https://example.com/product", interval_hours=1)
assert isinstance(info, ScheduledJobInfo)
assert info.job_id == "job-456"
assert len(scheduler.scheduler.scheduled) == 1
# ============================================================================
# Tests gestion erreurs Redis
# ============================================================================
def test_scheduler_redis_connection_error(monkeypatch):
"""Leve RedisUnavailableError quand Redis n'est pas accessible."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: FakeRedisConnectionError(url),
)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
with pytest.raises(RedisUnavailableError) as exc_info:
scheduler.enqueue_immediate("https://example.com/product")
assert "Redis" in str(exc_info.value.message)
assert exc_info.value.cause is not None
def test_scheduler_lazy_connection(monkeypatch):
"""La connexion Redis n'est etablie qu'au premier appel."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
connection_calls = []
def track_from_url(url):
connection_calls.append(url)
return FakeRedis(url)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", track_from_url)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
# Pas de connexion a la creation
assert len(connection_calls) == 0
# Connexion au premier appel
scheduler.enqueue_immediate("https://example.com/product")
assert len(connection_calls) == 1
# Pas de nouvelle connexion au deuxieme appel
scheduler.enqueue_immediate("https://example.com/product2")
assert len(connection_calls) == 1
def test_check_redis_connection_success(monkeypatch):
"""check_redis_connection retourne True si Redis repond."""
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", FakeRedis)
assert check_redis_connection("redis://localhost:6379/0") is True
def test_check_redis_connection_failure(monkeypatch):
"""check_redis_connection retourne False si Redis ne repond pas."""
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url", FakeRedisConnectionError
)
assert check_redis_connection("redis://localhost:6379/0") is False
def test_scheduler_schedule_redis_error(monkeypatch):
"""schedule_product leve RedisUnavailableError si Redis down."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: FakeRedisConnectionError(url),
)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
with pytest.raises(RedisUnavailableError):
scheduler.schedule_product("https://example.com/product", interval_hours=24)

View File

@@ -0,0 +1,91 @@
"""
Tests end-to-end pour la tache RQ de scraping avec persistence DB.
"""
from dataclasses import dataclass
from datetime import datetime
from pricewatch.app.core.registry import get_registry
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product, ScrapingLog
from pricewatch.app.stores.base import BaseStore
from pricewatch.app.tasks import scrape as scrape_task
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
debug: bool = False
enable_db: bool = True
default_use_playwright: bool = False
default_playwright_timeout: int = 1000
class DummyStore(BaseStore):
def __init__(self) -> None:
super().__init__(store_id="dummy")
def match(self, url: str) -> float:
return 1.0 if "example.com" in url else 0.0
def canonicalize(self, url: str) -> str:
return url
def extract_reference(self, url: str) -> str | None:
return "REF-TEST"
def parse(self, html: str, url: str) -> ProductSnapshot:
return ProductSnapshot(
source=self.store_id,
url=url,
fetched_at=datetime(2026, 1, 14, 10, 0, 0),
title="Produit test",
price=19.99,
currency="EUR",
reference="REF-TEST",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
class DummyFetchResult:
def __init__(self, html: str) -> None:
self.success = True
self.html = html
self.error = None
self.duration_ms = 123
def test_scrape_product_persists_db(tmp_path, monkeypatch):
"""La tache scrape_product persiste en DB et logge un scraping."""
reset_engine()
db_path = tmp_path / "scrape.db"
config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}"))
init_db(config)
registry = get_registry()
previous_stores = list(registry._stores)
registry._stores = []
registry.register(DummyStore())
monkeypatch.setattr(scrape_task, "get_config", lambda: config)
monkeypatch.setattr(scrape_task, "setup_stores", lambda: None)
monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("<html></html>"))
try:
result = scrape_task.scrape_product("https://example.com/product", save_db=True)
finally:
registry._stores = previous_stores
reset_engine()
assert result["success"] is True
assert result["product_id"] is not None
with get_session(config) as session:
assert session.query(Product).count() == 1
assert session.query(ScrapingLog).count() == 1

View File

@@ -0,0 +1,110 @@
"""
Test end-to-end: enqueue -> worker -> DB via Redis.
"""
from dataclasses import dataclass
from datetime import datetime
import pytest
import redis
from rq import Queue
from rq.worker import SimpleWorker
from pricewatch.app.core.registry import get_registry
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product, ScrapingLog
from pricewatch.app.stores.base import BaseStore
from pricewatch.app.tasks import scrape as scrape_task
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
debug: bool = False
enable_db: bool = True
default_use_playwright: bool = False
default_playwright_timeout: int = 1000
class DummyStore(BaseStore):
def __init__(self) -> None:
super().__init__(store_id="dummy")
def match(self, url: str) -> float:
return 1.0 if "example.com" in url else 0.0
def canonicalize(self, url: str) -> str:
return url
def extract_reference(self, url: str) -> str | None:
return "REF-WORKER"
def parse(self, html: str, url: str) -> ProductSnapshot:
return ProductSnapshot(
source=self.store_id,
url=url,
fetched_at=datetime(2026, 1, 14, 11, 0, 0),
title="Produit worker",
price=29.99,
currency="EUR",
reference="REF-WORKER",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
class DummyFetchResult:
def __init__(self, html: str) -> None:
self.success = True
self.html = html
self.error = None
self.duration_ms = 50
def _redis_available(redis_url: str) -> bool:
try:
conn = redis.from_url(redis_url)
conn.ping()
return True
except Exception:
return False
@pytest.mark.skipif(not _redis_available("redis://localhost:6379/0"), reason="Redis indisponible")
def test_enqueue_worker_persists_db(tmp_path, monkeypatch):
"""Le job enqueued est traite par le worker et persiste en DB."""
reset_engine()
db_path = tmp_path / "worker.db"
config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}"))
init_db(config)
registry = get_registry()
previous_stores = list(registry._stores)
registry._stores = []
registry.register(DummyStore())
monkeypatch.setattr(scrape_task, "get_config", lambda: config)
monkeypatch.setattr(scrape_task, "setup_stores", lambda: None)
monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("<html></html>"))
redis_conn = redis.from_url("redis://localhost:6379/0")
queue = Queue("default", connection=redis_conn)
try:
job = queue.enqueue(scrape_task.scrape_product, "https://example.com/product", save_db=True)
worker = SimpleWorker([queue], connection=redis_conn)
worker.work(burst=True)
finally:
registry._stores = previous_stores
reset_engine()
assert job.is_finished
with get_session(config) as session:
assert session.query(Product).count() == 1
assert session.query(ScrapingLog).count() == 1