This commit is contained in:
Gilles Soulier
2026-01-14 21:54:55 +01:00
parent c91c0f1fc9
commit d0b73b9319
140 changed files with 5822 additions and 161 deletions

56
tests/api/test_auth.py Normal file
View File

@@ -0,0 +1,56 @@
"""
Tests auth API.
"""
from dataclasses import dataclass
import pytest
from fastapi import HTTPException
from pricewatch.app.api.main import require_token
@dataclass
class FakeRedisConfig:
url: str
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
redis: FakeRedisConfig
api_token: str
def test_missing_token_returns_401(monkeypatch):
"""Sans token, retourne 401."""
config = FakeAppConfig(
db=FakeDbConfig(url="sqlite:///:memory:"),
redis=FakeRedisConfig(url="redis://localhost:6379/0"),
api_token="secret",
)
monkeypatch.setattr("pricewatch.app.api.main.get_config", lambda: config)
with pytest.raises(HTTPException) as excinfo:
require_token(None)
assert excinfo.value.status_code == 401
def test_bad_token_returns_403(monkeypatch):
"""Token invalide retourne 403."""
config = FakeAppConfig(
db=FakeDbConfig(url="sqlite:///:memory:"),
redis=FakeRedisConfig(url="redis://localhost:6379/0"),
api_token="secret",
)
monkeypatch.setattr("pricewatch.app.api.main.get_config", lambda: config)
with pytest.raises(HTTPException) as excinfo:
require_token("Bearer nope")
assert excinfo.value.status_code == 403

View File

@@ -0,0 +1,30 @@
"""
Tests API logs backend.
"""
from pricewatch.app.api.main import BACKEND_LOGS, list_backend_logs, preview_scrape
from pricewatch.app.api.schemas import ScrapePreviewRequest
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
def test_backend_logs_capture_preview(monkeypatch):
BACKEND_LOGS.clear()
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com",
title="Produit",
price=9.99,
currency="EUR",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
def fake_scrape(url, use_playwright=None, save_db=False):
return {"success": True, "snapshot": snapshot, "error": None}
monkeypatch.setattr("pricewatch.app.api.main.scrape_product", fake_scrape)
preview_scrape(ScrapePreviewRequest(url="https://example.com"))
logs = list_backend_logs()
assert logs
assert logs[-1].message.startswith("Preview scraping")

View File

@@ -0,0 +1,239 @@
"""
Tests filtres avances et exports API.
"""
from datetime import datetime, timedelta
import json
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from pricewatch.app.api.main import (
export_logs,
export_prices,
export_products,
list_logs,
list_prices,
list_products,
)
from pricewatch.app.db.models import Base, PriceHistory, Product, ScrapingLog
def _make_session():
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
session = sessionmaker(bind=engine)()
return engine, session
def test_list_products_filters_latest_price_and_stock():
engine, session = _make_session()
try:
product_a = Product(
source="amazon",
reference="REF-A",
url="https://example.com/a",
title="A",
category="Test",
currency="EUR",
first_seen_at=datetime(2026, 1, 14, 10, 0, 0),
last_updated_at=datetime(2026, 1, 15, 9, 0, 0),
)
product_b = Product(
source="amazon",
reference="REF-B",
url="https://example.com/b",
title="B",
category="Test",
currency="EUR",
first_seen_at=datetime(2026, 1, 14, 10, 0, 0),
last_updated_at=datetime(2026, 1, 15, 9, 5, 0),
)
session.add_all([product_a, product_b])
session.commit()
history = [
PriceHistory(
product_id=product_a.id,
price=80,
shipping_cost=0,
stock_status="out_of_stock",
fetch_method="http",
fetch_status="success",
fetched_at=datetime(2026, 1, 15, 8, 0, 0),
),
PriceHistory(
product_id=product_a.id,
price=100,
shipping_cost=0,
stock_status="in_stock",
fetch_method="http",
fetch_status="success",
fetched_at=datetime(2026, 1, 15, 9, 0, 0),
),
PriceHistory(
product_id=product_b.id,
price=200,
shipping_cost=10,
stock_status="in_stock",
fetch_method="http",
fetch_status="success",
fetched_at=datetime(2026, 1, 15, 9, 5, 0),
),
]
session.add_all(history)
session.commit()
filtered = list_products(price_min=150, session=session)
assert len(filtered) == 1
assert filtered[0].reference == "REF-B"
filtered_stock = list_products(stock_status="in_stock", session=session)
assert {item.reference for item in filtered_stock} == {"REF-A", "REF-B"}
finally:
session.close()
engine.dispose()
def test_list_prices_filters():
engine, session = _make_session()
try:
product = Product(
source="amazon",
reference="REF-1",
url="https://example.com/1",
title="Produit",
category="Test",
currency="EUR",
first_seen_at=datetime(2026, 1, 14, 10, 0, 0),
last_updated_at=datetime(2026, 1, 14, 11, 0, 0),
)
session.add(product)
session.commit()
history = [
PriceHistory(
product_id=product.id,
price=50,
shipping_cost=0,
stock_status="in_stock",
fetch_method="http",
fetch_status="success",
fetched_at=datetime(2026, 1, 14, 12, 0, 0),
),
PriceHistory(
product_id=product.id,
price=120,
shipping_cost=0,
stock_status="in_stock",
fetch_method="http",
fetch_status="failed",
fetched_at=datetime(2026, 1, 15, 12, 0, 0),
),
]
session.add_all(history)
session.commit()
results = list_prices(
product_id=product.id,
price_min=100,
fetch_status="failed",
session=session,
)
assert len(results) == 1
assert results[0].price == 120
finally:
session.close()
engine.dispose()
def test_list_logs_filters():
engine, session = _make_session()
try:
now = datetime(2026, 1, 15, 10, 0, 0)
logs = [
ScrapingLog(
product_id=None,
url="https://example.com/a",
source="amazon",
reference="REF-A",
fetch_method="http",
fetch_status="success",
fetched_at=now,
),
ScrapingLog(
product_id=None,
url="https://example.com/b",
source="amazon",
reference="REF-B",
fetch_method="http",
fetch_status="failed",
fetched_at=now - timedelta(hours=2),
),
]
session.add_all(logs)
session.commit()
filtered = list_logs(
fetch_status="success",
fetched_after=now - timedelta(hours=1),
session=session,
)
assert len(filtered) == 1
assert filtered[0].reference == "REF-A"
finally:
session.close()
engine.dispose()
def test_exports_csv_and_json():
engine, session = _make_session()
try:
product = Product(
source="amazon",
reference="REF-EXPORT",
url="https://example.com/export",
title="Export",
category="Test",
currency="EUR",
first_seen_at=datetime(2026, 1, 14, 10, 0, 0),
last_updated_at=datetime(2026, 1, 14, 11, 0, 0),
)
session.add(product)
session.commit()
session.add(
PriceHistory(
product_id=product.id,
price=99,
shipping_cost=0,
stock_status="in_stock",
fetch_method="http",
fetch_status="success",
fetched_at=datetime(2026, 1, 14, 12, 0, 0),
)
)
session.add(
ScrapingLog(
product_id=product.id,
url=product.url,
source=product.source,
reference=product.reference,
fetch_method="http",
fetch_status="success",
fetched_at=datetime(2026, 1, 14, 12, 0, 0),
)
)
session.commit()
csv_response = export_products(format="csv", session=session)
assert csv_response.media_type == "text/csv"
assert "products.csv" in csv_response.headers.get("Content-Disposition", "")
assert "REF-EXPORT" in csv_response.body.decode("utf-8")
json_response = export_logs(format="json", session=session)
payload = json.loads(json_response.body.decode("utf-8"))
assert payload[0]["reference"] == "REF-EXPORT"
finally:
session.close()
engine.dispose()

40
tests/api/test_health.py Normal file
View File

@@ -0,0 +1,40 @@
"""
Tests endpoint /health.
"""
from dataclasses import dataclass
from pricewatch.app.api.main import health_check
@dataclass
class FakeRedisConfig:
url: str
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
redis: FakeRedisConfig
api_token: str
def test_health_ok(monkeypatch):
"""Health retourne db/redis true."""
config = FakeAppConfig(
db=FakeDbConfig(url="sqlite:///:memory:"),
redis=FakeRedisConfig(url="redis://localhost:6379/0"),
api_token="secret",
)
monkeypatch.setattr("pricewatch.app.api.main.get_config", lambda: config)
monkeypatch.setattr("pricewatch.app.api.main.check_db_connection", lambda cfg: True)
monkeypatch.setattr("pricewatch.app.api.main.check_redis_connection", lambda url: True)
result = health_check()
assert result.db is True
assert result.redis is True

View File

@@ -0,0 +1,47 @@
"""
Tests HTTP d'integration contre l'API Docker.
"""
import os
import pytest
import httpx
API_BASE = os.getenv("PW_API_BASE", "http://localhost:8001")
API_TOKEN = os.getenv("PW_API_TOKEN", "change_me")
def _client() -> httpx.Client:
return httpx.Client(base_url=API_BASE, timeout=2.0)
def _is_api_up() -> bool:
try:
with _client() as client:
resp = client.get("/health")
return resp.status_code == 200
except Exception:
return False
@pytest.mark.skipif(not _is_api_up(), reason="API Docker indisponible")
def test_health_endpoint():
"""/health repond avec db/redis."""
with _client() as client:
resp = client.get("/health")
assert resp.status_code == 200
payload = resp.json()
assert "db" in payload and "redis" in payload
@pytest.mark.skipif(not _is_api_up(), reason="API Docker indisponible")
def test_products_requires_token():
"""/products demande un token valide."""
with _client() as client:
resp = client.get("/products")
assert resp.status_code == 401
resp = client.get("/products", headers={"Authorization": f"Bearer {API_TOKEN}"})
assert resp.status_code == 200
assert isinstance(resp.json(), list)

View File

@@ -0,0 +1,37 @@
"""
Tests API produits en lecture seule.
"""
from datetime import datetime
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from pricewatch.app.api.main import list_products
from pricewatch.app.db.models import Base, Product
def test_list_products():
"""Liste des produits."""
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
session = sessionmaker(bind=engine)()
product = Product(
source="amazon",
reference="REF1",
url="https://example.com",
title="Produit",
category="Test",
currency="EUR",
first_seen_at=datetime(2026, 1, 14, 16, 0, 0),
last_updated_at=datetime(2026, 1, 14, 16, 0, 0),
)
session.add(product)
session.commit()
data = list_products(session=session, limit=50, offset=0)
assert len(data) == 1
assert data[0].reference == "REF1"
session.close()
engine.dispose()

View File

@@ -0,0 +1,55 @@
"""
Tests API preview/commit scraping.
"""
from datetime import datetime
from pricewatch.app.api.main import commit_scrape, preview_scrape
from pricewatch.app.api.schemas import ScrapeCommitRequest, ScrapePreviewRequest
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
def test_preview_scrape_returns_snapshot(monkeypatch):
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com",
title="Produit",
price=9.99,
currency="EUR",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
def fake_scrape(url, use_playwright=None, save_db=False):
return {"success": True, "snapshot": snapshot, "error": None}
monkeypatch.setattr("pricewatch.app.api.main.scrape_product", fake_scrape)
response = preview_scrape(ScrapePreviewRequest(url="https://example.com"))
assert response.success is True
assert response.snapshot["source"] == "amazon"
assert response.snapshot["price"] == 9.99
def test_commit_scrape_persists_snapshot(monkeypatch):
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com",
title="Produit",
price=19.99,
currency="EUR",
fetched_at=datetime(2026, 1, 15, 10, 0, 0),
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
class FakePipeline:
def __init__(self, config=None):
self.config = config
def process_snapshot(self, snapshot, save_to_db=True):
return 42
monkeypatch.setattr("pricewatch.app.api.main.ScrapingPipeline", FakePipeline)
response = commit_scrape(ScrapeCommitRequest(snapshot=snapshot.model_dump(mode="json")))
assert response.success is True
assert response.product_id == 42

View File

@@ -0,0 +1,16 @@
"""
Tests API logs Uvicorn.
"""
from pricewatch.app.api.main import list_uvicorn_logs
def test_list_uvicorn_logs_reads_file(monkeypatch, tmp_path):
log_file = tmp_path / "uvicorn.log"
log_file.write_text("ligne-1\nligne-2\n", encoding="utf-8")
monkeypatch.setattr("pricewatch.app.api.main.UVICORN_LOG_PATH", log_file)
response = list_uvicorn_logs(limit=1)
assert len(response) == 1
assert response[0].line == "ligne-2"

11
tests/api/test_version.py Normal file
View File

@@ -0,0 +1,11 @@
"""
Tests API version.
"""
from pricewatch.app.api.main import version_info
def test_version_info():
"""Retourne la version API."""
response = version_info()
assert response.api_version

View File

@@ -0,0 +1,72 @@
"""
Tests API webhooks.
"""
import pytest
from fastapi import HTTPException
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from pricewatch.app.api.main import (
create_webhook,
delete_webhook,
list_webhooks,
send_webhook_test,
update_webhook,
)
from pricewatch.app.api.schemas import WebhookCreate, WebhookUpdate
from pricewatch.app.db.models import Base
def _make_session():
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
session = sessionmaker(bind=engine)()
return engine, session
def test_webhook_crud_and_test(monkeypatch):
engine, session = _make_session()
try:
payload = WebhookCreate(event="price_changed", url="https://example.com/webhook")
created = create_webhook(payload, session=session)
assert created.id > 0
items = list_webhooks(session=session)
assert len(items) == 1
updated = update_webhook(created.id, WebhookUpdate(enabled=False), session=session)
assert updated.enabled is False
with pytest.raises(HTTPException) as excinfo:
send_webhook_test(created.id, session=session)
assert excinfo.value.status_code == 409
update_webhook(created.id, WebhookUpdate(enabled=True), session=session)
called = {}
def fake_post(url, json, headers, timeout):
called["url"] = url
called["json"] = json
called["headers"] = headers
called["timeout"] = timeout
class FakeResponse:
status_code = 200
def raise_for_status(self):
return None
return FakeResponse()
monkeypatch.setattr("pricewatch.app.api.main.httpx.post", fake_post)
response = send_webhook_test(created.id, session=session)
assert response.status == "sent"
assert called["json"]["event"] == "test"
delete_webhook(created.id, session=session)
assert list_webhooks(session=session) == []
finally:
session.close()
engine.dispose()

View File

View File

@@ -0,0 +1,130 @@
"""
Test end-to-end: CLI enqueue -> worker -> DB via Redis.
"""
from dataclasses import dataclass
from datetime import datetime
import pytest
import redis
from rq import Queue
from rq.worker import SimpleWorker
from typer.testing import CliRunner
from pricewatch.app.cli import main as cli_main
from pricewatch.app.core.registry import get_registry
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product, ScrapingLog
from pricewatch.app.stores.base import BaseStore
from pricewatch.app.tasks import scrape as scrape_task
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeRedisConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
redis: FakeRedisConfig
debug: bool = False
enable_db: bool = True
default_use_playwright: bool = False
default_playwright_timeout: int = 1000
class DummyStore(BaseStore):
def __init__(self) -> None:
super().__init__(store_id="dummy")
def match(self, url: str) -> float:
return 1.0 if "example.com" in url else 0.0
def canonicalize(self, url: str) -> str:
return url
def extract_reference(self, url: str) -> str | None:
return "REF-CLI"
def parse(self, html: str, url: str) -> ProductSnapshot:
return ProductSnapshot(
source=self.store_id,
url=url,
fetched_at=datetime(2026, 1, 14, 15, 0, 0),
title="Produit cli",
price=49.99,
currency="EUR",
reference="REF-CLI",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
class DummyFetchResult:
def __init__(self, html: str) -> None:
self.success = True
self.html = html
self.error = None
self.duration_ms = 20
def _redis_available(redis_url: str) -> bool:
try:
conn = redis.from_url(redis_url)
conn.ping()
return True
except Exception:
return False
@pytest.mark.skipif(not _redis_available("redis://localhost:6379/0"), reason="Redis indisponible")
def test_cli_enqueue_worker_persists_db(tmp_path, monkeypatch):
"""Enqueue via CLI, execution worker, persistence DB."""
reset_engine()
db_path = tmp_path / "cli-worker.db"
redis_url = "redis://localhost:6379/0"
config = FakeAppConfig(
db=FakeDbConfig(url=f"sqlite:///{db_path}"),
redis=FakeRedisConfig(url=redis_url),
)
init_db(config)
registry = get_registry()
previous_stores = list(registry._stores)
registry._stores = []
registry.register(DummyStore())
monkeypatch.setattr(cli_main, "get_config", lambda: config)
monkeypatch.setattr(scrape_task, "get_config", lambda: config)
monkeypatch.setattr(scrape_task, "setup_stores", lambda: None)
monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("<html></html>"))
queue_name = "test-cli"
redis_conn = redis.from_url(redis_url)
queue = Queue(queue_name, connection=redis_conn)
queue.empty()
runner = CliRunner()
try:
result = runner.invoke(
cli_main.app,
["enqueue", "https://example.com/product", "--queue", queue_name, "--save-db"],
)
assert result.exit_code == 0
worker = SimpleWorker([queue], connection=redis_conn)
worker.work(burst=True)
finally:
queue.empty()
registry._stores = previous_stores
reset_engine()
with get_session(config) as session:
assert session.query(Product).count() == 1
assert session.query(ScrapingLog).count() == 1

View File

@@ -0,0 +1,83 @@
"""
Tests CLI pour enqueue/schedule avec gestion Redis.
"""
from types import SimpleNamespace
from typer.testing import CliRunner
from pricewatch.app.cli import main as cli_main
class DummyScheduler:
def __init__(self, *args, **kwargs) -> None:
self.enqueue_calls = []
self.schedule_calls = []
def enqueue_immediate(self, url, use_playwright=None, save_db=True):
self.enqueue_calls.append((url, use_playwright, save_db))
return SimpleNamespace(id="job-123")
def schedule_product(self, url, interval_hours=24, use_playwright=None, save_db=True):
self.schedule_calls.append((url, interval_hours, use_playwright, save_db))
return SimpleNamespace(job_id="job-456", next_run=SimpleNamespace(isoformat=lambda: "2026"))
def test_enqueue_cli_success(monkeypatch):
"""La commande enqueue retourne un job id."""
runner = CliRunner()
dummy = DummyScheduler()
monkeypatch.setattr(cli_main, "ScrapingScheduler", lambda *args, **kwargs: dummy)
result = runner.invoke(cli_main.app, ["enqueue", "https://example.com/product"])
assert result.exit_code == 0
assert "job-123" in result.output
def test_schedule_cli_success(monkeypatch):
"""La commande schedule retourne un job id et une date."""
runner = CliRunner()
dummy = DummyScheduler()
monkeypatch.setattr(cli_main, "ScrapingScheduler", lambda *args, **kwargs: dummy)
result = runner.invoke(
cli_main.app,
["schedule", "https://example.com/product", "--interval", "12"],
)
assert result.exit_code == 0
assert "job-456" in result.output
assert "2026" in result.output
def test_enqueue_cli_redis_unavailable(monkeypatch):
"""La commande enqueue echoue si Redis est indisponible."""
runner = CliRunner()
def raise_redis(*args, **kwargs):
raise cli_main.RedisUnavailableError("Redis non disponible")
monkeypatch.setattr(cli_main, "ScrapingScheduler", raise_redis)
result = runner.invoke(cli_main.app, ["enqueue", "https://example.com/product"])
assert result.exit_code == 1
assert "Redis non disponible" in result.output
def test_schedule_cli_redis_unavailable(monkeypatch):
"""La commande schedule echoue si Redis est indisponible."""
runner = CliRunner()
def raise_redis(*args, **kwargs):
raise cli_main.RedisUnavailableError("Redis non disponible")
monkeypatch.setattr(cli_main, "ScrapingScheduler", raise_redis)
result = runner.invoke(cli_main.app, ["schedule", "https://example.com/product"])
assert result.exit_code == 1
assert "Redis non disponible" in result.output

0
tests/cli/test_run_db.py Executable file → Normal file
View File

106
tests/cli/test_run_no_db.py Normal file
View File

@@ -0,0 +1,106 @@
"""
Tests pour la compatibilite --no-db.
"""
from dataclasses import dataclass
from pathlib import Path
from typer.testing import CliRunner
from pricewatch.app.cli import main as cli_main
from pricewatch.app.core.registry import get_registry
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product
from pricewatch.app.stores.base import BaseStore
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
debug: bool = False
enable_db: bool = True
class DummyStore(BaseStore):
def __init__(self) -> None:
super().__init__(store_id="dummy")
def match(self, url: str) -> float:
return 1.0 if "example.com" in url else 0.0
def canonicalize(self, url: str) -> str:
return url
def extract_reference(self, url: str) -> str | None:
return "REF-NODB"
def parse(self, html: str, url: str) -> ProductSnapshot:
return ProductSnapshot(
source=self.store_id,
url=url,
title="Produit nodb",
price=9.99,
currency="EUR",
reference="REF-NODB",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
class DummyFetchResult:
def __init__(self, html: str) -> None:
self.success = True
self.html = html
self.error = None
def test_cli_run_no_db(tmp_path, monkeypatch):
"""Le flag --no-db evite toute ecriture DB."""
reset_engine()
db_path = tmp_path / "nodb.db"
config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}"))
init_db(config)
yaml_path = tmp_path / "config.yaml"
out_path = tmp_path / "out.json"
yaml_path.write_text(
"""
urls:
- "https://example.com/product"
options:
use_playwright: false
save_html: false
save_screenshot: false
""",
encoding="utf-8",
)
registry = get_registry()
previous_stores = list(registry._stores)
registry._stores = []
registry.register(DummyStore())
monkeypatch.setattr(cli_main, "get_config", lambda: config)
monkeypatch.setattr(cli_main, "setup_stores", lambda: None)
monkeypatch.setattr(cli_main, "fetch_http", lambda url: DummyFetchResult("<html></html>"))
runner = CliRunner()
try:
result = runner.invoke(
cli_main.app,
["run", "--yaml", str(yaml_path), "--out", str(out_path), "--no-db"],
)
finally:
registry._stores = previous_stores
reset_engine()
assert result.exit_code == 0
assert out_path.exists()
with get_session(config) as session:
assert session.query(Product).count() == 0

View File

@@ -0,0 +1,54 @@
"""
Tests pour les commandes worker RQ via CLI.
"""
from types import SimpleNamespace
import pytest
from typer.testing import CliRunner
from pricewatch.app.cli import main as cli_main
class DummyRedis:
def ping(self) -> bool:
return True
class DummyWorker:
def __init__(self, queues, connection=None) -> None:
self.queues = queues
self.connection = connection
self.work_calls = []
def work(self, with_scheduler: bool = True):
self.work_calls.append(with_scheduler)
def test_worker_cli_success(monkeypatch):
"""Le worker demarre quand Redis est disponible."""
runner = CliRunner()
dummy_worker = DummyWorker([])
monkeypatch.setattr(cli_main, "Worker", lambda queues, connection=None: dummy_worker)
monkeypatch.setattr(cli_main.redis, "from_url", lambda url: DummyRedis())
result = runner.invoke(cli_main.app, ["worker", "--no-scheduler"])
assert result.exit_code == 0
assert dummy_worker.work_calls == [False]
def test_worker_cli_redis_down(monkeypatch):
"""Le worker echoue proprement si Redis est indisponible."""
runner = CliRunner()
def raise_connection(url):
raise cli_main.redis.exceptions.ConnectionError("redis down")
monkeypatch.setattr(cli_main.redis, "from_url", raise_connection)
result = runner.invoke(cli_main.app, ["worker"])
assert result.exit_code == 1
assert "Impossible de se connecter a Redis" in result.output

View File

0
tests/core/test_io.py Executable file → Normal file
View File

0
tests/core/test_registry_integration.py Executable file → Normal file
View File

View File

Binary file not shown.

View File

View File

@@ -0,0 +1,40 @@
"""
Tests de charge legere pour la persistence (100 snapshots).
"""
from datetime import datetime
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.models import Base, Product
from pricewatch.app.db.repository import ProductRepository
def test_bulk_save_100_snapshots():
"""Le repository persiste 100 snapshots sans erreur."""
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
session = sessionmaker(bind=engine)()
try:
repo = ProductRepository(session)
for idx in range(100):
snapshot = ProductSnapshot(
source="amazon",
url=f"https://example.com/product/{idx}",
fetched_at=datetime(2026, 1, 14, 14, 0, 0),
title=f"Produit {idx}",
price=10.0 + idx,
currency="EUR",
reference=f"REF-{idx}",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
repo.save_snapshot(snapshot)
session.commit()
assert session.query(Product).count() == 100
finally:
session.close()
engine.dispose()

0
tests/db/test_connection.py Executable file → Normal file
View File

7
tests/db/test_models.py Executable file → Normal file
View File

@@ -2,7 +2,7 @@
Tests pour les modeles SQLAlchemy.
"""
from datetime import datetime
from datetime import datetime, timezone
import pytest
from sqlalchemy import create_engine
@@ -30,6 +30,7 @@ def session() -> Session:
yield session
finally:
session.close()
engine.dispose()
def test_product_relationships(session: Session):
@@ -42,7 +43,7 @@ def test_product_relationships(session: Session):
stock_status="in_stock",
fetch_method="http",
fetch_status="success",
fetched_at=datetime.utcnow(),
fetched_at=datetime.now(timezone.utc),
)
image = ProductImage(image_url="https://example.com/image.jpg", position=0)
spec = ProductSpec(spec_key="Couleur", spec_value="Noir")
@@ -52,7 +53,7 @@ def test_product_relationships(session: Session):
reference="B08N5WRWNW",
fetch_method="http",
fetch_status="success",
fetched_at=datetime.utcnow(),
fetched_at=datetime.now(timezone.utc),
duration_ms=1200,
html_size_bytes=2048,
errors={"items": []},

0
tests/db/test_repository.py Executable file → Normal file
View File

0
tests/scraping/__init__.py Executable file → Normal file
View File

0
tests/scraping/__pycache__/__init__.cpython-313.pyc Executable file → Normal file
View File

Binary file not shown.

View File

0
tests/scraping/test_http_fetch.py Executable file → Normal file
View File

30
tests/scraping/test_pipeline.py Executable file → Normal file
View File

@@ -80,3 +80,33 @@ def test_pipeline_respects_disable_flag():
assert product_id is None
with get_session(config) as session:
assert session.query(Product).count() == 0
def test_pipeline_db_error_adds_note(monkeypatch):
"""Une erreur DB ajoute une note et retourne None."""
from sqlalchemy.exc import SQLAlchemyError
class DummyError(SQLAlchemyError):
pass
def raise_session(*args, **kwargs):
raise DummyError("db down")
monkeypatch.setattr("pricewatch.app.scraping.pipeline.get_session", raise_session)
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com/product",
fetched_at=datetime(2026, 1, 14, 13, 0, 0),
title="Produit",
price=10.0,
currency="EUR",
reference="B08PIPE",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
pipeline = ScrapingPipeline(config=FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:")))
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
assert product_id is None
assert any("Persistence DB echouee" in note for note in snapshot.debug.notes)

0
tests/scraping/test_pw_fetch.py Executable file → Normal file
View File

View File

View File

@@ -0,0 +1,29 @@
"""
Tests pour le parsing de prix avec separateurs de milliers.
"""
from pricewatch.app.stores.price_parser import parse_price_text
def test_parse_price_with_thousands_space():
assert parse_price_text("1 259,00") == 1259.00
def test_parse_price_with_narrow_nbsp():
assert parse_price_text("1\u202f259,00") == 1259.00
def test_parse_price_with_dot_thousands():
assert parse_price_text("1.259,00") == 1259.00
def test_parse_price_with_comma_thousands():
assert parse_price_text("1,259.00") == 1259.00
def test_parse_price_without_decimal():
assert parse_price_text("1259") == 1259.00
def test_parse_price_with_currency():
assert parse_price_text("EUR 1 259,00") == 1259.00

View File

@@ -0,0 +1,127 @@
"""
Tests pour la gestion des erreurs Redis dans le scheduler.
"""
import pytest
from redis.exceptions import ConnectionError as RedisConnectionError
from redis.exceptions import RedisError, TimeoutError as RedisTimeoutError
from pricewatch.app.tasks.scheduler import RedisUnavailableError, ScrapingScheduler, check_redis_connection
class DummyRedisOk:
def ping(self) -> bool:
return True
class DummyRedisError:
def __init__(self, exc: Exception) -> None:
self._exc = exc
def ping(self) -> None:
raise self._exc
class DummyQueue:
def __init__(self, name: str, connection=None) -> None:
self.name = name
self.connection = connection
class DummyScheduler:
def __init__(self, queue=None, connection=None) -> None:
self.queue = queue
self.connection = connection
def schedule(self, scheduled_time, func, args=None, kwargs=None, interval=None, repeat=None):
return type("Job", (), {"id": "job-redis"})()
class FakeRedisConfig:
def __init__(self, url: str) -> None:
self.url = url
class FakeAppConfig:
def __init__(self, redis_url: str) -> None:
self.redis = FakeRedisConfig(redis_url)
def test_check_redis_connection_success(monkeypatch):
"""Ping OK retourne True."""
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: DummyRedisOk())
assert check_redis_connection("redis://localhost:6379/0") is True
def test_check_redis_connection_failure_connection(monkeypatch):
"""Ping en echec retourne False."""
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: DummyRedisError(RedisConnectionError("no")),
)
assert check_redis_connection("redis://localhost:6379/0") is False
def test_check_redis_connection_failure_timeout(monkeypatch):
"""Timeout Redis retourne False."""
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: DummyRedisError(RedisTimeoutError("timeout")),
)
assert check_redis_connection("redis://localhost:6379/0") is False
def test_scheduler_lazy_connection(monkeypatch):
"""La connexion Redis est lazy."""
config = FakeAppConfig("redis://localhost:6379/0")
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: DummyRedisOk())
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config)
assert scheduler._redis is None
_ = scheduler.queue
assert scheduler._redis is not None
def test_scheduler_redis_connection_error(monkeypatch):
"""Une erreur de connexion leve RedisUnavailableError."""
config = FakeAppConfig("redis://localhost:6379/0")
def raise_connection(url):
raise RedisConnectionError("no")
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", raise_connection)
scheduler = ScrapingScheduler(config=config)
with pytest.raises(RedisUnavailableError):
_ = scheduler.queue
def test_scheduler_schedule_redis_error(monkeypatch):
"""Une erreur Redis leve RedisUnavailableError lors du schedule."""
config = FakeAppConfig("redis://localhost:6379/0")
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: DummyRedisError(RedisError("boom")),
)
scheduler = ScrapingScheduler(config=config)
with pytest.raises(RedisUnavailableError):
scheduler.schedule_product("https://example.com/product", interval_hours=1)
def test_scheduler_enqueue_redis_error(monkeypatch):
"""Une erreur Redis leve RedisUnavailableError lors de l'enqueue."""
config = FakeAppConfig("redis://localhost:6379/0")
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: DummyRedisError(RedisError("boom")),
)
scheduler = ScrapingScheduler(config=config)
with pytest.raises(RedisUnavailableError):
scheduler.enqueue_immediate("https://example.com/product")

View File

@@ -0,0 +1,184 @@
"""
Tests pour ScrapingScheduler avec mocks Redis/RQ.
"""
from dataclasses import dataclass
import pytest
from redis.exceptions import ConnectionError as RedisConnectionError
from pricewatch.app.tasks.scheduler import (
RedisUnavailableError,
ScheduledJobInfo,
ScrapingScheduler,
check_redis_connection,
)
@dataclass
class FakeRedis:
url: str
def ping(self):
"""Simule un ping reussi."""
return True
class FakeRedisConnectionError:
"""FakeRedis qui leve une erreur a la connexion."""
def __init__(self, url: str):
self.url = url
def ping(self):
raise RedisConnectionError("Connection refused")
class DummyQueue:
def __init__(self, name: str, connection=None) -> None:
self.name = name
self.connection = connection
self.enqueued = []
def enqueue(self, func, *args, **kwargs):
job = type("Job", (), {"id": "job-123"})()
self.enqueued.append((func, args, kwargs))
return job
class DummyScheduler:
def __init__(self, queue=None, connection=None) -> None:
self.queue = queue
self.connection = connection
self.scheduled = []
def schedule(self, scheduled_time, func, args=None, kwargs=None, interval=None, repeat=None):
job = type("Job", (), {"id": "job-456"})()
self.scheduled.append((scheduled_time, func, args, kwargs, interval, repeat))
return job
@dataclass
class FakeRedisConfig:
url: str
@dataclass
class FakeAppConfig:
redis: FakeRedisConfig
def test_scheduler_enqueue_immediate(monkeypatch):
"""Enqueue immediate utilise la queue RQ."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: FakeRedis(url))
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
job = scheduler.enqueue_immediate("https://example.com/product")
assert job.id == "job-123"
assert len(scheduler.queue.enqueued) == 1
def test_scheduler_schedule_product(monkeypatch):
"""Schedule product cree un job recurrent."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", lambda url: FakeRedis(url))
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
info = scheduler.schedule_product("https://example.com/product", interval_hours=1)
assert isinstance(info, ScheduledJobInfo)
assert info.job_id == "job-456"
assert len(scheduler.scheduler.scheduled) == 1
# ============================================================================
# Tests gestion erreurs Redis
# ============================================================================
def test_scheduler_redis_connection_error(monkeypatch):
"""Leve RedisUnavailableError quand Redis n'est pas accessible."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: FakeRedisConnectionError(url),
)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
with pytest.raises(RedisUnavailableError) as exc_info:
scheduler.enqueue_immediate("https://example.com/product")
assert "Redis" in str(exc_info.value.message)
assert exc_info.value.cause is not None
def test_scheduler_lazy_connection(monkeypatch):
"""La connexion Redis n'est etablie qu'au premier appel."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
connection_calls = []
def track_from_url(url):
connection_calls.append(url)
return FakeRedis(url)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", track_from_url)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
# Pas de connexion a la creation
assert len(connection_calls) == 0
# Connexion au premier appel
scheduler.enqueue_immediate("https://example.com/product")
assert len(connection_calls) == 1
# Pas de nouvelle connexion au deuxieme appel
scheduler.enqueue_immediate("https://example.com/product2")
assert len(connection_calls) == 1
def test_check_redis_connection_success(monkeypatch):
"""check_redis_connection retourne True si Redis repond."""
monkeypatch.setattr("pricewatch.app.tasks.scheduler.redis.from_url", FakeRedis)
assert check_redis_connection("redis://localhost:6379/0") is True
def test_check_redis_connection_failure(monkeypatch):
"""check_redis_connection retourne False si Redis ne repond pas."""
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url", FakeRedisConnectionError
)
assert check_redis_connection("redis://localhost:6379/0") is False
def test_scheduler_schedule_redis_error(monkeypatch):
"""schedule_product leve RedisUnavailableError si Redis down."""
config = FakeAppConfig(redis=FakeRedisConfig(url="redis://localhost:6379/0"))
monkeypatch.setattr(
"pricewatch.app.tasks.scheduler.redis.from_url",
lambda url: FakeRedisConnectionError(url),
)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Queue", DummyQueue)
monkeypatch.setattr("pricewatch.app.tasks.scheduler.Scheduler", DummyScheduler)
scheduler = ScrapingScheduler(config=config, queue_name="default")
with pytest.raises(RedisUnavailableError):
scheduler.schedule_product("https://example.com/product", interval_hours=24)

View File

@@ -0,0 +1,91 @@
"""
Tests end-to-end pour la tache RQ de scraping avec persistence DB.
"""
from dataclasses import dataclass
from datetime import datetime
from pricewatch.app.core.registry import get_registry
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product, ScrapingLog
from pricewatch.app.stores.base import BaseStore
from pricewatch.app.tasks import scrape as scrape_task
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
debug: bool = False
enable_db: bool = True
default_use_playwright: bool = False
default_playwright_timeout: int = 1000
class DummyStore(BaseStore):
def __init__(self) -> None:
super().__init__(store_id="dummy")
def match(self, url: str) -> float:
return 1.0 if "example.com" in url else 0.0
def canonicalize(self, url: str) -> str:
return url
def extract_reference(self, url: str) -> str | None:
return "REF-TEST"
def parse(self, html: str, url: str) -> ProductSnapshot:
return ProductSnapshot(
source=self.store_id,
url=url,
fetched_at=datetime(2026, 1, 14, 10, 0, 0),
title="Produit test",
price=19.99,
currency="EUR",
reference="REF-TEST",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
class DummyFetchResult:
def __init__(self, html: str) -> None:
self.success = True
self.html = html
self.error = None
self.duration_ms = 123
def test_scrape_product_persists_db(tmp_path, monkeypatch):
"""La tache scrape_product persiste en DB et logge un scraping."""
reset_engine()
db_path = tmp_path / "scrape.db"
config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}"))
init_db(config)
registry = get_registry()
previous_stores = list(registry._stores)
registry._stores = []
registry.register(DummyStore())
monkeypatch.setattr(scrape_task, "get_config", lambda: config)
monkeypatch.setattr(scrape_task, "setup_stores", lambda: None)
monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("<html></html>"))
try:
result = scrape_task.scrape_product("https://example.com/product", save_db=True)
finally:
registry._stores = previous_stores
reset_engine()
assert result["success"] is True
assert result["product_id"] is not None
with get_session(config) as session:
assert session.query(Product).count() == 1
assert session.query(ScrapingLog).count() == 1

View File

@@ -0,0 +1,110 @@
"""
Test end-to-end: enqueue -> worker -> DB via Redis.
"""
from dataclasses import dataclass
from datetime import datetime
import pytest
import redis
from rq import Queue
from rq.worker import SimpleWorker
from pricewatch.app.core.registry import get_registry
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product, ScrapingLog
from pricewatch.app.stores.base import BaseStore
from pricewatch.app.tasks import scrape as scrape_task
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
debug: bool = False
enable_db: bool = True
default_use_playwright: bool = False
default_playwright_timeout: int = 1000
class DummyStore(BaseStore):
def __init__(self) -> None:
super().__init__(store_id="dummy")
def match(self, url: str) -> float:
return 1.0 if "example.com" in url else 0.0
def canonicalize(self, url: str) -> str:
return url
def extract_reference(self, url: str) -> str | None:
return "REF-WORKER"
def parse(self, html: str, url: str) -> ProductSnapshot:
return ProductSnapshot(
source=self.store_id,
url=url,
fetched_at=datetime(2026, 1, 14, 11, 0, 0),
title="Produit worker",
price=29.99,
currency="EUR",
reference="REF-WORKER",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
class DummyFetchResult:
def __init__(self, html: str) -> None:
self.success = True
self.html = html
self.error = None
self.duration_ms = 50
def _redis_available(redis_url: str) -> bool:
try:
conn = redis.from_url(redis_url)
conn.ping()
return True
except Exception:
return False
@pytest.mark.skipif(not _redis_available("redis://localhost:6379/0"), reason="Redis indisponible")
def test_enqueue_worker_persists_db(tmp_path, monkeypatch):
"""Le job enqueued est traite par le worker et persiste en DB."""
reset_engine()
db_path = tmp_path / "worker.db"
config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}"))
init_db(config)
registry = get_registry()
previous_stores = list(registry._stores)
registry._stores = []
registry.register(DummyStore())
monkeypatch.setattr(scrape_task, "get_config", lambda: config)
monkeypatch.setattr(scrape_task, "setup_stores", lambda: None)
monkeypatch.setattr(scrape_task, "fetch_http", lambda url: DummyFetchResult("<html></html>"))
redis_conn = redis.from_url("redis://localhost:6379/0")
queue = Queue("default", connection=redis_conn)
try:
job = queue.enqueue(scrape_task.scrape_product, "https://example.com/product", save_db=True)
worker = SimpleWorker([queue], connection=redis_conn)
worker.work(burst=True)
finally:
registry._stores = previous_stores
reset_engine()
assert job.is_finished
with get_session(config) as session:
assert session.query(Product).count() == 1
assert session.query(ScrapingLog).count() == 1