This commit is contained in:
2026-01-14 07:03:38 +01:00
parent ecda149a4b
commit c91c0f1fc9
61 changed files with 4388 additions and 38 deletions

106
tests/cli/test_run_db.py Executable file
View File

@@ -0,0 +1,106 @@
"""
Tests end-to-end pour la commande CLI run avec persistence DB.
"""
from dataclasses import dataclass
from pathlib import Path
from typer.testing import CliRunner
from pricewatch.app.cli import main as cli_main
from pricewatch.app.core.registry import get_registry
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product
from pricewatch.app.stores.base import BaseStore
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
debug: bool = False
enable_db: bool = True
class DummyStore(BaseStore):
def __init__(self) -> None:
super().__init__(store_id="dummy")
def match(self, url: str) -> float:
return 1.0 if "example.com" in url else 0.0
def canonicalize(self, url: str) -> str:
return url
def extract_reference(self, url: str) -> str | None:
return "REF123"
def parse(self, html: str, url: str) -> ProductSnapshot:
return ProductSnapshot(
source=self.store_id,
url=url,
title="Produit dummy",
price=9.99,
currency="EUR",
reference="REF123",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
class DummyFetchResult:
def __init__(self, html: str) -> None:
self.success = True
self.html = html
self.error = None
def test_cli_run_persists_db(tmp_path, monkeypatch):
"""Le CLI run persiste en base quand --save-db est active."""
reset_engine()
db_path = tmp_path / "test.db"
config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}"))
init_db(config)
yaml_path = tmp_path / "config.yaml"
out_path = tmp_path / "out.json"
yaml_path.write_text(
"""
urls:
- "https://example.com/product"
options:
use_playwright: false
save_html: false
save_screenshot: false
""",
encoding="utf-8",
)
registry = get_registry()
previous_stores = list(registry._stores)
registry._stores = []
registry.register(DummyStore())
monkeypatch.setattr(cli_main, "get_config", lambda: config)
monkeypatch.setattr(cli_main, "setup_stores", lambda: None)
monkeypatch.setattr(cli_main, "fetch_http", lambda url: DummyFetchResult("<html></html>"))
runner = CliRunner()
try:
result = runner.invoke(
cli_main.app,
["run", "--yaml", str(yaml_path), "--out", str(out_path), "--save-db"],
)
finally:
registry._stores = previous_stores
reset_engine()
assert result.exit_code == 0
assert out_path.exists()
with get_session(config) as session:
assert session.query(Product).count() == 1

Binary file not shown.

462
tests/core/test_io.py Executable file
View File

@@ -0,0 +1,462 @@
"""
Tests pour pricewatch.app.core.io
Teste la lecture/écriture YAML/JSON et les fonctions de sauvegarde debug.
"""
import json
import tempfile
from datetime import datetime
from pathlib import Path
import pytest
import yaml
from pricewatch.app.core.io import (
ScrapingConfig,
ScrapingOptions,
read_json_results,
read_yaml_config,
save_debug_html,
save_debug_screenshot,
write_json_results,
)
from pricewatch.app.core.schema import (
DebugInfo,
DebugStatus,
FetchMethod,
ProductSnapshot,
StockStatus,
)
class TestScrapingOptions:
"""Tests pour le modèle ScrapingOptions."""
def test_default_values(self):
"""Les valeurs par défaut sont correctes."""
options = ScrapingOptions()
assert options.use_playwright is True
assert options.headful is False
assert options.save_html is True
assert options.save_screenshot is True
assert options.timeout_ms == 60000
def test_custom_values(self):
"""Les valeurs personnalisées sont acceptées."""
options = ScrapingOptions(
use_playwright=False,
headful=True,
save_html=False,
save_screenshot=False,
timeout_ms=30000,
)
assert options.use_playwright is False
assert options.headful is True
assert options.save_html is False
assert options.save_screenshot is False
assert options.timeout_ms == 30000
def test_timeout_validation_min(self):
"""Timeout inférieur à 1000ms est rejeté."""
with pytest.raises(ValueError):
ScrapingOptions(timeout_ms=500)
def test_timeout_validation_valid(self):
"""Timeout >= 1000ms est accepté."""
options = ScrapingOptions(timeout_ms=1000)
assert options.timeout_ms == 1000
class TestScrapingConfig:
"""Tests pour le modèle ScrapingConfig."""
def test_minimal_config(self):
"""Config minimale avec URLs uniquement."""
config = ScrapingConfig(urls=["https://example.com"])
assert len(config.urls) == 1
assert config.urls[0] == "https://example.com"
assert isinstance(config.options, ScrapingOptions)
def test_config_with_options(self):
"""Config avec URLs et options."""
options = ScrapingOptions(use_playwright=False, timeout_ms=10000)
config = ScrapingConfig(
urls=["https://example.com", "https://test.com"], options=options
)
assert len(config.urls) == 2
assert config.options.use_playwright is False
assert config.options.timeout_ms == 10000
def test_validate_urls_empty_list(self):
"""Liste d'URLs vide est rejetée."""
with pytest.raises(ValueError, match="Au moins une URL"):
ScrapingConfig(urls=[])
def test_validate_urls_strips_whitespace(self):
"""Les espaces sont nettoyés."""
config = ScrapingConfig(urls=[" https://example.com ", "https://test.com"])
assert config.urls == ["https://example.com", "https://test.com"]
def test_validate_urls_removes_empty(self):
"""Les URLs vides sont supprimées."""
config = ScrapingConfig(
urls=["https://example.com", "", " ", "https://test.com"]
)
assert len(config.urls) == 2
assert config.urls == ["https://example.com", "https://test.com"]
def test_validate_urls_all_empty(self):
"""Si toutes les URLs sont vides, erreur."""
with pytest.raises(ValueError, match="Aucune URL valide"):
ScrapingConfig(urls=["", " ", "\t"])
class TestReadYamlConfig:
"""Tests pour read_yaml_config()."""
def test_read_valid_yaml(self, tmp_path):
"""Lit un fichier YAML valide."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {
"urls": ["https://example.com", "https://test.com"],
"options": {"use_playwright": False, "timeout_ms": 30000},
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(yaml_path)
assert len(config.urls) == 2
assert config.urls[0] == "https://example.com"
assert config.options.use_playwright is False
assert config.options.timeout_ms == 30000
def test_read_yaml_minimal(self, tmp_path):
"""Lit un YAML minimal (URLs uniquement)."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"urls": ["https://example.com"]}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(yaml_path)
assert len(config.urls) == 1
# Options par défaut
assert config.options.use_playwright is True
assert config.options.timeout_ms == 60000
def test_read_yaml_file_not_found(self, tmp_path):
"""Fichier introuvable lève FileNotFoundError."""
yaml_path = tmp_path / "nonexistent.yaml"
with pytest.raises(FileNotFoundError):
read_yaml_config(yaml_path)
def test_read_yaml_empty_file(self, tmp_path):
"""Fichier YAML vide lève ValueError."""
yaml_path = tmp_path / "empty.yaml"
yaml_path.write_text("")
with pytest.raises(ValueError, match="Fichier YAML vide"):
read_yaml_config(yaml_path)
def test_read_yaml_invalid_syntax(self, tmp_path):
"""YAML avec syntaxe invalide lève ValueError."""
yaml_path = tmp_path / "invalid.yaml"
yaml_path.write_text("urls: [invalid yaml syntax")
with pytest.raises(ValueError, match="YAML invalide"):
read_yaml_config(yaml_path)
def test_read_yaml_missing_urls(self, tmp_path):
"""YAML sans champ 'urls' lève erreur de validation."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"options": {"use_playwright": False}}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
with pytest.raises(Exception): # Pydantic validation error
read_yaml_config(yaml_path)
def test_read_yaml_accepts_path_string(self, tmp_path):
"""Accepte un string comme chemin."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"urls": ["https://example.com"]}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(str(yaml_path))
assert len(config.urls) == 1
class TestWriteJsonResults:
"""Tests pour write_json_results()."""
@pytest.fixture
def sample_snapshot(self) -> ProductSnapshot:
"""Fixture: ProductSnapshot exemple."""
return ProductSnapshot(
source="test",
url="https://example.com/product",
fetched_at=datetime(2024, 1, 1, 12, 0, 0),
title="Test Product",
price=99.99,
currency="EUR",
stock_status=StockStatus.IN_STOCK,
reference="TEST123",
images=["https://example.com/img1.jpg"],
category="Test Category",
specs={"Brand": "TestBrand"},
debug=DebugInfo(
method=FetchMethod.HTTP,
status=DebugStatus.SUCCESS,
errors=[],
notes=[],
),
)
def test_write_single_snapshot(self, tmp_path, sample_snapshot):
"""Écrit un seul snapshot."""
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot], json_path)
assert json_path.exists()
# Vérifier le contenu
with open(json_path) as f:
data = json.load(f)
assert isinstance(data, list)
assert len(data) == 1
assert data[0]["source"] == "test"
assert data[0]["title"] == "Test Product"
def test_write_multiple_snapshots(self, tmp_path, sample_snapshot):
"""Écrit plusieurs snapshots."""
snapshot2 = ProductSnapshot(
source="test2",
url="https://example.com/product2",
fetched_at=datetime(2024, 1, 2, 12, 0, 0),
title="Test Product 2",
price=49.99,
currency="EUR",
stock_status=StockStatus.OUT_OF_STOCK,
debug=DebugInfo(
method=FetchMethod.PLAYWRIGHT,
status=DebugStatus.PARTIAL,
errors=["Test error"],
notes=[],
),
)
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot, snapshot2], json_path)
with open(json_path) as f:
data = json.load(f)
assert len(data) == 2
assert data[0]["source"] == "test"
assert data[1]["source"] == "test2"
def test_write_creates_parent_dirs(self, tmp_path, sample_snapshot):
"""Crée les dossiers parents si nécessaire."""
json_path = tmp_path / "sub" / "dir" / "results.json"
write_json_results([sample_snapshot], json_path)
assert json_path.exists()
assert json_path.parent.exists()
def test_write_empty_list(self, tmp_path):
"""Écrit une liste vide."""
json_path = tmp_path / "empty.json"
write_json_results([], json_path)
assert json_path.exists()
with open(json_path) as f:
data = json.load(f)
assert data == []
def test_write_indent_control(self, tmp_path, sample_snapshot):
"""Contrôle l'indentation."""
# Avec indent
json_path1 = tmp_path / "pretty.json"
write_json_results([sample_snapshot], json_path1, indent=2)
content1 = json_path1.read_text()
assert "\n" in content1 # Pretty-printed
# Sans indent (compact)
json_path2 = tmp_path / "compact.json"
write_json_results([sample_snapshot], json_path2, indent=None)
content2 = json_path2.read_text()
assert len(content2) < len(content1) # Plus compact
def test_write_accepts_path_string(self, tmp_path, sample_snapshot):
"""Accepte un string comme chemin."""
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot], str(json_path))
assert json_path.exists()
class TestReadJsonResults:
"""Tests pour read_json_results()."""
@pytest.fixture
def json_file_with_snapshot(self, tmp_path) -> Path:
"""Fixture: Fichier JSON avec un snapshot."""
json_path = tmp_path / "results.json"
snapshot_data = {
"source": "test",
"url": "https://example.com/product",
"fetched_at": "2024-01-01T12:00:00",
"title": "Test Product",
"price": 99.99,
"currency": "EUR",
"shipping_cost": None,
"stock_status": "in_stock",
"reference": "TEST123",
"images": ["https://example.com/img.jpg"],
"category": "Test",
"specs": {"Brand": "Test"},
"debug": {
"method": "http",
"status": "success",
"errors": [],
"notes": [],
"duration_ms": None,
"html_size_bytes": None,
},
}
with open(json_path, "w") as f:
json.dump([snapshot_data], f)
return json_path
def test_read_single_snapshot(self, json_file_with_snapshot):
"""Lit un fichier avec un snapshot."""
snapshots = read_json_results(json_file_with_snapshot)
assert len(snapshots) == 1
assert isinstance(snapshots[0], ProductSnapshot)
assert snapshots[0].source == "test"
assert snapshots[0].title == "Test Product"
assert snapshots[0].price == 99.99
def test_read_file_not_found(self, tmp_path):
"""Fichier introuvable lève FileNotFoundError."""
json_path = tmp_path / "nonexistent.json"
with pytest.raises(FileNotFoundError):
read_json_results(json_path)
def test_read_invalid_json(self, tmp_path):
"""JSON invalide lève ValueError."""
json_path = tmp_path / "invalid.json"
json_path.write_text("{invalid json")
with pytest.raises(ValueError, match="JSON invalide"):
read_json_results(json_path)
def test_read_not_a_list(self, tmp_path):
"""JSON qui n'est pas une liste lève ValueError."""
json_path = tmp_path / "notlist.json"
with open(json_path, "w") as f:
json.dump({"key": "value"}, f)
with pytest.raises(ValueError, match="doit contenir une liste"):
read_json_results(json_path)
def test_read_empty_list(self, tmp_path):
"""Liste vide est acceptée."""
json_path = tmp_path / "empty.json"
with open(json_path, "w") as f:
json.dump([], f)
snapshots = read_json_results(json_path)
assert snapshots == []
def test_read_accepts_path_string(self, json_file_with_snapshot):
"""Accepte un string comme chemin."""
snapshots = read_json_results(str(json_file_with_snapshot))
assert len(snapshots) == 1
class TestSaveDebugHtml:
"""Tests pour save_debug_html()."""
def test_save_html_default_dir(self, tmp_path, monkeypatch):
"""Sauvegarde HTML dans le dossier par défaut."""
# Changer le répertoire de travail pour le test
monkeypatch.chdir(tmp_path)
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page")
assert result_path.exists()
assert result_path.name == "test_page.html"
assert result_path.read_text(encoding="utf-8") == html
def test_save_html_custom_dir(self, tmp_path):
"""Sauvegarde HTML dans un dossier personnalisé."""
output_dir = tmp_path / "debug_html"
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page", output_dir)
assert result_path.parent == output_dir
assert result_path.name == "test_page.html"
assert result_path.read_text(encoding="utf-8") == html
def test_save_html_creates_dir(self, tmp_path):
"""Crée le dossier de sortie s'il n'existe pas."""
output_dir = tmp_path / "sub" / "dir" / "html"
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page", output_dir)
assert output_dir.exists()
assert result_path.exists()
def test_save_html_large_content(self, tmp_path):
"""Sauvegarde du HTML volumineux."""
html = "<html><body>" + ("x" * 100000) + "</body></html>"
result_path = save_debug_html(html, "large_page", tmp_path)
assert result_path.exists()
assert len(result_path.read_text(encoding="utf-8")) == len(html)
class TestSaveDebugScreenshot:
"""Tests pour save_debug_screenshot()."""
def test_save_screenshot_default_dir(self, tmp_path, monkeypatch):
"""Sauvegarde screenshot dans le dossier par défaut."""
monkeypatch.chdir(tmp_path)
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot")
assert result_path.exists()
assert result_path.name == "test_screenshot.png"
assert result_path.read_bytes() == screenshot_bytes
def test_save_screenshot_custom_dir(self, tmp_path):
"""Sauvegarde screenshot dans un dossier personnalisé."""
output_dir = tmp_path / "screenshots"
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
assert result_path.parent == output_dir
assert result_path.name == "test_screenshot.png"
assert result_path.read_bytes() == screenshot_bytes
def test_save_screenshot_creates_dir(self, tmp_path):
"""Crée le dossier de sortie s'il n'existe pas."""
output_dir = tmp_path / "sub" / "dir" / "screenshots"
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
assert output_dir.exists()
assert result_path.exists()

View File

@@ -0,0 +1,174 @@
"""
Tests d'intégration pour le registry avec les stores réels.
Teste la détection automatique du bon store pour des URLs
Amazon, Cdiscount, Backmarket et AliExpress.
"""
import pytest
from pricewatch.app.core.registry import StoreRegistry
from pricewatch.app.stores.amazon.store import AmazonStore
from pricewatch.app.stores.cdiscount.store import CdiscountStore
from pricewatch.app.stores.backmarket.store import BackmarketStore
from pricewatch.app.stores.aliexpress.store import AliexpressStore
class TestRegistryRealStores:
"""Tests d'intégration avec les 4 stores réels."""
@pytest.fixture
def registry_with_all_stores(self) -> StoreRegistry:
"""Fixture: Registry avec les 4 stores réels enregistrés."""
registry = StoreRegistry()
registry.register(AmazonStore())
registry.register(CdiscountStore())
registry.register(BackmarketStore())
registry.register(AliexpressStore())
return registry
def test_all_stores_registered(self, registry_with_all_stores):
"""Vérifie que les 4 stores sont enregistrés."""
assert len(registry_with_all_stores) == 4
stores = registry_with_all_stores.list_stores()
assert "amazon" in stores
assert "cdiscount" in stores
assert "backmarket" in stores
assert "aliexpress" in stores
def test_detect_amazon_fr(self, registry_with_all_stores):
"""Détecte Amazon.fr correctement."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "amazon"
def test_detect_amazon_com(self, registry_with_all_stores):
"""Détecte Amazon.com correctement."""
url = "https://www.amazon.com/dp/B08N5WRWNW"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "amazon"
def test_detect_amazon_with_product_name(self, registry_with_all_stores):
"""Détecte Amazon avec nom de produit dans l'URL."""
url = "https://www.amazon.fr/Product-Name-Here/dp/B08N5WRWNW/ref=sr_1_1"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "amazon"
def test_detect_cdiscount(self, registry_with_all_stores):
"""Détecte Cdiscount correctement."""
url = "https://www.cdiscount.com/informatique/clavier-souris-webcam/example/f-1070123-example.html"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "cdiscount"
def test_detect_backmarket(self, registry_with_all_stores):
"""Détecte Backmarket correctement."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "backmarket"
def test_detect_backmarket_locale_en(self, registry_with_all_stores):
"""Détecte Backmarket avec locale anglais."""
url = "https://www.backmarket.fr/en-fr/p/macbook-air-15-2024"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "backmarket"
def test_detect_aliexpress_fr(self, registry_with_all_stores):
"""Détecte AliExpress.fr correctement."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "aliexpress"
def test_detect_aliexpress_com(self, registry_with_all_stores):
"""Détecte AliExpress.com correctement."""
url = "https://www.aliexpress.com/item/1005007187023722.html"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "aliexpress"
def test_detect_unknown_store(self, registry_with_all_stores):
"""URL inconnue retourne None."""
url = "https://www.ebay.com/itm/123456789"
store = registry_with_all_stores.detect_store(url)
assert store is None
def test_detect_invalid_url(self, registry_with_all_stores):
"""URL invalide retourne None."""
url = "not-a-valid-url"
store = registry_with_all_stores.detect_store(url)
assert store is None
def test_detect_priority_amazon_over_others(self, registry_with_all_stores):
"""Amazon.fr doit avoir le meilleur score pour ses URLs."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
store = registry_with_all_stores.detect_store(url)
# Amazon.fr devrait avoir score 0.9, les autres 0.0
assert store.store_id == "amazon"
def test_each_store_matches_only_own_urls(self, registry_with_all_stores):
"""Chaque store ne matche que ses propres URLs."""
test_cases = [
("https://www.amazon.fr/dp/B08N5WRWNW", "amazon"),
("https://www.cdiscount.com/product", "cdiscount"),
("https://www.backmarket.fr/fr-fr/p/product", "backmarket"),
("https://fr.aliexpress.com/item/12345.html", "aliexpress"),
]
for url, expected_store_id in test_cases:
store = registry_with_all_stores.detect_store(url)
assert store is not None, f"Aucun store détecté pour {url}"
assert store.store_id == expected_store_id, (
f"Mauvais store pour {url}: "
f"attendu {expected_store_id}, obtenu {store.store_id}"
)
def test_get_store_by_id(self, registry_with_all_stores):
"""Récupère chaque store par son ID."""
amazon = registry_with_all_stores.get_store("amazon")
assert amazon is not None
assert isinstance(amazon, AmazonStore)
cdiscount = registry_with_all_stores.get_store("cdiscount")
assert cdiscount is not None
assert isinstance(cdiscount, CdiscountStore)
backmarket = registry_with_all_stores.get_store("backmarket")
assert backmarket is not None
assert isinstance(backmarket, BackmarketStore)
aliexpress = registry_with_all_stores.get_store("aliexpress")
assert aliexpress is not None
assert isinstance(aliexpress, AliexpressStore)
def test_unregister_store(self, registry_with_all_stores):
"""Désenregistre un store et vérifie qu'il n'est plus détecté."""
assert len(registry_with_all_stores) == 4
# Désenregistrer Amazon
removed = registry_with_all_stores.unregister("amazon")
assert removed is True
assert len(registry_with_all_stores) == 3
# Amazon ne doit plus être détecté
store = registry_with_all_stores.detect_store("https://www.amazon.fr/dp/B08N5WRWNW")
assert store is None
# Les autres stores doivent toujours fonctionner
store = registry_with_all_stores.detect_store("https://www.cdiscount.com/product")
assert store is not None
assert store.store_id == "cdiscount"
def test_repr_includes_all_stores(self, registry_with_all_stores):
"""La représentation string inclut tous les stores."""
repr_str = repr(registry_with_all_stores)
assert "StoreRegistry" in repr_str
assert "amazon" in repr_str
assert "cdiscount" in repr_str
assert "backmarket" in repr_str
assert "aliexpress" in repr_str

87
tests/db/test_connection.py Executable file
View File

@@ -0,0 +1,87 @@
"""
Tests pour la couche de connexion SQLAlchemy.
"""
from dataclasses import dataclass
import pytest
from sqlalchemy import inspect
from pricewatch.app.db.connection import (
check_db_connection,
get_engine,
get_session,
init_db,
reset_engine,
)
from pricewatch.app.db.models import Product
@dataclass
class FakeDbConfig:
"""Config DB minimale pour tests SQLite."""
url: str
host: str = "sqlite"
port: int = 0
database: str = ":memory:"
@dataclass
class FakeAppConfig:
"""Config App minimale pour tests."""
db: FakeDbConfig
debug: bool = False
@pytest.fixture(autouse=True)
def reset_db_engine():
"""Reset l'engine global entre les tests."""
reset_engine()
yield
reset_engine()
@pytest.fixture
def sqlite_config() -> FakeAppConfig:
"""Config SQLite in-memory pour tests."""
return FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"))
def test_get_engine_sqlite(sqlite_config: FakeAppConfig):
"""Cree un engine SQLite fonctionnel."""
engine = get_engine(sqlite_config)
assert engine.url.get_backend_name() == "sqlite"
def test_init_db_creates_tables(sqlite_config: FakeAppConfig):
"""Init DB cree toutes les tables attendues."""
init_db(sqlite_config)
engine = get_engine(sqlite_config)
inspector = inspect(engine)
tables = set(inspector.get_table_names())
assert "products" in tables
assert "price_history" in tables
assert "product_images" in tables
assert "product_specs" in tables
assert "scraping_logs" in tables
def test_get_session_commit(sqlite_config: FakeAppConfig):
"""La session permet un commit simple."""
init_db(sqlite_config)
with get_session(sqlite_config) as session:
product = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com")
session.add(product)
session.commit()
with get_session(sqlite_config) as session:
assert session.query(Product).count() == 1
def test_check_db_connection(sqlite_config: FakeAppConfig):
"""Le health check DB retourne True en SQLite."""
init_db(sqlite_config)
assert check_db_connection(sqlite_config) is True

89
tests/db/test_models.py Executable file
View File

@@ -0,0 +1,89 @@
"""
Tests pour les modeles SQLAlchemy.
"""
from datetime import datetime
import pytest
from sqlalchemy import create_engine
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, sessionmaker
from pricewatch.app.db.models import (
Base,
PriceHistory,
Product,
ProductImage,
ProductSpec,
ScrapingLog,
)
@pytest.fixture
def session() -> Session:
"""Session SQLite in-memory pour tests de modeles."""
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
SessionLocal = sessionmaker(bind=engine)
session = SessionLocal()
try:
yield session
finally:
session.close()
def test_product_relationships(session: Session):
"""Les relations principales fonctionnent (prix, images, specs, logs)."""
product = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com")
price = PriceHistory(
price=199.99,
shipping_cost=0,
stock_status="in_stock",
fetch_method="http",
fetch_status="success",
fetched_at=datetime.utcnow(),
)
image = ProductImage(image_url="https://example.com/image.jpg", position=0)
spec = ProductSpec(spec_key="Couleur", spec_value="Noir")
log = ScrapingLog(
url="https://example.com",
source="amazon",
reference="B08N5WRWNW",
fetch_method="http",
fetch_status="success",
fetched_at=datetime.utcnow(),
duration_ms=1200,
html_size_bytes=2048,
errors={"items": []},
notes={"items": ["OK"]},
)
product.price_history.append(price)
product.images.append(image)
product.specs.append(spec)
product.logs.append(log)
session.add(product)
session.commit()
loaded = session.query(Product).first()
assert loaded is not None
assert len(loaded.price_history) == 1
assert len(loaded.images) == 1
assert len(loaded.specs) == 1
assert len(loaded.logs) == 1
def test_unique_product_constraint(session: Session):
"""La contrainte unique source+reference est respectee."""
product_a = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com/a")
product_b = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com/b")
session.add(product_a)
session.commit()
session.add(product_b)
with pytest.raises(IntegrityError):
session.commit()
session.rollback()

82
tests/db/test_repository.py Executable file
View File

@@ -0,0 +1,82 @@
"""
Tests pour le repository SQLAlchemy.
"""
from datetime import datetime
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.models import Base, Product, ScrapingLog
from pricewatch.app.db.repository import ProductRepository
@pytest.fixture
def session() -> Session:
"""Session SQLite in-memory pour tests repository."""
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
SessionLocal = sessionmaker(bind=engine)
session = SessionLocal()
try:
yield session
finally:
session.close()
engine.dispose()
def _make_snapshot(reference: str | None) -> ProductSnapshot:
return ProductSnapshot(
source="amazon",
url="https://example.com/product",
fetched_at=datetime(2026, 1, 14, 12, 0, 0),
title="Produit test",
price=199.99,
currency="EUR",
shipping_cost=0.0,
reference=reference,
images=["https://example.com/img1.jpg"],
specs={"Couleur": "Noir"},
debug=DebugInfo(
method=FetchMethod.HTTP,
status=DebugStatus.SUCCESS,
errors=["Avertissement"],
notes=["OK"],
),
)
def test_save_snapshot_creates_product(session: Session):
"""Le repository persiste produit + log."""
repo = ProductRepository(session)
snapshot = _make_snapshot(reference="B08N5WRWNW")
product_id = repo.save_snapshot(snapshot)
session.commit()
product = session.query(Product).one()
assert product.id == product_id
assert product.reference == "B08N5WRWNW"
assert len(product.images) == 1
assert len(product.specs) == 1
assert len(product.price_history) == 1
log = session.query(ScrapingLog).one()
assert log.product_id == product_id
assert log.errors == ["Avertissement"]
assert log.notes == ["OK"]
def test_save_snapshot_without_reference(session: Session):
"""Sans reference, le produit n'est pas cree mais le log existe."""
repo = ProductRepository(session)
snapshot = _make_snapshot(reference=None)
product_id = repo.save_snapshot(snapshot)
session.commit()
assert product_id is None
assert session.query(Product).count() == 0
assert session.query(ScrapingLog).count() == 1

0
tests/scraping/__init__.py Executable file
View File

Binary file not shown.

290
tests/scraping/test_http_fetch.py Executable file
View File

@@ -0,0 +1,290 @@
"""
Tests pour pricewatch.app.scraping.http_fetch
Teste la récupération HTTP avec mocks pour éviter les vraies requêtes.
"""
from unittest.mock import Mock, patch
import pytest
import requests
from requests.exceptions import RequestException, Timeout
from pricewatch.app.scraping.http_fetch import FetchResult, fetch_http
class TestFetchResult:
"""Tests pour la classe FetchResult."""
def test_success_result(self):
"""Création d'un résultat réussi."""
result = FetchResult(
success=True,
html="<html>Test</html>",
status_code=200,
duration_ms=150,
)
assert result.success is True
assert result.html == "<html>Test</html>"
assert result.error is None
assert result.status_code == 200
assert result.duration_ms == 150
def test_error_result(self):
"""Création d'un résultat d'erreur."""
result = FetchResult(
success=False,
error="403 Forbidden",
status_code=403,
duration_ms=100,
)
assert result.success is False
assert result.html is None
assert result.error == "403 Forbidden"
assert result.status_code == 403
assert result.duration_ms == 100
def test_minimal_result(self):
"""Résultat minimal avec success uniquement."""
result = FetchResult(success=False)
assert result.success is False
assert result.html is None
assert result.error is None
assert result.status_code is None
assert result.duration_ms is None
class TestFetchHttp:
"""Tests pour la fonction fetch_http()."""
def test_fetch_success(self, mocker):
"""Requête HTTP réussie (200 OK)."""
# Mock de requests.get
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = "<html><body>Test Page</body></html>"
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.success is True
assert result.html == "<html><body>Test Page</body></html>"
assert result.status_code == 200
assert result.error is None
assert result.duration_ms is not None
assert result.duration_ms >= 0
def test_fetch_with_custom_timeout(self, mocker):
"""Requête avec timeout personnalisé."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = "<html>OK</html>"
mock_get = mocker.patch("requests.get", return_value=mock_response)
fetch_http("https://example.com", timeout=60)
# Vérifier que timeout est passé à requests.get
mock_get.assert_called_once()
call_kwargs = mock_get.call_args.kwargs
assert call_kwargs["timeout"] == 60
def test_fetch_with_custom_headers(self, mocker):
"""Requête avec headers personnalisés."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = "<html>OK</html>"
mock_get = mocker.patch("requests.get", return_value=mock_response)
custom_headers = {"X-Custom-Header": "test-value"}
fetch_http("https://example.com", headers=custom_headers)
# Vérifier que les headers personnalisés sont inclus
mock_get.assert_called_once()
call_kwargs = mock_get.call_args.kwargs
assert "X-Custom-Header" in call_kwargs["headers"]
assert call_kwargs["headers"]["X-Custom-Header"] == "test-value"
# Headers par défaut doivent aussi être présents
assert "User-Agent" in call_kwargs["headers"]
def test_fetch_403_forbidden(self, mocker):
"""Requête bloquée (403 Forbidden)."""
mock_response = Mock()
mock_response.status_code = 403
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.success is False
assert result.html is None
assert result.status_code == 403
assert "403 Forbidden" in result.error
assert "Anti-bot" in result.error
def test_fetch_404_not_found(self, mocker):
"""Page introuvable (404 Not Found)."""
mock_response = Mock()
mock_response.status_code = 404
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.success is False
assert result.status_code == 404
assert "404 Not Found" in result.error
def test_fetch_429_rate_limit(self, mocker):
"""Rate limit atteint (429 Too Many Requests)."""
mock_response = Mock()
mock_response.status_code = 429
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.success is False
assert result.status_code == 429
assert "429" in result.error
assert "Rate limit" in result.error
def test_fetch_500_server_error(self, mocker):
"""Erreur serveur (500 Internal Server Error)."""
mock_response = Mock()
mock_response.status_code = 500
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.success is False
assert result.status_code == 500
assert "500" in result.error
assert "Server Error" in result.error
def test_fetch_503_service_unavailable(self, mocker):
"""Service indisponible (503)."""
mock_response = Mock()
mock_response.status_code = 503
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.success is False
assert result.status_code == 503
assert "503" in result.error
def test_fetch_unknown_status_code(self, mocker):
"""Code de statut inconnu (par ex. 418 I'm a teapot)."""
mock_response = Mock()
mock_response.status_code = 418
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.success is False
assert result.status_code == 418
assert "418" in result.error
def test_fetch_timeout_error(self, mocker):
"""Timeout lors de la requête."""
mocker.patch("requests.get", side_effect=Timeout("Connection timed out"))
result = fetch_http("https://example.com", timeout=10)
assert result.success is False
assert result.html is None
assert "Timeout" in result.error
assert result.duration_ms is not None
def test_fetch_request_exception(self, mocker):
"""Exception réseau générique."""
mocker.patch(
"requests.get",
side_effect=RequestException("Network error"),
)
result = fetch_http("https://example.com")
assert result.success is False
assert "Erreur réseau" in result.error
assert result.duration_ms is not None
def test_fetch_unexpected_exception(self, mocker):
"""Exception inattendue."""
mocker.patch("requests.get", side_effect=ValueError("Unexpected error"))
result = fetch_http("https://example.com")
assert result.success is False
assert "Erreur inattendue" in result.error
assert result.duration_ms is not None
def test_fetch_empty_url(self):
"""URL vide retourne une erreur."""
result = fetch_http("")
assert result.success is False
assert "URL vide" in result.error
assert result.html is None
def test_fetch_whitespace_url(self):
"""URL avec espaces uniquement retourne une erreur."""
result = fetch_http(" ")
assert result.success is False
assert "URL vide" in result.error
def test_fetch_no_redirects(self, mocker):
"""Requête sans suivre les redirections."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = "<html>OK</html>"
mock_get = mocker.patch("requests.get", return_value=mock_response)
fetch_http("https://example.com", follow_redirects=False)
mock_get.assert_called_once()
call_kwargs = mock_get.call_args.kwargs
assert call_kwargs["allow_redirects"] is False
def test_fetch_uses_random_user_agent(self, mocker):
"""Vérifie qu'un User-Agent aléatoire est utilisé."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = "<html>OK</html>"
mock_get = mocker.patch("requests.get", return_value=mock_response)
fetch_http("https://example.com")
# Vérifier qu'un User-Agent est présent
mock_get.assert_called_once()
call_kwargs = mock_get.call_args.kwargs
assert "User-Agent" in call_kwargs["headers"]
# User-Agent doit contenir "Mozilla" (présent dans tous les UA)
assert "Mozilla" in call_kwargs["headers"]["User-Agent"]
def test_fetch_duration_is_measured(self, mocker):
"""Vérifie que la durée est mesurée."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = "<html>OK</html>"
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.duration_ms is not None
assert isinstance(result.duration_ms, int)
assert result.duration_ms >= 0
def test_fetch_large_response(self, mocker):
"""Requête avec réponse volumineuse."""
mock_response = Mock()
mock_response.status_code = 200
# Simuler une grosse page HTML (1 MB)
mock_response.text = "<html>" + ("x" * 1000000) + "</html>"
mocker.patch("requests.get", return_value=mock_response)
result = fetch_http("https://example.com")
assert result.success is True
assert len(result.html) > 1000000

82
tests/scraping/test_pipeline.py Executable file
View File

@@ -0,0 +1,82 @@
"""
Tests pour ScrapingPipeline.
"""
from dataclasses import dataclass
from datetime import datetime
import pytest
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
from pricewatch.app.db.connection import get_session, init_db, reset_engine
from pricewatch.app.db.models import Product
from pricewatch.app.scraping.pipeline import ScrapingPipeline
@dataclass
class FakeDbConfig:
url: str
@dataclass
class FakeAppConfig:
db: FakeDbConfig
debug: bool = False
enable_db: bool = True
@pytest.fixture(autouse=True)
def reset_db_engine():
"""Reset l'engine global entre les tests."""
reset_engine()
yield
reset_engine()
def test_pipeline_persists_snapshot():
"""Le pipeline persiste un snapshot en base SQLite."""
config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"))
init_db(config)
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com/product",
fetched_at=datetime(2026, 1, 14, 12, 30, 0),
title="Produit pipeline",
price=99.99,
currency="EUR",
reference="B08PIPE",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
pipeline = ScrapingPipeline(config=config)
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
assert product_id is not None
with get_session(config) as session:
assert session.query(Product).count() == 1
def test_pipeline_respects_disable_flag():
"""Le pipeline ignore la persistence si enable_db=False."""
config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"), enable_db=False)
init_db(config)
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com/product",
fetched_at=datetime(2026, 1, 14, 12, 45, 0),
title="Produit pipeline",
price=99.99,
currency="EUR",
reference="B08PIPE",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
pipeline = ScrapingPipeline(config=config)
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
assert product_id is None
with get_session(config) as session:
assert session.query(Product).count() == 0

388
tests/scraping/test_pw_fetch.py Executable file
View File

@@ -0,0 +1,388 @@
"""
Tests pour pricewatch.app.scraping.pw_fetch
Teste la récupération Playwright avec mocks pour éviter de lancer vraiment un navigateur.
"""
from unittest.mock import Mock, patch
import pytest
from playwright.sync_api import TimeoutError as PlaywrightTimeout
from pricewatch.app.scraping.pw_fetch import (
PlaywrightFetchResult,
fetch_playwright,
fetch_with_fallback,
)
class TestPlaywrightFetchResult:
"""Tests pour la classe PlaywrightFetchResult."""
def test_success_result(self):
"""Création d'un résultat réussi."""
result = PlaywrightFetchResult(
success=True,
html="<html>Test</html>",
screenshot=b"fake_screenshot_bytes",
duration_ms=2500,
)
assert result.success is True
assert result.html == "<html>Test</html>"
assert result.screenshot == b"fake_screenshot_bytes"
assert result.error is None
assert result.duration_ms == 2500
def test_error_result(self):
"""Création d'un résultat d'erreur."""
result = PlaywrightFetchResult(
success=False,
error="Timeout",
screenshot=b"error_screenshot",
duration_ms=3000,
)
assert result.success is False
assert result.html is None
assert result.error == "Timeout"
assert result.screenshot == b"error_screenshot"
assert result.duration_ms == 3000
def test_minimal_result(self):
"""Résultat minimal."""
result = PlaywrightFetchResult(success=False)
assert result.success is False
assert result.html is None
assert result.screenshot is None
assert result.error is None
assert result.duration_ms is None
class TestFetchPlaywright:
"""Tests pour fetch_playwright()."""
@pytest.fixture
def mock_playwright_stack(self, mocker):
"""Fixture: Mock complet de la stack Playwright."""
# Mock de la page
mock_page = Mock()
mock_page.content.return_value = "<html><body>Playwright Test</body></html>"
mock_page.screenshot.return_value = b"fake_screenshot_data"
mock_page.goto.return_value = Mock(status=200)
# Mock du context
mock_context = Mock()
mock_context.new_page.return_value = mock_page
# Mock du browser
mock_browser = Mock()
mock_browser.new_context.return_value = mock_context
# Mock playwright chromium
mock_chromium = Mock()
mock_chromium.launch.return_value = mock_browser
# Mock playwright
mock_playwright_obj = Mock()
mock_playwright_obj.chromium = mock_chromium
# Mock sync_playwright().start()
mock_sync_playwright = Mock()
mock_sync_playwright.start.return_value = mock_playwright_obj
mocker.patch(
"pricewatch.app.scraping.pw_fetch.sync_playwright",
return_value=mock_sync_playwright,
)
return {
"playwright": mock_playwright_obj,
"browser": mock_browser,
"context": mock_context,
"page": mock_page,
}
def test_fetch_success(self, mock_playwright_stack):
"""Récupération Playwright réussie."""
result = fetch_playwright("https://example.com")
assert result.success is True
assert result.html == "<html><body>Playwright Test</body></html>"
assert result.screenshot is None # Par défaut pas de screenshot
assert result.error is None
assert result.duration_ms is not None
assert result.duration_ms >= 0
# Vérifier que la page a été visitée
mock_playwright_stack["page"].goto.assert_called_once_with(
"https://example.com", wait_until="domcontentloaded"
)
def test_fetch_with_screenshot(self, mock_playwright_stack):
"""Récupération avec screenshot."""
result = fetch_playwright("https://example.com", save_screenshot=True)
assert result.success is True
assert result.screenshot == b"fake_screenshot_data"
# Vérifier que screenshot() a été appelé
mock_playwright_stack["page"].screenshot.assert_called_once()
def test_fetch_headful_mode(self, mock_playwright_stack):
"""Mode headful (navigateur visible)."""
result = fetch_playwright("https://example.com", headless=False)
assert result.success is True
# Vérifier que headless=False a été passé
mock_playwright_stack["playwright"].chromium.launch.assert_called_once()
call_kwargs = mock_playwright_stack["playwright"].chromium.launch.call_args.kwargs
assert call_kwargs["headless"] is False
def test_fetch_with_custom_timeout(self, mock_playwright_stack):
"""Timeout personnalisé."""
result = fetch_playwright("https://example.com", timeout_ms=30000)
assert result.success is True
# Vérifier que set_default_timeout a été appelé
mock_playwright_stack["page"].set_default_timeout.assert_called_once_with(30000)
def test_fetch_with_wait_for_selector(self, mock_playwright_stack):
"""Attente d'un sélecteur CSS spécifique."""
result = fetch_playwright(
"https://example.com", wait_for_selector=".product-title"
)
assert result.success is True
# Vérifier que wait_for_selector a été appelé
mock_playwright_stack["page"].wait_for_selector.assert_called_once_with(
".product-title", timeout=60000
)
def test_fetch_wait_for_selector_timeout(self, mock_playwright_stack):
"""Timeout lors de l'attente du sélecteur."""
# Le sélecteur timeout mais la page continue
mock_playwright_stack["page"].wait_for_selector.side_effect = PlaywrightTimeout(
"Selector timeout"
)
result = fetch_playwright(
"https://example.com", wait_for_selector=".non-existent"
)
# Doit quand même réussir (le wait_for_selector est non-bloquant)
assert result.success is True
assert result.html is not None
def test_fetch_empty_url(self):
"""URL vide retourne une erreur."""
result = fetch_playwright("")
assert result.success is False
assert "URL vide" in result.error
assert result.html is None
def test_fetch_whitespace_url(self):
"""URL avec espaces retourne une erreur."""
result = fetch_playwright(" ")
assert result.success is False
assert "URL vide" in result.error
def test_fetch_no_response_from_server(self, mock_playwright_stack):
"""Pas de réponse du serveur."""
mock_playwright_stack["page"].goto.return_value = None
result = fetch_playwright("https://example.com")
assert result.success is False
assert "Pas de réponse du serveur" in result.error
def test_fetch_playwright_timeout(self, mock_playwright_stack):
"""Timeout Playwright lors de la navigation."""
mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout(
"Navigation timeout"
)
result = fetch_playwright("https://example.com", timeout_ms=10000)
assert result.success is False
assert "Timeout" in result.error
assert result.duration_ms is not None
def test_fetch_playwright_generic_error(self, mock_playwright_stack):
"""Erreur générique Playwright."""
mock_playwright_stack["page"].goto.side_effect = Exception(
"Generic Playwright error"
)
result = fetch_playwright("https://example.com")
assert result.success is False
assert "Erreur Playwright" in result.error
assert result.duration_ms is not None
def test_fetch_cleanup_on_success(self, mock_playwright_stack):
"""Nettoyage des ressources sur succès."""
result = fetch_playwright("https://example.com")
assert result.success is True
# Vérifier que les ressources sont nettoyées
mock_playwright_stack["page"].close.assert_called_once()
mock_playwright_stack["browser"].close.assert_called_once()
mock_playwright_stack["playwright"].stop.assert_called_once()
def test_fetch_cleanup_on_error(self, mock_playwright_stack):
"""Nettoyage des ressources sur erreur."""
mock_playwright_stack["page"].goto.side_effect = Exception("Test error")
result = fetch_playwright("https://example.com")
assert result.success is False
# Vérifier que les ressources sont nettoyées même en cas d'erreur
mock_playwright_stack["page"].close.assert_called_once()
mock_playwright_stack["browser"].close.assert_called_once()
mock_playwright_stack["playwright"].stop.assert_called_once()
def test_fetch_screenshot_on_error(self, mock_playwright_stack):
"""Screenshot capturé même en cas d'erreur."""
mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout("Timeout")
result = fetch_playwright("https://example.com", save_screenshot=True)
assert result.success is False
assert result.screenshot == b"fake_screenshot_data"
# Screenshot doit avoir été tenté
mock_playwright_stack["page"].screenshot.assert_called_once()
class TestFetchWithFallback:
"""Tests pour fetch_with_fallback()."""
def test_http_success_no_playwright(self, mocker):
"""Si HTTP réussit, Playwright n'est pas appelé."""
# Mock fetch_http qui réussit
mock_http_result = Mock()
mock_http_result.success = True
mock_http_result.html = "<html>HTTP Success</html>"
mock_http_result.duration_ms = 150
mocker.patch(
"pricewatch.app.scraping.http_fetch.fetch_http",
return_value=mock_http_result,
)
# Mock fetch_playwright (ne devrait pas être appelé)
mock_playwright = mocker.patch(
"pricewatch.app.scraping.pw_fetch.fetch_playwright"
)
result = fetch_with_fallback("https://example.com")
assert result.success is True
assert result.html == "<html>HTTP Success</html>"
assert result.duration_ms == 150
# Playwright ne doit pas être appelé
mock_playwright.assert_not_called()
def test_http_fails_playwright_fallback(self, mocker):
"""Si HTTP échoue, fallback vers Playwright."""
# Mock fetch_http qui échoue
mock_http_result = Mock()
mock_http_result.success = False
mock_http_result.error = "403 Forbidden"
mocker.patch(
"pricewatch.app.scraping.http_fetch.fetch_http",
return_value=mock_http_result,
)
# Mock fetch_playwright qui réussit
mock_playwright_result = PlaywrightFetchResult(
success=True,
html="<html>Playwright Success</html>",
duration_ms=2500,
)
mock_playwright = mocker.patch(
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
return_value=mock_playwright_result,
)
result = fetch_with_fallback("https://example.com")
assert result.success is True
assert result.html == "<html>Playwright Success</html>"
# Playwright doit avoir été appelé
mock_playwright.assert_called_once()
def test_skip_http_direct_playwright(self, mocker):
"""Mode Playwright direct (sans essayer HTTP d'abord)."""
# Mock fetch_http (ne devrait pas être appelé)
mock_http = mocker.patch("pricewatch.app.scraping.http_fetch.fetch_http")
# Mock fetch_playwright
mock_playwright_result = PlaywrightFetchResult(
success=True,
html="<html>Playwright Direct</html>",
duration_ms=2500,
)
mock_playwright = mocker.patch(
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
return_value=mock_playwright_result,
)
result = fetch_with_fallback("https://example.com", try_http_first=False)
assert result.success is True
assert result.html == "<html>Playwright Direct</html>"
# HTTP ne doit pas être appelé
mock_http.assert_not_called()
# Playwright doit avoir été appelé
mock_playwright.assert_called_once()
def test_playwright_options_passed(self, mocker):
"""Options Playwright passées correctement."""
# Mock fetch_http qui échoue
mock_http_result = Mock()
mock_http_result.success = False
mock_http_result.error = "403 Forbidden"
mocker.patch(
"pricewatch.app.scraping.http_fetch.fetch_http",
return_value=mock_http_result,
)
# Mock fetch_playwright
mock_playwright_result = PlaywrightFetchResult(
success=True,
html="<html>OK</html>",
duration_ms=2500,
)
mock_playwright = mocker.patch(
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
return_value=mock_playwright_result,
)
# Options personnalisées
options = {"headless": False, "timeout_ms": 30000, "save_screenshot": True}
result = fetch_with_fallback("https://example.com", playwright_options=options)
assert result.success is True
# Vérifier que les options sont passées à fetch_playwright
mock_playwright.assert_called_once_with("https://example.com", **options)