codex
This commit is contained in:
BIN
tests/cli/__pycache__/test_run_db.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/cli/__pycache__/test_run_db.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
106
tests/cli/test_run_db.py
Executable file
106
tests/cli/test_run_db.py
Executable file
@@ -0,0 +1,106 @@
|
||||
"""
|
||||
Tests end-to-end pour la commande CLI run avec persistence DB.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from pricewatch.app.cli import main as cli_main
|
||||
from pricewatch.app.core.registry import get_registry
|
||||
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
|
||||
from pricewatch.app.db.connection import get_session, init_db, reset_engine
|
||||
from pricewatch.app.db.models import Product
|
||||
from pricewatch.app.stores.base import BaseStore
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeDbConfig:
|
||||
url: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeAppConfig:
|
||||
db: FakeDbConfig
|
||||
debug: bool = False
|
||||
enable_db: bool = True
|
||||
|
||||
|
||||
class DummyStore(BaseStore):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(store_id="dummy")
|
||||
|
||||
def match(self, url: str) -> float:
|
||||
return 1.0 if "example.com" in url else 0.0
|
||||
|
||||
def canonicalize(self, url: str) -> str:
|
||||
return url
|
||||
|
||||
def extract_reference(self, url: str) -> str | None:
|
||||
return "REF123"
|
||||
|
||||
def parse(self, html: str, url: str) -> ProductSnapshot:
|
||||
return ProductSnapshot(
|
||||
source=self.store_id,
|
||||
url=url,
|
||||
title="Produit dummy",
|
||||
price=9.99,
|
||||
currency="EUR",
|
||||
reference="REF123",
|
||||
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
|
||||
)
|
||||
|
||||
|
||||
class DummyFetchResult:
|
||||
def __init__(self, html: str) -> None:
|
||||
self.success = True
|
||||
self.html = html
|
||||
self.error = None
|
||||
|
||||
|
||||
def test_cli_run_persists_db(tmp_path, monkeypatch):
|
||||
"""Le CLI run persiste en base quand --save-db est active."""
|
||||
reset_engine()
|
||||
db_path = tmp_path / "test.db"
|
||||
config = FakeAppConfig(db=FakeDbConfig(url=f"sqlite:///{db_path}"))
|
||||
init_db(config)
|
||||
|
||||
yaml_path = tmp_path / "config.yaml"
|
||||
out_path = tmp_path / "out.json"
|
||||
yaml_path.write_text(
|
||||
"""
|
||||
urls:
|
||||
- "https://example.com/product"
|
||||
options:
|
||||
use_playwright: false
|
||||
save_html: false
|
||||
save_screenshot: false
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
registry = get_registry()
|
||||
previous_stores = list(registry._stores)
|
||||
registry._stores = []
|
||||
registry.register(DummyStore())
|
||||
|
||||
monkeypatch.setattr(cli_main, "get_config", lambda: config)
|
||||
monkeypatch.setattr(cli_main, "setup_stores", lambda: None)
|
||||
monkeypatch.setattr(cli_main, "fetch_http", lambda url: DummyFetchResult("<html></html>"))
|
||||
|
||||
runner = CliRunner()
|
||||
try:
|
||||
result = runner.invoke(
|
||||
cli_main.app,
|
||||
["run", "--yaml", str(yaml_path), "--out", str(out_path), "--save-db"],
|
||||
)
|
||||
finally:
|
||||
registry._stores = previous_stores
|
||||
reset_engine()
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert out_path.exists()
|
||||
|
||||
with get_session(config) as session:
|
||||
assert session.query(Product).count() == 1
|
||||
BIN
tests/core/__pycache__/test_io.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/core/__pycache__/test_io.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
BIN
tests/core/__pycache__/test_registry_integration.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/core/__pycache__/test_registry_integration.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
462
tests/core/test_io.py
Executable file
462
tests/core/test_io.py
Executable file
@@ -0,0 +1,462 @@
|
||||
"""
|
||||
Tests pour pricewatch.app.core.io
|
||||
|
||||
Teste la lecture/écriture YAML/JSON et les fonctions de sauvegarde debug.
|
||||
"""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from pricewatch.app.core.io import (
|
||||
ScrapingConfig,
|
||||
ScrapingOptions,
|
||||
read_json_results,
|
||||
read_yaml_config,
|
||||
save_debug_html,
|
||||
save_debug_screenshot,
|
||||
write_json_results,
|
||||
)
|
||||
from pricewatch.app.core.schema import (
|
||||
DebugInfo,
|
||||
DebugStatus,
|
||||
FetchMethod,
|
||||
ProductSnapshot,
|
||||
StockStatus,
|
||||
)
|
||||
|
||||
|
||||
class TestScrapingOptions:
|
||||
"""Tests pour le modèle ScrapingOptions."""
|
||||
|
||||
def test_default_values(self):
|
||||
"""Les valeurs par défaut sont correctes."""
|
||||
options = ScrapingOptions()
|
||||
assert options.use_playwright is True
|
||||
assert options.headful is False
|
||||
assert options.save_html is True
|
||||
assert options.save_screenshot is True
|
||||
assert options.timeout_ms == 60000
|
||||
|
||||
def test_custom_values(self):
|
||||
"""Les valeurs personnalisées sont acceptées."""
|
||||
options = ScrapingOptions(
|
||||
use_playwright=False,
|
||||
headful=True,
|
||||
save_html=False,
|
||||
save_screenshot=False,
|
||||
timeout_ms=30000,
|
||||
)
|
||||
assert options.use_playwright is False
|
||||
assert options.headful is True
|
||||
assert options.save_html is False
|
||||
assert options.save_screenshot is False
|
||||
assert options.timeout_ms == 30000
|
||||
|
||||
def test_timeout_validation_min(self):
|
||||
"""Timeout inférieur à 1000ms est rejeté."""
|
||||
with pytest.raises(ValueError):
|
||||
ScrapingOptions(timeout_ms=500)
|
||||
|
||||
def test_timeout_validation_valid(self):
|
||||
"""Timeout >= 1000ms est accepté."""
|
||||
options = ScrapingOptions(timeout_ms=1000)
|
||||
assert options.timeout_ms == 1000
|
||||
|
||||
|
||||
class TestScrapingConfig:
|
||||
"""Tests pour le modèle ScrapingConfig."""
|
||||
|
||||
def test_minimal_config(self):
|
||||
"""Config minimale avec URLs uniquement."""
|
||||
config = ScrapingConfig(urls=["https://example.com"])
|
||||
assert len(config.urls) == 1
|
||||
assert config.urls[0] == "https://example.com"
|
||||
assert isinstance(config.options, ScrapingOptions)
|
||||
|
||||
def test_config_with_options(self):
|
||||
"""Config avec URLs et options."""
|
||||
options = ScrapingOptions(use_playwright=False, timeout_ms=10000)
|
||||
config = ScrapingConfig(
|
||||
urls=["https://example.com", "https://test.com"], options=options
|
||||
)
|
||||
assert len(config.urls) == 2
|
||||
assert config.options.use_playwright is False
|
||||
assert config.options.timeout_ms == 10000
|
||||
|
||||
def test_validate_urls_empty_list(self):
|
||||
"""Liste d'URLs vide est rejetée."""
|
||||
with pytest.raises(ValueError, match="Au moins une URL"):
|
||||
ScrapingConfig(urls=[])
|
||||
|
||||
def test_validate_urls_strips_whitespace(self):
|
||||
"""Les espaces sont nettoyés."""
|
||||
config = ScrapingConfig(urls=[" https://example.com ", "https://test.com"])
|
||||
assert config.urls == ["https://example.com", "https://test.com"]
|
||||
|
||||
def test_validate_urls_removes_empty(self):
|
||||
"""Les URLs vides sont supprimées."""
|
||||
config = ScrapingConfig(
|
||||
urls=["https://example.com", "", " ", "https://test.com"]
|
||||
)
|
||||
assert len(config.urls) == 2
|
||||
assert config.urls == ["https://example.com", "https://test.com"]
|
||||
|
||||
def test_validate_urls_all_empty(self):
|
||||
"""Si toutes les URLs sont vides, erreur."""
|
||||
with pytest.raises(ValueError, match="Aucune URL valide"):
|
||||
ScrapingConfig(urls=["", " ", "\t"])
|
||||
|
||||
|
||||
class TestReadYamlConfig:
|
||||
"""Tests pour read_yaml_config()."""
|
||||
|
||||
def test_read_valid_yaml(self, tmp_path):
|
||||
"""Lit un fichier YAML valide."""
|
||||
yaml_path = tmp_path / "config.yaml"
|
||||
yaml_content = {
|
||||
"urls": ["https://example.com", "https://test.com"],
|
||||
"options": {"use_playwright": False, "timeout_ms": 30000},
|
||||
}
|
||||
with open(yaml_path, "w") as f:
|
||||
yaml.dump(yaml_content, f)
|
||||
|
||||
config = read_yaml_config(yaml_path)
|
||||
assert len(config.urls) == 2
|
||||
assert config.urls[0] == "https://example.com"
|
||||
assert config.options.use_playwright is False
|
||||
assert config.options.timeout_ms == 30000
|
||||
|
||||
def test_read_yaml_minimal(self, tmp_path):
|
||||
"""Lit un YAML minimal (URLs uniquement)."""
|
||||
yaml_path = tmp_path / "config.yaml"
|
||||
yaml_content = {"urls": ["https://example.com"]}
|
||||
with open(yaml_path, "w") as f:
|
||||
yaml.dump(yaml_content, f)
|
||||
|
||||
config = read_yaml_config(yaml_path)
|
||||
assert len(config.urls) == 1
|
||||
# Options par défaut
|
||||
assert config.options.use_playwright is True
|
||||
assert config.options.timeout_ms == 60000
|
||||
|
||||
def test_read_yaml_file_not_found(self, tmp_path):
|
||||
"""Fichier introuvable lève FileNotFoundError."""
|
||||
yaml_path = tmp_path / "nonexistent.yaml"
|
||||
with pytest.raises(FileNotFoundError):
|
||||
read_yaml_config(yaml_path)
|
||||
|
||||
def test_read_yaml_empty_file(self, tmp_path):
|
||||
"""Fichier YAML vide lève ValueError."""
|
||||
yaml_path = tmp_path / "empty.yaml"
|
||||
yaml_path.write_text("")
|
||||
|
||||
with pytest.raises(ValueError, match="Fichier YAML vide"):
|
||||
read_yaml_config(yaml_path)
|
||||
|
||||
def test_read_yaml_invalid_syntax(self, tmp_path):
|
||||
"""YAML avec syntaxe invalide lève ValueError."""
|
||||
yaml_path = tmp_path / "invalid.yaml"
|
||||
yaml_path.write_text("urls: [invalid yaml syntax")
|
||||
|
||||
with pytest.raises(ValueError, match="YAML invalide"):
|
||||
read_yaml_config(yaml_path)
|
||||
|
||||
def test_read_yaml_missing_urls(self, tmp_path):
|
||||
"""YAML sans champ 'urls' lève erreur de validation."""
|
||||
yaml_path = tmp_path / "config.yaml"
|
||||
yaml_content = {"options": {"use_playwright": False}}
|
||||
with open(yaml_path, "w") as f:
|
||||
yaml.dump(yaml_content, f)
|
||||
|
||||
with pytest.raises(Exception): # Pydantic validation error
|
||||
read_yaml_config(yaml_path)
|
||||
|
||||
def test_read_yaml_accepts_path_string(self, tmp_path):
|
||||
"""Accepte un string comme chemin."""
|
||||
yaml_path = tmp_path / "config.yaml"
|
||||
yaml_content = {"urls": ["https://example.com"]}
|
||||
with open(yaml_path, "w") as f:
|
||||
yaml.dump(yaml_content, f)
|
||||
|
||||
config = read_yaml_config(str(yaml_path))
|
||||
assert len(config.urls) == 1
|
||||
|
||||
|
||||
class TestWriteJsonResults:
|
||||
"""Tests pour write_json_results()."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_snapshot(self) -> ProductSnapshot:
|
||||
"""Fixture: ProductSnapshot exemple."""
|
||||
return ProductSnapshot(
|
||||
source="test",
|
||||
url="https://example.com/product",
|
||||
fetched_at=datetime(2024, 1, 1, 12, 0, 0),
|
||||
title="Test Product",
|
||||
price=99.99,
|
||||
currency="EUR",
|
||||
stock_status=StockStatus.IN_STOCK,
|
||||
reference="TEST123",
|
||||
images=["https://example.com/img1.jpg"],
|
||||
category="Test Category",
|
||||
specs={"Brand": "TestBrand"},
|
||||
debug=DebugInfo(
|
||||
method=FetchMethod.HTTP,
|
||||
status=DebugStatus.SUCCESS,
|
||||
errors=[],
|
||||
notes=[],
|
||||
),
|
||||
)
|
||||
|
||||
def test_write_single_snapshot(self, tmp_path, sample_snapshot):
|
||||
"""Écrit un seul snapshot."""
|
||||
json_path = tmp_path / "results.json"
|
||||
write_json_results([sample_snapshot], json_path)
|
||||
|
||||
assert json_path.exists()
|
||||
|
||||
# Vérifier le contenu
|
||||
with open(json_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
assert isinstance(data, list)
|
||||
assert len(data) == 1
|
||||
assert data[0]["source"] == "test"
|
||||
assert data[0]["title"] == "Test Product"
|
||||
|
||||
def test_write_multiple_snapshots(self, tmp_path, sample_snapshot):
|
||||
"""Écrit plusieurs snapshots."""
|
||||
snapshot2 = ProductSnapshot(
|
||||
source="test2",
|
||||
url="https://example.com/product2",
|
||||
fetched_at=datetime(2024, 1, 2, 12, 0, 0),
|
||||
title="Test Product 2",
|
||||
price=49.99,
|
||||
currency="EUR",
|
||||
stock_status=StockStatus.OUT_OF_STOCK,
|
||||
debug=DebugInfo(
|
||||
method=FetchMethod.PLAYWRIGHT,
|
||||
status=DebugStatus.PARTIAL,
|
||||
errors=["Test error"],
|
||||
notes=[],
|
||||
),
|
||||
)
|
||||
|
||||
json_path = tmp_path / "results.json"
|
||||
write_json_results([sample_snapshot, snapshot2], json_path)
|
||||
|
||||
with open(json_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
assert len(data) == 2
|
||||
assert data[0]["source"] == "test"
|
||||
assert data[1]["source"] == "test2"
|
||||
|
||||
def test_write_creates_parent_dirs(self, tmp_path, sample_snapshot):
|
||||
"""Crée les dossiers parents si nécessaire."""
|
||||
json_path = tmp_path / "sub" / "dir" / "results.json"
|
||||
write_json_results([sample_snapshot], json_path)
|
||||
|
||||
assert json_path.exists()
|
||||
assert json_path.parent.exists()
|
||||
|
||||
def test_write_empty_list(self, tmp_path):
|
||||
"""Écrit une liste vide."""
|
||||
json_path = tmp_path / "empty.json"
|
||||
write_json_results([], json_path)
|
||||
|
||||
assert json_path.exists()
|
||||
|
||||
with open(json_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
assert data == []
|
||||
|
||||
def test_write_indent_control(self, tmp_path, sample_snapshot):
|
||||
"""Contrôle l'indentation."""
|
||||
# Avec indent
|
||||
json_path1 = tmp_path / "pretty.json"
|
||||
write_json_results([sample_snapshot], json_path1, indent=2)
|
||||
content1 = json_path1.read_text()
|
||||
assert "\n" in content1 # Pretty-printed
|
||||
|
||||
# Sans indent (compact)
|
||||
json_path2 = tmp_path / "compact.json"
|
||||
write_json_results([sample_snapshot], json_path2, indent=None)
|
||||
content2 = json_path2.read_text()
|
||||
assert len(content2) < len(content1) # Plus compact
|
||||
|
||||
def test_write_accepts_path_string(self, tmp_path, sample_snapshot):
|
||||
"""Accepte un string comme chemin."""
|
||||
json_path = tmp_path / "results.json"
|
||||
write_json_results([sample_snapshot], str(json_path))
|
||||
assert json_path.exists()
|
||||
|
||||
|
||||
class TestReadJsonResults:
|
||||
"""Tests pour read_json_results()."""
|
||||
|
||||
@pytest.fixture
|
||||
def json_file_with_snapshot(self, tmp_path) -> Path:
|
||||
"""Fixture: Fichier JSON avec un snapshot."""
|
||||
json_path = tmp_path / "results.json"
|
||||
snapshot_data = {
|
||||
"source": "test",
|
||||
"url": "https://example.com/product",
|
||||
"fetched_at": "2024-01-01T12:00:00",
|
||||
"title": "Test Product",
|
||||
"price": 99.99,
|
||||
"currency": "EUR",
|
||||
"shipping_cost": None,
|
||||
"stock_status": "in_stock",
|
||||
"reference": "TEST123",
|
||||
"images": ["https://example.com/img.jpg"],
|
||||
"category": "Test",
|
||||
"specs": {"Brand": "Test"},
|
||||
"debug": {
|
||||
"method": "http",
|
||||
"status": "success",
|
||||
"errors": [],
|
||||
"notes": [],
|
||||
"duration_ms": None,
|
||||
"html_size_bytes": None,
|
||||
},
|
||||
}
|
||||
|
||||
with open(json_path, "w") as f:
|
||||
json.dump([snapshot_data], f)
|
||||
|
||||
return json_path
|
||||
|
||||
def test_read_single_snapshot(self, json_file_with_snapshot):
|
||||
"""Lit un fichier avec un snapshot."""
|
||||
snapshots = read_json_results(json_file_with_snapshot)
|
||||
|
||||
assert len(snapshots) == 1
|
||||
assert isinstance(snapshots[0], ProductSnapshot)
|
||||
assert snapshots[0].source == "test"
|
||||
assert snapshots[0].title == "Test Product"
|
||||
assert snapshots[0].price == 99.99
|
||||
|
||||
def test_read_file_not_found(self, tmp_path):
|
||||
"""Fichier introuvable lève FileNotFoundError."""
|
||||
json_path = tmp_path / "nonexistent.json"
|
||||
with pytest.raises(FileNotFoundError):
|
||||
read_json_results(json_path)
|
||||
|
||||
def test_read_invalid_json(self, tmp_path):
|
||||
"""JSON invalide lève ValueError."""
|
||||
json_path = tmp_path / "invalid.json"
|
||||
json_path.write_text("{invalid json")
|
||||
|
||||
with pytest.raises(ValueError, match="JSON invalide"):
|
||||
read_json_results(json_path)
|
||||
|
||||
def test_read_not_a_list(self, tmp_path):
|
||||
"""JSON qui n'est pas une liste lève ValueError."""
|
||||
json_path = tmp_path / "notlist.json"
|
||||
with open(json_path, "w") as f:
|
||||
json.dump({"key": "value"}, f)
|
||||
|
||||
with pytest.raises(ValueError, match="doit contenir une liste"):
|
||||
read_json_results(json_path)
|
||||
|
||||
def test_read_empty_list(self, tmp_path):
|
||||
"""Liste vide est acceptée."""
|
||||
json_path = tmp_path / "empty.json"
|
||||
with open(json_path, "w") as f:
|
||||
json.dump([], f)
|
||||
|
||||
snapshots = read_json_results(json_path)
|
||||
assert snapshots == []
|
||||
|
||||
def test_read_accepts_path_string(self, json_file_with_snapshot):
|
||||
"""Accepte un string comme chemin."""
|
||||
snapshots = read_json_results(str(json_file_with_snapshot))
|
||||
assert len(snapshots) == 1
|
||||
|
||||
|
||||
class TestSaveDebugHtml:
|
||||
"""Tests pour save_debug_html()."""
|
||||
|
||||
def test_save_html_default_dir(self, tmp_path, monkeypatch):
|
||||
"""Sauvegarde HTML dans le dossier par défaut."""
|
||||
# Changer le répertoire de travail pour le test
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
html = "<html><body>Test</body></html>"
|
||||
result_path = save_debug_html(html, "test_page")
|
||||
|
||||
assert result_path.exists()
|
||||
assert result_path.name == "test_page.html"
|
||||
assert result_path.read_text(encoding="utf-8") == html
|
||||
|
||||
def test_save_html_custom_dir(self, tmp_path):
|
||||
"""Sauvegarde HTML dans un dossier personnalisé."""
|
||||
output_dir = tmp_path / "debug_html"
|
||||
html = "<html><body>Test</body></html>"
|
||||
|
||||
result_path = save_debug_html(html, "test_page", output_dir)
|
||||
|
||||
assert result_path.parent == output_dir
|
||||
assert result_path.name == "test_page.html"
|
||||
assert result_path.read_text(encoding="utf-8") == html
|
||||
|
||||
def test_save_html_creates_dir(self, tmp_path):
|
||||
"""Crée le dossier de sortie s'il n'existe pas."""
|
||||
output_dir = tmp_path / "sub" / "dir" / "html"
|
||||
html = "<html><body>Test</body></html>"
|
||||
|
||||
result_path = save_debug_html(html, "test_page", output_dir)
|
||||
|
||||
assert output_dir.exists()
|
||||
assert result_path.exists()
|
||||
|
||||
def test_save_html_large_content(self, tmp_path):
|
||||
"""Sauvegarde du HTML volumineux."""
|
||||
html = "<html><body>" + ("x" * 100000) + "</body></html>"
|
||||
result_path = save_debug_html(html, "large_page", tmp_path)
|
||||
|
||||
assert result_path.exists()
|
||||
assert len(result_path.read_text(encoding="utf-8")) == len(html)
|
||||
|
||||
|
||||
class TestSaveDebugScreenshot:
|
||||
"""Tests pour save_debug_screenshot()."""
|
||||
|
||||
def test_save_screenshot_default_dir(self, tmp_path, monkeypatch):
|
||||
"""Sauvegarde screenshot dans le dossier par défaut."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
screenshot_bytes = b"\x89PNG fake image data"
|
||||
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot")
|
||||
|
||||
assert result_path.exists()
|
||||
assert result_path.name == "test_screenshot.png"
|
||||
assert result_path.read_bytes() == screenshot_bytes
|
||||
|
||||
def test_save_screenshot_custom_dir(self, tmp_path):
|
||||
"""Sauvegarde screenshot dans un dossier personnalisé."""
|
||||
output_dir = tmp_path / "screenshots"
|
||||
screenshot_bytes = b"\x89PNG fake image data"
|
||||
|
||||
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
|
||||
|
||||
assert result_path.parent == output_dir
|
||||
assert result_path.name == "test_screenshot.png"
|
||||
assert result_path.read_bytes() == screenshot_bytes
|
||||
|
||||
def test_save_screenshot_creates_dir(self, tmp_path):
|
||||
"""Crée le dossier de sortie s'il n'existe pas."""
|
||||
output_dir = tmp_path / "sub" / "dir" / "screenshots"
|
||||
screenshot_bytes = b"\x89PNG fake image data"
|
||||
|
||||
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
|
||||
|
||||
assert output_dir.exists()
|
||||
assert result_path.exists()
|
||||
174
tests/core/test_registry_integration.py
Executable file
174
tests/core/test_registry_integration.py
Executable file
@@ -0,0 +1,174 @@
|
||||
"""
|
||||
Tests d'intégration pour le registry avec les stores réels.
|
||||
|
||||
Teste la détection automatique du bon store pour des URLs
|
||||
Amazon, Cdiscount, Backmarket et AliExpress.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from pricewatch.app.core.registry import StoreRegistry
|
||||
from pricewatch.app.stores.amazon.store import AmazonStore
|
||||
from pricewatch.app.stores.cdiscount.store import CdiscountStore
|
||||
from pricewatch.app.stores.backmarket.store import BackmarketStore
|
||||
from pricewatch.app.stores.aliexpress.store import AliexpressStore
|
||||
|
||||
|
||||
class TestRegistryRealStores:
|
||||
"""Tests d'intégration avec les 4 stores réels."""
|
||||
|
||||
@pytest.fixture
|
||||
def registry_with_all_stores(self) -> StoreRegistry:
|
||||
"""Fixture: Registry avec les 4 stores réels enregistrés."""
|
||||
registry = StoreRegistry()
|
||||
registry.register(AmazonStore())
|
||||
registry.register(CdiscountStore())
|
||||
registry.register(BackmarketStore())
|
||||
registry.register(AliexpressStore())
|
||||
return registry
|
||||
|
||||
def test_all_stores_registered(self, registry_with_all_stores):
|
||||
"""Vérifie que les 4 stores sont enregistrés."""
|
||||
assert len(registry_with_all_stores) == 4
|
||||
stores = registry_with_all_stores.list_stores()
|
||||
assert "amazon" in stores
|
||||
assert "cdiscount" in stores
|
||||
assert "backmarket" in stores
|
||||
assert "aliexpress" in stores
|
||||
|
||||
def test_detect_amazon_fr(self, registry_with_all_stores):
|
||||
"""Détecte Amazon.fr correctement."""
|
||||
url = "https://www.amazon.fr/dp/B08N5WRWNW"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None
|
||||
assert store.store_id == "amazon"
|
||||
|
||||
def test_detect_amazon_com(self, registry_with_all_stores):
|
||||
"""Détecte Amazon.com correctement."""
|
||||
url = "https://www.amazon.com/dp/B08N5WRWNW"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None
|
||||
assert store.store_id == "amazon"
|
||||
|
||||
def test_detect_amazon_with_product_name(self, registry_with_all_stores):
|
||||
"""Détecte Amazon avec nom de produit dans l'URL."""
|
||||
url = "https://www.amazon.fr/Product-Name-Here/dp/B08N5WRWNW/ref=sr_1_1"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None
|
||||
assert store.store_id == "amazon"
|
||||
|
||||
def test_detect_cdiscount(self, registry_with_all_stores):
|
||||
"""Détecte Cdiscount correctement."""
|
||||
url = "https://www.cdiscount.com/informatique/clavier-souris-webcam/example/f-1070123-example.html"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None
|
||||
assert store.store_id == "cdiscount"
|
||||
|
||||
def test_detect_backmarket(self, registry_with_all_stores):
|
||||
"""Détecte Backmarket correctement."""
|
||||
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None
|
||||
assert store.store_id == "backmarket"
|
||||
|
||||
def test_detect_backmarket_locale_en(self, registry_with_all_stores):
|
||||
"""Détecte Backmarket avec locale anglais."""
|
||||
url = "https://www.backmarket.fr/en-fr/p/macbook-air-15-2024"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None
|
||||
assert store.store_id == "backmarket"
|
||||
|
||||
def test_detect_aliexpress_fr(self, registry_with_all_stores):
|
||||
"""Détecte AliExpress.fr correctement."""
|
||||
url = "https://fr.aliexpress.com/item/1005007187023722.html"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None
|
||||
assert store.store_id == "aliexpress"
|
||||
|
||||
def test_detect_aliexpress_com(self, registry_with_all_stores):
|
||||
"""Détecte AliExpress.com correctement."""
|
||||
url = "https://www.aliexpress.com/item/1005007187023722.html"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None
|
||||
assert store.store_id == "aliexpress"
|
||||
|
||||
def test_detect_unknown_store(self, registry_with_all_stores):
|
||||
"""URL inconnue retourne None."""
|
||||
url = "https://www.ebay.com/itm/123456789"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is None
|
||||
|
||||
def test_detect_invalid_url(self, registry_with_all_stores):
|
||||
"""URL invalide retourne None."""
|
||||
url = "not-a-valid-url"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is None
|
||||
|
||||
def test_detect_priority_amazon_over_others(self, registry_with_all_stores):
|
||||
"""Amazon.fr doit avoir le meilleur score pour ses URLs."""
|
||||
url = "https://www.amazon.fr/dp/B08N5WRWNW"
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
# Amazon.fr devrait avoir score 0.9, les autres 0.0
|
||||
assert store.store_id == "amazon"
|
||||
|
||||
def test_each_store_matches_only_own_urls(self, registry_with_all_stores):
|
||||
"""Chaque store ne matche que ses propres URLs."""
|
||||
test_cases = [
|
||||
("https://www.amazon.fr/dp/B08N5WRWNW", "amazon"),
|
||||
("https://www.cdiscount.com/product", "cdiscount"),
|
||||
("https://www.backmarket.fr/fr-fr/p/product", "backmarket"),
|
||||
("https://fr.aliexpress.com/item/12345.html", "aliexpress"),
|
||||
]
|
||||
|
||||
for url, expected_store_id in test_cases:
|
||||
store = registry_with_all_stores.detect_store(url)
|
||||
assert store is not None, f"Aucun store détecté pour {url}"
|
||||
assert store.store_id == expected_store_id, (
|
||||
f"Mauvais store pour {url}: "
|
||||
f"attendu {expected_store_id}, obtenu {store.store_id}"
|
||||
)
|
||||
|
||||
def test_get_store_by_id(self, registry_with_all_stores):
|
||||
"""Récupère chaque store par son ID."""
|
||||
amazon = registry_with_all_stores.get_store("amazon")
|
||||
assert amazon is not None
|
||||
assert isinstance(amazon, AmazonStore)
|
||||
|
||||
cdiscount = registry_with_all_stores.get_store("cdiscount")
|
||||
assert cdiscount is not None
|
||||
assert isinstance(cdiscount, CdiscountStore)
|
||||
|
||||
backmarket = registry_with_all_stores.get_store("backmarket")
|
||||
assert backmarket is not None
|
||||
assert isinstance(backmarket, BackmarketStore)
|
||||
|
||||
aliexpress = registry_with_all_stores.get_store("aliexpress")
|
||||
assert aliexpress is not None
|
||||
assert isinstance(aliexpress, AliexpressStore)
|
||||
|
||||
def test_unregister_store(self, registry_with_all_stores):
|
||||
"""Désenregistre un store et vérifie qu'il n'est plus détecté."""
|
||||
assert len(registry_with_all_stores) == 4
|
||||
|
||||
# Désenregistrer Amazon
|
||||
removed = registry_with_all_stores.unregister("amazon")
|
||||
assert removed is True
|
||||
assert len(registry_with_all_stores) == 3
|
||||
|
||||
# Amazon ne doit plus être détecté
|
||||
store = registry_with_all_stores.detect_store("https://www.amazon.fr/dp/B08N5WRWNW")
|
||||
assert store is None
|
||||
|
||||
# Les autres stores doivent toujours fonctionner
|
||||
store = registry_with_all_stores.detect_store("https://www.cdiscount.com/product")
|
||||
assert store is not None
|
||||
assert store.store_id == "cdiscount"
|
||||
|
||||
def test_repr_includes_all_stores(self, registry_with_all_stores):
|
||||
"""La représentation string inclut tous les stores."""
|
||||
repr_str = repr(registry_with_all_stores)
|
||||
assert "StoreRegistry" in repr_str
|
||||
assert "amazon" in repr_str
|
||||
assert "cdiscount" in repr_str
|
||||
assert "backmarket" in repr_str
|
||||
assert "aliexpress" in repr_str
|
||||
BIN
tests/db/__pycache__/test_connection.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/db/__pycache__/test_connection.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
BIN
tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/db/__pycache__/test_models.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
BIN
tests/db/__pycache__/test_repository.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/db/__pycache__/test_repository.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
87
tests/db/test_connection.py
Executable file
87
tests/db/test_connection.py
Executable file
@@ -0,0 +1,87 @@
|
||||
"""
|
||||
Tests pour la couche de connexion SQLAlchemy.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import inspect
|
||||
|
||||
from pricewatch.app.db.connection import (
|
||||
check_db_connection,
|
||||
get_engine,
|
||||
get_session,
|
||||
init_db,
|
||||
reset_engine,
|
||||
)
|
||||
from pricewatch.app.db.models import Product
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeDbConfig:
|
||||
"""Config DB minimale pour tests SQLite."""
|
||||
|
||||
url: str
|
||||
host: str = "sqlite"
|
||||
port: int = 0
|
||||
database: str = ":memory:"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeAppConfig:
|
||||
"""Config App minimale pour tests."""
|
||||
|
||||
db: FakeDbConfig
|
||||
debug: bool = False
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_db_engine():
|
||||
"""Reset l'engine global entre les tests."""
|
||||
reset_engine()
|
||||
yield
|
||||
reset_engine()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sqlite_config() -> FakeAppConfig:
|
||||
"""Config SQLite in-memory pour tests."""
|
||||
return FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"))
|
||||
|
||||
|
||||
def test_get_engine_sqlite(sqlite_config: FakeAppConfig):
|
||||
"""Cree un engine SQLite fonctionnel."""
|
||||
engine = get_engine(sqlite_config)
|
||||
assert engine.url.get_backend_name() == "sqlite"
|
||||
|
||||
|
||||
def test_init_db_creates_tables(sqlite_config: FakeAppConfig):
|
||||
"""Init DB cree toutes les tables attendues."""
|
||||
init_db(sqlite_config)
|
||||
engine = get_engine(sqlite_config)
|
||||
inspector = inspect(engine)
|
||||
tables = set(inspector.get_table_names())
|
||||
assert "products" in tables
|
||||
assert "price_history" in tables
|
||||
assert "product_images" in tables
|
||||
assert "product_specs" in tables
|
||||
assert "scraping_logs" in tables
|
||||
|
||||
|
||||
def test_get_session_commit(sqlite_config: FakeAppConfig):
|
||||
"""La session permet un commit simple."""
|
||||
init_db(sqlite_config)
|
||||
|
||||
with get_session(sqlite_config) as session:
|
||||
product = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com")
|
||||
session.add(product)
|
||||
session.commit()
|
||||
|
||||
with get_session(sqlite_config) as session:
|
||||
assert session.query(Product).count() == 1
|
||||
|
||||
|
||||
def test_check_db_connection(sqlite_config: FakeAppConfig):
|
||||
"""Le health check DB retourne True en SQLite."""
|
||||
init_db(sqlite_config)
|
||||
assert check_db_connection(sqlite_config) is True
|
||||
89
tests/db/test_models.py
Executable file
89
tests/db/test_models.py
Executable file
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
Tests pour les modeles SQLAlchemy.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from pricewatch.app.db.models import (
|
||||
Base,
|
||||
PriceHistory,
|
||||
Product,
|
||||
ProductImage,
|
||||
ProductSpec,
|
||||
ScrapingLog,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session() -> Session:
|
||||
"""Session SQLite in-memory pour tests de modeles."""
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(engine)
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
session = SessionLocal()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
|
||||
def test_product_relationships(session: Session):
|
||||
"""Les relations principales fonctionnent (prix, images, specs, logs)."""
|
||||
product = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com")
|
||||
|
||||
price = PriceHistory(
|
||||
price=199.99,
|
||||
shipping_cost=0,
|
||||
stock_status="in_stock",
|
||||
fetch_method="http",
|
||||
fetch_status="success",
|
||||
fetched_at=datetime.utcnow(),
|
||||
)
|
||||
image = ProductImage(image_url="https://example.com/image.jpg", position=0)
|
||||
spec = ProductSpec(spec_key="Couleur", spec_value="Noir")
|
||||
log = ScrapingLog(
|
||||
url="https://example.com",
|
||||
source="amazon",
|
||||
reference="B08N5WRWNW",
|
||||
fetch_method="http",
|
||||
fetch_status="success",
|
||||
fetched_at=datetime.utcnow(),
|
||||
duration_ms=1200,
|
||||
html_size_bytes=2048,
|
||||
errors={"items": []},
|
||||
notes={"items": ["OK"]},
|
||||
)
|
||||
|
||||
product.price_history.append(price)
|
||||
product.images.append(image)
|
||||
product.specs.append(spec)
|
||||
product.logs.append(log)
|
||||
|
||||
session.add(product)
|
||||
session.commit()
|
||||
|
||||
loaded = session.query(Product).first()
|
||||
assert loaded is not None
|
||||
assert len(loaded.price_history) == 1
|
||||
assert len(loaded.images) == 1
|
||||
assert len(loaded.specs) == 1
|
||||
assert len(loaded.logs) == 1
|
||||
|
||||
|
||||
def test_unique_product_constraint(session: Session):
|
||||
"""La contrainte unique source+reference est respectee."""
|
||||
product_a = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com/a")
|
||||
product_b = Product(source="amazon", reference="B08N5WRWNW", url="https://example.com/b")
|
||||
|
||||
session.add(product_a)
|
||||
session.commit()
|
||||
|
||||
session.add(product_b)
|
||||
with pytest.raises(IntegrityError):
|
||||
session.commit()
|
||||
session.rollback()
|
||||
82
tests/db/test_repository.py
Executable file
82
tests/db/test_repository.py
Executable file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Tests pour le repository SQLAlchemy.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
|
||||
from pricewatch.app.db.models import Base, Product, ScrapingLog
|
||||
from pricewatch.app.db.repository import ProductRepository
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session() -> Session:
|
||||
"""Session SQLite in-memory pour tests repository."""
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(engine)
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
session = SessionLocal()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
session.close()
|
||||
engine.dispose()
|
||||
|
||||
|
||||
def _make_snapshot(reference: str | None) -> ProductSnapshot:
|
||||
return ProductSnapshot(
|
||||
source="amazon",
|
||||
url="https://example.com/product",
|
||||
fetched_at=datetime(2026, 1, 14, 12, 0, 0),
|
||||
title="Produit test",
|
||||
price=199.99,
|
||||
currency="EUR",
|
||||
shipping_cost=0.0,
|
||||
reference=reference,
|
||||
images=["https://example.com/img1.jpg"],
|
||||
specs={"Couleur": "Noir"},
|
||||
debug=DebugInfo(
|
||||
method=FetchMethod.HTTP,
|
||||
status=DebugStatus.SUCCESS,
|
||||
errors=["Avertissement"],
|
||||
notes=["OK"],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_save_snapshot_creates_product(session: Session):
|
||||
"""Le repository persiste produit + log."""
|
||||
repo = ProductRepository(session)
|
||||
snapshot = _make_snapshot(reference="B08N5WRWNW")
|
||||
|
||||
product_id = repo.save_snapshot(snapshot)
|
||||
session.commit()
|
||||
|
||||
product = session.query(Product).one()
|
||||
assert product.id == product_id
|
||||
assert product.reference == "B08N5WRWNW"
|
||||
assert len(product.images) == 1
|
||||
assert len(product.specs) == 1
|
||||
assert len(product.price_history) == 1
|
||||
|
||||
log = session.query(ScrapingLog).one()
|
||||
assert log.product_id == product_id
|
||||
assert log.errors == ["Avertissement"]
|
||||
assert log.notes == ["OK"]
|
||||
|
||||
|
||||
def test_save_snapshot_without_reference(session: Session):
|
||||
"""Sans reference, le produit n'est pas cree mais le log existe."""
|
||||
repo = ProductRepository(session)
|
||||
snapshot = _make_snapshot(reference=None)
|
||||
|
||||
product_id = repo.save_snapshot(snapshot)
|
||||
session.commit()
|
||||
|
||||
assert product_id is None
|
||||
assert session.query(Product).count() == 0
|
||||
assert session.query(ScrapingLog).count() == 1
|
||||
0
tests/scraping/__init__.py
Executable file
0
tests/scraping/__init__.py
Executable file
BIN
tests/scraping/__pycache__/__init__.cpython-313.pyc
Executable file
BIN
tests/scraping/__pycache__/__init__.cpython-313.pyc
Executable file
Binary file not shown.
BIN
tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
BIN
tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
BIN
tests/scraping/__pycache__/test_pw_fetch.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/scraping/__pycache__/test_pw_fetch.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
290
tests/scraping/test_http_fetch.py
Executable file
290
tests/scraping/test_http_fetch.py
Executable file
@@ -0,0 +1,290 @@
|
||||
"""
|
||||
Tests pour pricewatch.app.scraping.http_fetch
|
||||
|
||||
Teste la récupération HTTP avec mocks pour éviter les vraies requêtes.
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from requests.exceptions import RequestException, Timeout
|
||||
|
||||
from pricewatch.app.scraping.http_fetch import FetchResult, fetch_http
|
||||
|
||||
|
||||
class TestFetchResult:
|
||||
"""Tests pour la classe FetchResult."""
|
||||
|
||||
def test_success_result(self):
|
||||
"""Création d'un résultat réussi."""
|
||||
result = FetchResult(
|
||||
success=True,
|
||||
html="<html>Test</html>",
|
||||
status_code=200,
|
||||
duration_ms=150,
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>Test</html>"
|
||||
assert result.error is None
|
||||
assert result.status_code == 200
|
||||
assert result.duration_ms == 150
|
||||
|
||||
def test_error_result(self):
|
||||
"""Création d'un résultat d'erreur."""
|
||||
result = FetchResult(
|
||||
success=False,
|
||||
error="403 Forbidden",
|
||||
status_code=403,
|
||||
duration_ms=100,
|
||||
)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.error == "403 Forbidden"
|
||||
assert result.status_code == 403
|
||||
assert result.duration_ms == 100
|
||||
|
||||
def test_minimal_result(self):
|
||||
"""Résultat minimal avec success uniquement."""
|
||||
result = FetchResult(success=False)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.error is None
|
||||
assert result.status_code is None
|
||||
assert result.duration_ms is None
|
||||
|
||||
|
||||
class TestFetchHttp:
|
||||
"""Tests pour la fonction fetch_http()."""
|
||||
|
||||
def test_fetch_success(self, mocker):
|
||||
"""Requête HTTP réussie (200 OK)."""
|
||||
# Mock de requests.get
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html><body>Test Page</body></html>"
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html><body>Test Page</body></html>"
|
||||
assert result.status_code == 200
|
||||
assert result.error is None
|
||||
assert result.duration_ms is not None
|
||||
assert result.duration_ms >= 0
|
||||
|
||||
def test_fetch_with_custom_timeout(self, mocker):
|
||||
"""Requête avec timeout personnalisé."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
fetch_http("https://example.com", timeout=60)
|
||||
|
||||
# Vérifier que timeout est passé à requests.get
|
||||
mock_get.assert_called_once()
|
||||
call_kwargs = mock_get.call_args.kwargs
|
||||
assert call_kwargs["timeout"] == 60
|
||||
|
||||
def test_fetch_with_custom_headers(self, mocker):
|
||||
"""Requête avec headers personnalisés."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
custom_headers = {"X-Custom-Header": "test-value"}
|
||||
fetch_http("https://example.com", headers=custom_headers)
|
||||
|
||||
# Vérifier que les headers personnalisés sont inclus
|
||||
mock_get.assert_called_once()
|
||||
call_kwargs = mock_get.call_args.kwargs
|
||||
assert "X-Custom-Header" in call_kwargs["headers"]
|
||||
assert call_kwargs["headers"]["X-Custom-Header"] == "test-value"
|
||||
# Headers par défaut doivent aussi être présents
|
||||
assert "User-Agent" in call_kwargs["headers"]
|
||||
|
||||
def test_fetch_403_forbidden(self, mocker):
|
||||
"""Requête bloquée (403 Forbidden)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 403
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.status_code == 403
|
||||
assert "403 Forbidden" in result.error
|
||||
assert "Anti-bot" in result.error
|
||||
|
||||
def test_fetch_404_not_found(self, mocker):
|
||||
"""Page introuvable (404 Not Found)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 404
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 404
|
||||
assert "404 Not Found" in result.error
|
||||
|
||||
def test_fetch_429_rate_limit(self, mocker):
|
||||
"""Rate limit atteint (429 Too Many Requests)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 429
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 429
|
||||
assert "429" in result.error
|
||||
assert "Rate limit" in result.error
|
||||
|
||||
def test_fetch_500_server_error(self, mocker):
|
||||
"""Erreur serveur (500 Internal Server Error)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 500
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 500
|
||||
assert "500" in result.error
|
||||
assert "Server Error" in result.error
|
||||
|
||||
def test_fetch_503_service_unavailable(self, mocker):
|
||||
"""Service indisponible (503)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 503
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 503
|
||||
assert "503" in result.error
|
||||
|
||||
def test_fetch_unknown_status_code(self, mocker):
|
||||
"""Code de statut inconnu (par ex. 418 I'm a teapot)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 418
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 418
|
||||
assert "418" in result.error
|
||||
|
||||
def test_fetch_timeout_error(self, mocker):
|
||||
"""Timeout lors de la requête."""
|
||||
mocker.patch("requests.get", side_effect=Timeout("Connection timed out"))
|
||||
|
||||
result = fetch_http("https://example.com", timeout=10)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert "Timeout" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_request_exception(self, mocker):
|
||||
"""Exception réseau générique."""
|
||||
mocker.patch(
|
||||
"requests.get",
|
||||
side_effect=RequestException("Network error"),
|
||||
)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert "Erreur réseau" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_unexpected_exception(self, mocker):
|
||||
"""Exception inattendue."""
|
||||
mocker.patch("requests.get", side_effect=ValueError("Unexpected error"))
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert "Erreur inattendue" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_empty_url(self):
|
||||
"""URL vide retourne une erreur."""
|
||||
result = fetch_http("")
|
||||
|
||||
assert result.success is False
|
||||
assert "URL vide" in result.error
|
||||
assert result.html is None
|
||||
|
||||
def test_fetch_whitespace_url(self):
|
||||
"""URL avec espaces uniquement retourne une erreur."""
|
||||
result = fetch_http(" ")
|
||||
|
||||
assert result.success is False
|
||||
assert "URL vide" in result.error
|
||||
|
||||
def test_fetch_no_redirects(self, mocker):
|
||||
"""Requête sans suivre les redirections."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
fetch_http("https://example.com", follow_redirects=False)
|
||||
|
||||
mock_get.assert_called_once()
|
||||
call_kwargs = mock_get.call_args.kwargs
|
||||
assert call_kwargs["allow_redirects"] is False
|
||||
|
||||
def test_fetch_uses_random_user_agent(self, mocker):
|
||||
"""Vérifie qu'un User-Agent aléatoire est utilisé."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
fetch_http("https://example.com")
|
||||
|
||||
# Vérifier qu'un User-Agent est présent
|
||||
mock_get.assert_called_once()
|
||||
call_kwargs = mock_get.call_args.kwargs
|
||||
assert "User-Agent" in call_kwargs["headers"]
|
||||
# User-Agent doit contenir "Mozilla" (présent dans tous les UA)
|
||||
assert "Mozilla" in call_kwargs["headers"]["User-Agent"]
|
||||
|
||||
def test_fetch_duration_is_measured(self, mocker):
|
||||
"""Vérifie que la durée est mesurée."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.duration_ms is not None
|
||||
assert isinstance(result.duration_ms, int)
|
||||
assert result.duration_ms >= 0
|
||||
|
||||
def test_fetch_large_response(self, mocker):
|
||||
"""Requête avec réponse volumineuse."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
# Simuler une grosse page HTML (1 MB)
|
||||
mock_response.text = "<html>" + ("x" * 1000000) + "</html>"
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert len(result.html) > 1000000
|
||||
82
tests/scraping/test_pipeline.py
Executable file
82
tests/scraping/test_pipeline.py
Executable file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Tests pour ScrapingPipeline.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
|
||||
from pricewatch.app.db.connection import get_session, init_db, reset_engine
|
||||
from pricewatch.app.db.models import Product
|
||||
from pricewatch.app.scraping.pipeline import ScrapingPipeline
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeDbConfig:
|
||||
url: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeAppConfig:
|
||||
db: FakeDbConfig
|
||||
debug: bool = False
|
||||
enable_db: bool = True
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_db_engine():
|
||||
"""Reset l'engine global entre les tests."""
|
||||
reset_engine()
|
||||
yield
|
||||
reset_engine()
|
||||
|
||||
|
||||
def test_pipeline_persists_snapshot():
|
||||
"""Le pipeline persiste un snapshot en base SQLite."""
|
||||
config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"))
|
||||
init_db(config)
|
||||
|
||||
snapshot = ProductSnapshot(
|
||||
source="amazon",
|
||||
url="https://example.com/product",
|
||||
fetched_at=datetime(2026, 1, 14, 12, 30, 0),
|
||||
title="Produit pipeline",
|
||||
price=99.99,
|
||||
currency="EUR",
|
||||
reference="B08PIPE",
|
||||
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
|
||||
)
|
||||
|
||||
pipeline = ScrapingPipeline(config=config)
|
||||
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
|
||||
|
||||
assert product_id is not None
|
||||
|
||||
with get_session(config) as session:
|
||||
assert session.query(Product).count() == 1
|
||||
|
||||
|
||||
def test_pipeline_respects_disable_flag():
|
||||
"""Le pipeline ignore la persistence si enable_db=False."""
|
||||
config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"), enable_db=False)
|
||||
init_db(config)
|
||||
|
||||
snapshot = ProductSnapshot(
|
||||
source="amazon",
|
||||
url="https://example.com/product",
|
||||
fetched_at=datetime(2026, 1, 14, 12, 45, 0),
|
||||
title="Produit pipeline",
|
||||
price=99.99,
|
||||
currency="EUR",
|
||||
reference="B08PIPE",
|
||||
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
|
||||
)
|
||||
|
||||
pipeline = ScrapingPipeline(config=config)
|
||||
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
|
||||
|
||||
assert product_id is None
|
||||
with get_session(config) as session:
|
||||
assert session.query(Product).count() == 0
|
||||
388
tests/scraping/test_pw_fetch.py
Executable file
388
tests/scraping/test_pw_fetch.py
Executable file
@@ -0,0 +1,388 @@
|
||||
"""
|
||||
Tests pour pricewatch.app.scraping.pw_fetch
|
||||
|
||||
Teste la récupération Playwright avec mocks pour éviter de lancer vraiment un navigateur.
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from playwright.sync_api import TimeoutError as PlaywrightTimeout
|
||||
|
||||
from pricewatch.app.scraping.pw_fetch import (
|
||||
PlaywrightFetchResult,
|
||||
fetch_playwright,
|
||||
fetch_with_fallback,
|
||||
)
|
||||
|
||||
|
||||
class TestPlaywrightFetchResult:
|
||||
"""Tests pour la classe PlaywrightFetchResult."""
|
||||
|
||||
def test_success_result(self):
|
||||
"""Création d'un résultat réussi."""
|
||||
result = PlaywrightFetchResult(
|
||||
success=True,
|
||||
html="<html>Test</html>",
|
||||
screenshot=b"fake_screenshot_bytes",
|
||||
duration_ms=2500,
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>Test</html>"
|
||||
assert result.screenshot == b"fake_screenshot_bytes"
|
||||
assert result.error is None
|
||||
assert result.duration_ms == 2500
|
||||
|
||||
def test_error_result(self):
|
||||
"""Création d'un résultat d'erreur."""
|
||||
result = PlaywrightFetchResult(
|
||||
success=False,
|
||||
error="Timeout",
|
||||
screenshot=b"error_screenshot",
|
||||
duration_ms=3000,
|
||||
)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.error == "Timeout"
|
||||
assert result.screenshot == b"error_screenshot"
|
||||
assert result.duration_ms == 3000
|
||||
|
||||
def test_minimal_result(self):
|
||||
"""Résultat minimal."""
|
||||
result = PlaywrightFetchResult(success=False)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.screenshot is None
|
||||
assert result.error is None
|
||||
assert result.duration_ms is None
|
||||
|
||||
|
||||
class TestFetchPlaywright:
|
||||
"""Tests pour fetch_playwright()."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_playwright_stack(self, mocker):
|
||||
"""Fixture: Mock complet de la stack Playwright."""
|
||||
# Mock de la page
|
||||
mock_page = Mock()
|
||||
mock_page.content.return_value = "<html><body>Playwright Test</body></html>"
|
||||
mock_page.screenshot.return_value = b"fake_screenshot_data"
|
||||
mock_page.goto.return_value = Mock(status=200)
|
||||
|
||||
# Mock du context
|
||||
mock_context = Mock()
|
||||
mock_context.new_page.return_value = mock_page
|
||||
|
||||
# Mock du browser
|
||||
mock_browser = Mock()
|
||||
mock_browser.new_context.return_value = mock_context
|
||||
|
||||
# Mock playwright chromium
|
||||
mock_chromium = Mock()
|
||||
mock_chromium.launch.return_value = mock_browser
|
||||
|
||||
# Mock playwright
|
||||
mock_playwright_obj = Mock()
|
||||
mock_playwright_obj.chromium = mock_chromium
|
||||
|
||||
# Mock sync_playwright().start()
|
||||
mock_sync_playwright = Mock()
|
||||
mock_sync_playwright.start.return_value = mock_playwright_obj
|
||||
|
||||
mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.sync_playwright",
|
||||
return_value=mock_sync_playwright,
|
||||
)
|
||||
|
||||
return {
|
||||
"playwright": mock_playwright_obj,
|
||||
"browser": mock_browser,
|
||||
"context": mock_context,
|
||||
"page": mock_page,
|
||||
}
|
||||
|
||||
def test_fetch_success(self, mock_playwright_stack):
|
||||
"""Récupération Playwright réussie."""
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html><body>Playwright Test</body></html>"
|
||||
assert result.screenshot is None # Par défaut pas de screenshot
|
||||
assert result.error is None
|
||||
assert result.duration_ms is not None
|
||||
assert result.duration_ms >= 0
|
||||
|
||||
# Vérifier que la page a été visitée
|
||||
mock_playwright_stack["page"].goto.assert_called_once_with(
|
||||
"https://example.com", wait_until="domcontentloaded"
|
||||
)
|
||||
|
||||
def test_fetch_with_screenshot(self, mock_playwright_stack):
|
||||
"""Récupération avec screenshot."""
|
||||
result = fetch_playwright("https://example.com", save_screenshot=True)
|
||||
|
||||
assert result.success is True
|
||||
assert result.screenshot == b"fake_screenshot_data"
|
||||
|
||||
# Vérifier que screenshot() a été appelé
|
||||
mock_playwright_stack["page"].screenshot.assert_called_once()
|
||||
|
||||
def test_fetch_headful_mode(self, mock_playwright_stack):
|
||||
"""Mode headful (navigateur visible)."""
|
||||
result = fetch_playwright("https://example.com", headless=False)
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que headless=False a été passé
|
||||
mock_playwright_stack["playwright"].chromium.launch.assert_called_once()
|
||||
call_kwargs = mock_playwright_stack["playwright"].chromium.launch.call_args.kwargs
|
||||
assert call_kwargs["headless"] is False
|
||||
|
||||
def test_fetch_with_custom_timeout(self, mock_playwright_stack):
|
||||
"""Timeout personnalisé."""
|
||||
result = fetch_playwright("https://example.com", timeout_ms=30000)
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que set_default_timeout a été appelé
|
||||
mock_playwright_stack["page"].set_default_timeout.assert_called_once_with(30000)
|
||||
|
||||
def test_fetch_with_wait_for_selector(self, mock_playwright_stack):
|
||||
"""Attente d'un sélecteur CSS spécifique."""
|
||||
result = fetch_playwright(
|
||||
"https://example.com", wait_for_selector=".product-title"
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que wait_for_selector a été appelé
|
||||
mock_playwright_stack["page"].wait_for_selector.assert_called_once_with(
|
||||
".product-title", timeout=60000
|
||||
)
|
||||
|
||||
def test_fetch_wait_for_selector_timeout(self, mock_playwright_stack):
|
||||
"""Timeout lors de l'attente du sélecteur."""
|
||||
# Le sélecteur timeout mais la page continue
|
||||
mock_playwright_stack["page"].wait_for_selector.side_effect = PlaywrightTimeout(
|
||||
"Selector timeout"
|
||||
)
|
||||
|
||||
result = fetch_playwright(
|
||||
"https://example.com", wait_for_selector=".non-existent"
|
||||
)
|
||||
|
||||
# Doit quand même réussir (le wait_for_selector est non-bloquant)
|
||||
assert result.success is True
|
||||
assert result.html is not None
|
||||
|
||||
def test_fetch_empty_url(self):
|
||||
"""URL vide retourne une erreur."""
|
||||
result = fetch_playwright("")
|
||||
|
||||
assert result.success is False
|
||||
assert "URL vide" in result.error
|
||||
assert result.html is None
|
||||
|
||||
def test_fetch_whitespace_url(self):
|
||||
"""URL avec espaces retourne une erreur."""
|
||||
result = fetch_playwright(" ")
|
||||
|
||||
assert result.success is False
|
||||
assert "URL vide" in result.error
|
||||
|
||||
def test_fetch_no_response_from_server(self, mock_playwright_stack):
|
||||
"""Pas de réponse du serveur."""
|
||||
mock_playwright_stack["page"].goto.return_value = None
|
||||
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert "Pas de réponse du serveur" in result.error
|
||||
|
||||
def test_fetch_playwright_timeout(self, mock_playwright_stack):
|
||||
"""Timeout Playwright lors de la navigation."""
|
||||
mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout(
|
||||
"Navigation timeout"
|
||||
)
|
||||
|
||||
result = fetch_playwright("https://example.com", timeout_ms=10000)
|
||||
|
||||
assert result.success is False
|
||||
assert "Timeout" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_playwright_generic_error(self, mock_playwright_stack):
|
||||
"""Erreur générique Playwright."""
|
||||
mock_playwright_stack["page"].goto.side_effect = Exception(
|
||||
"Generic Playwright error"
|
||||
)
|
||||
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert "Erreur Playwright" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_cleanup_on_success(self, mock_playwright_stack):
|
||||
"""Nettoyage des ressources sur succès."""
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que les ressources sont nettoyées
|
||||
mock_playwright_stack["page"].close.assert_called_once()
|
||||
mock_playwright_stack["browser"].close.assert_called_once()
|
||||
mock_playwright_stack["playwright"].stop.assert_called_once()
|
||||
|
||||
def test_fetch_cleanup_on_error(self, mock_playwright_stack):
|
||||
"""Nettoyage des ressources sur erreur."""
|
||||
mock_playwright_stack["page"].goto.side_effect = Exception("Test error")
|
||||
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
|
||||
# Vérifier que les ressources sont nettoyées même en cas d'erreur
|
||||
mock_playwright_stack["page"].close.assert_called_once()
|
||||
mock_playwright_stack["browser"].close.assert_called_once()
|
||||
mock_playwright_stack["playwright"].stop.assert_called_once()
|
||||
|
||||
def test_fetch_screenshot_on_error(self, mock_playwright_stack):
|
||||
"""Screenshot capturé même en cas d'erreur."""
|
||||
mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout("Timeout")
|
||||
|
||||
result = fetch_playwright("https://example.com", save_screenshot=True)
|
||||
|
||||
assert result.success is False
|
||||
assert result.screenshot == b"fake_screenshot_data"
|
||||
|
||||
# Screenshot doit avoir été tenté
|
||||
mock_playwright_stack["page"].screenshot.assert_called_once()
|
||||
|
||||
|
||||
class TestFetchWithFallback:
|
||||
"""Tests pour fetch_with_fallback()."""
|
||||
|
||||
def test_http_success_no_playwright(self, mocker):
|
||||
"""Si HTTP réussit, Playwright n'est pas appelé."""
|
||||
# Mock fetch_http qui réussit
|
||||
mock_http_result = Mock()
|
||||
mock_http_result.success = True
|
||||
mock_http_result.html = "<html>HTTP Success</html>"
|
||||
mock_http_result.duration_ms = 150
|
||||
|
||||
mocker.patch(
|
||||
"pricewatch.app.scraping.http_fetch.fetch_http",
|
||||
return_value=mock_http_result,
|
||||
)
|
||||
|
||||
# Mock fetch_playwright (ne devrait pas être appelé)
|
||||
mock_playwright = mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.fetch_playwright"
|
||||
)
|
||||
|
||||
result = fetch_with_fallback("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>HTTP Success</html>"
|
||||
assert result.duration_ms == 150
|
||||
|
||||
# Playwright ne doit pas être appelé
|
||||
mock_playwright.assert_not_called()
|
||||
|
||||
def test_http_fails_playwright_fallback(self, mocker):
|
||||
"""Si HTTP échoue, fallback vers Playwright."""
|
||||
# Mock fetch_http qui échoue
|
||||
mock_http_result = Mock()
|
||||
mock_http_result.success = False
|
||||
mock_http_result.error = "403 Forbidden"
|
||||
|
||||
mocker.patch(
|
||||
"pricewatch.app.scraping.http_fetch.fetch_http",
|
||||
return_value=mock_http_result,
|
||||
)
|
||||
|
||||
# Mock fetch_playwright qui réussit
|
||||
mock_playwright_result = PlaywrightFetchResult(
|
||||
success=True,
|
||||
html="<html>Playwright Success</html>",
|
||||
duration_ms=2500,
|
||||
)
|
||||
|
||||
mock_playwright = mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
|
||||
return_value=mock_playwright_result,
|
||||
)
|
||||
|
||||
result = fetch_with_fallback("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>Playwright Success</html>"
|
||||
|
||||
# Playwright doit avoir été appelé
|
||||
mock_playwright.assert_called_once()
|
||||
|
||||
def test_skip_http_direct_playwright(self, mocker):
|
||||
"""Mode Playwright direct (sans essayer HTTP d'abord)."""
|
||||
# Mock fetch_http (ne devrait pas être appelé)
|
||||
mock_http = mocker.patch("pricewatch.app.scraping.http_fetch.fetch_http")
|
||||
|
||||
# Mock fetch_playwright
|
||||
mock_playwright_result = PlaywrightFetchResult(
|
||||
success=True,
|
||||
html="<html>Playwright Direct</html>",
|
||||
duration_ms=2500,
|
||||
)
|
||||
|
||||
mock_playwright = mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
|
||||
return_value=mock_playwright_result,
|
||||
)
|
||||
|
||||
result = fetch_with_fallback("https://example.com", try_http_first=False)
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>Playwright Direct</html>"
|
||||
|
||||
# HTTP ne doit pas être appelé
|
||||
mock_http.assert_not_called()
|
||||
|
||||
# Playwright doit avoir été appelé
|
||||
mock_playwright.assert_called_once()
|
||||
|
||||
def test_playwright_options_passed(self, mocker):
|
||||
"""Options Playwright passées correctement."""
|
||||
# Mock fetch_http qui échoue
|
||||
mock_http_result = Mock()
|
||||
mock_http_result.success = False
|
||||
mock_http_result.error = "403 Forbidden"
|
||||
|
||||
mocker.patch(
|
||||
"pricewatch.app.scraping.http_fetch.fetch_http",
|
||||
return_value=mock_http_result,
|
||||
)
|
||||
|
||||
# Mock fetch_playwright
|
||||
mock_playwright_result = PlaywrightFetchResult(
|
||||
success=True,
|
||||
html="<html>OK</html>",
|
||||
duration_ms=2500,
|
||||
)
|
||||
|
||||
mock_playwright = mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
|
||||
return_value=mock_playwright_result,
|
||||
)
|
||||
|
||||
# Options personnalisées
|
||||
options = {"headless": False, "timeout_ms": 30000, "save_screenshot": True}
|
||||
|
||||
result = fetch_with_fallback("https://example.com", playwright_options=options)
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que les options sont passées à fetch_playwright
|
||||
mock_playwright.assert_called_once_with("https://example.com", **options)
|
||||
Binary file not shown.
Reference in New Issue
Block a user