codex
This commit is contained in:
0
tests/scraping/__init__.py
Executable file
0
tests/scraping/__init__.py
Executable file
BIN
tests/scraping/__pycache__/__init__.cpython-313.pyc
Executable file
BIN
tests/scraping/__pycache__/__init__.cpython-313.pyc
Executable file
Binary file not shown.
BIN
tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/scraping/__pycache__/test_http_fetch.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
BIN
tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/scraping/__pycache__/test_pipeline.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
BIN
tests/scraping/__pycache__/test_pw_fetch.cpython-313-pytest-9.0.2.pyc
Executable file
BIN
tests/scraping/__pycache__/test_pw_fetch.cpython-313-pytest-9.0.2.pyc
Executable file
Binary file not shown.
290
tests/scraping/test_http_fetch.py
Executable file
290
tests/scraping/test_http_fetch.py
Executable file
@@ -0,0 +1,290 @@
|
||||
"""
|
||||
Tests pour pricewatch.app.scraping.http_fetch
|
||||
|
||||
Teste la récupération HTTP avec mocks pour éviter les vraies requêtes.
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from requests.exceptions import RequestException, Timeout
|
||||
|
||||
from pricewatch.app.scraping.http_fetch import FetchResult, fetch_http
|
||||
|
||||
|
||||
class TestFetchResult:
|
||||
"""Tests pour la classe FetchResult."""
|
||||
|
||||
def test_success_result(self):
|
||||
"""Création d'un résultat réussi."""
|
||||
result = FetchResult(
|
||||
success=True,
|
||||
html="<html>Test</html>",
|
||||
status_code=200,
|
||||
duration_ms=150,
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>Test</html>"
|
||||
assert result.error is None
|
||||
assert result.status_code == 200
|
||||
assert result.duration_ms == 150
|
||||
|
||||
def test_error_result(self):
|
||||
"""Création d'un résultat d'erreur."""
|
||||
result = FetchResult(
|
||||
success=False,
|
||||
error="403 Forbidden",
|
||||
status_code=403,
|
||||
duration_ms=100,
|
||||
)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.error == "403 Forbidden"
|
||||
assert result.status_code == 403
|
||||
assert result.duration_ms == 100
|
||||
|
||||
def test_minimal_result(self):
|
||||
"""Résultat minimal avec success uniquement."""
|
||||
result = FetchResult(success=False)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.error is None
|
||||
assert result.status_code is None
|
||||
assert result.duration_ms is None
|
||||
|
||||
|
||||
class TestFetchHttp:
|
||||
"""Tests pour la fonction fetch_http()."""
|
||||
|
||||
def test_fetch_success(self, mocker):
|
||||
"""Requête HTTP réussie (200 OK)."""
|
||||
# Mock de requests.get
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html><body>Test Page</body></html>"
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html><body>Test Page</body></html>"
|
||||
assert result.status_code == 200
|
||||
assert result.error is None
|
||||
assert result.duration_ms is not None
|
||||
assert result.duration_ms >= 0
|
||||
|
||||
def test_fetch_with_custom_timeout(self, mocker):
|
||||
"""Requête avec timeout personnalisé."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
fetch_http("https://example.com", timeout=60)
|
||||
|
||||
# Vérifier que timeout est passé à requests.get
|
||||
mock_get.assert_called_once()
|
||||
call_kwargs = mock_get.call_args.kwargs
|
||||
assert call_kwargs["timeout"] == 60
|
||||
|
||||
def test_fetch_with_custom_headers(self, mocker):
|
||||
"""Requête avec headers personnalisés."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
custom_headers = {"X-Custom-Header": "test-value"}
|
||||
fetch_http("https://example.com", headers=custom_headers)
|
||||
|
||||
# Vérifier que les headers personnalisés sont inclus
|
||||
mock_get.assert_called_once()
|
||||
call_kwargs = mock_get.call_args.kwargs
|
||||
assert "X-Custom-Header" in call_kwargs["headers"]
|
||||
assert call_kwargs["headers"]["X-Custom-Header"] == "test-value"
|
||||
# Headers par défaut doivent aussi être présents
|
||||
assert "User-Agent" in call_kwargs["headers"]
|
||||
|
||||
def test_fetch_403_forbidden(self, mocker):
|
||||
"""Requête bloquée (403 Forbidden)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 403
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.status_code == 403
|
||||
assert "403 Forbidden" in result.error
|
||||
assert "Anti-bot" in result.error
|
||||
|
||||
def test_fetch_404_not_found(self, mocker):
|
||||
"""Page introuvable (404 Not Found)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 404
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 404
|
||||
assert "404 Not Found" in result.error
|
||||
|
||||
def test_fetch_429_rate_limit(self, mocker):
|
||||
"""Rate limit atteint (429 Too Many Requests)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 429
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 429
|
||||
assert "429" in result.error
|
||||
assert "Rate limit" in result.error
|
||||
|
||||
def test_fetch_500_server_error(self, mocker):
|
||||
"""Erreur serveur (500 Internal Server Error)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 500
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 500
|
||||
assert "500" in result.error
|
||||
assert "Server Error" in result.error
|
||||
|
||||
def test_fetch_503_service_unavailable(self, mocker):
|
||||
"""Service indisponible (503)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 503
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 503
|
||||
assert "503" in result.error
|
||||
|
||||
def test_fetch_unknown_status_code(self, mocker):
|
||||
"""Code de statut inconnu (par ex. 418 I'm a teapot)."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 418
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert result.status_code == 418
|
||||
assert "418" in result.error
|
||||
|
||||
def test_fetch_timeout_error(self, mocker):
|
||||
"""Timeout lors de la requête."""
|
||||
mocker.patch("requests.get", side_effect=Timeout("Connection timed out"))
|
||||
|
||||
result = fetch_http("https://example.com", timeout=10)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert "Timeout" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_request_exception(self, mocker):
|
||||
"""Exception réseau générique."""
|
||||
mocker.patch(
|
||||
"requests.get",
|
||||
side_effect=RequestException("Network error"),
|
||||
)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert "Erreur réseau" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_unexpected_exception(self, mocker):
|
||||
"""Exception inattendue."""
|
||||
mocker.patch("requests.get", side_effect=ValueError("Unexpected error"))
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert "Erreur inattendue" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_empty_url(self):
|
||||
"""URL vide retourne une erreur."""
|
||||
result = fetch_http("")
|
||||
|
||||
assert result.success is False
|
||||
assert "URL vide" in result.error
|
||||
assert result.html is None
|
||||
|
||||
def test_fetch_whitespace_url(self):
|
||||
"""URL avec espaces uniquement retourne une erreur."""
|
||||
result = fetch_http(" ")
|
||||
|
||||
assert result.success is False
|
||||
assert "URL vide" in result.error
|
||||
|
||||
def test_fetch_no_redirects(self, mocker):
|
||||
"""Requête sans suivre les redirections."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
fetch_http("https://example.com", follow_redirects=False)
|
||||
|
||||
mock_get.assert_called_once()
|
||||
call_kwargs = mock_get.call_args.kwargs
|
||||
assert call_kwargs["allow_redirects"] is False
|
||||
|
||||
def test_fetch_uses_random_user_agent(self, mocker):
|
||||
"""Vérifie qu'un User-Agent aléatoire est utilisé."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
fetch_http("https://example.com")
|
||||
|
||||
# Vérifier qu'un User-Agent est présent
|
||||
mock_get.assert_called_once()
|
||||
call_kwargs = mock_get.call_args.kwargs
|
||||
assert "User-Agent" in call_kwargs["headers"]
|
||||
# User-Agent doit contenir "Mozilla" (présent dans tous les UA)
|
||||
assert "Mozilla" in call_kwargs["headers"]["User-Agent"]
|
||||
|
||||
def test_fetch_duration_is_measured(self, mocker):
|
||||
"""Vérifie que la durée est mesurée."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html>OK</html>"
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.duration_ms is not None
|
||||
assert isinstance(result.duration_ms, int)
|
||||
assert result.duration_ms >= 0
|
||||
|
||||
def test_fetch_large_response(self, mocker):
|
||||
"""Requête avec réponse volumineuse."""
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
# Simuler une grosse page HTML (1 MB)
|
||||
mock_response.text = "<html>" + ("x" * 1000000) + "</html>"
|
||||
mocker.patch("requests.get", return_value=mock_response)
|
||||
|
||||
result = fetch_http("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert len(result.html) > 1000000
|
||||
82
tests/scraping/test_pipeline.py
Executable file
82
tests/scraping/test_pipeline.py
Executable file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Tests pour ScrapingPipeline.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from pricewatch.app.core.schema import DebugInfo, DebugStatus, FetchMethod, ProductSnapshot
|
||||
from pricewatch.app.db.connection import get_session, init_db, reset_engine
|
||||
from pricewatch.app.db.models import Product
|
||||
from pricewatch.app.scraping.pipeline import ScrapingPipeline
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeDbConfig:
|
||||
url: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeAppConfig:
|
||||
db: FakeDbConfig
|
||||
debug: bool = False
|
||||
enable_db: bool = True
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_db_engine():
|
||||
"""Reset l'engine global entre les tests."""
|
||||
reset_engine()
|
||||
yield
|
||||
reset_engine()
|
||||
|
||||
|
||||
def test_pipeline_persists_snapshot():
|
||||
"""Le pipeline persiste un snapshot en base SQLite."""
|
||||
config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"))
|
||||
init_db(config)
|
||||
|
||||
snapshot = ProductSnapshot(
|
||||
source="amazon",
|
||||
url="https://example.com/product",
|
||||
fetched_at=datetime(2026, 1, 14, 12, 30, 0),
|
||||
title="Produit pipeline",
|
||||
price=99.99,
|
||||
currency="EUR",
|
||||
reference="B08PIPE",
|
||||
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
|
||||
)
|
||||
|
||||
pipeline = ScrapingPipeline(config=config)
|
||||
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
|
||||
|
||||
assert product_id is not None
|
||||
|
||||
with get_session(config) as session:
|
||||
assert session.query(Product).count() == 1
|
||||
|
||||
|
||||
def test_pipeline_respects_disable_flag():
|
||||
"""Le pipeline ignore la persistence si enable_db=False."""
|
||||
config = FakeAppConfig(db=FakeDbConfig(url="sqlite:///:memory:"), enable_db=False)
|
||||
init_db(config)
|
||||
|
||||
snapshot = ProductSnapshot(
|
||||
source="amazon",
|
||||
url="https://example.com/product",
|
||||
fetched_at=datetime(2026, 1, 14, 12, 45, 0),
|
||||
title="Produit pipeline",
|
||||
price=99.99,
|
||||
currency="EUR",
|
||||
reference="B08PIPE",
|
||||
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
|
||||
)
|
||||
|
||||
pipeline = ScrapingPipeline(config=config)
|
||||
product_id = pipeline.process_snapshot(snapshot, save_to_db=True)
|
||||
|
||||
assert product_id is None
|
||||
with get_session(config) as session:
|
||||
assert session.query(Product).count() == 0
|
||||
388
tests/scraping/test_pw_fetch.py
Executable file
388
tests/scraping/test_pw_fetch.py
Executable file
@@ -0,0 +1,388 @@
|
||||
"""
|
||||
Tests pour pricewatch.app.scraping.pw_fetch
|
||||
|
||||
Teste la récupération Playwright avec mocks pour éviter de lancer vraiment un navigateur.
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from playwright.sync_api import TimeoutError as PlaywrightTimeout
|
||||
|
||||
from pricewatch.app.scraping.pw_fetch import (
|
||||
PlaywrightFetchResult,
|
||||
fetch_playwright,
|
||||
fetch_with_fallback,
|
||||
)
|
||||
|
||||
|
||||
class TestPlaywrightFetchResult:
|
||||
"""Tests pour la classe PlaywrightFetchResult."""
|
||||
|
||||
def test_success_result(self):
|
||||
"""Création d'un résultat réussi."""
|
||||
result = PlaywrightFetchResult(
|
||||
success=True,
|
||||
html="<html>Test</html>",
|
||||
screenshot=b"fake_screenshot_bytes",
|
||||
duration_ms=2500,
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>Test</html>"
|
||||
assert result.screenshot == b"fake_screenshot_bytes"
|
||||
assert result.error is None
|
||||
assert result.duration_ms == 2500
|
||||
|
||||
def test_error_result(self):
|
||||
"""Création d'un résultat d'erreur."""
|
||||
result = PlaywrightFetchResult(
|
||||
success=False,
|
||||
error="Timeout",
|
||||
screenshot=b"error_screenshot",
|
||||
duration_ms=3000,
|
||||
)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.error == "Timeout"
|
||||
assert result.screenshot == b"error_screenshot"
|
||||
assert result.duration_ms == 3000
|
||||
|
||||
def test_minimal_result(self):
|
||||
"""Résultat minimal."""
|
||||
result = PlaywrightFetchResult(success=False)
|
||||
|
||||
assert result.success is False
|
||||
assert result.html is None
|
||||
assert result.screenshot is None
|
||||
assert result.error is None
|
||||
assert result.duration_ms is None
|
||||
|
||||
|
||||
class TestFetchPlaywright:
|
||||
"""Tests pour fetch_playwright()."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_playwright_stack(self, mocker):
|
||||
"""Fixture: Mock complet de la stack Playwright."""
|
||||
# Mock de la page
|
||||
mock_page = Mock()
|
||||
mock_page.content.return_value = "<html><body>Playwright Test</body></html>"
|
||||
mock_page.screenshot.return_value = b"fake_screenshot_data"
|
||||
mock_page.goto.return_value = Mock(status=200)
|
||||
|
||||
# Mock du context
|
||||
mock_context = Mock()
|
||||
mock_context.new_page.return_value = mock_page
|
||||
|
||||
# Mock du browser
|
||||
mock_browser = Mock()
|
||||
mock_browser.new_context.return_value = mock_context
|
||||
|
||||
# Mock playwright chromium
|
||||
mock_chromium = Mock()
|
||||
mock_chromium.launch.return_value = mock_browser
|
||||
|
||||
# Mock playwright
|
||||
mock_playwright_obj = Mock()
|
||||
mock_playwright_obj.chromium = mock_chromium
|
||||
|
||||
# Mock sync_playwright().start()
|
||||
mock_sync_playwright = Mock()
|
||||
mock_sync_playwright.start.return_value = mock_playwright_obj
|
||||
|
||||
mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.sync_playwright",
|
||||
return_value=mock_sync_playwright,
|
||||
)
|
||||
|
||||
return {
|
||||
"playwright": mock_playwright_obj,
|
||||
"browser": mock_browser,
|
||||
"context": mock_context,
|
||||
"page": mock_page,
|
||||
}
|
||||
|
||||
def test_fetch_success(self, mock_playwright_stack):
|
||||
"""Récupération Playwright réussie."""
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html><body>Playwright Test</body></html>"
|
||||
assert result.screenshot is None # Par défaut pas de screenshot
|
||||
assert result.error is None
|
||||
assert result.duration_ms is not None
|
||||
assert result.duration_ms >= 0
|
||||
|
||||
# Vérifier que la page a été visitée
|
||||
mock_playwright_stack["page"].goto.assert_called_once_with(
|
||||
"https://example.com", wait_until="domcontentloaded"
|
||||
)
|
||||
|
||||
def test_fetch_with_screenshot(self, mock_playwright_stack):
|
||||
"""Récupération avec screenshot."""
|
||||
result = fetch_playwright("https://example.com", save_screenshot=True)
|
||||
|
||||
assert result.success is True
|
||||
assert result.screenshot == b"fake_screenshot_data"
|
||||
|
||||
# Vérifier que screenshot() a été appelé
|
||||
mock_playwright_stack["page"].screenshot.assert_called_once()
|
||||
|
||||
def test_fetch_headful_mode(self, mock_playwright_stack):
|
||||
"""Mode headful (navigateur visible)."""
|
||||
result = fetch_playwright("https://example.com", headless=False)
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que headless=False a été passé
|
||||
mock_playwright_stack["playwright"].chromium.launch.assert_called_once()
|
||||
call_kwargs = mock_playwright_stack["playwright"].chromium.launch.call_args.kwargs
|
||||
assert call_kwargs["headless"] is False
|
||||
|
||||
def test_fetch_with_custom_timeout(self, mock_playwright_stack):
|
||||
"""Timeout personnalisé."""
|
||||
result = fetch_playwright("https://example.com", timeout_ms=30000)
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que set_default_timeout a été appelé
|
||||
mock_playwright_stack["page"].set_default_timeout.assert_called_once_with(30000)
|
||||
|
||||
def test_fetch_with_wait_for_selector(self, mock_playwright_stack):
|
||||
"""Attente d'un sélecteur CSS spécifique."""
|
||||
result = fetch_playwright(
|
||||
"https://example.com", wait_for_selector=".product-title"
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que wait_for_selector a été appelé
|
||||
mock_playwright_stack["page"].wait_for_selector.assert_called_once_with(
|
||||
".product-title", timeout=60000
|
||||
)
|
||||
|
||||
def test_fetch_wait_for_selector_timeout(self, mock_playwright_stack):
|
||||
"""Timeout lors de l'attente du sélecteur."""
|
||||
# Le sélecteur timeout mais la page continue
|
||||
mock_playwright_stack["page"].wait_for_selector.side_effect = PlaywrightTimeout(
|
||||
"Selector timeout"
|
||||
)
|
||||
|
||||
result = fetch_playwright(
|
||||
"https://example.com", wait_for_selector=".non-existent"
|
||||
)
|
||||
|
||||
# Doit quand même réussir (le wait_for_selector est non-bloquant)
|
||||
assert result.success is True
|
||||
assert result.html is not None
|
||||
|
||||
def test_fetch_empty_url(self):
|
||||
"""URL vide retourne une erreur."""
|
||||
result = fetch_playwright("")
|
||||
|
||||
assert result.success is False
|
||||
assert "URL vide" in result.error
|
||||
assert result.html is None
|
||||
|
||||
def test_fetch_whitespace_url(self):
|
||||
"""URL avec espaces retourne une erreur."""
|
||||
result = fetch_playwright(" ")
|
||||
|
||||
assert result.success is False
|
||||
assert "URL vide" in result.error
|
||||
|
||||
def test_fetch_no_response_from_server(self, mock_playwright_stack):
|
||||
"""Pas de réponse du serveur."""
|
||||
mock_playwright_stack["page"].goto.return_value = None
|
||||
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert "Pas de réponse du serveur" in result.error
|
||||
|
||||
def test_fetch_playwright_timeout(self, mock_playwright_stack):
|
||||
"""Timeout Playwright lors de la navigation."""
|
||||
mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout(
|
||||
"Navigation timeout"
|
||||
)
|
||||
|
||||
result = fetch_playwright("https://example.com", timeout_ms=10000)
|
||||
|
||||
assert result.success is False
|
||||
assert "Timeout" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_playwright_generic_error(self, mock_playwright_stack):
|
||||
"""Erreur générique Playwright."""
|
||||
mock_playwright_stack["page"].goto.side_effect = Exception(
|
||||
"Generic Playwright error"
|
||||
)
|
||||
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
assert "Erreur Playwright" in result.error
|
||||
assert result.duration_ms is not None
|
||||
|
||||
def test_fetch_cleanup_on_success(self, mock_playwright_stack):
|
||||
"""Nettoyage des ressources sur succès."""
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que les ressources sont nettoyées
|
||||
mock_playwright_stack["page"].close.assert_called_once()
|
||||
mock_playwright_stack["browser"].close.assert_called_once()
|
||||
mock_playwright_stack["playwright"].stop.assert_called_once()
|
||||
|
||||
def test_fetch_cleanup_on_error(self, mock_playwright_stack):
|
||||
"""Nettoyage des ressources sur erreur."""
|
||||
mock_playwright_stack["page"].goto.side_effect = Exception("Test error")
|
||||
|
||||
result = fetch_playwright("https://example.com")
|
||||
|
||||
assert result.success is False
|
||||
|
||||
# Vérifier que les ressources sont nettoyées même en cas d'erreur
|
||||
mock_playwright_stack["page"].close.assert_called_once()
|
||||
mock_playwright_stack["browser"].close.assert_called_once()
|
||||
mock_playwright_stack["playwright"].stop.assert_called_once()
|
||||
|
||||
def test_fetch_screenshot_on_error(self, mock_playwright_stack):
|
||||
"""Screenshot capturé même en cas d'erreur."""
|
||||
mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout("Timeout")
|
||||
|
||||
result = fetch_playwright("https://example.com", save_screenshot=True)
|
||||
|
||||
assert result.success is False
|
||||
assert result.screenshot == b"fake_screenshot_data"
|
||||
|
||||
# Screenshot doit avoir été tenté
|
||||
mock_playwright_stack["page"].screenshot.assert_called_once()
|
||||
|
||||
|
||||
class TestFetchWithFallback:
|
||||
"""Tests pour fetch_with_fallback()."""
|
||||
|
||||
def test_http_success_no_playwright(self, mocker):
|
||||
"""Si HTTP réussit, Playwright n'est pas appelé."""
|
||||
# Mock fetch_http qui réussit
|
||||
mock_http_result = Mock()
|
||||
mock_http_result.success = True
|
||||
mock_http_result.html = "<html>HTTP Success</html>"
|
||||
mock_http_result.duration_ms = 150
|
||||
|
||||
mocker.patch(
|
||||
"pricewatch.app.scraping.http_fetch.fetch_http",
|
||||
return_value=mock_http_result,
|
||||
)
|
||||
|
||||
# Mock fetch_playwright (ne devrait pas être appelé)
|
||||
mock_playwright = mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.fetch_playwright"
|
||||
)
|
||||
|
||||
result = fetch_with_fallback("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>HTTP Success</html>"
|
||||
assert result.duration_ms == 150
|
||||
|
||||
# Playwright ne doit pas être appelé
|
||||
mock_playwright.assert_not_called()
|
||||
|
||||
def test_http_fails_playwright_fallback(self, mocker):
|
||||
"""Si HTTP échoue, fallback vers Playwright."""
|
||||
# Mock fetch_http qui échoue
|
||||
mock_http_result = Mock()
|
||||
mock_http_result.success = False
|
||||
mock_http_result.error = "403 Forbidden"
|
||||
|
||||
mocker.patch(
|
||||
"pricewatch.app.scraping.http_fetch.fetch_http",
|
||||
return_value=mock_http_result,
|
||||
)
|
||||
|
||||
# Mock fetch_playwright qui réussit
|
||||
mock_playwright_result = PlaywrightFetchResult(
|
||||
success=True,
|
||||
html="<html>Playwright Success</html>",
|
||||
duration_ms=2500,
|
||||
)
|
||||
|
||||
mock_playwright = mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
|
||||
return_value=mock_playwright_result,
|
||||
)
|
||||
|
||||
result = fetch_with_fallback("https://example.com")
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>Playwright Success</html>"
|
||||
|
||||
# Playwright doit avoir été appelé
|
||||
mock_playwright.assert_called_once()
|
||||
|
||||
def test_skip_http_direct_playwright(self, mocker):
|
||||
"""Mode Playwright direct (sans essayer HTTP d'abord)."""
|
||||
# Mock fetch_http (ne devrait pas être appelé)
|
||||
mock_http = mocker.patch("pricewatch.app.scraping.http_fetch.fetch_http")
|
||||
|
||||
# Mock fetch_playwright
|
||||
mock_playwright_result = PlaywrightFetchResult(
|
||||
success=True,
|
||||
html="<html>Playwright Direct</html>",
|
||||
duration_ms=2500,
|
||||
)
|
||||
|
||||
mock_playwright = mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
|
||||
return_value=mock_playwright_result,
|
||||
)
|
||||
|
||||
result = fetch_with_fallback("https://example.com", try_http_first=False)
|
||||
|
||||
assert result.success is True
|
||||
assert result.html == "<html>Playwright Direct</html>"
|
||||
|
||||
# HTTP ne doit pas être appelé
|
||||
mock_http.assert_not_called()
|
||||
|
||||
# Playwright doit avoir été appelé
|
||||
mock_playwright.assert_called_once()
|
||||
|
||||
def test_playwright_options_passed(self, mocker):
|
||||
"""Options Playwright passées correctement."""
|
||||
# Mock fetch_http qui échoue
|
||||
mock_http_result = Mock()
|
||||
mock_http_result.success = False
|
||||
mock_http_result.error = "403 Forbidden"
|
||||
|
||||
mocker.patch(
|
||||
"pricewatch.app.scraping.http_fetch.fetch_http",
|
||||
return_value=mock_http_result,
|
||||
)
|
||||
|
||||
# Mock fetch_playwright
|
||||
mock_playwright_result = PlaywrightFetchResult(
|
||||
success=True,
|
||||
html="<html>OK</html>",
|
||||
duration_ms=2500,
|
||||
)
|
||||
|
||||
mock_playwright = mocker.patch(
|
||||
"pricewatch.app.scraping.pw_fetch.fetch_playwright",
|
||||
return_value=mock_playwright_result,
|
||||
)
|
||||
|
||||
# Options personnalisées
|
||||
options = {"headless": False, "timeout_ms": 30000, "save_screenshot": True}
|
||||
|
||||
result = fetch_with_fallback("https://example.com", playwright_options=options)
|
||||
|
||||
assert result.success is True
|
||||
|
||||
# Vérifier que les options sont passées à fetch_playwright
|
||||
mock_playwright.assert_called_once_with("https://example.com", **options)
|
||||
Reference in New Issue
Block a user