""" Tests pour pricewatch.app.scraping.pw_fetch Teste la récupération Playwright avec mocks pour éviter de lancer vraiment un navigateur. """ from unittest.mock import Mock, patch import pytest from playwright.sync_api import TimeoutError as PlaywrightTimeout from pricewatch.app.scraping.pw_fetch import ( PlaywrightFetchResult, fetch_playwright, fetch_with_fallback, ) class TestPlaywrightFetchResult: """Tests pour la classe PlaywrightFetchResult.""" def test_success_result(self): """Création d'un résultat réussi.""" result = PlaywrightFetchResult( success=True, html="Test", screenshot=b"fake_screenshot_bytes", duration_ms=2500, ) assert result.success is True assert result.html == "Test" assert result.screenshot == b"fake_screenshot_bytes" assert result.error is None assert result.duration_ms == 2500 def test_error_result(self): """Création d'un résultat d'erreur.""" result = PlaywrightFetchResult( success=False, error="Timeout", screenshot=b"error_screenshot", duration_ms=3000, ) assert result.success is False assert result.html is None assert result.error == "Timeout" assert result.screenshot == b"error_screenshot" assert result.duration_ms == 3000 def test_minimal_result(self): """Résultat minimal.""" result = PlaywrightFetchResult(success=False) assert result.success is False assert result.html is None assert result.screenshot is None assert result.error is None assert result.duration_ms is None class TestFetchPlaywright: """Tests pour fetch_playwright().""" @pytest.fixture def mock_playwright_stack(self, mocker): """Fixture: Mock complet de la stack Playwright.""" # Mock de la page mock_page = Mock() mock_page.content.return_value = "Playwright Test" mock_page.screenshot.return_value = b"fake_screenshot_data" mock_page.goto.return_value = Mock(status=200) # Mock du context mock_context = Mock() mock_context.new_page.return_value = mock_page # Mock du browser mock_browser = Mock() mock_browser.new_context.return_value = mock_context # Mock playwright chromium mock_chromium = Mock() mock_chromium.launch.return_value = mock_browser # Mock playwright mock_playwright_obj = Mock() mock_playwright_obj.chromium = mock_chromium # Mock sync_playwright().start() mock_sync_playwright = Mock() mock_sync_playwright.start.return_value = mock_playwright_obj mocker.patch( "pricewatch.app.scraping.pw_fetch.sync_playwright", return_value=mock_sync_playwright, ) return { "playwright": mock_playwright_obj, "browser": mock_browser, "context": mock_context, "page": mock_page, } def test_fetch_success(self, mock_playwright_stack): """Récupération Playwright réussie.""" result = fetch_playwright("https://example.com") assert result.success is True assert result.html == "Playwright Test" assert result.screenshot is None # Par défaut pas de screenshot assert result.error is None assert result.duration_ms is not None assert result.duration_ms >= 0 # Vérifier que la page a été visitée mock_playwright_stack["page"].goto.assert_called_once_with( "https://example.com", wait_until="domcontentloaded" ) def test_fetch_with_screenshot(self, mock_playwright_stack): """Récupération avec screenshot.""" result = fetch_playwright("https://example.com", save_screenshot=True) assert result.success is True assert result.screenshot == b"fake_screenshot_data" # Vérifier que screenshot() a été appelé mock_playwright_stack["page"].screenshot.assert_called_once() def test_fetch_headful_mode(self, mock_playwright_stack): """Mode headful (navigateur visible).""" result = fetch_playwright("https://example.com", headless=False) assert result.success is True # Vérifier que headless=False a été passé mock_playwright_stack["playwright"].chromium.launch.assert_called_once() call_kwargs = mock_playwright_stack["playwright"].chromium.launch.call_args.kwargs assert call_kwargs["headless"] is False def test_fetch_with_custom_timeout(self, mock_playwright_stack): """Timeout personnalisé.""" result = fetch_playwright("https://example.com", timeout_ms=30000) assert result.success is True # Vérifier que set_default_timeout a été appelé mock_playwright_stack["page"].set_default_timeout.assert_called_once_with(30000) def test_fetch_with_wait_for_selector(self, mock_playwright_stack): """Attente d'un sélecteur CSS spécifique.""" result = fetch_playwright( "https://example.com", wait_for_selector=".product-title" ) assert result.success is True # Vérifier que wait_for_selector a été appelé mock_playwright_stack["page"].wait_for_selector.assert_called_once_with( ".product-title", timeout=60000 ) def test_fetch_wait_for_selector_timeout(self, mock_playwright_stack): """Timeout lors de l'attente du sélecteur.""" # Le sélecteur timeout mais la page continue mock_playwright_stack["page"].wait_for_selector.side_effect = PlaywrightTimeout( "Selector timeout" ) result = fetch_playwright( "https://example.com", wait_for_selector=".non-existent" ) # Doit quand même réussir (le wait_for_selector est non-bloquant) assert result.success is True assert result.html is not None def test_fetch_empty_url(self): """URL vide retourne une erreur.""" result = fetch_playwright("") assert result.success is False assert "URL vide" in result.error assert result.html is None def test_fetch_whitespace_url(self): """URL avec espaces retourne une erreur.""" result = fetch_playwright(" ") assert result.success is False assert "URL vide" in result.error def test_fetch_no_response_from_server(self, mock_playwright_stack): """Pas de réponse du serveur.""" mock_playwright_stack["page"].goto.return_value = None result = fetch_playwright("https://example.com") assert result.success is False assert "Pas de réponse du serveur" in result.error def test_fetch_playwright_timeout(self, mock_playwright_stack): """Timeout Playwright lors de la navigation.""" mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout( "Navigation timeout" ) result = fetch_playwright("https://example.com", timeout_ms=10000) assert result.success is False assert "Timeout" in result.error assert result.duration_ms is not None def test_fetch_playwright_generic_error(self, mock_playwright_stack): """Erreur générique Playwright.""" mock_playwright_stack["page"].goto.side_effect = Exception( "Generic Playwright error" ) result = fetch_playwright("https://example.com") assert result.success is False assert "Erreur Playwright" in result.error assert result.duration_ms is not None def test_fetch_cleanup_on_success(self, mock_playwright_stack): """Nettoyage des ressources sur succès.""" result = fetch_playwright("https://example.com") assert result.success is True # Vérifier que les ressources sont nettoyées mock_playwright_stack["page"].close.assert_called_once() mock_playwright_stack["browser"].close.assert_called_once() mock_playwright_stack["playwright"].stop.assert_called_once() def test_fetch_cleanup_on_error(self, mock_playwright_stack): """Nettoyage des ressources sur erreur.""" mock_playwright_stack["page"].goto.side_effect = Exception("Test error") result = fetch_playwright("https://example.com") assert result.success is False # Vérifier que les ressources sont nettoyées même en cas d'erreur mock_playwright_stack["page"].close.assert_called_once() mock_playwright_stack["browser"].close.assert_called_once() mock_playwright_stack["playwright"].stop.assert_called_once() def test_fetch_screenshot_on_error(self, mock_playwright_stack): """Screenshot capturé même en cas d'erreur.""" mock_playwright_stack["page"].goto.side_effect = PlaywrightTimeout("Timeout") result = fetch_playwright("https://example.com", save_screenshot=True) assert result.success is False assert result.screenshot == b"fake_screenshot_data" # Screenshot doit avoir été tenté mock_playwright_stack["page"].screenshot.assert_called_once() class TestFetchWithFallback: """Tests pour fetch_with_fallback().""" def test_http_success_no_playwright(self, mocker): """Si HTTP réussit, Playwright n'est pas appelé.""" # Mock fetch_http qui réussit mock_http_result = Mock() mock_http_result.success = True mock_http_result.html = "HTTP Success" mock_http_result.duration_ms = 150 mocker.patch( "pricewatch.app.scraping.http_fetch.fetch_http", return_value=mock_http_result, ) # Mock fetch_playwright (ne devrait pas être appelé) mock_playwright = mocker.patch( "pricewatch.app.scraping.pw_fetch.fetch_playwright" ) result = fetch_with_fallback("https://example.com") assert result.success is True assert result.html == "HTTP Success" assert result.duration_ms == 150 # Playwright ne doit pas être appelé mock_playwright.assert_not_called() def test_http_fails_playwright_fallback(self, mocker): """Si HTTP échoue, fallback vers Playwright.""" # Mock fetch_http qui échoue mock_http_result = Mock() mock_http_result.success = False mock_http_result.error = "403 Forbidden" mocker.patch( "pricewatch.app.scraping.http_fetch.fetch_http", return_value=mock_http_result, ) # Mock fetch_playwright qui réussit mock_playwright_result = PlaywrightFetchResult( success=True, html="Playwright Success", duration_ms=2500, ) mock_playwright = mocker.patch( "pricewatch.app.scraping.pw_fetch.fetch_playwright", return_value=mock_playwright_result, ) result = fetch_with_fallback("https://example.com") assert result.success is True assert result.html == "Playwright Success" # Playwright doit avoir été appelé mock_playwright.assert_called_once() def test_skip_http_direct_playwright(self, mocker): """Mode Playwright direct (sans essayer HTTP d'abord).""" # Mock fetch_http (ne devrait pas être appelé) mock_http = mocker.patch("pricewatch.app.scraping.http_fetch.fetch_http") # Mock fetch_playwright mock_playwright_result = PlaywrightFetchResult( success=True, html="Playwright Direct", duration_ms=2500, ) mock_playwright = mocker.patch( "pricewatch.app.scraping.pw_fetch.fetch_playwright", return_value=mock_playwright_result, ) result = fetch_with_fallback("https://example.com", try_http_first=False) assert result.success is True assert result.html == "Playwright Direct" # HTTP ne doit pas être appelé mock_http.assert_not_called() # Playwright doit avoir été appelé mock_playwright.assert_called_once() def test_playwright_options_passed(self, mocker): """Options Playwright passées correctement.""" # Mock fetch_http qui échoue mock_http_result = Mock() mock_http_result.success = False mock_http_result.error = "403 Forbidden" mocker.patch( "pricewatch.app.scraping.http_fetch.fetch_http", return_value=mock_http_result, ) # Mock fetch_playwright mock_playwright_result = PlaywrightFetchResult( success=True, html="OK", duration_ms=2500, ) mock_playwright = mocker.patch( "pricewatch.app.scraping.pw_fetch.fetch_playwright", return_value=mock_playwright_result, ) # Options personnalisées options = {"headless": False, "timeout_ms": 30000, "save_screenshot": True} result = fetch_with_fallback("https://example.com", playwright_options=options) assert result.success is True # Vérifier que les options sont passées à fetch_playwright mock_playwright.assert_called_once_with("https://example.com", **options)