""" Tests pour pricewatch.app.scraping.http_fetch Teste la récupération HTTP avec mocks pour éviter les vraies requêtes. """ from unittest.mock import Mock, patch import pytest import requests from requests.exceptions import RequestException, Timeout from pricewatch.app.scraping.http_fetch import FetchResult, fetch_http class TestFetchResult: """Tests pour la classe FetchResult.""" def test_success_result(self): """Création d'un résultat réussi.""" result = FetchResult( success=True, html="Test", status_code=200, duration_ms=150, ) assert result.success is True assert result.html == "Test" assert result.error is None assert result.status_code == 200 assert result.duration_ms == 150 def test_error_result(self): """Création d'un résultat d'erreur.""" result = FetchResult( success=False, error="403 Forbidden", status_code=403, duration_ms=100, ) assert result.success is False assert result.html is None assert result.error == "403 Forbidden" assert result.status_code == 403 assert result.duration_ms == 100 def test_minimal_result(self): """Résultat minimal avec success uniquement.""" result = FetchResult(success=False) assert result.success is False assert result.html is None assert result.error is None assert result.status_code is None assert result.duration_ms is None class TestFetchHttp: """Tests pour la fonction fetch_http().""" def test_fetch_success(self, mocker): """Requête HTTP réussie (200 OK).""" # Mock de requests.get mock_response = Mock() mock_response.status_code = 200 mock_response.text = "Test Page" mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.success is True assert result.html == "Test Page" assert result.status_code == 200 assert result.error is None assert result.duration_ms is not None assert result.duration_ms >= 0 def test_fetch_with_custom_timeout(self, mocker): """Requête avec timeout personnalisé.""" mock_response = Mock() mock_response.status_code = 200 mock_response.text = "OK" mock_get = mocker.patch("requests.get", return_value=mock_response) fetch_http("https://example.com", timeout=60) # Vérifier que timeout est passé à requests.get mock_get.assert_called_once() call_kwargs = mock_get.call_args.kwargs assert call_kwargs["timeout"] == 60 def test_fetch_with_custom_headers(self, mocker): """Requête avec headers personnalisés.""" mock_response = Mock() mock_response.status_code = 200 mock_response.text = "OK" mock_get = mocker.patch("requests.get", return_value=mock_response) custom_headers = {"X-Custom-Header": "test-value"} fetch_http("https://example.com", headers=custom_headers) # Vérifier que les headers personnalisés sont inclus mock_get.assert_called_once() call_kwargs = mock_get.call_args.kwargs assert "X-Custom-Header" in call_kwargs["headers"] assert call_kwargs["headers"]["X-Custom-Header"] == "test-value" # Headers par défaut doivent aussi être présents assert "User-Agent" in call_kwargs["headers"] def test_fetch_403_forbidden(self, mocker): """Requête bloquée (403 Forbidden).""" mock_response = Mock() mock_response.status_code = 403 mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.success is False assert result.html is None assert result.status_code == 403 assert "403 Forbidden" in result.error assert "Anti-bot" in result.error def test_fetch_404_not_found(self, mocker): """Page introuvable (404 Not Found).""" mock_response = Mock() mock_response.status_code = 404 mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.success is False assert result.status_code == 404 assert "404 Not Found" in result.error def test_fetch_429_rate_limit(self, mocker): """Rate limit atteint (429 Too Many Requests).""" mock_response = Mock() mock_response.status_code = 429 mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.success is False assert result.status_code == 429 assert "429" in result.error assert "Rate limit" in result.error def test_fetch_500_server_error(self, mocker): """Erreur serveur (500 Internal Server Error).""" mock_response = Mock() mock_response.status_code = 500 mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.success is False assert result.status_code == 500 assert "500" in result.error assert "Server Error" in result.error def test_fetch_503_service_unavailable(self, mocker): """Service indisponible (503).""" mock_response = Mock() mock_response.status_code = 503 mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.success is False assert result.status_code == 503 assert "503" in result.error def test_fetch_unknown_status_code(self, mocker): """Code de statut inconnu (par ex. 418 I'm a teapot).""" mock_response = Mock() mock_response.status_code = 418 mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.success is False assert result.status_code == 418 assert "418" in result.error def test_fetch_timeout_error(self, mocker): """Timeout lors de la requête.""" mocker.patch("requests.get", side_effect=Timeout("Connection timed out")) result = fetch_http("https://example.com", timeout=10) assert result.success is False assert result.html is None assert "Timeout" in result.error assert result.duration_ms is not None def test_fetch_request_exception(self, mocker): """Exception réseau générique.""" mocker.patch( "requests.get", side_effect=RequestException("Network error"), ) result = fetch_http("https://example.com") assert result.success is False assert "Erreur réseau" in result.error assert result.duration_ms is not None def test_fetch_unexpected_exception(self, mocker): """Exception inattendue.""" mocker.patch("requests.get", side_effect=ValueError("Unexpected error")) result = fetch_http("https://example.com") assert result.success is False assert "Erreur inattendue" in result.error assert result.duration_ms is not None def test_fetch_empty_url(self): """URL vide retourne une erreur.""" result = fetch_http("") assert result.success is False assert "URL vide" in result.error assert result.html is None def test_fetch_whitespace_url(self): """URL avec espaces uniquement retourne une erreur.""" result = fetch_http(" ") assert result.success is False assert "URL vide" in result.error def test_fetch_no_redirects(self, mocker): """Requête sans suivre les redirections.""" mock_response = Mock() mock_response.status_code = 200 mock_response.text = "OK" mock_get = mocker.patch("requests.get", return_value=mock_response) fetch_http("https://example.com", follow_redirects=False) mock_get.assert_called_once() call_kwargs = mock_get.call_args.kwargs assert call_kwargs["allow_redirects"] is False def test_fetch_uses_random_user_agent(self, mocker): """Vérifie qu'un User-Agent aléatoire est utilisé.""" mock_response = Mock() mock_response.status_code = 200 mock_response.text = "OK" mock_get = mocker.patch("requests.get", return_value=mock_response) fetch_http("https://example.com") # Vérifier qu'un User-Agent est présent mock_get.assert_called_once() call_kwargs = mock_get.call_args.kwargs assert "User-Agent" in call_kwargs["headers"] # User-Agent doit contenir "Mozilla" (présent dans tous les UA) assert "Mozilla" in call_kwargs["headers"]["User-Agent"] def test_fetch_duration_is_measured(self, mocker): """Vérifie que la durée est mesurée.""" mock_response = Mock() mock_response.status_code = 200 mock_response.text = "OK" mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.duration_ms is not None assert isinstance(result.duration_ms, int) assert result.duration_ms >= 0 def test_fetch_large_response(self, mocker): """Requête avec réponse volumineuse.""" mock_response = Mock() mock_response.status_code = 200 # Simuler une grosse page HTML (1 MB) mock_response.text = "" + ("x" * 1000000) + "" mocker.patch("requests.get", return_value=mock_response) result = fetch_http("https://example.com") assert result.success is True assert len(result.html) > 1000000