291 lines
9.9 KiB
Python
Executable File
291 lines
9.9 KiB
Python
Executable File
"""
|
|
Tests pour pricewatch.app.scraping.http_fetch
|
|
|
|
Teste la récupération HTTP avec mocks pour éviter les vraies requêtes.
|
|
"""
|
|
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
import requests
|
|
from requests.exceptions import RequestException, Timeout
|
|
|
|
from pricewatch.app.scraping.http_fetch import FetchResult, fetch_http
|
|
|
|
|
|
class TestFetchResult:
|
|
"""Tests pour la classe FetchResult."""
|
|
|
|
def test_success_result(self):
|
|
"""Création d'un résultat réussi."""
|
|
result = FetchResult(
|
|
success=True,
|
|
html="<html>Test</html>",
|
|
status_code=200,
|
|
duration_ms=150,
|
|
)
|
|
|
|
assert result.success is True
|
|
assert result.html == "<html>Test</html>"
|
|
assert result.error is None
|
|
assert result.status_code == 200
|
|
assert result.duration_ms == 150
|
|
|
|
def test_error_result(self):
|
|
"""Création d'un résultat d'erreur."""
|
|
result = FetchResult(
|
|
success=False,
|
|
error="403 Forbidden",
|
|
status_code=403,
|
|
duration_ms=100,
|
|
)
|
|
|
|
assert result.success is False
|
|
assert result.html is None
|
|
assert result.error == "403 Forbidden"
|
|
assert result.status_code == 403
|
|
assert result.duration_ms == 100
|
|
|
|
def test_minimal_result(self):
|
|
"""Résultat minimal avec success uniquement."""
|
|
result = FetchResult(success=False)
|
|
|
|
assert result.success is False
|
|
assert result.html is None
|
|
assert result.error is None
|
|
assert result.status_code is None
|
|
assert result.duration_ms is None
|
|
|
|
|
|
class TestFetchHttp:
|
|
"""Tests pour la fonction fetch_http()."""
|
|
|
|
def test_fetch_success(self, mocker):
|
|
"""Requête HTTP réussie (200 OK)."""
|
|
# Mock de requests.get
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html><body>Test Page</body></html>"
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is True
|
|
assert result.html == "<html><body>Test Page</body></html>"
|
|
assert result.status_code == 200
|
|
assert result.error is None
|
|
assert result.duration_ms is not None
|
|
assert result.duration_ms >= 0
|
|
|
|
def test_fetch_with_custom_timeout(self, mocker):
|
|
"""Requête avec timeout personnalisé."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html>OK</html>"
|
|
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
fetch_http("https://example.com", timeout=60)
|
|
|
|
# Vérifier que timeout est passé à requests.get
|
|
mock_get.assert_called_once()
|
|
call_kwargs = mock_get.call_args.kwargs
|
|
assert call_kwargs["timeout"] == 60
|
|
|
|
def test_fetch_with_custom_headers(self, mocker):
|
|
"""Requête avec headers personnalisés."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html>OK</html>"
|
|
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
custom_headers = {"X-Custom-Header": "test-value"}
|
|
fetch_http("https://example.com", headers=custom_headers)
|
|
|
|
# Vérifier que les headers personnalisés sont inclus
|
|
mock_get.assert_called_once()
|
|
call_kwargs = mock_get.call_args.kwargs
|
|
assert "X-Custom-Header" in call_kwargs["headers"]
|
|
assert call_kwargs["headers"]["X-Custom-Header"] == "test-value"
|
|
# Headers par défaut doivent aussi être présents
|
|
assert "User-Agent" in call_kwargs["headers"]
|
|
|
|
def test_fetch_403_forbidden(self, mocker):
|
|
"""Requête bloquée (403 Forbidden)."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 403
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is False
|
|
assert result.html is None
|
|
assert result.status_code == 403
|
|
assert "403 Forbidden" in result.error
|
|
assert "Anti-bot" in result.error
|
|
|
|
def test_fetch_404_not_found(self, mocker):
|
|
"""Page introuvable (404 Not Found)."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 404
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is False
|
|
assert result.status_code == 404
|
|
assert "404 Not Found" in result.error
|
|
|
|
def test_fetch_429_rate_limit(self, mocker):
|
|
"""Rate limit atteint (429 Too Many Requests)."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 429
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is False
|
|
assert result.status_code == 429
|
|
assert "429" in result.error
|
|
assert "Rate limit" in result.error
|
|
|
|
def test_fetch_500_server_error(self, mocker):
|
|
"""Erreur serveur (500 Internal Server Error)."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 500
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is False
|
|
assert result.status_code == 500
|
|
assert "500" in result.error
|
|
assert "Server Error" in result.error
|
|
|
|
def test_fetch_503_service_unavailable(self, mocker):
|
|
"""Service indisponible (503)."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 503
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is False
|
|
assert result.status_code == 503
|
|
assert "503" in result.error
|
|
|
|
def test_fetch_unknown_status_code(self, mocker):
|
|
"""Code de statut inconnu (par ex. 418 I'm a teapot)."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 418
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is False
|
|
assert result.status_code == 418
|
|
assert "418" in result.error
|
|
|
|
def test_fetch_timeout_error(self, mocker):
|
|
"""Timeout lors de la requête."""
|
|
mocker.patch("requests.get", side_effect=Timeout("Connection timed out"))
|
|
|
|
result = fetch_http("https://example.com", timeout=10)
|
|
|
|
assert result.success is False
|
|
assert result.html is None
|
|
assert "Timeout" in result.error
|
|
assert result.duration_ms is not None
|
|
|
|
def test_fetch_request_exception(self, mocker):
|
|
"""Exception réseau générique."""
|
|
mocker.patch(
|
|
"requests.get",
|
|
side_effect=RequestException("Network error"),
|
|
)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is False
|
|
assert "Erreur réseau" in result.error
|
|
assert result.duration_ms is not None
|
|
|
|
def test_fetch_unexpected_exception(self, mocker):
|
|
"""Exception inattendue."""
|
|
mocker.patch("requests.get", side_effect=ValueError("Unexpected error"))
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is False
|
|
assert "Erreur inattendue" in result.error
|
|
assert result.duration_ms is not None
|
|
|
|
def test_fetch_empty_url(self):
|
|
"""URL vide retourne une erreur."""
|
|
result = fetch_http("")
|
|
|
|
assert result.success is False
|
|
assert "URL vide" in result.error
|
|
assert result.html is None
|
|
|
|
def test_fetch_whitespace_url(self):
|
|
"""URL avec espaces uniquement retourne une erreur."""
|
|
result = fetch_http(" ")
|
|
|
|
assert result.success is False
|
|
assert "URL vide" in result.error
|
|
|
|
def test_fetch_no_redirects(self, mocker):
|
|
"""Requête sans suivre les redirections."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html>OK</html>"
|
|
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
fetch_http("https://example.com", follow_redirects=False)
|
|
|
|
mock_get.assert_called_once()
|
|
call_kwargs = mock_get.call_args.kwargs
|
|
assert call_kwargs["allow_redirects"] is False
|
|
|
|
def test_fetch_uses_random_user_agent(self, mocker):
|
|
"""Vérifie qu'un User-Agent aléatoire est utilisé."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html>OK</html>"
|
|
mock_get = mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
fetch_http("https://example.com")
|
|
|
|
# Vérifier qu'un User-Agent est présent
|
|
mock_get.assert_called_once()
|
|
call_kwargs = mock_get.call_args.kwargs
|
|
assert "User-Agent" in call_kwargs["headers"]
|
|
# User-Agent doit contenir "Mozilla" (présent dans tous les UA)
|
|
assert "Mozilla" in call_kwargs["headers"]["User-Agent"]
|
|
|
|
def test_fetch_duration_is_measured(self, mocker):
|
|
"""Vérifie que la durée est mesurée."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html>OK</html>"
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.duration_ms is not None
|
|
assert isinstance(result.duration_ms, int)
|
|
assert result.duration_ms >= 0
|
|
|
|
def test_fetch_large_response(self, mocker):
|
|
"""Requête avec réponse volumineuse."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
# Simuler une grosse page HTML (1 MB)
|
|
mock_response.text = "<html>" + ("x" * 1000000) + "</html>"
|
|
mocker.patch("requests.get", return_value=mock_response)
|
|
|
|
result = fetch_http("https://example.com")
|
|
|
|
assert result.success is True
|
|
assert len(result.html) > 1000000
|