This commit is contained in:
2026-01-14 07:03:38 +01:00
parent ecda149a4b
commit c91c0f1fc9
61 changed files with 4388 additions and 38 deletions

462
tests/core/test_io.py Executable file
View File

@@ -0,0 +1,462 @@
"""
Tests pour pricewatch.app.core.io
Teste la lecture/écriture YAML/JSON et les fonctions de sauvegarde debug.
"""
import json
import tempfile
from datetime import datetime
from pathlib import Path
import pytest
import yaml
from pricewatch.app.core.io import (
ScrapingConfig,
ScrapingOptions,
read_json_results,
read_yaml_config,
save_debug_html,
save_debug_screenshot,
write_json_results,
)
from pricewatch.app.core.schema import (
DebugInfo,
DebugStatus,
FetchMethod,
ProductSnapshot,
StockStatus,
)
class TestScrapingOptions:
"""Tests pour le modèle ScrapingOptions."""
def test_default_values(self):
"""Les valeurs par défaut sont correctes."""
options = ScrapingOptions()
assert options.use_playwright is True
assert options.headful is False
assert options.save_html is True
assert options.save_screenshot is True
assert options.timeout_ms == 60000
def test_custom_values(self):
"""Les valeurs personnalisées sont acceptées."""
options = ScrapingOptions(
use_playwright=False,
headful=True,
save_html=False,
save_screenshot=False,
timeout_ms=30000,
)
assert options.use_playwright is False
assert options.headful is True
assert options.save_html is False
assert options.save_screenshot is False
assert options.timeout_ms == 30000
def test_timeout_validation_min(self):
"""Timeout inférieur à 1000ms est rejeté."""
with pytest.raises(ValueError):
ScrapingOptions(timeout_ms=500)
def test_timeout_validation_valid(self):
"""Timeout >= 1000ms est accepté."""
options = ScrapingOptions(timeout_ms=1000)
assert options.timeout_ms == 1000
class TestScrapingConfig:
"""Tests pour le modèle ScrapingConfig."""
def test_minimal_config(self):
"""Config minimale avec URLs uniquement."""
config = ScrapingConfig(urls=["https://example.com"])
assert len(config.urls) == 1
assert config.urls[0] == "https://example.com"
assert isinstance(config.options, ScrapingOptions)
def test_config_with_options(self):
"""Config avec URLs et options."""
options = ScrapingOptions(use_playwright=False, timeout_ms=10000)
config = ScrapingConfig(
urls=["https://example.com", "https://test.com"], options=options
)
assert len(config.urls) == 2
assert config.options.use_playwright is False
assert config.options.timeout_ms == 10000
def test_validate_urls_empty_list(self):
"""Liste d'URLs vide est rejetée."""
with pytest.raises(ValueError, match="Au moins une URL"):
ScrapingConfig(urls=[])
def test_validate_urls_strips_whitespace(self):
"""Les espaces sont nettoyés."""
config = ScrapingConfig(urls=[" https://example.com ", "https://test.com"])
assert config.urls == ["https://example.com", "https://test.com"]
def test_validate_urls_removes_empty(self):
"""Les URLs vides sont supprimées."""
config = ScrapingConfig(
urls=["https://example.com", "", " ", "https://test.com"]
)
assert len(config.urls) == 2
assert config.urls == ["https://example.com", "https://test.com"]
def test_validate_urls_all_empty(self):
"""Si toutes les URLs sont vides, erreur."""
with pytest.raises(ValueError, match="Aucune URL valide"):
ScrapingConfig(urls=["", " ", "\t"])
class TestReadYamlConfig:
"""Tests pour read_yaml_config()."""
def test_read_valid_yaml(self, tmp_path):
"""Lit un fichier YAML valide."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {
"urls": ["https://example.com", "https://test.com"],
"options": {"use_playwright": False, "timeout_ms": 30000},
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(yaml_path)
assert len(config.urls) == 2
assert config.urls[0] == "https://example.com"
assert config.options.use_playwright is False
assert config.options.timeout_ms == 30000
def test_read_yaml_minimal(self, tmp_path):
"""Lit un YAML minimal (URLs uniquement)."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"urls": ["https://example.com"]}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(yaml_path)
assert len(config.urls) == 1
# Options par défaut
assert config.options.use_playwright is True
assert config.options.timeout_ms == 60000
def test_read_yaml_file_not_found(self, tmp_path):
"""Fichier introuvable lève FileNotFoundError."""
yaml_path = tmp_path / "nonexistent.yaml"
with pytest.raises(FileNotFoundError):
read_yaml_config(yaml_path)
def test_read_yaml_empty_file(self, tmp_path):
"""Fichier YAML vide lève ValueError."""
yaml_path = tmp_path / "empty.yaml"
yaml_path.write_text("")
with pytest.raises(ValueError, match="Fichier YAML vide"):
read_yaml_config(yaml_path)
def test_read_yaml_invalid_syntax(self, tmp_path):
"""YAML avec syntaxe invalide lève ValueError."""
yaml_path = tmp_path / "invalid.yaml"
yaml_path.write_text("urls: [invalid yaml syntax")
with pytest.raises(ValueError, match="YAML invalide"):
read_yaml_config(yaml_path)
def test_read_yaml_missing_urls(self, tmp_path):
"""YAML sans champ 'urls' lève erreur de validation."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"options": {"use_playwright": False}}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
with pytest.raises(Exception): # Pydantic validation error
read_yaml_config(yaml_path)
def test_read_yaml_accepts_path_string(self, tmp_path):
"""Accepte un string comme chemin."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"urls": ["https://example.com"]}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(str(yaml_path))
assert len(config.urls) == 1
class TestWriteJsonResults:
"""Tests pour write_json_results()."""
@pytest.fixture
def sample_snapshot(self) -> ProductSnapshot:
"""Fixture: ProductSnapshot exemple."""
return ProductSnapshot(
source="test",
url="https://example.com/product",
fetched_at=datetime(2024, 1, 1, 12, 0, 0),
title="Test Product",
price=99.99,
currency="EUR",
stock_status=StockStatus.IN_STOCK,
reference="TEST123",
images=["https://example.com/img1.jpg"],
category="Test Category",
specs={"Brand": "TestBrand"},
debug=DebugInfo(
method=FetchMethod.HTTP,
status=DebugStatus.SUCCESS,
errors=[],
notes=[],
),
)
def test_write_single_snapshot(self, tmp_path, sample_snapshot):
"""Écrit un seul snapshot."""
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot], json_path)
assert json_path.exists()
# Vérifier le contenu
with open(json_path) as f:
data = json.load(f)
assert isinstance(data, list)
assert len(data) == 1
assert data[0]["source"] == "test"
assert data[0]["title"] == "Test Product"
def test_write_multiple_snapshots(self, tmp_path, sample_snapshot):
"""Écrit plusieurs snapshots."""
snapshot2 = ProductSnapshot(
source="test2",
url="https://example.com/product2",
fetched_at=datetime(2024, 1, 2, 12, 0, 0),
title="Test Product 2",
price=49.99,
currency="EUR",
stock_status=StockStatus.OUT_OF_STOCK,
debug=DebugInfo(
method=FetchMethod.PLAYWRIGHT,
status=DebugStatus.PARTIAL,
errors=["Test error"],
notes=[],
),
)
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot, snapshot2], json_path)
with open(json_path) as f:
data = json.load(f)
assert len(data) == 2
assert data[0]["source"] == "test"
assert data[1]["source"] == "test2"
def test_write_creates_parent_dirs(self, tmp_path, sample_snapshot):
"""Crée les dossiers parents si nécessaire."""
json_path = tmp_path / "sub" / "dir" / "results.json"
write_json_results([sample_snapshot], json_path)
assert json_path.exists()
assert json_path.parent.exists()
def test_write_empty_list(self, tmp_path):
"""Écrit une liste vide."""
json_path = tmp_path / "empty.json"
write_json_results([], json_path)
assert json_path.exists()
with open(json_path) as f:
data = json.load(f)
assert data == []
def test_write_indent_control(self, tmp_path, sample_snapshot):
"""Contrôle l'indentation."""
# Avec indent
json_path1 = tmp_path / "pretty.json"
write_json_results([sample_snapshot], json_path1, indent=2)
content1 = json_path1.read_text()
assert "\n" in content1 # Pretty-printed
# Sans indent (compact)
json_path2 = tmp_path / "compact.json"
write_json_results([sample_snapshot], json_path2, indent=None)
content2 = json_path2.read_text()
assert len(content2) < len(content1) # Plus compact
def test_write_accepts_path_string(self, tmp_path, sample_snapshot):
"""Accepte un string comme chemin."""
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot], str(json_path))
assert json_path.exists()
class TestReadJsonResults:
"""Tests pour read_json_results()."""
@pytest.fixture
def json_file_with_snapshot(self, tmp_path) -> Path:
"""Fixture: Fichier JSON avec un snapshot."""
json_path = tmp_path / "results.json"
snapshot_data = {
"source": "test",
"url": "https://example.com/product",
"fetched_at": "2024-01-01T12:00:00",
"title": "Test Product",
"price": 99.99,
"currency": "EUR",
"shipping_cost": None,
"stock_status": "in_stock",
"reference": "TEST123",
"images": ["https://example.com/img.jpg"],
"category": "Test",
"specs": {"Brand": "Test"},
"debug": {
"method": "http",
"status": "success",
"errors": [],
"notes": [],
"duration_ms": None,
"html_size_bytes": None,
},
}
with open(json_path, "w") as f:
json.dump([snapshot_data], f)
return json_path
def test_read_single_snapshot(self, json_file_with_snapshot):
"""Lit un fichier avec un snapshot."""
snapshots = read_json_results(json_file_with_snapshot)
assert len(snapshots) == 1
assert isinstance(snapshots[0], ProductSnapshot)
assert snapshots[0].source == "test"
assert snapshots[0].title == "Test Product"
assert snapshots[0].price == 99.99
def test_read_file_not_found(self, tmp_path):
"""Fichier introuvable lève FileNotFoundError."""
json_path = tmp_path / "nonexistent.json"
with pytest.raises(FileNotFoundError):
read_json_results(json_path)
def test_read_invalid_json(self, tmp_path):
"""JSON invalide lève ValueError."""
json_path = tmp_path / "invalid.json"
json_path.write_text("{invalid json")
with pytest.raises(ValueError, match="JSON invalide"):
read_json_results(json_path)
def test_read_not_a_list(self, tmp_path):
"""JSON qui n'est pas une liste lève ValueError."""
json_path = tmp_path / "notlist.json"
with open(json_path, "w") as f:
json.dump({"key": "value"}, f)
with pytest.raises(ValueError, match="doit contenir une liste"):
read_json_results(json_path)
def test_read_empty_list(self, tmp_path):
"""Liste vide est acceptée."""
json_path = tmp_path / "empty.json"
with open(json_path, "w") as f:
json.dump([], f)
snapshots = read_json_results(json_path)
assert snapshots == []
def test_read_accepts_path_string(self, json_file_with_snapshot):
"""Accepte un string comme chemin."""
snapshots = read_json_results(str(json_file_with_snapshot))
assert len(snapshots) == 1
class TestSaveDebugHtml:
"""Tests pour save_debug_html()."""
def test_save_html_default_dir(self, tmp_path, monkeypatch):
"""Sauvegarde HTML dans le dossier par défaut."""
# Changer le répertoire de travail pour le test
monkeypatch.chdir(tmp_path)
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page")
assert result_path.exists()
assert result_path.name == "test_page.html"
assert result_path.read_text(encoding="utf-8") == html
def test_save_html_custom_dir(self, tmp_path):
"""Sauvegarde HTML dans un dossier personnalisé."""
output_dir = tmp_path / "debug_html"
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page", output_dir)
assert result_path.parent == output_dir
assert result_path.name == "test_page.html"
assert result_path.read_text(encoding="utf-8") == html
def test_save_html_creates_dir(self, tmp_path):
"""Crée le dossier de sortie s'il n'existe pas."""
output_dir = tmp_path / "sub" / "dir" / "html"
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page", output_dir)
assert output_dir.exists()
assert result_path.exists()
def test_save_html_large_content(self, tmp_path):
"""Sauvegarde du HTML volumineux."""
html = "<html><body>" + ("x" * 100000) + "</body></html>"
result_path = save_debug_html(html, "large_page", tmp_path)
assert result_path.exists()
assert len(result_path.read_text(encoding="utf-8")) == len(html)
class TestSaveDebugScreenshot:
"""Tests pour save_debug_screenshot()."""
def test_save_screenshot_default_dir(self, tmp_path, monkeypatch):
"""Sauvegarde screenshot dans le dossier par défaut."""
monkeypatch.chdir(tmp_path)
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot")
assert result_path.exists()
assert result_path.name == "test_screenshot.png"
assert result_path.read_bytes() == screenshot_bytes
def test_save_screenshot_custom_dir(self, tmp_path):
"""Sauvegarde screenshot dans un dossier personnalisé."""
output_dir = tmp_path / "screenshots"
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
assert result_path.parent == output_dir
assert result_path.name == "test_screenshot.png"
assert result_path.read_bytes() == screenshot_bytes
def test_save_screenshot_creates_dir(self, tmp_path):
"""Crée le dossier de sortie s'il n'existe pas."""
output_dir = tmp_path / "sub" / "dir" / "screenshots"
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
assert output_dir.exists()
assert result_path.exists()