This commit is contained in:
2026-01-14 07:03:38 +01:00
parent ecda149a4b
commit c91c0f1fc9
61 changed files with 4388 additions and 38 deletions

Binary file not shown.

462
tests/core/test_io.py Executable file
View File

@@ -0,0 +1,462 @@
"""
Tests pour pricewatch.app.core.io
Teste la lecture/écriture YAML/JSON et les fonctions de sauvegarde debug.
"""
import json
import tempfile
from datetime import datetime
from pathlib import Path
import pytest
import yaml
from pricewatch.app.core.io import (
ScrapingConfig,
ScrapingOptions,
read_json_results,
read_yaml_config,
save_debug_html,
save_debug_screenshot,
write_json_results,
)
from pricewatch.app.core.schema import (
DebugInfo,
DebugStatus,
FetchMethod,
ProductSnapshot,
StockStatus,
)
class TestScrapingOptions:
"""Tests pour le modèle ScrapingOptions."""
def test_default_values(self):
"""Les valeurs par défaut sont correctes."""
options = ScrapingOptions()
assert options.use_playwright is True
assert options.headful is False
assert options.save_html is True
assert options.save_screenshot is True
assert options.timeout_ms == 60000
def test_custom_values(self):
"""Les valeurs personnalisées sont acceptées."""
options = ScrapingOptions(
use_playwright=False,
headful=True,
save_html=False,
save_screenshot=False,
timeout_ms=30000,
)
assert options.use_playwright is False
assert options.headful is True
assert options.save_html is False
assert options.save_screenshot is False
assert options.timeout_ms == 30000
def test_timeout_validation_min(self):
"""Timeout inférieur à 1000ms est rejeté."""
with pytest.raises(ValueError):
ScrapingOptions(timeout_ms=500)
def test_timeout_validation_valid(self):
"""Timeout >= 1000ms est accepté."""
options = ScrapingOptions(timeout_ms=1000)
assert options.timeout_ms == 1000
class TestScrapingConfig:
"""Tests pour le modèle ScrapingConfig."""
def test_minimal_config(self):
"""Config minimale avec URLs uniquement."""
config = ScrapingConfig(urls=["https://example.com"])
assert len(config.urls) == 1
assert config.urls[0] == "https://example.com"
assert isinstance(config.options, ScrapingOptions)
def test_config_with_options(self):
"""Config avec URLs et options."""
options = ScrapingOptions(use_playwright=False, timeout_ms=10000)
config = ScrapingConfig(
urls=["https://example.com", "https://test.com"], options=options
)
assert len(config.urls) == 2
assert config.options.use_playwright is False
assert config.options.timeout_ms == 10000
def test_validate_urls_empty_list(self):
"""Liste d'URLs vide est rejetée."""
with pytest.raises(ValueError, match="Au moins une URL"):
ScrapingConfig(urls=[])
def test_validate_urls_strips_whitespace(self):
"""Les espaces sont nettoyés."""
config = ScrapingConfig(urls=[" https://example.com ", "https://test.com"])
assert config.urls == ["https://example.com", "https://test.com"]
def test_validate_urls_removes_empty(self):
"""Les URLs vides sont supprimées."""
config = ScrapingConfig(
urls=["https://example.com", "", " ", "https://test.com"]
)
assert len(config.urls) == 2
assert config.urls == ["https://example.com", "https://test.com"]
def test_validate_urls_all_empty(self):
"""Si toutes les URLs sont vides, erreur."""
with pytest.raises(ValueError, match="Aucune URL valide"):
ScrapingConfig(urls=["", " ", "\t"])
class TestReadYamlConfig:
"""Tests pour read_yaml_config()."""
def test_read_valid_yaml(self, tmp_path):
"""Lit un fichier YAML valide."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {
"urls": ["https://example.com", "https://test.com"],
"options": {"use_playwright": False, "timeout_ms": 30000},
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(yaml_path)
assert len(config.urls) == 2
assert config.urls[0] == "https://example.com"
assert config.options.use_playwright is False
assert config.options.timeout_ms == 30000
def test_read_yaml_minimal(self, tmp_path):
"""Lit un YAML minimal (URLs uniquement)."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"urls": ["https://example.com"]}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(yaml_path)
assert len(config.urls) == 1
# Options par défaut
assert config.options.use_playwright is True
assert config.options.timeout_ms == 60000
def test_read_yaml_file_not_found(self, tmp_path):
"""Fichier introuvable lève FileNotFoundError."""
yaml_path = tmp_path / "nonexistent.yaml"
with pytest.raises(FileNotFoundError):
read_yaml_config(yaml_path)
def test_read_yaml_empty_file(self, tmp_path):
"""Fichier YAML vide lève ValueError."""
yaml_path = tmp_path / "empty.yaml"
yaml_path.write_text("")
with pytest.raises(ValueError, match="Fichier YAML vide"):
read_yaml_config(yaml_path)
def test_read_yaml_invalid_syntax(self, tmp_path):
"""YAML avec syntaxe invalide lève ValueError."""
yaml_path = tmp_path / "invalid.yaml"
yaml_path.write_text("urls: [invalid yaml syntax")
with pytest.raises(ValueError, match="YAML invalide"):
read_yaml_config(yaml_path)
def test_read_yaml_missing_urls(self, tmp_path):
"""YAML sans champ 'urls' lève erreur de validation."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"options": {"use_playwright": False}}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
with pytest.raises(Exception): # Pydantic validation error
read_yaml_config(yaml_path)
def test_read_yaml_accepts_path_string(self, tmp_path):
"""Accepte un string comme chemin."""
yaml_path = tmp_path / "config.yaml"
yaml_content = {"urls": ["https://example.com"]}
with open(yaml_path, "w") as f:
yaml.dump(yaml_content, f)
config = read_yaml_config(str(yaml_path))
assert len(config.urls) == 1
class TestWriteJsonResults:
"""Tests pour write_json_results()."""
@pytest.fixture
def sample_snapshot(self) -> ProductSnapshot:
"""Fixture: ProductSnapshot exemple."""
return ProductSnapshot(
source="test",
url="https://example.com/product",
fetched_at=datetime(2024, 1, 1, 12, 0, 0),
title="Test Product",
price=99.99,
currency="EUR",
stock_status=StockStatus.IN_STOCK,
reference="TEST123",
images=["https://example.com/img1.jpg"],
category="Test Category",
specs={"Brand": "TestBrand"},
debug=DebugInfo(
method=FetchMethod.HTTP,
status=DebugStatus.SUCCESS,
errors=[],
notes=[],
),
)
def test_write_single_snapshot(self, tmp_path, sample_snapshot):
"""Écrit un seul snapshot."""
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot], json_path)
assert json_path.exists()
# Vérifier le contenu
with open(json_path) as f:
data = json.load(f)
assert isinstance(data, list)
assert len(data) == 1
assert data[0]["source"] == "test"
assert data[0]["title"] == "Test Product"
def test_write_multiple_snapshots(self, tmp_path, sample_snapshot):
"""Écrit plusieurs snapshots."""
snapshot2 = ProductSnapshot(
source="test2",
url="https://example.com/product2",
fetched_at=datetime(2024, 1, 2, 12, 0, 0),
title="Test Product 2",
price=49.99,
currency="EUR",
stock_status=StockStatus.OUT_OF_STOCK,
debug=DebugInfo(
method=FetchMethod.PLAYWRIGHT,
status=DebugStatus.PARTIAL,
errors=["Test error"],
notes=[],
),
)
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot, snapshot2], json_path)
with open(json_path) as f:
data = json.load(f)
assert len(data) == 2
assert data[0]["source"] == "test"
assert data[1]["source"] == "test2"
def test_write_creates_parent_dirs(self, tmp_path, sample_snapshot):
"""Crée les dossiers parents si nécessaire."""
json_path = tmp_path / "sub" / "dir" / "results.json"
write_json_results([sample_snapshot], json_path)
assert json_path.exists()
assert json_path.parent.exists()
def test_write_empty_list(self, tmp_path):
"""Écrit une liste vide."""
json_path = tmp_path / "empty.json"
write_json_results([], json_path)
assert json_path.exists()
with open(json_path) as f:
data = json.load(f)
assert data == []
def test_write_indent_control(self, tmp_path, sample_snapshot):
"""Contrôle l'indentation."""
# Avec indent
json_path1 = tmp_path / "pretty.json"
write_json_results([sample_snapshot], json_path1, indent=2)
content1 = json_path1.read_text()
assert "\n" in content1 # Pretty-printed
# Sans indent (compact)
json_path2 = tmp_path / "compact.json"
write_json_results([sample_snapshot], json_path2, indent=None)
content2 = json_path2.read_text()
assert len(content2) < len(content1) # Plus compact
def test_write_accepts_path_string(self, tmp_path, sample_snapshot):
"""Accepte un string comme chemin."""
json_path = tmp_path / "results.json"
write_json_results([sample_snapshot], str(json_path))
assert json_path.exists()
class TestReadJsonResults:
"""Tests pour read_json_results()."""
@pytest.fixture
def json_file_with_snapshot(self, tmp_path) -> Path:
"""Fixture: Fichier JSON avec un snapshot."""
json_path = tmp_path / "results.json"
snapshot_data = {
"source": "test",
"url": "https://example.com/product",
"fetched_at": "2024-01-01T12:00:00",
"title": "Test Product",
"price": 99.99,
"currency": "EUR",
"shipping_cost": None,
"stock_status": "in_stock",
"reference": "TEST123",
"images": ["https://example.com/img.jpg"],
"category": "Test",
"specs": {"Brand": "Test"},
"debug": {
"method": "http",
"status": "success",
"errors": [],
"notes": [],
"duration_ms": None,
"html_size_bytes": None,
},
}
with open(json_path, "w") as f:
json.dump([snapshot_data], f)
return json_path
def test_read_single_snapshot(self, json_file_with_snapshot):
"""Lit un fichier avec un snapshot."""
snapshots = read_json_results(json_file_with_snapshot)
assert len(snapshots) == 1
assert isinstance(snapshots[0], ProductSnapshot)
assert snapshots[0].source == "test"
assert snapshots[0].title == "Test Product"
assert snapshots[0].price == 99.99
def test_read_file_not_found(self, tmp_path):
"""Fichier introuvable lève FileNotFoundError."""
json_path = tmp_path / "nonexistent.json"
with pytest.raises(FileNotFoundError):
read_json_results(json_path)
def test_read_invalid_json(self, tmp_path):
"""JSON invalide lève ValueError."""
json_path = tmp_path / "invalid.json"
json_path.write_text("{invalid json")
with pytest.raises(ValueError, match="JSON invalide"):
read_json_results(json_path)
def test_read_not_a_list(self, tmp_path):
"""JSON qui n'est pas une liste lève ValueError."""
json_path = tmp_path / "notlist.json"
with open(json_path, "w") as f:
json.dump({"key": "value"}, f)
with pytest.raises(ValueError, match="doit contenir une liste"):
read_json_results(json_path)
def test_read_empty_list(self, tmp_path):
"""Liste vide est acceptée."""
json_path = tmp_path / "empty.json"
with open(json_path, "w") as f:
json.dump([], f)
snapshots = read_json_results(json_path)
assert snapshots == []
def test_read_accepts_path_string(self, json_file_with_snapshot):
"""Accepte un string comme chemin."""
snapshots = read_json_results(str(json_file_with_snapshot))
assert len(snapshots) == 1
class TestSaveDebugHtml:
"""Tests pour save_debug_html()."""
def test_save_html_default_dir(self, tmp_path, monkeypatch):
"""Sauvegarde HTML dans le dossier par défaut."""
# Changer le répertoire de travail pour le test
monkeypatch.chdir(tmp_path)
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page")
assert result_path.exists()
assert result_path.name == "test_page.html"
assert result_path.read_text(encoding="utf-8") == html
def test_save_html_custom_dir(self, tmp_path):
"""Sauvegarde HTML dans un dossier personnalisé."""
output_dir = tmp_path / "debug_html"
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page", output_dir)
assert result_path.parent == output_dir
assert result_path.name == "test_page.html"
assert result_path.read_text(encoding="utf-8") == html
def test_save_html_creates_dir(self, tmp_path):
"""Crée le dossier de sortie s'il n'existe pas."""
output_dir = tmp_path / "sub" / "dir" / "html"
html = "<html><body>Test</body></html>"
result_path = save_debug_html(html, "test_page", output_dir)
assert output_dir.exists()
assert result_path.exists()
def test_save_html_large_content(self, tmp_path):
"""Sauvegarde du HTML volumineux."""
html = "<html><body>" + ("x" * 100000) + "</body></html>"
result_path = save_debug_html(html, "large_page", tmp_path)
assert result_path.exists()
assert len(result_path.read_text(encoding="utf-8")) == len(html)
class TestSaveDebugScreenshot:
"""Tests pour save_debug_screenshot()."""
def test_save_screenshot_default_dir(self, tmp_path, monkeypatch):
"""Sauvegarde screenshot dans le dossier par défaut."""
monkeypatch.chdir(tmp_path)
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot")
assert result_path.exists()
assert result_path.name == "test_screenshot.png"
assert result_path.read_bytes() == screenshot_bytes
def test_save_screenshot_custom_dir(self, tmp_path):
"""Sauvegarde screenshot dans un dossier personnalisé."""
output_dir = tmp_path / "screenshots"
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
assert result_path.parent == output_dir
assert result_path.name == "test_screenshot.png"
assert result_path.read_bytes() == screenshot_bytes
def test_save_screenshot_creates_dir(self, tmp_path):
"""Crée le dossier de sortie s'il n'existe pas."""
output_dir = tmp_path / "sub" / "dir" / "screenshots"
screenshot_bytes = b"\x89PNG fake image data"
result_path = save_debug_screenshot(screenshot_bytes, "test_screenshot", output_dir)
assert output_dir.exists()
assert result_path.exists()

View File

@@ -0,0 +1,174 @@
"""
Tests d'intégration pour le registry avec les stores réels.
Teste la détection automatique du bon store pour des URLs
Amazon, Cdiscount, Backmarket et AliExpress.
"""
import pytest
from pricewatch.app.core.registry import StoreRegistry
from pricewatch.app.stores.amazon.store import AmazonStore
from pricewatch.app.stores.cdiscount.store import CdiscountStore
from pricewatch.app.stores.backmarket.store import BackmarketStore
from pricewatch.app.stores.aliexpress.store import AliexpressStore
class TestRegistryRealStores:
"""Tests d'intégration avec les 4 stores réels."""
@pytest.fixture
def registry_with_all_stores(self) -> StoreRegistry:
"""Fixture: Registry avec les 4 stores réels enregistrés."""
registry = StoreRegistry()
registry.register(AmazonStore())
registry.register(CdiscountStore())
registry.register(BackmarketStore())
registry.register(AliexpressStore())
return registry
def test_all_stores_registered(self, registry_with_all_stores):
"""Vérifie que les 4 stores sont enregistrés."""
assert len(registry_with_all_stores) == 4
stores = registry_with_all_stores.list_stores()
assert "amazon" in stores
assert "cdiscount" in stores
assert "backmarket" in stores
assert "aliexpress" in stores
def test_detect_amazon_fr(self, registry_with_all_stores):
"""Détecte Amazon.fr correctement."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "amazon"
def test_detect_amazon_com(self, registry_with_all_stores):
"""Détecte Amazon.com correctement."""
url = "https://www.amazon.com/dp/B08N5WRWNW"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "amazon"
def test_detect_amazon_with_product_name(self, registry_with_all_stores):
"""Détecte Amazon avec nom de produit dans l'URL."""
url = "https://www.amazon.fr/Product-Name-Here/dp/B08N5WRWNW/ref=sr_1_1"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "amazon"
def test_detect_cdiscount(self, registry_with_all_stores):
"""Détecte Cdiscount correctement."""
url = "https://www.cdiscount.com/informatique/clavier-souris-webcam/example/f-1070123-example.html"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "cdiscount"
def test_detect_backmarket(self, registry_with_all_stores):
"""Détecte Backmarket correctement."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "backmarket"
def test_detect_backmarket_locale_en(self, registry_with_all_stores):
"""Détecte Backmarket avec locale anglais."""
url = "https://www.backmarket.fr/en-fr/p/macbook-air-15-2024"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "backmarket"
def test_detect_aliexpress_fr(self, registry_with_all_stores):
"""Détecte AliExpress.fr correctement."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "aliexpress"
def test_detect_aliexpress_com(self, registry_with_all_stores):
"""Détecte AliExpress.com correctement."""
url = "https://www.aliexpress.com/item/1005007187023722.html"
store = registry_with_all_stores.detect_store(url)
assert store is not None
assert store.store_id == "aliexpress"
def test_detect_unknown_store(self, registry_with_all_stores):
"""URL inconnue retourne None."""
url = "https://www.ebay.com/itm/123456789"
store = registry_with_all_stores.detect_store(url)
assert store is None
def test_detect_invalid_url(self, registry_with_all_stores):
"""URL invalide retourne None."""
url = "not-a-valid-url"
store = registry_with_all_stores.detect_store(url)
assert store is None
def test_detect_priority_amazon_over_others(self, registry_with_all_stores):
"""Amazon.fr doit avoir le meilleur score pour ses URLs."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
store = registry_with_all_stores.detect_store(url)
# Amazon.fr devrait avoir score 0.9, les autres 0.0
assert store.store_id == "amazon"
def test_each_store_matches_only_own_urls(self, registry_with_all_stores):
"""Chaque store ne matche que ses propres URLs."""
test_cases = [
("https://www.amazon.fr/dp/B08N5WRWNW", "amazon"),
("https://www.cdiscount.com/product", "cdiscount"),
("https://www.backmarket.fr/fr-fr/p/product", "backmarket"),
("https://fr.aliexpress.com/item/12345.html", "aliexpress"),
]
for url, expected_store_id in test_cases:
store = registry_with_all_stores.detect_store(url)
assert store is not None, f"Aucun store détecté pour {url}"
assert store.store_id == expected_store_id, (
f"Mauvais store pour {url}: "
f"attendu {expected_store_id}, obtenu {store.store_id}"
)
def test_get_store_by_id(self, registry_with_all_stores):
"""Récupère chaque store par son ID."""
amazon = registry_with_all_stores.get_store("amazon")
assert amazon is not None
assert isinstance(amazon, AmazonStore)
cdiscount = registry_with_all_stores.get_store("cdiscount")
assert cdiscount is not None
assert isinstance(cdiscount, CdiscountStore)
backmarket = registry_with_all_stores.get_store("backmarket")
assert backmarket is not None
assert isinstance(backmarket, BackmarketStore)
aliexpress = registry_with_all_stores.get_store("aliexpress")
assert aliexpress is not None
assert isinstance(aliexpress, AliexpressStore)
def test_unregister_store(self, registry_with_all_stores):
"""Désenregistre un store et vérifie qu'il n'est plus détecté."""
assert len(registry_with_all_stores) == 4
# Désenregistrer Amazon
removed = registry_with_all_stores.unregister("amazon")
assert removed is True
assert len(registry_with_all_stores) == 3
# Amazon ne doit plus être détecté
store = registry_with_all_stores.detect_store("https://www.amazon.fr/dp/B08N5WRWNW")
assert store is None
# Les autres stores doivent toujours fonctionner
store = registry_with_all_stores.detect_store("https://www.cdiscount.com/product")
assert store is not None
assert store.store_id == "cdiscount"
def test_repr_includes_all_stores(self, registry_with_all_stores):
"""La représentation string inclut tous les stores."""
repr_str = repr(registry_with_all_stores)
assert "StoreRegistry" in repr_str
assert "amazon" in repr_str
assert "cdiscount" in repr_str
assert "backmarket" in repr_str
assert "aliexpress" in repr_str