chore: sync project files

This commit is contained in:
Gilles Soulier
2026-01-13 19:49:04 +01:00
parent 53f8227941
commit ecda149a4b
149 changed files with 65272 additions and 1 deletions

0
tests/__init__.py Executable file
View File

Binary file not shown.

292
tests/core/test_registry.py Executable file
View File

@@ -0,0 +1,292 @@
"""
Tests pour pricewatch.app.core.registry
Vérifie l'enregistrement des stores, la détection automatique,
et les fonctions helper du registry.
"""
import pytest
from pricewatch.app.core.registry import StoreRegistry
from pricewatch.app.stores.base import BaseStore
from pricewatch.app.core.schema import ProductSnapshot
class MockStore(BaseStore):
"""Mock store pour les tests."""
def __init__(self, store_id: str, match_patterns: dict[str, float]):
"""
Args:
store_id: ID du store
match_patterns: Dict {substring: score} pour simuler match()
"""
super().__init__(store_id=store_id, selectors_path=None)
self.match_patterns = match_patterns
def match(self, url: str) -> float:
"""Retourne un score basé sur les patterns configurés."""
if not url:
return 0.0
url_lower = url.lower()
for pattern, score in self.match_patterns.items():
if pattern in url_lower:
return score
return 0.0
def canonicalize(self, url: str) -> str:
"""Mock canonicalize."""
return url
def extract_reference(self, url: str) -> str | None:
"""Mock extract_reference."""
return "TEST_REF"
def parse(self, html: str, url: str, **kwargs) -> ProductSnapshot:
"""Mock parse - pas utilisé dans les tests du registry."""
raise NotImplementedError("Mock parse not implemented")
class TestStoreRegistry:
"""Tests du StoreRegistry."""
@pytest.fixture
def registry(self) -> StoreRegistry:
"""Fixture: Registry vide."""
return StoreRegistry()
@pytest.fixture
def mock_amazon(self) -> MockStore:
"""Fixture: Mock Amazon store."""
return MockStore(
store_id="amazon",
match_patterns={"amazon.fr": 0.9, "amazon.com": 0.8},
)
@pytest.fixture
def mock_cdiscount(self) -> MockStore:
"""Fixture: Mock Cdiscount store."""
return MockStore(
store_id="cdiscount",
match_patterns={"cdiscount.com": 0.9},
)
def test_registry_init_empty(self, registry):
"""Un registry vide ne contient aucun store."""
assert len(registry) == 0
assert registry.list_stores() == []
def test_register_single_store(self, registry, mock_amazon):
"""Enregistre un seul store."""
registry.register(mock_amazon)
assert len(registry) == 1
assert "amazon" in registry.list_stores()
def test_register_multiple_stores(self, registry, mock_amazon, mock_cdiscount):
"""Enregistre plusieurs stores."""
registry.register(mock_amazon)
registry.register(mock_cdiscount)
assert len(registry) == 2
assert set(registry.list_stores()) == {"amazon", "cdiscount"}
def test_register_invalid_type(self, registry):
"""Enregistrer un objet non-BaseStore doit échouer."""
with pytest.raises(TypeError) as exc_info:
registry.register("not a store")
assert "Expected BaseStore" in str(exc_info.value)
def test_register_duplicate_replaces(self, registry, mock_amazon):
"""Enregistrer deux fois le même store_id remplace le premier."""
registry.register(mock_amazon)
assert len(registry) == 1
# Créer un autre mock avec le même ID
duplicate = MockStore(store_id="amazon", match_patterns={"amazon.es": 0.7})
registry.register(duplicate)
# Doit toujours avoir un seul store
assert len(registry) == 1
assert "amazon" in registry.list_stores()
# Doit avoir le nouveau store
store = registry.get_store("amazon")
assert store is duplicate
def test_get_store_existing(self, registry, mock_amazon):
"""Récupère un store existant."""
registry.register(mock_amazon)
store = registry.get_store("amazon")
assert store is mock_amazon
def test_get_store_non_existing(self, registry):
"""Récupère un store inexistant retourne None."""
store = registry.get_store("nonexistent")
assert store is None
def test_unregister_existing(self, registry, mock_amazon):
"""Désenregistre un store existant."""
registry.register(mock_amazon)
assert len(registry) == 1
removed = registry.unregister("amazon")
assert removed is True
assert len(registry) == 0
assert "amazon" not in registry.list_stores()
def test_unregister_non_existing(self, registry):
"""Désenregistre un store inexistant retourne False."""
removed = registry.unregister("nonexistent")
assert removed is False
def test_detect_store_empty_url(self, registry, mock_amazon):
"""URL vide retourne None."""
registry.register(mock_amazon)
store = registry.detect_store("")
assert store is None
def test_detect_store_whitespace_url(self, registry, mock_amazon):
"""URL avec espaces retourne None."""
registry.register(mock_amazon)
store = registry.detect_store(" ")
assert store is None
def test_detect_store_empty_registry(self, registry):
"""Registry vide retourne None."""
store = registry.detect_store("https://example.com")
assert store is None
def test_detect_store_single_match(self, registry, mock_amazon):
"""Détecte un store avec un seul match."""
registry.register(mock_amazon)
store = registry.detect_store("https://www.amazon.fr/dp/B08N5WRWNW")
assert store is mock_amazon
def test_detect_store_no_match(self, registry, mock_amazon):
"""Aucun store ne match retourne None."""
registry.register(mock_amazon)
store = registry.detect_store("https://www.ebay.com/item/123")
assert store is None
def test_detect_store_multiple_matches_best_score(
self, registry, mock_amazon, mock_cdiscount
):
"""Avec plusieurs matches, retourne le meilleur score."""
registry.register(mock_amazon)
registry.register(mock_cdiscount)
# Test Amazon
store = registry.detect_store("https://www.amazon.fr/dp/B08N5WRWNW")
assert store is mock_amazon
# Test Cdiscount
store = registry.detect_store("https://www.cdiscount.com/product/123")
assert store is mock_cdiscount
def test_detect_store_ambiguous_url_best_score(self, registry):
"""URL ambiguë: retourne le store avec le meilleur score."""
# Créer deux stores avec des scores différents pour la même URL
store_a = MockStore(store_id="store_a", match_patterns={"example.com": 0.7})
store_b = MockStore(store_id="store_b", match_patterns={"example.com": 0.9})
registry.register(store_a)
registry.register(store_b)
store = registry.detect_store("https://www.example.com")
assert store is store_b # Meilleur score (0.9 vs 0.7)
def test_detect_store_exception_in_match(self, registry, mock_amazon):
"""Si un store.match() lève une exception, continue avec les autres."""
# Créer un store qui crash
class BrokenStore(MockStore):
def match(self, url: str) -> float:
raise RuntimeError("Simulated crash")
broken = BrokenStore(store_id="broken", match_patterns={})
registry.register(broken)
registry.register(mock_amazon)
# Doit quand même détecter Amazon malgré le crash du broken store
store = registry.detect_store("https://www.amazon.fr/dp/B08N5WRWNW")
assert store is mock_amazon
def test_list_stores_empty(self, registry):
"""Liste des stores vide."""
assert registry.list_stores() == []
def test_list_stores_multiple(self, registry, mock_amazon, mock_cdiscount):
"""Liste des stores avec plusieurs enregistrés."""
registry.register(mock_amazon)
registry.register(mock_cdiscount)
stores = registry.list_stores()
assert len(stores) == 2
assert "amazon" in stores
assert "cdiscount" in stores
def test_len_operator(self, registry, mock_amazon, mock_cdiscount):
"""Opérateur len() retourne le nombre de stores."""
assert len(registry) == 0
registry.register(mock_amazon)
assert len(registry) == 1
registry.register(mock_cdiscount)
assert len(registry) == 2
registry.unregister("amazon")
assert len(registry) == 1
def test_repr(self, registry, mock_amazon, mock_cdiscount):
"""Représentation string du registry."""
registry.register(mock_amazon)
registry.register(mock_cdiscount)
repr_str = repr(registry)
assert "StoreRegistry" in repr_str
assert "amazon" in repr_str
assert "cdiscount" in repr_str
class TestRegistryGlobalFunctions:
"""Tests des fonctions globales du module registry."""
def test_get_registry_singleton(self):
"""get_registry() retourne toujours la même instance."""
from pricewatch.app.core.registry import get_registry
registry1 = get_registry()
registry2 = get_registry()
assert registry1 is registry2
def test_register_store_global(self):
"""register_store() enregistre dans le registry global."""
from pricewatch.app.core.registry import get_registry, register_store
# Nettoyer le registry global pour le test
registry = get_registry()
initial_count = len(registry)
mock = MockStore(store_id="test_global", match_patterns={})
register_store(mock)
assert len(registry) == initial_count + 1
assert "test_global" in registry.list_stores()
# Cleanup
registry.unregister("test_global")
def test_detect_store_global(self):
"""detect_store() utilise le registry global."""
from pricewatch.app.core.registry import detect_store, get_registry, register_store
# Nettoyer le registry global pour le test
registry = get_registry()
mock = MockStore(store_id="test_detect", match_patterns={"testsite.com": 0.9})
register_store(mock)
store = detect_store("https://www.testsite.com/product")
assert store is not None
assert store.store_id == "test_detect"
# Cleanup
registry.unregister("test_detect")

331
tests/core/test_schema.py Executable file
View File

@@ -0,0 +1,331 @@
"""
Tests pour pricewatch.app.core.schema
Vérifie la validation Pydantic, la serialization JSON,
et les méthodes helper de ProductSnapshot.
"""
import json
from datetime import datetime
import pytest
from pydantic import ValidationError
from pricewatch.app.core.schema import (
DebugInfo,
DebugStatus,
FetchMethod,
ProductSnapshot,
StockStatus,
)
class TestEnums:
"""Tests des enums."""
def test_stock_status_values(self):
"""Vérifie les valeurs de StockStatus."""
assert StockStatus.IN_STOCK.value == "in_stock"
assert StockStatus.OUT_OF_STOCK.value == "out_of_stock"
assert StockStatus.UNKNOWN.value == "unknown"
def test_fetch_method_values(self):
"""Vérifie les valeurs de FetchMethod."""
assert FetchMethod.HTTP.value == "http"
assert FetchMethod.PLAYWRIGHT.value == "playwright"
def test_debug_status_values(self):
"""Vérifie les valeurs de DebugStatus."""
assert DebugStatus.SUCCESS.value == "success"
assert DebugStatus.PARTIAL.value == "partial"
assert DebugStatus.FAILED.value == "failed"
class TestDebugInfo:
"""Tests du modèle DebugInfo."""
def test_debug_info_creation(self):
"""Crée un DebugInfo valide."""
debug = DebugInfo(
method=FetchMethod.HTTP,
status=DebugStatus.SUCCESS,
duration_ms=1500,
html_size_bytes=120000,
)
assert debug.method == FetchMethod.HTTP
assert debug.status == DebugStatus.SUCCESS
assert debug.duration_ms == 1500
assert debug.html_size_bytes == 120000
assert debug.errors == []
assert debug.notes == []
def test_debug_info_with_errors(self):
"""Crée un DebugInfo avec des erreurs."""
debug = DebugInfo(
method=FetchMethod.PLAYWRIGHT,
status=DebugStatus.FAILED,
errors=["403 Forbidden", "Captcha detected"],
notes=["Fallback to Playwright triggered"],
)
assert len(debug.errors) == 2
assert "403 Forbidden" in debug.errors
assert len(debug.notes) == 1
def test_debug_info_defaults(self):
"""Vérifie les valeurs par défaut."""
debug = DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS)
assert debug.errors == []
assert debug.notes == []
assert debug.duration_ms is None
assert debug.html_size_bytes is None
class TestProductSnapshot:
"""Tests du modèle ProductSnapshot."""
@pytest.fixture
def minimal_snapshot(self) -> ProductSnapshot:
"""Fixture: ProductSnapshot minimal valide."""
return ProductSnapshot(
source="amazon",
url="https://www.amazon.fr/dp/B08N5WRWNW",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
@pytest.fixture
def complete_snapshot(self) -> ProductSnapshot:
"""Fixture: ProductSnapshot complet."""
return ProductSnapshot(
source="amazon",
url="https://www.amazon.fr/dp/B08N5WRWNW",
fetched_at=datetime(2026, 1, 13, 10, 30, 0),
title="PlayStation 5",
price=499.99,
currency="EUR",
shipping_cost=0.0,
stock_status=StockStatus.IN_STOCK,
reference="B08N5WRWNW",
category="Jeux vidéo",
images=[
"https://example.com/image1.jpg",
"https://example.com/image2.jpg",
],
specs={
"Marque": "Sony",
"Couleur": "Blanc",
"Poids": "4.5 kg",
},
debug=DebugInfo(
method=FetchMethod.HTTP,
status=DebugStatus.SUCCESS,
duration_ms=1200,
html_size_bytes=145000,
),
)
def test_create_minimal_snapshot(self, minimal_snapshot):
"""Crée un ProductSnapshot minimal."""
assert minimal_snapshot.source == "amazon"
assert minimal_snapshot.url == "https://www.amazon.fr/dp/B08N5WRWNW"
assert minimal_snapshot.title is None
assert minimal_snapshot.price is None
assert minimal_snapshot.currency == "EUR" # Default
assert minimal_snapshot.stock_status == StockStatus.UNKNOWN # Default
def test_create_complete_snapshot(self, complete_snapshot):
"""Crée un ProductSnapshot complet."""
assert complete_snapshot.source == "amazon"
assert complete_snapshot.title == "PlayStation 5"
assert complete_snapshot.price == 499.99
assert complete_snapshot.reference == "B08N5WRWNW"
assert len(complete_snapshot.images) == 2
assert len(complete_snapshot.specs) == 3
def test_url_validation_empty(self):
"""URL vide doit échouer."""
with pytest.raises(ValidationError) as exc_info:
ProductSnapshot(
source="amazon",
url="",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
assert "URL cannot be empty" in str(exc_info.value)
def test_url_validation_whitespace(self):
"""URL avec seulement des espaces doit échouer."""
with pytest.raises(ValidationError) as exc_info:
ProductSnapshot(
source="amazon",
url=" ",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
assert "URL cannot be empty" in str(exc_info.value)
def test_source_validation_empty(self):
"""Source vide doit échouer."""
with pytest.raises(ValidationError) as exc_info:
ProductSnapshot(
source="",
url="https://example.com",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
assert "Source cannot be empty" in str(exc_info.value)
def test_source_normalization(self):
"""Source doit être normalisée en lowercase."""
snapshot = ProductSnapshot(
source="AMAZON",
url="https://example.com",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
assert snapshot.source == "amazon"
def test_price_negative(self):
"""Prix négatif doit échouer."""
with pytest.raises(ValidationError):
ProductSnapshot(
source="amazon",
url="https://example.com",
price=-10.0,
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
def test_shipping_cost_negative(self):
"""Frais de port négatifs doivent échouer."""
with pytest.raises(ValidationError):
ProductSnapshot(
source="amazon",
url="https://example.com",
shipping_cost=-5.0,
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
def test_images_validation(self):
"""Les URLs d'images vides doivent être filtrées."""
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com",
images=["https://img1.jpg", "", " ", "https://img2.jpg"],
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
assert len(snapshot.images) == 2
assert "https://img1.jpg" in snapshot.images
assert "https://img2.jpg" in snapshot.images
def test_is_complete_with_title_and_price(self, complete_snapshot):
"""Un snapshot avec titre et prix est complet."""
assert complete_snapshot.is_complete() is True
def test_is_complete_without_price(self, minimal_snapshot):
"""Un snapshot sans prix n'est pas complet."""
minimal_snapshot.title = "Test Product"
assert minimal_snapshot.is_complete() is False
def test_is_complete_without_title(self, minimal_snapshot):
"""Un snapshot sans titre n'est pas complet."""
minimal_snapshot.price = 99.99
assert minimal_snapshot.is_complete() is False
def test_is_complete_minimal(self, minimal_snapshot):
"""Un snapshot minimal n'est pas complet."""
assert minimal_snapshot.is_complete() is False
def test_add_error(self, minimal_snapshot):
"""Ajoute une erreur au debug."""
minimal_snapshot.add_error("Test error 1")
minimal_snapshot.add_error("Test error 2")
assert len(minimal_snapshot.debug.errors) == 2
assert "Test error 1" in minimal_snapshot.debug.errors
def test_add_note(self, minimal_snapshot):
"""Ajoute une note au debug."""
minimal_snapshot.add_note("Test note 1")
minimal_snapshot.add_note("Test note 2")
assert len(minimal_snapshot.debug.notes) == 2
assert "Test note 1" in minimal_snapshot.debug.notes
def test_to_dict(self, complete_snapshot):
"""Serialization vers dict."""
data = complete_snapshot.to_dict()
assert isinstance(data, dict)
assert data["source"] == "amazon"
assert data["title"] == "PlayStation 5"
assert data["price"] == 499.99
assert isinstance(data["fetched_at"], str) # ISO format
assert data["debug"]["method"] == "http"
def test_to_json(self, complete_snapshot):
"""Serialization vers JSON."""
json_str = complete_snapshot.to_json()
assert isinstance(json_str, str)
# Vérifie que c'est du JSON valide
data = json.loads(json_str)
assert data["source"] == "amazon"
assert data["title"] == "PlayStation 5"
assert data["price"] == 499.99
def test_from_json(self, complete_snapshot):
"""Désérialisation depuis JSON."""
# Serialize puis deserialize
json_str = complete_snapshot.to_json()
restored = ProductSnapshot.from_json(json_str)
assert restored.source == complete_snapshot.source
assert restored.title == complete_snapshot.title
assert restored.price == complete_snapshot.price
assert restored.reference == complete_snapshot.reference
def test_to_dict_and_from_json_roundtrip(self, complete_snapshot):
"""Roundtrip complet dict → JSON → ProductSnapshot."""
# to_dict puis JSON puis from_json
json_str = json.dumps(complete_snapshot.to_dict())
restored = ProductSnapshot.from_json(json_str)
assert restored.source == complete_snapshot.source
assert restored.title == complete_snapshot.title
assert restored.price == complete_snapshot.price
def test_enum_serialization(self):
"""Les enums doivent être sérialisés en string."""
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com",
stock_status=StockStatus.IN_STOCK,
debug=DebugInfo(method=FetchMethod.PLAYWRIGHT, status=DebugStatus.PARTIAL),
)
data = snapshot.to_dict()
assert data["stock_status"] == "in_stock"
assert data["debug"]["method"] == "playwright"
assert data["debug"]["status"] == "partial"
def test_fetched_at_default(self):
"""fetched_at doit avoir une valeur par défaut."""
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
assert snapshot.fetched_at is not None
assert isinstance(snapshot.fetched_at, datetime)
def test_specs_default(self):
"""specs doit être un dict vide par défaut."""
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
assert snapshot.specs == {}
assert isinstance(snapshot.specs, dict)
def test_images_default(self):
"""images doit être une liste vide par défaut."""
snapshot = ProductSnapshot(
source="amazon",
url="https://example.com",
debug=DebugInfo(method=FetchMethod.HTTP, status=DebugStatus.SUCCESS),
)
assert snapshot.images == []
assert isinstance(snapshot.images, list)

271
tests/stores/test_aliexpress.py Executable file
View File

@@ -0,0 +1,271 @@
#!/usr/bin/env python3
"""Tests pour le store AliExpress."""
import pytest
from pathlib import Path
from pricewatch.app.stores.aliexpress.store import AliexpressStore
class TestAliexpressStore:
"""Tests pour AliexpressStore."""
@pytest.fixture
def store(self):
"""Fixture du store AliExpress."""
return AliexpressStore()
# ========== Tests de match() ==========
def test_match_aliexpress_com_product(self, store):
"""URL aliexpress.com/item/ reconnue comme produit."""
url = "https://www.aliexpress.com/item/1005007187023722.html"
score = store.match(url)
assert score == 0.9
def test_match_aliexpress_fr_product(self, store):
"""URL fr.aliexpress.com/item/ reconnue comme produit."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
score = store.match(url)
assert score == 0.9
def test_match_aliexpress_non_product(self, store):
"""URL aliexpress.com mais pas /item/ → score réduit."""
url = "https://www.aliexpress.com/category/electronics"
score = store.match(url)
assert score == 0.5
def test_match_other_site(self, store):
"""Autres sites non reconnus."""
urls = [
"https://www.amazon.fr/dp/ASIN",
"https://www.cdiscount.com/f-123-abc.html",
"",
None,
]
for url in urls:
if url is not None:
score = store.match(url)
assert score == 0.0
def test_match_case_insensitive(self, store):
"""Match insensible à la casse."""
url = "https://FR.ALIEXPRESS.COM/ITEM/1234567890.HTML"
score = store.match(url)
assert score == 0.9
# ========== Tests de canonicalize() ==========
def test_canonicalize_remove_query_params(self, store):
"""Canonicalize retire les paramètres de query."""
url = "https://fr.aliexpress.com/item/1005007187023722.html?spm=a2g0o.detail.0.0"
canonical = store.canonicalize(url)
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
def test_canonicalize_remove_fragment(self, store):
"""Canonicalize retire le fragment (#)."""
url = "https://fr.aliexpress.com/item/1005007187023722.html#reviews"
canonical = store.canonicalize(url)
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
def test_canonicalize_keep_item_path(self, store):
"""Canonicalize garde le chemin /item/{ID}.html."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
canonical = store.canonicalize(url)
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
def test_canonicalize_empty_url(self, store):
"""Canonicalize avec URL vide retourne la même."""
assert store.canonicalize("") == ""
assert store.canonicalize(None) is None
# ========== Tests de extract_reference() ==========
def test_extract_reference_standard_format(self, store):
"""Extraction du SKU depuis format standard /item/{ID}.html."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
ref = store.extract_reference(url)
assert ref == "1005007187023722"
def test_extract_reference_with_query_params(self, store):
"""Extraction du SKU ignore les paramètres de query."""
url = "https://fr.aliexpress.com/item/1005007187023722.html?param=value"
ref = store.extract_reference(url)
assert ref == "1005007187023722"
def test_extract_reference_different_domain(self, store):
"""Extraction du SKU fonctionne avec différents domaines."""
url = "https://www.aliexpress.com/item/9876543210987.html"
ref = store.extract_reference(url)
assert ref == "9876543210987"
def test_extract_reference_invalid_url(self, store):
"""Extraction du SKU depuis URL invalide retourne None."""
urls = [
"https://www.aliexpress.com/category/electronics",
"https://www.aliexpress.com/",
"",
None,
]
for url in urls:
ref = store.extract_reference(url)
assert ref is None
# ========== Tests de parse() ==========
def test_parse_basic_html_with_title(self, store):
"""Parse HTML basique avec h1."""
html = """
<html>
<head>
<meta property="og:title" content="Samsung DDR4 RAM - AliExpress">
</head>
<body>
<h1>Samsung DDR4 RAM Server Memory</h1>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(html, url)
assert snapshot.source == "aliexpress"
assert snapshot.url == "https://fr.aliexpress.com/item/1005007187023722.html"
assert snapshot.title == "Samsung DDR4 RAM Server Memory"
assert snapshot.reference == "1005007187023722"
assert snapshot.currency == "EUR" # fr.aliexpress → EUR
def test_parse_title_from_meta_og(self, store):
"""Parse titre depuis og:title quand pas de h1."""
html = """
<html>
<head>
<meta property="og:title" content="Product Name - AliExpress">
</head>
<body>
</body>
</html>
"""
url = "https://www.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.title == "Product Name" # "- AliExpress" retiré
assert snapshot.currency == "USD" # .com → USD
def test_parse_price_from_regex(self, store):
"""Parse prix depuis regex dans le HTML."""
html = """
<html>
<head>
<meta property="og:title" content="Test Product - AliExpress">
</head>
<body>
<h1>Test Product</h1>
<div class="price-container">
<span>Prix: 99,99 €</span>
</div>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.price == 99.99
assert snapshot.currency == "EUR"
def test_parse_price_euro_before(self, store):
"""Parse prix avec € avant le nombre."""
html = """
<html>
<head><meta property="og:title" content="Test - AliExpress"></head>
<body>
<h1>Test</h1>
<span>€ 125.50</span>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.price == 125.50
def test_parse_images_from_dcdata(self, store):
"""Parse images depuis window._d_c_.DCData."""
html = """
<html>
<head><meta property="og:title" content="Test - AliExpress"></head>
<body>
<h1>Test</h1>
<script>
window._d_c_ = window._d_c_ || {};
window._d_c_.DCData = {
"imagePathList": [
"https://ae01.alicdn.com/kf/image1.jpg",
"https://ae01.alicdn.com/kf/image2.jpg"
]
};
</script>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert len(snapshot.images) == 2
assert snapshot.images[0] == "https://ae01.alicdn.com/kf/image1.jpg"
assert snapshot.images[1] == "https://ae01.alicdn.com/kf/image2.jpg"
assert any("DCData" in note for note in snapshot.debug.notes)
def test_parse_images_from_og_fallback(self, store):
"""Parse images depuis og:image en fallback."""
html = """
<html>
<head>
<meta property="og:title" content="Test - AliExpress">
<meta property="og:image" content="https://ae01.alicdn.com/kf/product.jpg">
</head>
<body>
<h1>Test</h1>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert len(snapshot.images) == 1
assert snapshot.images[0] == "https://ae01.alicdn.com/kf/product.jpg"
def test_parse_missing_title_and_price(self, store):
"""Parse avec titre et prix manquants → status PARTIAL."""
html = "<html><body><p>Empty content</p></body></html>"
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.title is None
assert snapshot.price is None
assert not snapshot.is_complete()
assert snapshot.debug.status == "partial"
def test_parse_small_html_warning(self, store):
"""Parse avec HTML petit génère un warning."""
html = "<html><head><title>Test</title></head><body></body></html>"
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
# HTML < 200KB devrait générer une note
assert any("non rendu" in note.lower() for note in snapshot.debug.notes)
def test_parse_stock_status_in_stock(self, store):
"""Parse détecte in_stock depuis le bouton add to cart."""
html = """
<html>
<head><meta property="og:title" content="Test - AliExpress"></head>
<body>
<h1>Test</h1>
<button class="add-to-cart-btn">Add to Cart</button>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.stock_status == "in_stock"

View File

@@ -0,0 +1,184 @@
#!/usr/bin/env python3
"""Tests fixtures réelles pour le store AliExpress."""
import pytest
from pathlib import Path
from pricewatch.app.stores.aliexpress.store import AliexpressStore
class TestAliexpressFixtures:
"""Tests avec fixtures HTML réelles d'AliExpress."""
@pytest.fixture
def store(self):
"""Fixture du store AliExpress."""
return AliexpressStore()
@pytest.fixture
def fixture_samsung_ram(self):
"""Fixture HTML Samsung DDR4 RAM."""
fixture_path = (
Path(__file__).parent.parent.parent
/ "pricewatch/app/stores/aliexpress/fixtures/aliexpress_1005007187023722.html"
)
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
# ========== Tests de parsing complet ==========
def test_parse_samsung_ram_complete(self, store, fixture_samsung_ram):
"""Parse fixture Samsung RAM - doit extraire toutes les données essentielles."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
# Identité
assert snapshot.source == "aliexpress"
assert snapshot.url == "https://fr.aliexpress.com/item/1005007187023722.html"
assert snapshot.reference == "1005007187023722"
# Contenu essentiel
assert snapshot.title is not None
assert "Samsung" in snapshot.title
assert "DDR4" in snapshot.title
assert snapshot.price is not None
assert snapshot.price > 0
assert snapshot.currency == "EUR"
# Complet
assert snapshot.is_complete()
def test_parse_samsung_ram_title(self, store, fixture_samsung_ram):
"""Parse fixture - vérifier le titre exact."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
assert snapshot.title.startswith("Samsung serveur DDR4")
assert "RAM" in snapshot.title
assert len(snapshot.title) > 20
def test_parse_samsung_ram_price(self, store, fixture_samsung_ram):
"""Parse fixture - vérifier le prix."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
# Prix extrait par regex
assert snapshot.price == 136.69
assert snapshot.currency == "EUR"
def test_parse_samsung_ram_reference(self, store, fixture_samsung_ram):
"""Parse fixture - vérifier la référence (SKU)."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
assert snapshot.reference == "1005007187023722"
assert len(snapshot.reference) == 16 # ID long (13 chiffres)
def test_parse_samsung_ram_images(self, store, fixture_samsung_ram):
"""Parse fixture - vérifier les images."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
assert len(snapshot.images) >= 6
# Vérifier que les URLs sont valides
for img_url in snapshot.images:
assert img_url.startswith("http")
assert "alicdn.com" in img_url
def test_parse_samsung_ram_stock(self, store, fixture_samsung_ram):
"""Parse fixture - vérifier le stock."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
# Devrait être in_stock (bouton "add to cart" présent)
assert snapshot.stock_status == "in_stock"
def test_parse_samsung_ram_debug_success(self, store, fixture_samsung_ram):
"""Parse fixture - vérifier les infos de debug."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
assert snapshot.debug.status == "success"
assert len(snapshot.debug.errors) == 0
# Devrait avoir une note sur les images DCData
assert any("DCData" in note for note in snapshot.debug.notes)
# ========== Tests de robustesse ==========
def test_parse_with_different_urls(self, store, fixture_samsung_ram):
"""Parse fixture fonctionne avec différentes formes d'URL."""
urls = [
"https://fr.aliexpress.com/item/1005007187023722.html",
"https://fr.aliexpress.com/item/1005007187023722.html?spm=a2g0o.detail",
"https://fr.aliexpress.com/item/1005007187023722.html#reviews",
]
for url in urls:
snapshot = store.parse(fixture_samsung_ram, url)
assert "Samsung" in snapshot.title
assert snapshot.price == 136.69
# URL canonicalisée (sans query params ni fragment)
assert (
snapshot.url == "https://fr.aliexpress.com/item/1005007187023722.html"
)
def test_parse_extracts_images_from_dcdata(self, store, fixture_samsung_ram):
"""Parse fixture extrait les images depuis DCData JSON."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
# Les images doivent venir de DCData
assert len(snapshot.images) == 6
assert all("alicdn.com" in img for img in snapshot.images)
# Debug note sur DCData
assert any("DCData" in note for note in snapshot.debug.notes)
def test_parse_no_errors(self, store, fixture_samsung_ram):
"""Parse fixture ne génère pas d'erreurs."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
assert len(snapshot.debug.errors) == 0
# ========== Tests comparatifs ==========
def test_parse_consistent_results(self, store, fixture_samsung_ram):
"""Parse multiple fois donne les mêmes résultats."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot1 = store.parse(fixture_samsung_ram, url)
snapshot2 = store.parse(fixture_samsung_ram, url)
# Les résultats doivent être identiques (sauf fetched_at)
assert snapshot1.title == snapshot2.title
assert snapshot1.price == snapshot2.price
assert snapshot1.currency == snapshot2.currency
assert snapshot1.reference == snapshot2.reference
assert snapshot1.images == snapshot2.images
assert snapshot1.is_complete() == snapshot2.is_complete()
def test_parse_json_export(self, store, fixture_samsung_ram):
"""Parse et export JSON fonctionne sans erreur."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
# Export vers dict
data = snapshot.to_dict()
assert data["source"] == "aliexpress"
assert "Samsung" in data["title"]
assert data["price"] == 136.69
assert data["currency"] == "EUR"
assert data["reference"] == "1005007187023722"
assert len(data["images"]) >= 6
assert "debug" in data
def test_parse_html_size_adequate(self, store, fixture_samsung_ram):
"""Parse fixture - HTML assez volumineux (rendu complet)."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(fixture_samsung_ram, url)
# HTML > 200KB = rendu complet
# Pas de note sur HTML court
assert not any("non rendu" in note.lower() for note in snapshot.debug.notes)

386
tests/stores/test_amazon.py Executable file
View File

@@ -0,0 +1,386 @@
"""
Tests pour pricewatch.app.stores.amazon.store
Vérifie match(), canonicalize(), extract_reference() et parse()
pour le store Amazon.
"""
import pytest
from pricewatch.app.stores.amazon.store import AmazonStore
from pricewatch.app.core.schema import DebugStatus, StockStatus
class TestAmazonMatch:
"""Tests de la méthode match() pour Amazon."""
@pytest.fixture
def store(self) -> AmazonStore:
"""Fixture: AmazonStore instance."""
return AmazonStore()
def test_match_amazon_fr(self, store):
"""amazon.fr doit retourner 0.9."""
score = store.match("https://www.amazon.fr/dp/B08N5WRWNW")
assert score == 0.9
def test_match_amazon_com(self, store):
"""amazon.com doit retourner 0.8."""
score = store.match("https://www.amazon.com/dp/B08N5WRWNW")
assert score == 0.8
def test_match_amazon_co_uk(self, store):
"""amazon.co.uk doit retourner 0.8."""
score = store.match("https://www.amazon.co.uk/dp/B08N5WRWNW")
assert score == 0.8
def test_match_amazon_de(self, store):
"""amazon.de doit retourner 0.7."""
score = store.match("https://www.amazon.de/dp/B08N5WRWNW")
assert score == 0.7
def test_match_non_amazon(self, store):
"""URL non-Amazon doit retourner 0.0."""
score = store.match("https://www.cdiscount.com/product/123")
assert score == 0.0
def test_match_empty_url(self, store):
"""URL vide doit retourner 0.0."""
score = store.match("")
assert score == 0.0
def test_match_case_insensitive(self, store):
"""Match doit être insensible à la casse."""
score = store.match("https://www.AMAZON.FR/dp/B08N5WRWNW")
assert score == 0.9
class TestAmazonCanonicalize:
"""Tests de la méthode canonicalize() pour Amazon."""
@pytest.fixture
def store(self) -> AmazonStore:
"""Fixture: AmazonStore instance."""
return AmazonStore()
def test_canonicalize_with_product_name(self, store):
"""URL avec nom de produit doit être normalisée."""
url = "https://www.amazon.fr/Product-Name-Here/dp/B08N5WRWNW/ref=sr_1_1"
canonical = store.canonicalize(url)
assert canonical == "https://www.amazon.fr/dp/B08N5WRWNW"
def test_canonicalize_already_canonical(self, store):
"""URL déjà canonique ne change pas."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
canonical = store.canonicalize(url)
assert canonical == "https://www.amazon.fr/dp/B08N5WRWNW"
def test_canonicalize_with_query_params(self, store):
"""URL avec query params doit être normalisée."""
url = "https://www.amazon.fr/dp/B08N5WRWNW?ref=abc&tag=xyz"
canonical = store.canonicalize(url)
assert canonical == "https://www.amazon.fr/dp/B08N5WRWNW"
def test_canonicalize_gp_product(self, store):
"""URL avec /gp/product/ doit être normalisée."""
url = "https://www.amazon.fr/gp/product/B08N5WRWNW"
canonical = store.canonicalize(url)
assert canonical == "https://www.amazon.fr/dp/B08N5WRWNW"
def test_canonicalize_no_asin(self, store):
"""URL sans ASIN retourne l'URL nettoyée."""
url = "https://www.amazon.fr/some-page?ref=abc"
canonical = store.canonicalize(url)
assert canonical == "https://www.amazon.fr/some-page"
assert "?" not in canonical
def test_canonicalize_empty_url(self, store):
"""URL vide retourne URL vide."""
canonical = store.canonicalize("")
assert canonical == ""
def test_canonicalize_preserves_domain(self, store):
"""Le domaine doit être préservé."""
url_fr = "https://www.amazon.fr/dp/B08N5WRWNW/ref=123"
url_com = "https://www.amazon.com/dp/B08N5WRWNW/ref=123"
assert store.canonicalize(url_fr) == "https://www.amazon.fr/dp/B08N5WRWNW"
assert store.canonicalize(url_com) == "https://www.amazon.com/dp/B08N5WRWNW"
class TestAmazonExtractReference:
"""Tests de la méthode extract_reference() pour Amazon."""
@pytest.fixture
def store(self) -> AmazonStore:
"""Fixture: AmazonStore instance."""
return AmazonStore()
def test_extract_reference_dp(self, store):
"""Extraction d'ASIN depuis /dp/."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
asin = store.extract_reference(url)
assert asin == "B08N5WRWNW"
def test_extract_reference_dp_with_path(self, store):
"""Extraction d'ASIN depuis /dp/ avec chemin."""
url = "https://www.amazon.fr/Product-Name/dp/B08N5WRWNW/ref=sr_1_1"
asin = store.extract_reference(url)
assert asin == "B08N5WRWNW"
def test_extract_reference_gp_product(self, store):
"""Extraction d'ASIN depuis /gp/product/."""
url = "https://www.amazon.fr/gp/product/B08N5WRWNW"
asin = store.extract_reference(url)
assert asin == "B08N5WRWNW"
def test_extract_reference_invalid_url(self, store):
"""URL sans ASIN retourne None."""
url = "https://www.amazon.fr/some-page"
asin = store.extract_reference(url)
assert asin is None
def test_extract_reference_empty_url(self, store):
"""URL vide retourne None."""
asin = store.extract_reference("")
assert asin is None
def test_extract_reference_asin_format(self, store):
"""L'ASIN doit avoir exactement 10 caractères alphanumériques."""
# ASIN valide: 10 caractères
url_valid = "https://www.amazon.fr/dp/B08N5WRWNW"
assert store.extract_reference(url_valid) == "B08N5WRWNW"
# ASIN invalide: trop court
url_short = "https://www.amazon.fr/dp/B08N5"
assert store.extract_reference(url_short) is None
# ASIN invalide: trop long
url_long = "https://www.amazon.fr/dp/B08N5WRWNW123"
assert store.extract_reference(url_long) is None
class TestAmazonParse:
"""Tests de la méthode parse() pour Amazon."""
@pytest.fixture
def store(self) -> AmazonStore:
"""Fixture: AmazonStore instance."""
return AmazonStore()
@pytest.fixture
def minimal_html(self) -> str:
"""Fixture: HTML Amazon minimal avec titre et prix."""
return """
<html>
<head><title>Test Product</title></head>
<body>
<span id="productTitle">Test Amazon Product</span>
<span class="a-price-whole">299,99 €</span>
<span class="a-price-symbol">€</span>
<div id="availability">
<span class="a-size-medium a-color-success">En stock</span>
</div>
</body>
</html>
"""
@pytest.fixture
def complete_html(self) -> str:
"""Fixture: HTML Amazon complet."""
return """
<html>
<head><title>Test Product</title></head>
<body>
<span id="productTitle">PlayStation 5 Console</span>
<span class="a-price-whole">499,99 €</span>
<span class="a-price-symbol">€</span>
<div id="availability">
<span class="a-size-medium a-color-success">En stock</span>
</div>
<input type="hidden" name="ASIN" value="B08N5WRWNW" />
<div id="wayfinding-breadcrumbs_feature_div">
<ul>
<li><a>Accueil</a></li>
<li><a>High-Tech</a></li>
<li><a>Jeux vidéo</a></li>
</ul>
</div>
<img src="https://m.media-amazon.com/images/I/image1.jpg" />
<img src="https://m.media-amazon.com/images/I/image2.jpg" />
<table id="productDetails_techSpec_section_1">
<tr>
<th>Marque</th>
<td>Sony</td>
</tr>
<tr>
<th>Couleur</th>
<td>Blanc</td>
</tr>
</table>
</body>
</html>
"""
@pytest.fixture
def captcha_html(self) -> str:
"""Fixture: HTML avec captcha."""
return """
<html>
<body>
<div>
<p>Sorry, we just need to make sure you're not a robot.</p>
<form action="/captcha">
<input type="text" name="captcha" />
</form>
</div>
</body>
</html>
"""
@pytest.fixture
def out_of_stock_html(self) -> str:
"""Fixture: HTML produit en rupture de stock."""
return """
<html>
<body>
<span id="productTitle">Out of Stock Product</span>
<span class="a-price-whole">199,99 €</span>
<div id="availability">
<span class="a-size-medium a-color-price">Actuellement indisponible</span>
</div>
</body>
</html>
"""
def test_parse_minimal_html(self, store, minimal_html):
"""Parse un HTML minimal avec titre et prix."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
snapshot = store.parse(minimal_html, url)
assert snapshot.source == "amazon"
assert snapshot.url == "https://www.amazon.fr/dp/B08N5WRWNW"
assert snapshot.title == "Test Amazon Product"
assert snapshot.price == 299.99
assert snapshot.currency == "EUR"
assert snapshot.stock_status == StockStatus.IN_STOCK
assert snapshot.is_complete() is True
def test_parse_complete_html(self, store, complete_html):
"""Parse un HTML complet avec toutes les données."""
url = "https://www.amazon.fr/ps5/dp/B08N5WRWNW"
snapshot = store.parse(complete_html, url)
assert snapshot.title == "PlayStation 5 Console"
assert snapshot.price == 499.99
assert snapshot.reference == "B08N5WRWNW"
assert snapshot.stock_status == StockStatus.IN_STOCK
assert snapshot.category == "Jeux vidéo"
assert len(snapshot.images) >= 2
assert "Marque" in snapshot.specs
assert snapshot.specs["Marque"] == "Sony"
assert snapshot.is_complete() is True
assert snapshot.debug.status == DebugStatus.SUCCESS
def test_parse_captcha_html(self, store, captcha_html):
"""Parse un HTML avec captcha doit signaler l'erreur."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
snapshot = store.parse(captcha_html, url)
assert snapshot.debug.status == DebugStatus.FAILED
assert any("captcha" in err.lower() for err in snapshot.debug.errors)
assert snapshot.is_complete() is False
def test_parse_out_of_stock(self, store, out_of_stock_html):
"""Parse un produit en rupture de stock."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
snapshot = store.parse(out_of_stock_html, url)
assert snapshot.title == "Out of Stock Product"
assert snapshot.price == 199.99
assert snapshot.stock_status == StockStatus.OUT_OF_STOCK
def test_parse_empty_html(self, store):
"""Parse un HTML vide doit retourner un snapshot partiel."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
snapshot = store.parse("<html><body></body></html>", url)
assert snapshot.source == "amazon"
assert snapshot.title is None
assert snapshot.price is None
assert snapshot.is_complete() is False
assert snapshot.debug.status == DebugStatus.PARTIAL
def test_parse_canonicalizes_url(self, store, minimal_html):
"""Parse doit canonicaliser l'URL."""
url = "https://www.amazon.fr/Product-Name/dp/B08N5WRWNW/ref=sr_1_1?tag=xyz"
snapshot = store.parse(minimal_html, url)
assert snapshot.url == "https://www.amazon.fr/dp/B08N5WRWNW"
def test_parse_extracts_reference_from_url(self, store, minimal_html):
"""Parse doit extraire l'ASIN depuis l'URL."""
url = "https://www.amazon.fr/Product-Name/dp/B08N5WRWNW"
snapshot = store.parse(minimal_html, url)
assert snapshot.reference == "B08N5WRWNW"
def test_parse_sets_fetched_at(self, store, minimal_html):
"""Parse doit définir fetched_at."""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
snapshot = store.parse(minimal_html, url)
assert snapshot.fetched_at is not None
def test_parse_partial_status_without_title(self, store):
"""Parse sans titre doit avoir status PARTIAL."""
html = """
<html><body>
<span class="a-price-whole">299</span>
<span class="a-price-fraction">99</span>
</body></html>
"""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
snapshot = store.parse(html, url)
assert snapshot.debug.status == DebugStatus.PARTIAL
assert snapshot.title is None
assert snapshot.price == 299.99
def test_parse_partial_status_without_price(self, store):
"""Parse sans prix doit avoir status PARTIAL."""
html = """
<html><body>
<span id="productTitle">Test Product</span>
</body></html>
"""
url = "https://www.amazon.fr/dp/B08N5WRWNW"
snapshot = store.parse(html, url)
assert snapshot.debug.status == DebugStatus.PARTIAL
assert snapshot.title == "Test Product"
assert snapshot.price is None
class TestAmazonStoreInit:
"""Tests de l'initialisation du store Amazon."""
def test_store_id(self):
"""Le store_id doit être 'amazon'."""
store = AmazonStore()
assert store.store_id == "amazon"
def test_selectors_loaded(self):
"""Les sélecteurs doivent être chargés depuis selectors.yml."""
store = AmazonStore()
# Vérifie que des sélecteurs ont été chargés
assert isinstance(store.selectors, dict)
# Devrait avoir au moins quelques sélecteurs
assert len(store.selectors) > 0
def test_repr(self):
"""Test de la représentation string."""
store = AmazonStore()
repr_str = repr(store)
assert "AmazonStore" in repr_str
assert "amazon" in repr_str

View File

@@ -0,0 +1,198 @@
"""
Tests pour pricewatch.app.stores.amazon.store avec fixtures HTML réels.
Teste le parsing de vraies pages HTML capturées depuis Amazon.fr.
"""
import pytest
from pathlib import Path
from pricewatch.app.stores.amazon.store import AmazonStore
from pricewatch.app.core.schema import DebugStatus, StockStatus
class TestAmazonRealFixtures:
"""Tests avec fixtures HTML réels capturés depuis Amazon."""
@pytest.fixture
def store(self) -> AmazonStore:
"""Fixture: AmazonStore instance."""
return AmazonStore()
@pytest.fixture
def fixture_b0d4dx8ph3(self) -> str:
"""Fixture: HTML Amazon B0D4DX8PH3 (UGREEN Uno Qi2 Chargeur Induction)."""
fixture_path = Path(__file__).parent.parent.parent / "pricewatch/app/stores/amazon/fixtures/amazon_B0D4DX8PH3.html"
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
@pytest.fixture
def fixture_b0f6mwnj6j(self) -> str:
"""Fixture: HTML Amazon B0F6MWNJ6J (Baseus Docking Station)."""
fixture_path = Path(__file__).parent.parent.parent / "pricewatch/app/stores/amazon/fixtures/amazon_B0F6MWNJ6J.html"
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
@pytest.fixture
def fixture_captcha(self) -> str:
"""Fixture: HTML page captcha Amazon."""
fixture_path = Path(__file__).parent.parent.parent / "pricewatch/app/stores/amazon/fixtures/captcha.html"
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
def test_parse_b0d4dx8ph3_complete(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - doit extraire toutes les données essentielles."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Métadonnées
assert snapshot.source == "amazon"
assert snapshot.url == "https://www.amazon.fr/dp/B0D4DX8PH3"
assert snapshot.reference == "B0D4DX8PH3"
assert snapshot.fetched_at is not None
# Titre (doit contenir "UGREEN" ou similaire)
assert snapshot.title is not None
assert len(snapshot.title) > 0
assert "UGREEN" in snapshot.title.upper() or "Chargeur" in snapshot.title
# Prix
assert snapshot.price is not None
assert snapshot.price > 0
assert snapshot.currency == "EUR"
# Status (success ou partial acceptable si parsing incomplet)
assert snapshot.debug.status in [DebugStatus.SUCCESS, DebugStatus.PARTIAL]
def test_parse_b0f6mwnj6j_complete(self, store, fixture_b0f6mwnj6j):
"""Parse fixture B0F6MWNJ6J - doit extraire toutes les données essentielles."""
url = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot = store.parse(fixture_b0f6mwnj6j, url)
# Métadonnées
assert snapshot.source == "amazon"
assert snapshot.url == "https://www.amazon.fr/dp/B0F6MWNJ6J"
assert snapshot.reference == "B0F6MWNJ6J"
# Titre
assert snapshot.title is not None
assert "Baseus" in snapshot.title or "Docking Station" in snapshot.title
# Prix
assert snapshot.price is not None
assert snapshot.price > 0
assert snapshot.currency == "EUR"
# Status
assert snapshot.debug.status in [DebugStatus.SUCCESS, DebugStatus.PARTIAL]
def test_parse_b0d4dx8ph3_images(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - doit extraire au moins une image."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Doit avoir au moins une image
assert len(snapshot.images) > 0
# Les images doivent être des URLs valides
for img_url in snapshot.images:
assert img_url.startswith("http")
assert "amazon" in img_url.lower()
def test_parse_b0f6mwnj6j_specs(self, store, fixture_b0f6mwnj6j):
"""Parse fixture B0F6MWNJ6J - doit extraire des specs si présentes."""
url = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot = store.parse(fixture_b0f6mwnj6j, url)
# Si des specs sont extraites, elles doivent être dans un dict
if snapshot.specs:
assert isinstance(snapshot.specs, dict)
assert len(snapshot.specs) > 0
def test_parse_b0d4dx8ph3_category(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - doit extraire la catégorie si présente."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Si une catégorie est extraite, elle doit être non vide
if snapshot.category:
assert len(snapshot.category) > 0
def test_parse_captcha_fixture(self, store, fixture_captcha):
"""Parse fixture captcha - doit détecter le captcha et signaler l'erreur."""
url = "https://www.amazon.fr/dp/B0DFWRHZ7L"
snapshot = store.parse(fixture_captcha, url)
# Le parsing doit échouer avec status FAILED ou PARTIAL
assert snapshot.debug.status in [DebugStatus.FAILED, DebugStatus.PARTIAL]
# Doit avoir au moins une erreur mentionnant le captcha
assert len(snapshot.debug.errors) > 0
assert any("captcha" in err.lower() for err in snapshot.debug.errors)
# Ne doit pas extraire de données produit
assert snapshot.title is None
assert snapshot.price is None
assert snapshot.is_complete() is False
def test_parse_b0d4dx8ph3_stock_status(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - doit extraire le stock status."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Stock status doit être défini (in_stock, out_of_stock, ou unknown)
assert snapshot.stock_status in [StockStatus.IN_STOCK, StockStatus.OUT_OF_STOCK, StockStatus.UNKNOWN]
def test_parse_b0f6mwnj6j_stock_status(self, store, fixture_b0f6mwnj6j):
"""Parse fixture B0F6MWNJ6J - doit extraire le stock status."""
url = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot = store.parse(fixture_b0f6mwnj6j, url)
# Stock status doit être défini
assert snapshot.stock_status in [StockStatus.IN_STOCK, StockStatus.OUT_OF_STOCK, StockStatus.UNKNOWN]
def test_parse_b0d4dx8ph3_completeness(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - vérifier is_complete()."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Si titre ET prix sont présents, is_complete() doit être True
if snapshot.title and snapshot.price:
assert snapshot.is_complete() is True
else:
assert snapshot.is_complete() is False
def test_parse_b0f6mwnj6j_completeness(self, store, fixture_b0f6mwnj6j):
"""Parse fixture B0F6MWNJ6J - vérifier is_complete()."""
url = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot = store.parse(fixture_b0f6mwnj6j, url)
# Si titre ET prix sont présents, is_complete() doit être True
if snapshot.title and snapshot.price:
assert snapshot.is_complete() is True
else:
assert snapshot.is_complete() is False
def test_parse_b0d4dx8ph3_json_serialization(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - vérifier sérialisation JSON."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Doit pouvoir sérialiser en JSON sans erreur
json_str = snapshot.to_json()
assert json_str is not None
assert len(json_str) > 0
# JSON compact (sans espaces après les deux-points)
assert '"source":"amazon"' in json_str or '"source": "amazon"' in json_str
assert 'B0D4DX8PH3' in json_str
def test_parse_fixtures_preserve_asin(self, store, fixture_b0d4dx8ph3, fixture_b0f6mwnj6j):
"""Parse fixtures - l'ASIN dans l'URL doit être préservé dans reference."""
# Test B0D4DX8PH3
url1 = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot1 = store.parse(fixture_b0d4dx8ph3, url1)
assert snapshot1.reference == "B0D4DX8PH3"
# Test B0F6MWNJ6J
url2 = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot2 = store.parse(fixture_b0f6mwnj6j, url2)
assert snapshot2.reference == "B0F6MWNJ6J"

259
tests/stores/test_backmarket.py Executable file
View File

@@ -0,0 +1,259 @@
#!/usr/bin/env python3
"""Tests pour le store Backmarket."""
import pytest
from pathlib import Path
from pricewatch.app.stores.backmarket.store import BackmarketStore
class TestBackmarketStore:
"""Tests pour BackmarketStore."""
@pytest.fixture
def store(self):
"""Fixture du store Backmarket."""
return BackmarketStore()
# ========== Tests de match() ==========
def test_match_backmarket_fr(self, store):
"""URL backmarket.fr reconnue."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
score = store.match(url)
assert score == 0.9
def test_match_backmarket_com(self, store):
"""URL backmarket.com reconnue (autres pays)."""
url = "https://www.backmarket.com/en-us/p/iphone-15-pro"
score = store.match(url)
assert score == 0.8
def test_match_other_site(self, store):
"""Autres sites non reconnus."""
urls = [
"https://www.amazon.fr/dp/ASIN",
"https://www.cdiscount.com/f-123-abc.html",
"https://www.fnac.com/produit",
"",
None,
]
for url in urls:
if url is not None:
score = store.match(url)
assert score == 0.0
def test_match_case_insensitive(self, store):
"""Match insensible à la casse."""
url = "https://WWW.BACKMARKET.FR/FR-FR/P/IPHONE"
score = store.match(url)
assert score == 0.9
# ========== Tests de canonicalize() ==========
def test_canonicalize_remove_query_params(self, store):
"""Canonicalize retire les paramètres de query."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro?color=black"
canonical = store.canonicalize(url)
assert canonical == "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
def test_canonicalize_remove_fragment(self, store):
"""Canonicalize retire le fragment (#)."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro#specs"
canonical = store.canonicalize(url)
assert canonical == "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
def test_canonicalize_keep_path(self, store):
"""Canonicalize garde le chemin complet."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
canonical = store.canonicalize(url)
assert canonical == "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
def test_canonicalize_empty_url(self, store):
"""Canonicalize avec URL vide retourne la même."""
assert store.canonicalize("") == ""
assert store.canonicalize(None) is None
# ========== Tests de extract_reference() ==========
def test_extract_reference_standard_format(self, store):
"""Extraction du SKU depuis format standard /p/{slug}."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
ref = store.extract_reference(url)
assert ref == "iphone-15-pro"
def test_extract_reference_with_query_params(self, store):
"""Extraction du SKU ignore les paramètres de query."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro?color=black"
ref = store.extract_reference(url)
assert ref == "iphone-15-pro"
def test_extract_reference_different_locale(self, store):
"""Extraction du SKU fonctionne avec d'autres locales."""
url = "https://www.backmarket.com/en-us/p/macbook-air-m2"
ref = store.extract_reference(url)
assert ref == "macbook-air-m2"
def test_extract_reference_with_numbers(self, store):
"""Extraction du SKU avec chiffres dans le slug."""
url = "https://www.backmarket.fr/fr-fr/p/samsung-galaxy-s23"
ref = store.extract_reference(url)
assert ref == "samsung-galaxy-s23"
def test_extract_reference_invalid_url(self, store):
"""Extraction du SKU depuis URL invalide retourne None."""
urls = [
"https://www.backmarket.fr/fr-fr/collections/smartphones",
"https://www.backmarket.fr/",
"",
None,
]
for url in urls:
ref = store.extract_reference(url)
assert ref is None
# ========== Tests de parse() ==========
def test_parse_basic_html(self, store):
"""Parse HTML basique avec JSON-LD."""
html = """
<html>
<head>
<script type="application/ld+json">
{
"@type": "Product",
"name": "iPhone 15 Pro",
"offers": {
"@type": "Offer",
"price": "571.00",
"priceCurrency": "EUR"
},
"image": "https://example.com/image.jpg"
}
</script>
</head>
<body>
<h1 class="heading-1">iPhone 15 Pro</h1>
</body>
</html>
"""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(html, url)
assert snapshot.source == "backmarket"
assert snapshot.url == "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
assert snapshot.title == "iPhone 15 Pro"
assert snapshot.price == 571.0
assert snapshot.currency == "EUR"
assert snapshot.reference == "iphone-15-pro"
assert len(snapshot.images) == 1
assert snapshot.is_complete()
def test_parse_without_json_ld(self, store):
"""Parse HTML sans JSON-LD utilise les sélecteurs CSS."""
html = """
<html>
<body>
<h1 class="heading-1">MacBook Air M2</h1>
<div data-test="price">799,99 €</div>
</body>
</html>
"""
url = "https://www.backmarket.fr/fr-fr/p/macbook-air-m2"
snapshot = store.parse(html, url)
assert snapshot.title == "MacBook Air M2"
assert snapshot.price == 799.99
assert snapshot.currency == "EUR"
assert snapshot.reference == "macbook-air-m2"
def test_parse_with_condition(self, store):
"""Parse extrait la condition du reconditionné."""
html = """
<html>
<head>
<script type="application/ld+json">
{
"@type": "Product",
"name": "iPhone 15",
"offers": {"price": "500", "priceCurrency": "EUR"}
}
</script>
</head>
<body>
<h1>iPhone 15</h1>
<button data-test="condition-button">Excellent</button>
</body>
</html>
"""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15"
snapshot = store.parse(html, url)
assert "Condition" in snapshot.specs
assert snapshot.specs["Condition"] == "Excellent"
assert any("reconditionné" in note.lower() for note in snapshot.debug.notes)
def test_parse_missing_title_and_price(self, store):
"""Parse avec titre et prix manquants → status PARTIAL."""
html = "<html><body><p>Contenu vide</p></body></html>"
url = "https://www.backmarket.fr/fr-fr/p/test"
snapshot = store.parse(html, url)
assert snapshot.title is None
assert snapshot.price is None
assert not snapshot.is_complete()
assert snapshot.debug.status == "partial"
def test_parse_stock_status_detection(self, store):
"""Parse détecte le statut de stock depuis le bouton add-to-cart."""
html = """
<html>
<head>
<script type="application/ld+json">
{
"@type": "Product",
"name": "Test Product",
"offers": {"price": "100", "priceCurrency": "EUR"}
}
</script>
</head>
<body>
<button data-test="add-to-cart">Ajouter au panier</button>
</body>
</html>
"""
url = "https://www.backmarket.fr/fr-fr/p/test-product"
snapshot = store.parse(html, url)
assert snapshot.stock_status == "in_stock"
def test_parse_specs_from_definition_list(self, store):
"""Parse extrait les specs depuis les <dl>."""
html = """
<html>
<head>
<script type="application/ld+json">
{
"@type": "Product",
"name": "Test",
"offers": {"price": "100", "priceCurrency": "EUR"}
}
</script>
</head>
<body>
<dl>
<dt>Mémoire</dt>
<dd>256 Go</dd>
<dt>Couleur</dt>
<dd>Noir</dd>
</dl>
</body>
</html>
"""
url = "https://www.backmarket.fr/fr-fr/p/test"
snapshot = store.parse(html, url)
assert "Mémoire" in snapshot.specs
assert snapshot.specs["Mémoire"] == "256 Go"
assert "Couleur" in snapshot.specs
assert snapshot.specs["Couleur"] == "Noir"

View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""Tests fixtures réelles pour le store Backmarket."""
import pytest
from pathlib import Path
from pricewatch.app.stores.backmarket.store import BackmarketStore
class TestBackmarketFixtures:
"""Tests avec fixtures HTML réelles de Backmarket."""
@pytest.fixture
def store(self):
"""Fixture du store Backmarket."""
return BackmarketStore()
@pytest.fixture
def fixture_iphone15pro(self):
"""Fixture HTML iPhone 15 Pro."""
fixture_path = (
Path(__file__).parent.parent.parent
/ "pricewatch/app/stores/backmarket/fixtures/backmarket_iphone15pro.html"
)
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
# ========== Tests de parsing complet ==========
def test_parse_iphone15pro_complete(self, store, fixture_iphone15pro):
"""Parse fixture iPhone 15 Pro - doit extraire toutes les données essentielles."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
# Identité
assert snapshot.source == "backmarket"
assert snapshot.url == "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
assert snapshot.reference == "iphone-15-pro"
# Contenu essentiel
assert snapshot.title == "iPhone 15 Pro"
assert snapshot.price is not None
assert snapshot.price > 0
assert snapshot.currency == "EUR"
# Complet
assert snapshot.is_complete()
def test_parse_iphone15pro_title(self, store, fixture_iphone15pro):
"""Parse fixture - vérifier le titre exact."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
assert snapshot.title == "iPhone 15 Pro"
assert len(snapshot.title) > 0
def test_parse_iphone15pro_price(self, store, fixture_iphone15pro):
"""Parse fixture - vérifier le prix."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
# Prix extrait du JSON-LD
assert snapshot.price == 571.0
assert snapshot.currency == "EUR"
def test_parse_iphone15pro_reference(self, store, fixture_iphone15pro):
"""Parse fixture - vérifier la référence (SKU)."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
assert snapshot.reference == "iphone-15-pro"
def test_parse_iphone15pro_images(self, store, fixture_iphone15pro):
"""Parse fixture - vérifier les images."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
assert len(snapshot.images) >= 1
# Vérifier que les URLs sont valides
for img_url in snapshot.images:
assert img_url.startswith("http")
assert "cloudfront" in img_url or "backmarket" in img_url
def test_parse_iphone15pro_debug_success(self, store, fixture_iphone15pro):
"""Parse fixture - vérifier les infos de debug."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
assert snapshot.debug.status == "success"
assert snapshot.debug.method == "http" # Sera mis à jour par l'appelant
# ========== Tests de robustesse ==========
def test_parse_with_different_urls(self, store, fixture_iphone15pro):
"""Parse fixture fonctionne avec différentes formes d'URL."""
urls = [
"https://www.backmarket.fr/fr-fr/p/iphone-15-pro",
"https://www.backmarket.fr/fr-fr/p/iphone-15-pro?color=black",
"https://www.backmarket.fr/fr-fr/p/iphone-15-pro#specs",
]
for url in urls:
snapshot = store.parse(fixture_iphone15pro, url)
assert snapshot.title == "iPhone 15 Pro"
assert snapshot.price == 571.0
# URL canonicalisée (sans query params ni fragment)
assert snapshot.url == "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
def test_parse_extracts_data_from_json_ld(self, store, fixture_iphone15pro):
"""Parse fixture utilise le JSON-LD schema.org (source prioritaire)."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
# Les données doivent venir du JSON-LD
assert snapshot.title == "iPhone 15 Pro"
assert snapshot.price == 571.0
assert snapshot.currency == "EUR"
# Pas d'erreur dans le debug
assert len(snapshot.debug.errors) == 0
def test_parse_no_errors(self, store, fixture_iphone15pro):
"""Parse fixture ne génère pas d'erreurs."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
assert len(snapshot.debug.errors) == 0
# ========== Tests comparatifs ==========
def test_parse_consistent_results(self, store, fixture_iphone15pro):
"""Parse multiple fois donne les mêmes résultats."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot1 = store.parse(fixture_iphone15pro, url)
snapshot2 = store.parse(fixture_iphone15pro, url)
# Les résultats doivent être identiques (sauf fetched_at)
assert snapshot1.title == snapshot2.title
assert snapshot1.price == snapshot2.price
assert snapshot1.currency == snapshot2.currency
assert snapshot1.reference == snapshot2.reference
assert snapshot1.images == snapshot2.images
assert snapshot1.is_complete() == snapshot2.is_complete()
def test_parse_json_export(self, store, fixture_iphone15pro):
"""Parse et export JSON fonctionne sans erreur."""
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
snapshot = store.parse(fixture_iphone15pro, url)
# Export vers dict
data = snapshot.to_dict()
assert data["source"] == "backmarket"
assert data["title"] == "iPhone 15 Pro"
assert data["price"] == 571.0
assert data["currency"] == "EUR"
assert data["reference"] == "iphone-15-pro"
assert "debug" in data

288
tests/stores/test_cdiscount.py Executable file
View File

@@ -0,0 +1,288 @@
"""
Tests pour pricewatch.app.stores.cdiscount.store
Vérifie match(), canonicalize(), extract_reference() et parse()
pour le store Cdiscount.
"""
import pytest
from pricewatch.app.stores.cdiscount.store import CdiscountStore
from pricewatch.app.core.schema import DebugStatus, StockStatus
class TestCdiscountMatch:
"""Tests de la méthode match() pour Cdiscount."""
@pytest.fixture
def store(self) -> CdiscountStore:
"""Fixture: CdiscountStore instance."""
return CdiscountStore()
def test_match_cdiscount_com(self, store):
"""cdiscount.com doit retourner 0.9."""
score = store.match("https://www.cdiscount.com/informatique/example/f-123-sku.html")
assert score == 0.9
def test_match_non_cdiscount(self, store):
"""URL non-Cdiscount doit retourner 0.0."""
score = store.match("https://www.amazon.fr/dp/B08N5WRWNW")
assert score == 0.0
def test_match_empty_url(self, store):
"""URL vide doit retourner 0.0."""
score = store.match("")
assert score == 0.0
def test_match_case_insensitive(self, store):
"""Match doit être insensible à la casse."""
score = store.match("https://www.CDISCOUNT.COM/product/f-123-sku.html")
assert score == 0.9
class TestCdiscountCanonicalize:
"""Tests de la méthode canonicalize() pour Cdiscount."""
@pytest.fixture
def store(self) -> CdiscountStore:
"""Fixture: CdiscountStore instance."""
return CdiscountStore()
def test_canonicalize_with_query_params(self, store):
"""URL avec query params doit être normalisée."""
url = "https://www.cdiscount.com/informatique/pc/product/f-10709-sku.html?idOffre=123&sw=abc"
canonical = store.canonicalize(url)
assert canonical == "https://www.cdiscount.com/informatique/pc/product/f-10709-sku.html"
assert "?" not in canonical
def test_canonicalize_already_canonical(self, store):
"""URL déjà canonique ne change pas."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
canonical = store.canonicalize(url)
assert canonical == url
def test_canonicalize_with_fragment(self, store):
"""URL avec fragment doit être normalisée."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html#mpos=2"
canonical = store.canonicalize(url)
assert canonical == "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
assert "#" not in canonical
def test_canonicalize_empty_url(self, store):
"""URL vide retourne URL vide."""
canonical = store.canonicalize("")
assert canonical == ""
class TestCdiscountExtractReference:
"""Tests de la méthode extract_reference() pour Cdiscount."""
@pytest.fixture
def store(self) -> CdiscountStore:
"""Fixture: CdiscountStore instance."""
return CdiscountStore()
def test_extract_reference_standard_format(self, store):
"""Extraction du SKU depuis format standard /f-{ID}-{SKU}.html."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-tuf608umrv004.html"
ref = store.extract_reference(url)
assert ref == "10709-tuf608umrv004"
def test_extract_reference_long_url(self, store):
"""Extraction du SKU depuis URL longue avec chemin complet."""
url = "https://www.cdiscount.com/informatique/ordinateurs-pc-portables/pc-portable-gamer-asus/f-10709-tuf608umrv004.html"
ref = store.extract_reference(url)
assert ref == "10709-tuf608umrv004"
def test_extract_reference_with_query_params(self, store):
"""Extraction du SKU depuis URL avec query params."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku123.html?idOffre=456"
ref = store.extract_reference(url)
assert ref == "10709-sku123"
def test_extract_reference_invalid_url(self, store):
"""URL sans SKU retourne None."""
url = "https://www.cdiscount.com/informatique/"
ref = store.extract_reference(url)
assert ref is None
def test_extract_reference_empty_url(self, store):
"""URL vide retourne None."""
ref = store.extract_reference("")
assert ref is None
class TestCdiscountParse:
"""Tests de la méthode parse() pour Cdiscount."""
@pytest.fixture
def store(self) -> CdiscountStore:
"""Fixture: CdiscountStore instance."""
return CdiscountStore()
@pytest.fixture
def minimal_html(self) -> str:
"""Fixture: HTML Cdiscount minimal avec titre et prix."""
return """
<html>
<head><title>Test Product</title></head>
<body>
<h1 data-e2e="title">PC Portable Test</h1>
<div class="sc-83lijy-0 kwssIa SecondaryPrice-price">899,99 €</div>
</body>
</html>
"""
@pytest.fixture
def complete_html(self) -> str:
"""Fixture: HTML Cdiscount plus complet."""
return """
<html>
<head><title>Test Product</title></head>
<body>
<h1 data-e2e="title">PC Portable Gamer ASUS</h1>
<div class="SecondaryPrice-price">1299,99 €</div>
<img alt="PC Portable Gamer ASUS" src="https://www.cdiscount.com/pdt2/0/0/4/1/700x700/sku123/image1.jpg" />
<img alt="PC Portable Gamer ASUS" src="https://www.cdiscount.com/pdt2/0/0/4/2/700x700/sku123/image2.jpg" />
</body>
</html>
"""
@pytest.fixture
def empty_html(self) -> str:
"""Fixture: HTML vide."""
return "<html><body></body></html>"
def test_parse_minimal_html(self, store, minimal_html):
"""Parse un HTML minimal avec titre et prix."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku123.html"
snapshot = store.parse(minimal_html, url)
assert snapshot.source == "cdiscount"
assert snapshot.url == "https://www.cdiscount.com/informatique/pc/f-10709-sku123.html"
assert snapshot.title == "PC Portable Test"
assert snapshot.price == 899.99
assert snapshot.currency == "EUR"
assert snapshot.is_complete() is True
def test_parse_complete_html(self, store, complete_html):
"""Parse un HTML plus complet avec images."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-asus123.html"
snapshot = store.parse(complete_html, url)
assert snapshot.title == "PC Portable Gamer ASUS"
assert snapshot.price == 1299.99
assert snapshot.reference == "10709-asus123"
assert len(snapshot.images) >= 2
assert snapshot.is_complete() is True
assert snapshot.debug.status == DebugStatus.SUCCESS
def test_parse_empty_html(self, store, empty_html):
"""Parse un HTML vide doit retourner un snapshot partiel."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
snapshot = store.parse(empty_html, url)
assert snapshot.source == "cdiscount"
assert snapshot.title is None
assert snapshot.price is None
assert snapshot.is_complete() is False
assert snapshot.debug.status == DebugStatus.PARTIAL
def test_parse_canonicalizes_url(self, store, minimal_html):
"""Parse doit canonicaliser l'URL."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html?idOffre=123#mpos=2"
snapshot = store.parse(minimal_html, url)
assert snapshot.url == "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
assert "?" not in snapshot.url
assert "#" not in snapshot.url
def test_parse_extracts_reference_from_url(self, store, minimal_html):
"""Parse doit extraire le SKU depuis l'URL."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-tuf608umrv004.html"
snapshot = store.parse(minimal_html, url)
assert snapshot.reference == "10709-tuf608umrv004"
def test_parse_sets_fetched_at(self, store, minimal_html):
"""Parse doit définir fetched_at."""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
snapshot = store.parse(minimal_html, url)
assert snapshot.fetched_at is not None
def test_parse_partial_status_without_title(self, store):
"""Parse sans titre doit avoir status PARTIAL."""
html = """
<html><body>
<div class="SecondaryPrice-price">299,99 €</div>
</body></html>
"""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
snapshot = store.parse(html, url)
assert snapshot.debug.status == DebugStatus.PARTIAL
assert snapshot.title is None
assert snapshot.price == 299.99
def test_parse_partial_status_without_price(self, store):
"""Parse sans prix doit avoir status PARTIAL."""
html = """
<html><body>
<h1 data-e2e="title">Test Product</h1>
</body></html>
"""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
snapshot = store.parse(html, url)
assert snapshot.debug.status == DebugStatus.PARTIAL
assert snapshot.title == "Test Product"
assert snapshot.price is None
def test_parse_price_with_comma_separator(self, store):
"""Parse doit gérer les prix avec virgule (format français)."""
html = """
<html><body>
<h1 data-e2e="title">Test</h1>
<div class="price">1199,99 €</div>
</body></html>
"""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
snapshot = store.parse(html, url)
assert snapshot.price == 1199.99
def test_parse_price_with_dot_separator(self, store):
"""Parse doit gérer les prix avec point (format international)."""
html = """
<html><body>
<h1 data-e2e="title">Test</h1>
<div class="price">1199.99 €</div>
</body></html>
"""
url = "https://www.cdiscount.com/informatique/pc/f-10709-sku.html"
snapshot = store.parse(html, url)
assert snapshot.price == 1199.99
class TestCdiscountStoreInit:
"""Tests de l'initialisation du store Cdiscount."""
def test_store_id(self):
"""Le store_id doit être 'cdiscount'."""
store = CdiscountStore()
assert store.store_id == "cdiscount"
def test_selectors_loaded(self):
"""Les sélecteurs doivent être chargés depuis selectors.yml."""
store = CdiscountStore()
assert isinstance(store.selectors, dict)
assert len(store.selectors) > 0
def test_repr(self):
"""Test de la représentation string."""
store = CdiscountStore()
repr_str = repr(store)
assert "CdiscountStore" in repr_str
assert "cdiscount" in repr_str

View File

@@ -0,0 +1,201 @@
"""
Tests pour pricewatch.app.stores.cdiscount.store avec fixtures HTML réels.
Teste le parsing de vraies pages HTML capturées depuis Cdiscount.com.
"""
import pytest
from pathlib import Path
from pricewatch.app.stores.cdiscount.store import CdiscountStore
from pricewatch.app.core.schema import DebugStatus, StockStatus
class TestCdiscountRealFixtures:
"""Tests avec fixtures HTML réels capturés depuis Cdiscount."""
@pytest.fixture
def store(self) -> CdiscountStore:
"""Fixture: CdiscountStore instance."""
return CdiscountStore()
@pytest.fixture
def fixture_tuf608umrv004(self) -> str:
"""Fixture: HTML Cdiscount tuf608umrv004 (PC Portable Gamer ASUS)."""
fixture_path = Path(__file__).parent.parent.parent / \
"pricewatch/app/stores/cdiscount/fixtures/cdiscount_tuf608umrv004_pw.html"
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
@pytest.fixture
def fixture_a128902(self) -> str:
"""Fixture: HTML Cdiscount a128902 (Canapé d'angle NIRVANA)."""
fixture_path = Path(__file__).parent.parent.parent / \
"pricewatch/app/stores/cdiscount/fixtures/cdiscount_a128902_pw.html"
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
def test_parse_tuf608umrv004_complete(self, store, fixture_tuf608umrv004):
"""Parse fixture tuf608umrv004 - doit extraire toutes les données essentielles."""
url = "https://www.cdiscount.com/informatique/ordinateurs-pc-portables/pc-portable-gamer-asus-tuf-gaming-a16-sans-windo/f-10709-tuf608umrv004.html"
snapshot = store.parse(fixture_tuf608umrv004, url)
# Métadonnées
assert snapshot.source == "cdiscount"
assert snapshot.url == url
assert snapshot.reference == "10709-tuf608umrv004"
assert snapshot.fetched_at is not None
# Titre (doit contenir "ASUS" ou "TUF")
assert snapshot.title is not None
assert len(snapshot.title) > 0
assert "ASUS" in snapshot.title or "TUF" in snapshot.title
# Prix
assert snapshot.price is not None
assert snapshot.price > 0
assert snapshot.currency == "EUR"
# Status
assert snapshot.debug.status in [DebugStatus.SUCCESS, DebugStatus.PARTIAL]
def test_parse_a128902_complete(self, store, fixture_a128902):
"""Parse fixture a128902 - doit extraire toutes les données essentielles."""
url = "https://www.cdiscount.com/maison/canape-canapes/canape-d-angle-convertible-reversible-nirvana-4-5/f-11701-a128902.html"
snapshot = store.parse(fixture_a128902, url)
# Métadonnées
assert snapshot.source == "cdiscount"
assert snapshot.url == url
assert snapshot.reference == "11701-a128902"
# Titre
assert snapshot.title is not None
assert "Canapé" in snapshot.title or "NIRVANA" in snapshot.title.upper()
# Prix
assert snapshot.price is not None
assert snapshot.price > 0
assert snapshot.currency == "EUR"
# Status
assert snapshot.debug.status in [DebugStatus.SUCCESS, DebugStatus.PARTIAL]
def test_parse_tuf608umrv004_images(self, store, fixture_tuf608umrv004):
"""Parse fixture tuf608umrv004 - doit extraire au moins une image."""
url = "https://www.cdiscount.com/informatique/.../f-10709-tuf608umrv004.html"
snapshot = store.parse(fixture_tuf608umrv004, url)
# Doit avoir au moins une image
assert len(snapshot.images) > 0
# Les images doivent être des URLs valides
for img_url in snapshot.images:
assert img_url.startswith("http")
assert "cdiscount.com" in img_url.lower() or "cdscdn.com" in img_url.lower()
def test_parse_a128902_images(self, store, fixture_a128902):
"""Parse fixture a128902 - doit extraire au moins une image."""
url = "https://www.cdiscount.com/maison/.../f-11701-a128902.html"
snapshot = store.parse(fixture_a128902, url)
# Doit avoir au moins une image
assert len(snapshot.images) > 0
# Les images doivent être des URLs valides
for img_url in snapshot.images:
assert img_url.startswith("http")
def test_parse_tuf608umrv004_completeness(self, store, fixture_tuf608umrv004):
"""Parse fixture tuf608umrv004 - vérifier is_complete()."""
url = "https://www.cdiscount.com/informatique/.../f-10709-tuf608umrv004.html"
snapshot = store.parse(fixture_tuf608umrv004, url)
# Si titre ET prix sont présents, is_complete() doit être True
if snapshot.title and snapshot.price:
assert snapshot.is_complete() is True
else:
assert snapshot.is_complete() is False
def test_parse_a128902_completeness(self, store, fixture_a128902):
"""Parse fixture a128902 - vérifier is_complete()."""
url = "https://www.cdiscount.com/maison/.../f-11701-a128902.html"
snapshot = store.parse(fixture_a128902, url)
# Si titre ET prix sont présents, is_complete() doit être True
if snapshot.title and snapshot.price:
assert snapshot.is_complete() is True
else:
assert snapshot.is_complete() is False
def test_parse_tuf608umrv004_json_serialization(self, store, fixture_tuf608umrv004):
"""Parse fixture tuf608umrv004 - vérifier sérialisation JSON."""
url = "https://www.cdiscount.com/informatique/.../f-10709-tuf608umrv004.html"
snapshot = store.parse(fixture_tuf608umrv004, url)
# Doit pouvoir sérialiser en JSON sans erreur
json_str = snapshot.to_json()
assert json_str is not None
assert len(json_str) > 0
assert 'cdiscount' in json_str
assert 'tuf608umrv004' in json_str.lower()
def test_parse_a128902_json_serialization(self, store, fixture_a128902):
"""Parse fixture a128902 - vérifier sérialisation JSON."""
url = "https://www.cdiscount.com/maison/.../f-11701-a128902.html"
snapshot = store.parse(fixture_a128902, url)
# Doit pouvoir sérialiser en JSON sans erreur
json_str = snapshot.to_json()
assert json_str is not None
assert len(json_str) > 0
assert 'cdiscount' in json_str
assert 'a128902' in json_str
def test_parse_fixtures_preserve_sku(self, store, fixture_tuf608umrv004, fixture_a128902):
"""Parse fixtures - le SKU dans l'URL doit être préservé dans reference."""
# Test tuf608umrv004
url1 = "https://www.cdiscount.com/informatique/.../f-10709-tuf608umrv004.html"
snapshot1 = store.parse(fixture_tuf608umrv004, url1)
assert snapshot1.reference == "10709-tuf608umrv004"
# Test a128902
url2 = "https://www.cdiscount.com/maison/.../f-11701-a128902.html"
snapshot2 = store.parse(fixture_a128902, url2)
assert snapshot2.reference == "11701-a128902"
def test_parse_tuf608umrv004_price_format(self, store, fixture_tuf608umrv004):
"""Parse fixture tuf608umrv004 - le prix doit être un float valide."""
url = "https://www.cdiscount.com/informatique/.../f-10709-tuf608umrv004.html"
snapshot = store.parse(fixture_tuf608umrv004, url)
if snapshot.price:
assert isinstance(snapshot.price, float)
assert snapshot.price > 0
# Le prix doit avoir maximum 2 décimales
assert snapshot.price == round(snapshot.price, 2)
def test_parse_a128902_price_format(self, store, fixture_a128902):
"""Parse fixture a128902 - le prix doit être un float valide."""
url = "https://www.cdiscount.com/maison/.../f-11701-a128902.html"
snapshot = store.parse(fixture_a128902, url)
if snapshot.price:
assert isinstance(snapshot.price, float)
assert snapshot.price > 0
# Le prix doit avoir maximum 2 décimales
assert snapshot.price == round(snapshot.price, 2)
def test_parse_different_categories(self, store, fixture_tuf608umrv004, fixture_a128902):
"""Parse fixtures de catégories différentes - les deux doivent fonctionner."""
# PC Portable (informatique)
url1 = "https://www.cdiscount.com/informatique/.../f-10709-tuf608umrv004.html"
snapshot1 = store.parse(fixture_tuf608umrv004, url1)
assert snapshot1.is_complete()
# Canapé (maison)
url2 = "https://www.cdiscount.com/maison/.../f-11701-a128902.html"
snapshot2 = store.parse(fixture_a128902, url2)
assert snapshot2.is_complete()
# Les deux doivent être valides
assert snapshot1.price != snapshot2.price # Produits différents
assert snapshot1.title != snapshot2.title # Produits différents