Files
scrap/tests/stores/test_amazon_fixtures.py
2026-01-13 19:49:04 +01:00

199 lines
8.4 KiB
Python
Executable File

"""
Tests pour pricewatch.app.stores.amazon.store avec fixtures HTML réels.
Teste le parsing de vraies pages HTML capturées depuis Amazon.fr.
"""
import pytest
from pathlib import Path
from pricewatch.app.stores.amazon.store import AmazonStore
from pricewatch.app.core.schema import DebugStatus, StockStatus
class TestAmazonRealFixtures:
"""Tests avec fixtures HTML réels capturés depuis Amazon."""
@pytest.fixture
def store(self) -> AmazonStore:
"""Fixture: AmazonStore instance."""
return AmazonStore()
@pytest.fixture
def fixture_b0d4dx8ph3(self) -> str:
"""Fixture: HTML Amazon B0D4DX8PH3 (UGREEN Uno Qi2 Chargeur Induction)."""
fixture_path = Path(__file__).parent.parent.parent / "pricewatch/app/stores/amazon/fixtures/amazon_B0D4DX8PH3.html"
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
@pytest.fixture
def fixture_b0f6mwnj6j(self) -> str:
"""Fixture: HTML Amazon B0F6MWNJ6J (Baseus Docking Station)."""
fixture_path = Path(__file__).parent.parent.parent / "pricewatch/app/stores/amazon/fixtures/amazon_B0F6MWNJ6J.html"
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
@pytest.fixture
def fixture_captcha(self) -> str:
"""Fixture: HTML page captcha Amazon."""
fixture_path = Path(__file__).parent.parent.parent / "pricewatch/app/stores/amazon/fixtures/captcha.html"
with open(fixture_path, "r", encoding="utf-8") as f:
return f.read()
def test_parse_b0d4dx8ph3_complete(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - doit extraire toutes les données essentielles."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Métadonnées
assert snapshot.source == "amazon"
assert snapshot.url == "https://www.amazon.fr/dp/B0D4DX8PH3"
assert snapshot.reference == "B0D4DX8PH3"
assert snapshot.fetched_at is not None
# Titre (doit contenir "UGREEN" ou similaire)
assert snapshot.title is not None
assert len(snapshot.title) > 0
assert "UGREEN" in snapshot.title.upper() or "Chargeur" in snapshot.title
# Prix
assert snapshot.price is not None
assert snapshot.price > 0
assert snapshot.currency == "EUR"
# Status (success ou partial acceptable si parsing incomplet)
assert snapshot.debug.status in [DebugStatus.SUCCESS, DebugStatus.PARTIAL]
def test_parse_b0f6mwnj6j_complete(self, store, fixture_b0f6mwnj6j):
"""Parse fixture B0F6MWNJ6J - doit extraire toutes les données essentielles."""
url = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot = store.parse(fixture_b0f6mwnj6j, url)
# Métadonnées
assert snapshot.source == "amazon"
assert snapshot.url == "https://www.amazon.fr/dp/B0F6MWNJ6J"
assert snapshot.reference == "B0F6MWNJ6J"
# Titre
assert snapshot.title is not None
assert "Baseus" in snapshot.title or "Docking Station" in snapshot.title
# Prix
assert snapshot.price is not None
assert snapshot.price > 0
assert snapshot.currency == "EUR"
# Status
assert snapshot.debug.status in [DebugStatus.SUCCESS, DebugStatus.PARTIAL]
def test_parse_b0d4dx8ph3_images(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - doit extraire au moins une image."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Doit avoir au moins une image
assert len(snapshot.images) > 0
# Les images doivent être des URLs valides
for img_url in snapshot.images:
assert img_url.startswith("http")
assert "amazon" in img_url.lower()
def test_parse_b0f6mwnj6j_specs(self, store, fixture_b0f6mwnj6j):
"""Parse fixture B0F6MWNJ6J - doit extraire des specs si présentes."""
url = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot = store.parse(fixture_b0f6mwnj6j, url)
# Si des specs sont extraites, elles doivent être dans un dict
if snapshot.specs:
assert isinstance(snapshot.specs, dict)
assert len(snapshot.specs) > 0
def test_parse_b0d4dx8ph3_category(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - doit extraire la catégorie si présente."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Si une catégorie est extraite, elle doit être non vide
if snapshot.category:
assert len(snapshot.category) > 0
def test_parse_captcha_fixture(self, store, fixture_captcha):
"""Parse fixture captcha - doit détecter le captcha et signaler l'erreur."""
url = "https://www.amazon.fr/dp/B0DFWRHZ7L"
snapshot = store.parse(fixture_captcha, url)
# Le parsing doit échouer avec status FAILED ou PARTIAL
assert snapshot.debug.status in [DebugStatus.FAILED, DebugStatus.PARTIAL]
# Doit avoir au moins une erreur mentionnant le captcha
assert len(snapshot.debug.errors) > 0
assert any("captcha" in err.lower() for err in snapshot.debug.errors)
# Ne doit pas extraire de données produit
assert snapshot.title is None
assert snapshot.price is None
assert snapshot.is_complete() is False
def test_parse_b0d4dx8ph3_stock_status(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - doit extraire le stock status."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Stock status doit être défini (in_stock, out_of_stock, ou unknown)
assert snapshot.stock_status in [StockStatus.IN_STOCK, StockStatus.OUT_OF_STOCK, StockStatus.UNKNOWN]
def test_parse_b0f6mwnj6j_stock_status(self, store, fixture_b0f6mwnj6j):
"""Parse fixture B0F6MWNJ6J - doit extraire le stock status."""
url = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot = store.parse(fixture_b0f6mwnj6j, url)
# Stock status doit être défini
assert snapshot.stock_status in [StockStatus.IN_STOCK, StockStatus.OUT_OF_STOCK, StockStatus.UNKNOWN]
def test_parse_b0d4dx8ph3_completeness(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - vérifier is_complete()."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Si titre ET prix sont présents, is_complete() doit être True
if snapshot.title and snapshot.price:
assert snapshot.is_complete() is True
else:
assert snapshot.is_complete() is False
def test_parse_b0f6mwnj6j_completeness(self, store, fixture_b0f6mwnj6j):
"""Parse fixture B0F6MWNJ6J - vérifier is_complete()."""
url = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot = store.parse(fixture_b0f6mwnj6j, url)
# Si titre ET prix sont présents, is_complete() doit être True
if snapshot.title and snapshot.price:
assert snapshot.is_complete() is True
else:
assert snapshot.is_complete() is False
def test_parse_b0d4dx8ph3_json_serialization(self, store, fixture_b0d4dx8ph3):
"""Parse fixture B0D4DX8PH3 - vérifier sérialisation JSON."""
url = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot = store.parse(fixture_b0d4dx8ph3, url)
# Doit pouvoir sérialiser en JSON sans erreur
json_str = snapshot.to_json()
assert json_str is not None
assert len(json_str) > 0
# JSON compact (sans espaces après les deux-points)
assert '"source":"amazon"' in json_str or '"source": "amazon"' in json_str
assert 'B0D4DX8PH3' in json_str
def test_parse_fixtures_preserve_asin(self, store, fixture_b0d4dx8ph3, fixture_b0f6mwnj6j):
"""Parse fixtures - l'ASIN dans l'URL doit être préservé dans reference."""
# Test B0D4DX8PH3
url1 = "https://www.amazon.fr/dp/B0D4DX8PH3"
snapshot1 = store.parse(fixture_b0d4dx8ph3, url1)
assert snapshot1.reference == "B0D4DX8PH3"
# Test B0F6MWNJ6J
url2 = "https://www.amazon.fr/dp/B0F6MWNJ6J"
snapshot2 = store.parse(fixture_b0f6mwnj6j, url2)
assert snapshot2.reference == "B0F6MWNJ6J"