Files
scrap/tests/stores/test_aliexpress.py
2026-01-13 19:49:04 +01:00

272 lines
9.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""Tests pour le store AliExpress."""
import pytest
from pathlib import Path
from pricewatch.app.stores.aliexpress.store import AliexpressStore
class TestAliexpressStore:
"""Tests pour AliexpressStore."""
@pytest.fixture
def store(self):
"""Fixture du store AliExpress."""
return AliexpressStore()
# ========== Tests de match() ==========
def test_match_aliexpress_com_product(self, store):
"""URL aliexpress.com/item/ reconnue comme produit."""
url = "https://www.aliexpress.com/item/1005007187023722.html"
score = store.match(url)
assert score == 0.9
def test_match_aliexpress_fr_product(self, store):
"""URL fr.aliexpress.com/item/ reconnue comme produit."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
score = store.match(url)
assert score == 0.9
def test_match_aliexpress_non_product(self, store):
"""URL aliexpress.com mais pas /item/ → score réduit."""
url = "https://www.aliexpress.com/category/electronics"
score = store.match(url)
assert score == 0.5
def test_match_other_site(self, store):
"""Autres sites non reconnus."""
urls = [
"https://www.amazon.fr/dp/ASIN",
"https://www.cdiscount.com/f-123-abc.html",
"",
None,
]
for url in urls:
if url is not None:
score = store.match(url)
assert score == 0.0
def test_match_case_insensitive(self, store):
"""Match insensible à la casse."""
url = "https://FR.ALIEXPRESS.COM/ITEM/1234567890.HTML"
score = store.match(url)
assert score == 0.9
# ========== Tests de canonicalize() ==========
def test_canonicalize_remove_query_params(self, store):
"""Canonicalize retire les paramètres de query."""
url = "https://fr.aliexpress.com/item/1005007187023722.html?spm=a2g0o.detail.0.0"
canonical = store.canonicalize(url)
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
def test_canonicalize_remove_fragment(self, store):
"""Canonicalize retire le fragment (#)."""
url = "https://fr.aliexpress.com/item/1005007187023722.html#reviews"
canonical = store.canonicalize(url)
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
def test_canonicalize_keep_item_path(self, store):
"""Canonicalize garde le chemin /item/{ID}.html."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
canonical = store.canonicalize(url)
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
def test_canonicalize_empty_url(self, store):
"""Canonicalize avec URL vide retourne la même."""
assert store.canonicalize("") == ""
assert store.canonicalize(None) is None
# ========== Tests de extract_reference() ==========
def test_extract_reference_standard_format(self, store):
"""Extraction du SKU depuis format standard /item/{ID}.html."""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
ref = store.extract_reference(url)
assert ref == "1005007187023722"
def test_extract_reference_with_query_params(self, store):
"""Extraction du SKU ignore les paramètres de query."""
url = "https://fr.aliexpress.com/item/1005007187023722.html?param=value"
ref = store.extract_reference(url)
assert ref == "1005007187023722"
def test_extract_reference_different_domain(self, store):
"""Extraction du SKU fonctionne avec différents domaines."""
url = "https://www.aliexpress.com/item/9876543210987.html"
ref = store.extract_reference(url)
assert ref == "9876543210987"
def test_extract_reference_invalid_url(self, store):
"""Extraction du SKU depuis URL invalide retourne None."""
urls = [
"https://www.aliexpress.com/category/electronics",
"https://www.aliexpress.com/",
"",
None,
]
for url in urls:
ref = store.extract_reference(url)
assert ref is None
# ========== Tests de parse() ==========
def test_parse_basic_html_with_title(self, store):
"""Parse HTML basique avec h1."""
html = """
<html>
<head>
<meta property="og:title" content="Samsung DDR4 RAM - AliExpress">
</head>
<body>
<h1>Samsung DDR4 RAM Server Memory</h1>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1005007187023722.html"
snapshot = store.parse(html, url)
assert snapshot.source == "aliexpress"
assert snapshot.url == "https://fr.aliexpress.com/item/1005007187023722.html"
assert snapshot.title == "Samsung DDR4 RAM Server Memory"
assert snapshot.reference == "1005007187023722"
assert snapshot.currency == "EUR" # fr.aliexpress → EUR
def test_parse_title_from_meta_og(self, store):
"""Parse titre depuis og:title quand pas de h1."""
html = """
<html>
<head>
<meta property="og:title" content="Product Name - AliExpress">
</head>
<body>
</body>
</html>
"""
url = "https://www.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.title == "Product Name" # "- AliExpress" retiré
assert snapshot.currency == "USD" # .com → USD
def test_parse_price_from_regex(self, store):
"""Parse prix depuis regex dans le HTML."""
html = """
<html>
<head>
<meta property="og:title" content="Test Product - AliExpress">
</head>
<body>
<h1>Test Product</h1>
<div class="price-container">
<span>Prix: 99,99 €</span>
</div>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.price == 99.99
assert snapshot.currency == "EUR"
def test_parse_price_euro_before(self, store):
"""Parse prix avec € avant le nombre."""
html = """
<html>
<head><meta property="og:title" content="Test - AliExpress"></head>
<body>
<h1>Test</h1>
<span>€ 125.50</span>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.price == 125.50
def test_parse_images_from_dcdata(self, store):
"""Parse images depuis window._d_c_.DCData."""
html = """
<html>
<head><meta property="og:title" content="Test - AliExpress"></head>
<body>
<h1>Test</h1>
<script>
window._d_c_ = window._d_c_ || {};
window._d_c_.DCData = {
"imagePathList": [
"https://ae01.alicdn.com/kf/image1.jpg",
"https://ae01.alicdn.com/kf/image2.jpg"
]
};
</script>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert len(snapshot.images) == 2
assert snapshot.images[0] == "https://ae01.alicdn.com/kf/image1.jpg"
assert snapshot.images[1] == "https://ae01.alicdn.com/kf/image2.jpg"
assert any("DCData" in note for note in snapshot.debug.notes)
def test_parse_images_from_og_fallback(self, store):
"""Parse images depuis og:image en fallback."""
html = """
<html>
<head>
<meta property="og:title" content="Test - AliExpress">
<meta property="og:image" content="https://ae01.alicdn.com/kf/product.jpg">
</head>
<body>
<h1>Test</h1>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert len(snapshot.images) == 1
assert snapshot.images[0] == "https://ae01.alicdn.com/kf/product.jpg"
def test_parse_missing_title_and_price(self, store):
"""Parse avec titre et prix manquants → status PARTIAL."""
html = "<html><body><p>Empty content</p></body></html>"
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.title is None
assert snapshot.price is None
assert not snapshot.is_complete()
assert snapshot.debug.status == "partial"
def test_parse_small_html_warning(self, store):
"""Parse avec HTML petit génère un warning."""
html = "<html><head><title>Test</title></head><body></body></html>"
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
# HTML < 200KB devrait générer une note
assert any("non rendu" in note.lower() for note in snapshot.debug.notes)
def test_parse_stock_status_in_stock(self, store):
"""Parse détecte in_stock depuis le bouton add to cart."""
html = """
<html>
<head><meta property="og:title" content="Test - AliExpress"></head>
<body>
<h1>Test</h1>
<button class="add-to-cart-btn">Add to Cart</button>
</body>
</html>
"""
url = "https://fr.aliexpress.com/item/1234567890.html"
snapshot = store.parse(html, url)
assert snapshot.stock_status == "in_stock"