272 lines
9.6 KiB
Python
Executable File
272 lines
9.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Tests pour le store AliExpress."""
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
from pricewatch.app.stores.aliexpress.store import AliexpressStore
|
|
|
|
|
|
class TestAliexpressStore:
|
|
"""Tests pour AliexpressStore."""
|
|
|
|
@pytest.fixture
|
|
def store(self):
|
|
"""Fixture du store AliExpress."""
|
|
return AliexpressStore()
|
|
|
|
# ========== Tests de match() ==========
|
|
|
|
def test_match_aliexpress_com_product(self, store):
|
|
"""URL aliexpress.com/item/ reconnue comme produit."""
|
|
url = "https://www.aliexpress.com/item/1005007187023722.html"
|
|
score = store.match(url)
|
|
assert score == 0.9
|
|
|
|
def test_match_aliexpress_fr_product(self, store):
|
|
"""URL fr.aliexpress.com/item/ reconnue comme produit."""
|
|
url = "https://fr.aliexpress.com/item/1005007187023722.html"
|
|
score = store.match(url)
|
|
assert score == 0.9
|
|
|
|
def test_match_aliexpress_non_product(self, store):
|
|
"""URL aliexpress.com mais pas /item/ → score réduit."""
|
|
url = "https://www.aliexpress.com/category/electronics"
|
|
score = store.match(url)
|
|
assert score == 0.5
|
|
|
|
def test_match_other_site(self, store):
|
|
"""Autres sites non reconnus."""
|
|
urls = [
|
|
"https://www.amazon.fr/dp/ASIN",
|
|
"https://www.cdiscount.com/f-123-abc.html",
|
|
"",
|
|
None,
|
|
]
|
|
for url in urls:
|
|
if url is not None:
|
|
score = store.match(url)
|
|
assert score == 0.0
|
|
|
|
def test_match_case_insensitive(self, store):
|
|
"""Match insensible à la casse."""
|
|
url = "https://FR.ALIEXPRESS.COM/ITEM/1234567890.HTML"
|
|
score = store.match(url)
|
|
assert score == 0.9
|
|
|
|
# ========== Tests de canonicalize() ==========
|
|
|
|
def test_canonicalize_remove_query_params(self, store):
|
|
"""Canonicalize retire les paramètres de query."""
|
|
url = "https://fr.aliexpress.com/item/1005007187023722.html?spm=a2g0o.detail.0.0"
|
|
canonical = store.canonicalize(url)
|
|
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
|
|
|
|
def test_canonicalize_remove_fragment(self, store):
|
|
"""Canonicalize retire le fragment (#)."""
|
|
url = "https://fr.aliexpress.com/item/1005007187023722.html#reviews"
|
|
canonical = store.canonicalize(url)
|
|
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
|
|
|
|
def test_canonicalize_keep_item_path(self, store):
|
|
"""Canonicalize garde le chemin /item/{ID}.html."""
|
|
url = "https://fr.aliexpress.com/item/1005007187023722.html"
|
|
canonical = store.canonicalize(url)
|
|
assert canonical == "https://fr.aliexpress.com/item/1005007187023722.html"
|
|
|
|
def test_canonicalize_empty_url(self, store):
|
|
"""Canonicalize avec URL vide retourne la même."""
|
|
assert store.canonicalize("") == ""
|
|
assert store.canonicalize(None) is None
|
|
|
|
# ========== Tests de extract_reference() ==========
|
|
|
|
def test_extract_reference_standard_format(self, store):
|
|
"""Extraction du SKU depuis format standard /item/{ID}.html."""
|
|
url = "https://fr.aliexpress.com/item/1005007187023722.html"
|
|
ref = store.extract_reference(url)
|
|
assert ref == "1005007187023722"
|
|
|
|
def test_extract_reference_with_query_params(self, store):
|
|
"""Extraction du SKU ignore les paramètres de query."""
|
|
url = "https://fr.aliexpress.com/item/1005007187023722.html?param=value"
|
|
ref = store.extract_reference(url)
|
|
assert ref == "1005007187023722"
|
|
|
|
def test_extract_reference_different_domain(self, store):
|
|
"""Extraction du SKU fonctionne avec différents domaines."""
|
|
url = "https://www.aliexpress.com/item/9876543210987.html"
|
|
ref = store.extract_reference(url)
|
|
assert ref == "9876543210987"
|
|
|
|
def test_extract_reference_invalid_url(self, store):
|
|
"""Extraction du SKU depuis URL invalide retourne None."""
|
|
urls = [
|
|
"https://www.aliexpress.com/category/electronics",
|
|
"https://www.aliexpress.com/",
|
|
"",
|
|
None,
|
|
]
|
|
for url in urls:
|
|
ref = store.extract_reference(url)
|
|
assert ref is None
|
|
|
|
# ========== Tests de parse() ==========
|
|
|
|
def test_parse_basic_html_with_title(self, store):
|
|
"""Parse HTML basique avec h1."""
|
|
html = """
|
|
<html>
|
|
<head>
|
|
<meta property="og:title" content="Samsung DDR4 RAM - AliExpress">
|
|
</head>
|
|
<body>
|
|
<h1>Samsung DDR4 RAM Server Memory</h1>
|
|
</body>
|
|
</html>
|
|
"""
|
|
url = "https://fr.aliexpress.com/item/1005007187023722.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
assert snapshot.source == "aliexpress"
|
|
assert snapshot.url == "https://fr.aliexpress.com/item/1005007187023722.html"
|
|
assert snapshot.title == "Samsung DDR4 RAM Server Memory"
|
|
assert snapshot.reference == "1005007187023722"
|
|
assert snapshot.currency == "EUR" # fr.aliexpress → EUR
|
|
|
|
def test_parse_title_from_meta_og(self, store):
|
|
"""Parse titre depuis og:title quand pas de h1."""
|
|
html = """
|
|
<html>
|
|
<head>
|
|
<meta property="og:title" content="Product Name - AliExpress">
|
|
</head>
|
|
<body>
|
|
</body>
|
|
</html>
|
|
"""
|
|
url = "https://www.aliexpress.com/item/1234567890.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
assert snapshot.title == "Product Name" # "- AliExpress" retiré
|
|
assert snapshot.currency == "USD" # .com → USD
|
|
|
|
def test_parse_price_from_regex(self, store):
|
|
"""Parse prix depuis regex dans le HTML."""
|
|
html = """
|
|
<html>
|
|
<head>
|
|
<meta property="og:title" content="Test Product - AliExpress">
|
|
</head>
|
|
<body>
|
|
<h1>Test Product</h1>
|
|
<div class="price-container">
|
|
<span>Prix: 99,99 €</span>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
url = "https://fr.aliexpress.com/item/1234567890.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
assert snapshot.price == 99.99
|
|
assert snapshot.currency == "EUR"
|
|
|
|
def test_parse_price_euro_before(self, store):
|
|
"""Parse prix avec € avant le nombre."""
|
|
html = """
|
|
<html>
|
|
<head><meta property="og:title" content="Test - AliExpress"></head>
|
|
<body>
|
|
<h1>Test</h1>
|
|
<span>€ 125.50</span>
|
|
</body>
|
|
</html>
|
|
"""
|
|
url = "https://fr.aliexpress.com/item/1234567890.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
assert snapshot.price == 125.50
|
|
|
|
def test_parse_images_from_dcdata(self, store):
|
|
"""Parse images depuis window._d_c_.DCData."""
|
|
html = """
|
|
<html>
|
|
<head><meta property="og:title" content="Test - AliExpress"></head>
|
|
<body>
|
|
<h1>Test</h1>
|
|
<script>
|
|
window._d_c_ = window._d_c_ || {};
|
|
window._d_c_.DCData = {
|
|
"imagePathList": [
|
|
"https://ae01.alicdn.com/kf/image1.jpg",
|
|
"https://ae01.alicdn.com/kf/image2.jpg"
|
|
]
|
|
};
|
|
</script>
|
|
</body>
|
|
</html>
|
|
"""
|
|
url = "https://fr.aliexpress.com/item/1234567890.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
assert len(snapshot.images) == 2
|
|
assert snapshot.images[0] == "https://ae01.alicdn.com/kf/image1.jpg"
|
|
assert snapshot.images[1] == "https://ae01.alicdn.com/kf/image2.jpg"
|
|
assert any("DCData" in note for note in snapshot.debug.notes)
|
|
|
|
def test_parse_images_from_og_fallback(self, store):
|
|
"""Parse images depuis og:image en fallback."""
|
|
html = """
|
|
<html>
|
|
<head>
|
|
<meta property="og:title" content="Test - AliExpress">
|
|
<meta property="og:image" content="https://ae01.alicdn.com/kf/product.jpg">
|
|
</head>
|
|
<body>
|
|
<h1>Test</h1>
|
|
</body>
|
|
</html>
|
|
"""
|
|
url = "https://fr.aliexpress.com/item/1234567890.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
assert len(snapshot.images) == 1
|
|
assert snapshot.images[0] == "https://ae01.alicdn.com/kf/product.jpg"
|
|
|
|
def test_parse_missing_title_and_price(self, store):
|
|
"""Parse avec titre et prix manquants → status PARTIAL."""
|
|
html = "<html><body><p>Empty content</p></body></html>"
|
|
url = "https://fr.aliexpress.com/item/1234567890.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
assert snapshot.title is None
|
|
assert snapshot.price is None
|
|
assert not snapshot.is_complete()
|
|
assert snapshot.debug.status == "partial"
|
|
|
|
def test_parse_small_html_warning(self, store):
|
|
"""Parse avec HTML petit génère un warning."""
|
|
html = "<html><head><title>Test</title></head><body></body></html>"
|
|
url = "https://fr.aliexpress.com/item/1234567890.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
# HTML < 200KB devrait générer une note
|
|
assert any("non rendu" in note.lower() for note in snapshot.debug.notes)
|
|
|
|
def test_parse_stock_status_in_stock(self, store):
|
|
"""Parse détecte in_stock depuis le bouton add to cart."""
|
|
html = """
|
|
<html>
|
|
<head><meta property="og:title" content="Test - AliExpress"></head>
|
|
<body>
|
|
<h1>Test</h1>
|
|
<button class="add-to-cart-btn">Add to Cart</button>
|
|
</body>
|
|
</html>
|
|
"""
|
|
url = "https://fr.aliexpress.com/item/1234567890.html"
|
|
snapshot = store.parse(html, url)
|
|
|
|
assert snapshot.stock_status == "in_stock"
|