This commit is contained in:
Gilles Soulier
2026-01-14 21:54:55 +01:00
parent c91c0f1fc9
commit d0b73b9319
140 changed files with 5822 additions and 161 deletions

View File

@@ -23,6 +23,7 @@ from pricewatch.app.core.schema import (
StockStatus,
)
from pricewatch.app.stores.base import BaseStore
from pricewatch.app.stores.price_parser import parse_price_text
logger = get_logger("stores.aliexpress")
@@ -126,6 +127,8 @@ class AliexpressStore(BaseStore):
images = self._extract_images(html, soup, debug_info)
category = self._extract_category(soup, debug_info)
specs = self._extract_specs(soup, debug_info)
description = self._extract_description(soup, debug_info)
msrp = self._extract_msrp(html, debug_info)
reference = self.extract_reference(url)
# Note sur le rendu client-side
@@ -150,8 +153,10 @@ class AliexpressStore(BaseStore):
stock_status=stock_status,
reference=reference,
category=category,
description=description,
images=images,
specs=specs,
msrp=msrp,
debug=debug_info,
)
@@ -183,6 +188,17 @@ class AliexpressStore(BaseStore):
debug.errors.append("Titre non trouvé")
return None
def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
"""Extrait la description (meta tags)."""
meta = soup.find("meta", property="og:description") or soup.find(
"meta", attrs={"name": "description"}
)
if meta:
description = meta.get("content", "").strip()
if description:
return description
return None
def _extract_price(
self, html: str, soup: BeautifulSoup, debug: DebugInfo
) -> Optional[float]:
@@ -193,35 +209,39 @@ class AliexpressStore(BaseStore):
On utilise regex sur le HTML brut.
"""
# Pattern 1: Prix avant € (ex: "136,69 €")
match = re.search(r"([0-9]+[.,][0-9]{2})\s*€", html)
match = re.search(r"([0-9][0-9\\s.,\\u00a0\\u202f\\u2009]*)\\s*€", html)
if match:
price_str = match.group(1).replace(",", ".")
try:
return float(price_str)
except ValueError:
pass
price = parse_price_text(match.group(1))
if price is not None:
return price
# Pattern 2: € avant prix (ex: "€ 136.69")
match = re.search(r"\s*([0-9]+[.,][0-9]{2})", html)
match = re.search(r"\\s*([0-9][0-9\\s.,\\u00a0\\u202f\\u2009]*)", html)
if match:
price_str = match.group(1).replace(",", ".")
try:
return float(price_str)
except ValueError:
pass
price = parse_price_text(match.group(1))
if price is not None:
return price
# Pattern 3: Chercher dans meta tags (moins fiable)
og_price = soup.find("meta", property="og:price:amount")
if og_price:
price_str = og_price.get("content", "")
try:
return float(price_str)
except ValueError:
pass
price = parse_price_text(price_str)
if price is not None:
return price
debug.errors.append("Prix non trouvé")
return None
def _extract_msrp(self, html: str, debug: DebugInfo) -> Optional[float]:
"""Extrait le prix conseille si present."""
match = re.search(r"originalPrice\"\\s*:\\s*\"([0-9\\s.,]+)\"", html)
if match:
price = parse_price_text(match.group(1))
if price is not None:
return price
return None
def _extract_currency(
self, url: str, soup: BeautifulSoup, debug: DebugInfo
) -> str: