codex2

2026-01-14 21:54:55 +01:00
parent c91c0f1fc9
commit d0b73b9319
140 changed files with 5822 additions and 161 deletions
--- a/pricewatch/app/stores/aliexpress/store.py
+++ b/pricewatch/app/stores/aliexpress/store.py
@@ -23,6 +23,7 @@ from pricewatch.app.core.schema import (
    StockStatus,
 )
 from pricewatch.app.stores.base import BaseStore
+from pricewatch.app.stores.price_parser import parse_price_text

 logger = get_logger("stores.aliexpress")

@@ -126,6 +127,8 @@ class AliexpressStore(BaseStore):
        images = self._extract_images(html, soup, debug_info)
        category = self._extract_category(soup, debug_info)
        specs = self._extract_specs(soup, debug_info)
+        description = self._extract_description(soup, debug_info)
+        msrp = self._extract_msrp(html, debug_info)
        reference = self.extract_reference(url)

        # Note sur le rendu client-side
@@ -150,8 +153,10 @@ class AliexpressStore(BaseStore):
            stock_status=stock_status,
            reference=reference,
            category=category,
+            description=description,
            images=images,
            specs=specs,
+            msrp=msrp,
            debug=debug_info,
        )

@@ -183,6 +188,17 @@ class AliexpressStore(BaseStore):
        debug.errors.append("Titre non trouvé")
        return None

+    def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
+        """Extrait la description (meta tags)."""
+        meta = soup.find("meta", property="og:description") or soup.find(
+            "meta", attrs={"name": "description"}
+        )
+        if meta:
+            description = meta.get("content", "").strip()
+            if description:
+                return description
+        return None
+
    def _extract_price(
        self, html: str, soup: BeautifulSoup, debug: DebugInfo
    ) -> Optional[float]:
@@ -193,35 +209,39 @@ class AliexpressStore(BaseStore):
        On utilise regex sur le HTML brut.
        """
        # Pattern 1: Prix avant € (ex: "136,69 €")
-        match = re.search(r"([0-9]+[.,][0-9]{2})\s*€", html)
+        match = re.search(r"([0-9][0-9\\s.,\\u00a0\\u202f\\u2009]*)\\s*€", html)
        if match:
-            price_str = match.group(1).replace(",", ".")
-            try:
-                return float(price_str)
-            except ValueError:
-                pass
+            price = parse_price_text(match.group(1))
+            if price is not None:
+                return price

        # Pattern 2: € avant prix (ex: "€ 136.69")
-        match = re.search(r"€\s*([0-9]+[.,][0-9]{2})", html)
+        match = re.search(r"€\\s*([0-9][0-9\\s.,\\u00a0\\u202f\\u2009]*)", html)
        if match:
-            price_str = match.group(1).replace(",", ".")
-            try:
-                return float(price_str)
-            except ValueError:
-                pass
+            price = parse_price_text(match.group(1))
+            if price is not None:
+                return price

        # Pattern 3: Chercher dans meta tags (moins fiable)
        og_price = soup.find("meta", property="og:price:amount")
        if og_price:
            price_str = og_price.get("content", "")
-            try:
-                return float(price_str)
-            except ValueError:
-                pass
+            price = parse_price_text(price_str)
+            if price is not None:
+                return price

        debug.errors.append("Prix non trouvé")
        return None

+    def _extract_msrp(self, html: str, debug: DebugInfo) -> Optional[float]:
+        """Extrait le prix conseille si present."""
+        match = re.search(r"originalPrice\"\\s*:\\s*\"([0-9\\s.,]+)\"", html)
+        if match:
+            price = parse_price_text(match.group(1))
+            if price is not None:
+                return price
+        return None
+
    def _extract_currency(
        self, url: str, soup: BeautifulSoup, debug: DebugInfo
    ) -> str: