before claude

2026-01-18 06:26:17 +01:00
parent dc19315e5d
commit 740c3d7516
60 changed files with 3815 additions and 354 deletions
--- a/pricewatch/app/stores/amazon/pycache/store.cpython-313.pyc
+++ b/pricewatch/app/stores/amazon/pycache/store.cpython-313.pyc
--- a/pricewatch/app/stores/amazon/selectors.yml
+++ b/pricewatch/app/stores/amazon/selectors.yml
@@ -15,6 +15,13 @@ price:
  - "#priceblock_dealprice"
  - ".a-price-range .a-price .a-offscreen"

+# Texte de réduction explicite
+discount_text:
+  - "#regularprice_savings"
+  - "#dealprice_savings"
+  - "#savingsPercentage"
+  - "span.savingsPercentage"
+
 # Devise (généralement dans le symbole)
 currency:
  - "span.a-price-symbol"
@@ -32,6 +39,24 @@ stock_status:
  - "#availability"
  - ".a-declarative .a-size-medium"

+# Note moyenne
+rating_value:
+  - "#acrPopover"
+  - "#averageCustomerReviews .a-icon-alt"
+  - "#averageCustomerReviews span.a-icon-alt"
+
+# Nombre d'évaluations
+rating_count:
+  - "#acrCustomerReviewText"
+  - "#acrCustomerReviewLink"
+
+# Badge Choix d'Amazon
+amazon_choice:
+  - "#acBadge_feature_div"
+  - "#acBadge_feature_div .ac-badge"
+  - "#acBadge_feature_div .ac-badge-rectangle"
+  - "#acBadge_feature_div .ac-badge-rectangle-icon"
+
 # Images produit
 images:
  - "#landingImage"
@@ -44,6 +69,13 @@ category:
  - "#wayfinding-breadcrumbs_feature_div"
  - ".a-breadcrumb"

+# Description (détails de l'article)
+description:
+  - "#detailBullets_feature_div"
+  - "#detailBulletsWrapper_feature_div"
+  - "#productDetails_detailBullets_sections1"
+  - "#feature-bullets"
+
 # Caractéristiques techniques (table specs)
 specs_table:
  - "#productDetails_techSpec_section_1"
--- a/pricewatch/app/stores/amazon/store.py
+++ b/pricewatch/app/stores/amazon/store.py
@@ -130,13 +130,19 @@ class AmazonStore(BaseStore):
        title = self._extract_title(soup, debug_info)
        price = self._extract_price(soup, debug_info)
        currency = self._extract_currency(soup, debug_info)
-        stock_status = self._extract_stock(soup, debug_info)
-        images = self._extract_images(soup, debug_info)
+        stock_status, stock_text, in_stock = self._extract_stock_details(soup, debug_info)
+        main_image, gallery_images, images = self._extract_images(soup, debug_info)
        category = self._extract_category(soup, debug_info)
        specs = self._extract_specs(soup, debug_info)
        description = self._extract_description(soup, debug_info)
        msrp = self._extract_msrp(soup, debug_info)
        reference = self.extract_reference(url) or self._extract_asin_from_html(soup)
+        rating_value = self._extract_rating_value(soup, debug_info)
+        rating_count = self._extract_rating_count(soup, debug_info)
+        amazon_choice, amazon_choice_label = self._extract_amazon_choice(soup, debug_info)
+        discount_text = self._extract_discount_text(soup, debug_info)
+        model_number, model_name = self._extract_model_details(specs)
+        asin = reference

        # Déterminer le statut final (ne pas écraser FAILED)
        if debug_info.status != DebugStatus.FAILED:
@@ -153,12 +159,24 @@ class AmazonStore(BaseStore):
            currency=currency or "EUR",
            shipping_cost=None,  # Difficile à extraire
            stock_status=stock_status,
+            stock_text=stock_text,
+            in_stock=in_stock,
            reference=reference,
+            asin=asin,
            category=category,
            description=description,
            images=images,
+            main_image=main_image,
+            gallery_images=gallery_images,
            specs=specs,
            msrp=msrp,
+            rating_value=rating_value,
+            rating_count=rating_count,
+            amazon_choice=amazon_choice,
+            amazon_choice_label=amazon_choice_label,
+            discount_text=discount_text,
+            model_number=model_number,
+            model_name=model_name,
            debug=debug_info,
        )

@@ -203,14 +221,26 @@ class AmazonStore(BaseStore):
        return None

    def _extract_description(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
-        """Extrait la description (meta tags)."""
-        meta = soup.find("meta", property="og:description") or soup.find(
-            "meta", attrs={"name": "description"}
-        )
-        if meta:
-            description = meta.get("content", "").strip()
-            if description:
-                return description
+        """Extrait la description depuis les détails de l'article."""
+        selectors = self.get_selector("description", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if not element:
+                continue
+            items = [
+                item.get_text(" ", strip=True)
+                for item in element.select("li")
+                if item.get_text(strip=True)
+            ]
+            if items:
+                return "\n".join(items)
+            text = " ".join(element.stripped_strings)
+            if text:
+                return text
+
        return None

    def _extract_price(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]:
@@ -271,8 +301,10 @@ class AmazonStore(BaseStore):
        # Défaut basé sur le domaine
        return "EUR"

-    def _extract_stock(self, soup: BeautifulSoup, debug: DebugInfo) -> StockStatus:
-        """Extrait le statut de stock."""
+    def _extract_stock_details(
+        self, soup: BeautifulSoup, debug: DebugInfo
+    ) -> tuple[StockStatus, Optional[str], Optional[bool]]:
+        """Extrait le statut de stock avec texte brut."""
        selectors = self.get_selector("stock_status", [])
        if isinstance(selectors, str):
            selectors = [selectors]
@@ -280,22 +312,27 @@ class AmazonStore(BaseStore):
        for selector in selectors:
            element = soup.select_one(selector)
            if element:
-                text = element.get_text(strip=True).lower()
-                if "en stock" in text or "available" in text or "in stock" in text:
-                    return StockStatus.IN_STOCK
+                text = element.get_text(strip=True)
+                normalized = text.lower()
+                if "en stock" in normalized or "available" in normalized or "in stock" in normalized:
+                    return StockStatus.IN_STOCK, text, True
                elif (
-                    "rupture" in text
-                    or "indisponible" in text
-                    or "out of stock" in text
+                    "rupture" in normalized
+                    or "indisponible" in normalized
+                    or "out of stock" in normalized
                ):
-                    return StockStatus.OUT_OF_STOCK
+                    return StockStatus.OUT_OF_STOCK, text, False

-        return StockStatus.UNKNOWN
+        return StockStatus.UNKNOWN, None, None

-    def _extract_images(self, soup: BeautifulSoup, debug: DebugInfo) -> list[str]:
-        """Extrait les URLs d'images."""
-        images = []
-        seen = set()
+    def _extract_images(
+        self, soup: BeautifulSoup, debug: DebugInfo
+    ) -> tuple[Optional[str], list[str], list[str]]:
+        """Extrait l'image principale et la galerie."""
+        images: list[str] = []
+        seen: set[str] = set()
+        main_image: Optional[str] = None
+        max_gallery = 15
        selectors = self.get_selector("images", [])
        if isinstance(selectors, str):
            selectors = [selectors]
@@ -309,6 +346,8 @@ class AmazonStore(BaseStore):
                    if self._is_product_image(url) and url not in seen:
                        images.append(url)
                        seen.add(url)
+                        if main_image is None:
+                            main_image = url
                dynamic = element.get("data-a-dynamic-image")
                if dynamic:
                    urls = self._extract_dynamic_images(dynamic)
@@ -316,6 +355,8 @@ class AmazonStore(BaseStore):
                        if self._is_product_image(dyn_url) and dyn_url not in seen:
                            images.append(dyn_url)
                            seen.add(dyn_url)
+                            if main_image is None:
+                                main_image = dyn_url

        # Fallback: chercher tous les img tags si aucune image trouvée
        if not images:
@@ -326,8 +367,15 @@ class AmazonStore(BaseStore):
                    if url not in seen:
                        images.append(url)
                        seen.add(url)
+                        if main_image is None:
+                            main_image = url

-        return images
+        if main_image is None and images:
+            main_image = images[0]
+        gallery_images = [url for url in images if url != main_image]
+        gallery_images = gallery_images[:max_gallery]
+        final_images = [main_image] + gallery_images if main_image else gallery_images
+        return main_image, gallery_images, final_images

    def _extract_dynamic_images(self, raw: str) -> list[str]:
        """Extrait les URLs du JSON data-a-dynamic-image."""
@@ -393,8 +441,111 @@ class AmazonStore(BaseStore):
                        if key and value:
                            specs[key] = value

+        # Détails de l'article sous forme de liste
+        detail_list = soup.select("#detailBullets_feature_div li")
+        for item in detail_list:
+            text = item.get_text(" ", strip=True)
+            if ":" not in text:
+                continue
+            key, value = text.split(":", 1)
+            key = key.strip()
+            value = value.strip()
+            if key and value and key not in specs:
+                specs[key] = value
+
        return specs

+    def _extract_rating_value(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]:
+        """Extrait la note moyenne."""
+        selectors = self.get_selector("rating_value", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if not element:
+                continue
+            text = element.get_text(" ", strip=True) or element.get("title", "").strip()
+            match = re.search(r"([\d.,]+)", text)
+            if match:
+                value = match.group(1).replace(",", ".")
+                try:
+                    return float(value)
+                except ValueError:
+                    continue
+        return None
+
+    def _extract_rating_count(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[int]:
+        """Extrait le nombre d'évaluations."""
+        selectors = self.get_selector("rating_count", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if not element:
+                continue
+            text = element.get_text(" ", strip=True)
+            match = re.search(r"([\d\s\u202f\u00a0]+)", text)
+            if match:
+                numeric = re.sub(r"[^\d]", "", match.group(1))
+                if numeric:
+                    return int(numeric)
+        return None
+
+    def _extract_amazon_choice(
+        self, soup: BeautifulSoup, debug: DebugInfo
+    ) -> tuple[Optional[bool], Optional[str]]:
+        """Extrait le badge Choix d'Amazon."""
+        selectors = self.get_selector("amazon_choice", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if element:
+                label_candidates = [
+                    element.get_text(" ", strip=True),
+                    element.get("aria-label", "").strip(),
+                    element.get("title", "").strip(),
+                    element.get("data-a-badge-label", "").strip(),
+                ]
+                label = next((item for item in label_candidates if item), "")
+                normalized = label.lower()
+                if "choix d'amazon" in normalized or "amazon's choice" in normalized:
+                    return True, label
+                if label:
+                    return True, label
+                return True, None
+        return None, None
+
+    def _extract_discount_text(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
+        """Extrait le texte de réduction explicite."""
+        selectors = self.get_selector("discount_text", [])
+        if isinstance(selectors, str):
+            selectors = [selectors]
+
+        for selector in selectors:
+            element = soup.select_one(selector)
+            if not element:
+                continue
+            text = element.get_text(" ", strip=True)
+            if text:
+                return text
+        return None
+
+    def _extract_model_details(self, specs: dict[str, str]) -> tuple[Optional[str], Optional[str]]:
+        """Extrait le numero et le nom du modele depuis les specs."""
+        model_number = None
+        model_name = None
+        for key, value in specs.items():
+            normalized = key.lower()
+            if "numéro du modèle de l'article" in normalized or "numero du modele de l'article" in normalized:
+                model_number = value
+            if "nom du modèle" in normalized or "nom du modele" in normalized:
+                model_name = value
+        return model_number, model_name
+
    def _extract_asin_from_html(self, soup: BeautifulSoup) -> Optional[str]:
        """Extrait l'ASIN depuis le HTML (fallback)."""
        selectors = self.get_selector("asin", [])