chore: sync project files

2026-01-13 19:49:04 +01:00
parent 53f8227941
commit ecda149a4b
149 changed files with 65272 additions and 1 deletions
--- a/analyze_backmarket.py
+++ b/analyze_backmarket.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""Analyse du HTML Backmarket pour identifier les sélecteurs."""
+
+from bs4 import BeautifulSoup
+import json
+import re
+
+# Lire le HTML
+with open("scraped/backmarket_pw.html", "r", encoding="utf-8") as f:
+    html = f.read()
+
+soup = BeautifulSoup(html, "lxml")
+
+print("=" * 80)
+print("ANALYSE HTML BACKMARKET.FR")
+print("=" * 80)
+
+# 1. Titre
+print("\n1. TITRE")
+print("-" * 80)
+h1_tags = soup.find_all("h1")
+print(f"Nombre de h1: {len(h1_tags)}")
+for i, h1 in enumerate(h1_tags[:3]):
+    print(f"  [{i+1}] Classes: {h1.get('class')}")
+    print(f"      Texte: {h1.get_text().strip()[:100]}")
+
+# 2. Prix
+print("\n2. PRIX")
+print("-" * 80)
+# Chercher dans JSON-LD
+json_ld_scripts = soup.find_all("script", {"type": "application/ld+json"})
+print(f"Scripts JSON-LD trouvés: {len(json_ld_scripts)}")
+for i, script in enumerate(json_ld_scripts[:3]):
+    try:
+        data = json.loads(script.string)
+        if isinstance(data, dict):
+            print(f"\n  Script [{i+1}] @type: {data.get('@type')}")
+            if data.get("@type") == "Product":
+                print(f"    name: {data.get('name')}")
+                offers = data.get('offers', {})
+                if isinstance(offers, dict):
+                    print(f"    price: {offers.get('price')}")
+                    print(f"    priceCurrency: {offers.get('priceCurrency')}")
+    except Exception as e:
+        print(f"  Script [{i+1}] Erreur parsing JSON: {e}")
+
+# Chercher les divs/spans avec prix
+price_elements = soup.find_all(["div", "span", "p"], class_=lambda x: x and ("price" in str(x).lower()))
+print(f"\nÉléments avec 'price' dans la classe: {len(price_elements)}")
+for i, elem in enumerate(price_elements[:5]):
+    text = elem.get_text().strip()[:80]
+    print(f"  [{i+1}] {elem.name} {elem.get('class')} → {text}")
+
+# Regex prix
+matches = re.findall(r'(\d{2,4})[,\s]?(\d{2})\s*€', html)
+print(f"\nPrix trouvés par regex: {len(matches)} matches")
+unique_prices = list(set([f"{m[0]},{m[1]} €" for m in matches[:10]]))
+for price in unique_prices[:5]:
+    print(f"  - {price}")
+
+# 3. Images
+print("\n3. IMAGES")
+print("-" * 80)
+img_product = soup.find_all("img", alt=True)
+print(f"Images avec alt: {len(img_product)}")
+for i, img in enumerate(img_product[:5]):
+    alt = img.get("alt", "")
+    src = img.get("src", "")
+    if "iphone" in alt.lower() or "apple" in alt.lower():
+        print(f"  [{i+1}] alt: {alt[:60]}")
+        print(f"       src: {src[:80]}")
+
+# 4. État/Condition
+print("\n4. ÉTAT / CONDITION")
+print("-" * 80)
+condition_elements = soup.find_all(["div", "span", "button"], class_=lambda x: x and ("condition" in str(x).lower() or "grade" in str(x).lower() or "état" in str(x).lower()))
+print(f"Éléments avec condition/grade/état: {len(condition_elements)}")
+for i, elem in enumerate(condition_elements[:5]):
+    text = elem.get_text().strip()[:80]
+    print(f"  [{i+1}] {elem.name} {elem.get('class')} → {text}")
+
+# 5. SKU / Référence
+print("\n5. SKU / RÉFÉRENCE PRODUIT")
+print("-" * 80)
+# Chercher dans l'URL
+print("Dans l'URL: /fr-fr/p/iphone-15-pro")
+print("Possible SKU: iphone-15-pro")
+
+# Chercher dans JSON-LD
+for script in json_ld_scripts:
+    try:
+        data = json.loads(script.string)
+        if isinstance(data, dict) and data.get("@type") == "Product":
+            print(f"\nDans JSON-LD:")
+            print(f"  sku: {data.get('sku')}")
+            print(f"  mpn: {data.get('mpn')}")
+            print(f"  productID: {data.get('productID')}")
+    except:
+        pass
+
+# 6. Breadcrumb / Catégorie
+print("\n6. CATÉGORIE / BREADCRUMB")
+print("-" * 80)
+breadcrumbs = soup.find_all(["nav", "ol", "ul"], class_=lambda x: x and "breadcrumb" in str(x).lower())
+for bc in breadcrumbs[:2]:
+    print(f"Tag: {bc.name}, Classes: {bc.get('class')}")
+    links = bc.find_all("a")
+    for link in links[:5]:
+        print(f"  - {link.get_text().strip()}")
+
+# 7. Spécifications
+print("\n7. CARACTÉRISTIQUES TECHNIQUES")
+print("-" * 80)
+specs_sections = soup.find_all(["div", "dl", "table"], class_=lambda x: x and ("spec" in str(x).lower() or "characteristic" in str(x).lower() or "feature" in str(x).lower()))
+print(f"Sections de specs: {len(specs_sections)}")
+for i, section in enumerate(specs_sections[:3]):
+    print(f"  [{i+1}] {section.name} {section.get('class')}")
+    text = section.get_text().strip()[:150]
+    print(f"       {text}")
+
+# 8. Meta tags
+print("\n8. META TAGS")
+print("-" * 80)
+og_title = soup.find("meta", property="og:title")
+og_price = soup.find("meta", property="og:price:amount")
+if og_title:
+    print(f"og:title: {og_title.get('content')}")
+if og_price:
+    print(f"og:price:amount: {og_price.get('content')}")
+
+print("\n" + "=" * 80)
+print("FIN DE L'ANALYSE")
+print("=" * 80)