chore: sync project files

2026-01-13 19:49:04 +01:00
parent 53f8227941
commit ecda149a4b
149 changed files with 65272 additions and 1 deletions
--- a/analyze_aliexpress_data.py
+++ b/analyze_aliexpress_data.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""Analyse des données JavaScript d'AliExpress."""
+
+import re
+import json
+from pathlib import Path
+
+html_file = Path("scraped/aliexpress_pw.html")
+html = html_file.read_text(encoding="utf-8")
+
+print("=" * 80)
+print("EXTRACTION DES DONNÉES ALIEXPRESS")
+print("=" * 80)
+
+# 1. Extract window.runParams
+print("\n[1] window.runParams")
+print("-" * 80)
+match = re.search(r'window\.runParams\s*=\s*(\{[^;]*\});', html, re.DOTALL)
+if match:
+    try:
+        data = json.loads(match.group(1))
+        print(f"✓ Trouvé et parsé: {len(json.dumps(data))} chars")
+        print(f"Keys: {list(data.keys())}")
+
+        # Save for inspection
+        with open("scraped/aliexpress_runParams.json", "w") as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+        print("✓ Sauvegardé: scraped/aliexpress_runParams.json")
+    except Exception as e:
+        print(f"✗ Erreur parsing: {e}")
+else:
+    print("✗ Non trouvé")
+
+# 2. Extract DCData
+print("\n[2] window._d_c_.DCData")
+print("-" * 80)
+match = re.search(r'window\._d_c_\.DCData\s*=\s*(\{[^;]*\});', html, re.DOTALL)
+if match:
+    try:
+        data = json.loads(match.group(1))
+        print(f"✓ Trouvé et parsé")
+        print(f"Keys: {list(data.keys())}")
+
+        if "imagePathList" in data:
+            print(f"  → {len(data['imagePathList'])} images")
+            for i, img in enumerate(data['imagePathList'][:3], 1):
+                print(f"    [{i}] {img[:70]}...")
+
+        # Save
+        with open("scraped/aliexpress_dcdata.json", "w") as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+        print("✓ Sauvegardé: scraped/aliexpress_dcdata.json")
+    except Exception as e:
+        print(f"✗ Erreur: {e}")
+else:
+    print("✗ Non trouvé")
+
+# 3. Search for any object with product/price/title keys
+print("\n[3] Recherche de données produit dans tous les scripts")
+print("-" * 80)
+
+# Find all potential JSON objects
+json_pattern = re.compile(r'\{[^{}]*(?:"(?:title|price|product|sku|id)"[^{}]*)+\}', re.IGNORECASE)
+matches = json_pattern.findall(html)
+
+found_data = []
+for match_text in matches[:50]:  # Limit to first 50
+    try:
+        # Try to complete the JSON if needed
+        data = json.loads(match_text)
+
+        # Check if it has interesting keys
+        keys = set(data.keys())
+        interesting = {"title", "price", "product", "sku", "id", "name"}
+        if keys & interesting:
+            found_data.append((keys & interesting, data))
+    except:
+        pass
+
+if found_data:
+    print(f"✓ Trouvé {len(found_data)} objet(s) avec données intéressantes:")
+    for i, (keys, data) in enumerate(found_data[:5], 1):
+        print(f"\n  Objet {i}: keys = {keys}")
+        for key in keys:
+            value = str(data.get(key, ""))[:60]
+            print(f"    → {key}: {value}")
+else:
+    print("✗ Aucun objet intéressant trouvé")
+
+# 4. Search for price patterns in text
+print("\n[4] Recherche de patterns de prix")
+print("-" * 80)
+price_patterns = [
+    (r'"price":\s*"?([0-9]+\.?[0-9]*)"?', "price key"),
+    (r'"minPrice":\s*"?([0-9]+\.?[0-9]*)"?', "minPrice key"),
+    (r'"maxPrice":\s*"?([0-9]+\.?[0-9]*)"?', "maxPrice key"),
+    (r'€\s*([0-9]+[.,][0-9]{2})', "Euro symbol"),
+    (r'([0-9]+[.,][0-9]{2})\s*€', "Price before Euro"),
+]
+
+for pattern, desc in price_patterns:
+    matches = re.findall(pattern, html)
+    if matches:
+        print(f"✓ {desc}: {len(matches)} match(es)")
+        for price in set(matches[:5]):
+            print(f"  → {price}")
+    else:
+        print(f"✗ {desc}: Aucun")
+
+print("\n" + "=" * 80)
+print("FIN DE L'EXTRACTION")
+print("=" * 80)