chore: sync project files

This commit is contained in:
Gilles Soulier
2026-01-13 19:49:04 +01:00
parent 53f8227941
commit ecda149a4b
149 changed files with 65272 additions and 1 deletions

68
fetch_backmarket.py Executable file
View File

@@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""Script pour tester le scraping Backmarket - HTTP vs Playwright."""
from pricewatch.app.scraping.http_fetch import fetch_http
from pricewatch.app.scraping.pw_fetch import fetch_playwright
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
print("=" * 80)
print("TEST BACKMARKET.FR - HTTP vs PLAYWRIGHT")
print("=" * 80)
# Test 1: HTTP
print("\n1. Test avec HTTP")
print("-" * 80)
result_http = fetch_http(url, timeout=30)
if result_http.success and result_http.html:
print(f"✓ HTTP fonctionne!")
print(f" Taille: {len(result_http.html)} chars")
print(f" Status: {result_http.status_code}")
print(f" Durée: {result_http.duration_ms}ms")
# Sauvegarder
with open("scraped/backmarket_http.html", "w", encoding="utf-8") as f:
f.write(result_http.html)
print(f" Sauvegardé: scraped/backmarket_http.html")
# Vérifier si c'est du vrai contenu
if "iphone" in result_http.html.lower() and "pro" in result_http.html.lower():
print(f" ✓ Contenu valide détecté")
else:
print(f" ⚠ Contenu suspect (pas de mention iPhone/Pro)")
else:
print(f"✗ HTTP a échoué: {result_http.error}")
# Test 2: Playwright
print("\n2. Test avec Playwright")
print("-" * 80)
result_pw = fetch_playwright(url, headless=True, timeout_ms=60000)
if result_pw.success and result_pw.html:
print(f"✓ Playwright fonctionne!")
print(f" Taille: {len(result_pw.html)} chars")
print(f" Durée: {result_pw.duration_ms}ms")
# Sauvegarder
with open("scraped/backmarket_pw.html", "w", encoding="utf-8") as f:
f.write(result_pw.html)
print(f" Sauvegardé: scraped/backmarket_pw.html")
else:
print(f"✗ Playwright a échoué: {result_pw.error}")
# Comparaison
print("\n3. Comparaison")
print("-" * 80)
if result_http.success and result_pw.success:
size_diff = len(result_pw.html) - len(result_http.html)
print(f"Différence de taille: {size_diff:,} chars ({size_diff/len(result_http.html)*100:.1f}%)")
if size_diff > 10000:
print("→ Playwright récupère beaucoup plus de contenu")
print("→ Recommandation: Utiliser Playwright")
else:
print("→ HTTP et Playwright donnent des résultats similaires")
print("→ Recommandation: HTTP (plus rapide)")
print("\n" + "=" * 80)