69 lines
2.3 KiB
Python
Executable File
69 lines
2.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Script pour tester le scraping Backmarket - HTTP vs Playwright."""
|
|
|
|
from pricewatch.app.scraping.http_fetch import fetch_http
|
|
from pricewatch.app.scraping.pw_fetch import fetch_playwright
|
|
|
|
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
|
|
|
|
print("=" * 80)
|
|
print("TEST BACKMARKET.FR - HTTP vs PLAYWRIGHT")
|
|
print("=" * 80)
|
|
|
|
# Test 1: HTTP
|
|
print("\n1. Test avec HTTP")
|
|
print("-" * 80)
|
|
result_http = fetch_http(url, timeout=30)
|
|
|
|
if result_http.success and result_http.html:
|
|
print(f"✓ HTTP fonctionne!")
|
|
print(f" Taille: {len(result_http.html)} chars")
|
|
print(f" Status: {result_http.status_code}")
|
|
print(f" Durée: {result_http.duration_ms}ms")
|
|
|
|
# Sauvegarder
|
|
with open("scraped/backmarket_http.html", "w", encoding="utf-8") as f:
|
|
f.write(result_http.html)
|
|
print(f" Sauvegardé: scraped/backmarket_http.html")
|
|
|
|
# Vérifier si c'est du vrai contenu
|
|
if "iphone" in result_http.html.lower() and "pro" in result_http.html.lower():
|
|
print(f" ✓ Contenu valide détecté")
|
|
else:
|
|
print(f" ⚠ Contenu suspect (pas de mention iPhone/Pro)")
|
|
else:
|
|
print(f"✗ HTTP a échoué: {result_http.error}")
|
|
|
|
# Test 2: Playwright
|
|
print("\n2. Test avec Playwright")
|
|
print("-" * 80)
|
|
result_pw = fetch_playwright(url, headless=True, timeout_ms=60000)
|
|
|
|
if result_pw.success and result_pw.html:
|
|
print(f"✓ Playwright fonctionne!")
|
|
print(f" Taille: {len(result_pw.html)} chars")
|
|
print(f" Durée: {result_pw.duration_ms}ms")
|
|
|
|
# Sauvegarder
|
|
with open("scraped/backmarket_pw.html", "w", encoding="utf-8") as f:
|
|
f.write(result_pw.html)
|
|
print(f" Sauvegardé: scraped/backmarket_pw.html")
|
|
else:
|
|
print(f"✗ Playwright a échoué: {result_pw.error}")
|
|
|
|
# Comparaison
|
|
print("\n3. Comparaison")
|
|
print("-" * 80)
|
|
if result_http.success and result_pw.success:
|
|
size_diff = len(result_pw.html) - len(result_http.html)
|
|
print(f"Différence de taille: {size_diff:,} chars ({size_diff/len(result_http.html)*100:.1f}%)")
|
|
|
|
if size_diff > 10000:
|
|
print("→ Playwright récupère beaucoup plus de contenu")
|
|
print("→ Recommandation: Utiliser Playwright")
|
|
else:
|
|
print("→ HTTP et Playwright donnent des résultats similaires")
|
|
print("→ Recommandation: HTTP (plus rapide)")
|
|
|
|
print("\n" + "=" * 80)
|