chore: sync project files
This commit is contained in:
68
fetch_backmarket.py
Executable file
68
fetch_backmarket.py
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Script pour tester le scraping Backmarket - HTTP vs Playwright."""
|
||||
|
||||
from pricewatch.app.scraping.http_fetch import fetch_http
|
||||
from pricewatch.app.scraping.pw_fetch import fetch_playwright
|
||||
|
||||
url = "https://www.backmarket.fr/fr-fr/p/iphone-15-pro"
|
||||
|
||||
print("=" * 80)
|
||||
print("TEST BACKMARKET.FR - HTTP vs PLAYWRIGHT")
|
||||
print("=" * 80)
|
||||
|
||||
# Test 1: HTTP
|
||||
print("\n1. Test avec HTTP")
|
||||
print("-" * 80)
|
||||
result_http = fetch_http(url, timeout=30)
|
||||
|
||||
if result_http.success and result_http.html:
|
||||
print(f"✓ HTTP fonctionne!")
|
||||
print(f" Taille: {len(result_http.html)} chars")
|
||||
print(f" Status: {result_http.status_code}")
|
||||
print(f" Durée: {result_http.duration_ms}ms")
|
||||
|
||||
# Sauvegarder
|
||||
with open("scraped/backmarket_http.html", "w", encoding="utf-8") as f:
|
||||
f.write(result_http.html)
|
||||
print(f" Sauvegardé: scraped/backmarket_http.html")
|
||||
|
||||
# Vérifier si c'est du vrai contenu
|
||||
if "iphone" in result_http.html.lower() and "pro" in result_http.html.lower():
|
||||
print(f" ✓ Contenu valide détecté")
|
||||
else:
|
||||
print(f" ⚠ Contenu suspect (pas de mention iPhone/Pro)")
|
||||
else:
|
||||
print(f"✗ HTTP a échoué: {result_http.error}")
|
||||
|
||||
# Test 2: Playwright
|
||||
print("\n2. Test avec Playwright")
|
||||
print("-" * 80)
|
||||
result_pw = fetch_playwright(url, headless=True, timeout_ms=60000)
|
||||
|
||||
if result_pw.success and result_pw.html:
|
||||
print(f"✓ Playwright fonctionne!")
|
||||
print(f" Taille: {len(result_pw.html)} chars")
|
||||
print(f" Durée: {result_pw.duration_ms}ms")
|
||||
|
||||
# Sauvegarder
|
||||
with open("scraped/backmarket_pw.html", "w", encoding="utf-8") as f:
|
||||
f.write(result_pw.html)
|
||||
print(f" Sauvegardé: scraped/backmarket_pw.html")
|
||||
else:
|
||||
print(f"✗ Playwright a échoué: {result_pw.error}")
|
||||
|
||||
# Comparaison
|
||||
print("\n3. Comparaison")
|
||||
print("-" * 80)
|
||||
if result_http.success and result_pw.success:
|
||||
size_diff = len(result_pw.html) - len(result_http.html)
|
||||
print(f"Différence de taille: {size_diff:,} chars ({size_diff/len(result_http.html)*100:.1f}%)")
|
||||
|
||||
if size_diff > 10000:
|
||||
print("→ Playwright récupère beaucoup plus de contenu")
|
||||
print("→ Recommandation: Utiliser Playwright")
|
||||
else:
|
||||
print("→ HTTP et Playwright donnent des résultats similaires")
|
||||
print("→ Recommandation: HTTP (plus rapide)")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
Reference in New Issue
Block a user