Files
scrap/fetch_cdiscount.py
2026-01-13 19:49:04 +01:00

52 lines
1.7 KiB
Python
Executable File

#!/usr/bin/env python3
"""Script temporaire pour récupérer HTML Cdiscount avec Playwright."""
from pricewatch.app.scraping.pw_fetch import fetch_playwright
from pricewatch.app.stores.cdiscount.store import CdiscountStore
url = "https://www.cdiscount.com/informatique/ecrans-informatiques/ecran-pc-gamer-philips-27-fhd-180hz-dal/f-10732-phi1721524349346.html"
print(f"Récupération de {url}")
print("=" * 80)
result = fetch_playwright(
url,
headless=True,
timeout_ms=60000,
save_screenshot=False
)
if result.success and result.html:
output_path = "scraped/cdiscount_phi1721524349346_pw.html"
with open(output_path, "w", encoding="utf-8") as f:
f.write(result.html)
print(f"✓ HTML sauvegardé: {output_path} ({len(result.html)} chars)")
# Parser le HTML
print("\n" + "=" * 80)
print("PARSING")
print("=" * 80)
store = CdiscountStore()
snapshot = store.parse(result.html, url)
print(f"\nSource: {snapshot.source}")
print(f"URL: {snapshot.url}")
print(f"Reference: {snapshot.reference}")
print(f"Title: {snapshot.title[:80] if snapshot.title else None}...")
print(f"Price: {snapshot.price} {snapshot.currency}")
print(f"Stock: {snapshot.stock_status}")
print(f"Images: {len(snapshot.images)} images")
print(f"Category: {snapshot.category}")
print(f"Specs: {len(snapshot.specs)} specs")
print(f"\nDebug status: {snapshot.debug.status}")
if snapshot.debug.errors:
print(f"Debug errors: {len(snapshot.debug.errors)}")
for err in snapshot.debug.errors:
print(f" - {err}")
print(f"\nIs complete: {snapshot.is_complete()}")
else:
print(f"✗ Erreur: {result.error}")