scrap/fetch_cdiscount.py

#!/usr/bin/env python3
"""Script temporaire pour récupérer HTML Cdiscount avec Playwright."""

from pricewatch.app.scraping.pw_fetch import fetch_playwright
from pricewatch.app.stores.cdiscount.store import CdiscountStore

url = "https://www.cdiscount.com/informatique/ecrans-informatiques/ecran-pc-gamer-philips-27-fhd-180hz-dal/f-10732-phi1721524349346.html"

print(f"Récupération de {url}")
print("=" * 80)

result = fetch_playwright(
    url,
    headless=True,
    timeout_ms=60000,
    save_screenshot=False
)

if result.success and result.html:
    output_path = "scraped/cdiscount_phi1721524349346_pw.html"
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(result.html)
    print(f"✓ HTML sauvegardé: {output_path} ({len(result.html)} chars)")

    # Parser le HTML
    print("\n" + "=" * 80)
    print("PARSING")
    print("=" * 80)

    store = CdiscountStore()
    snapshot = store.parse(result.html, url)

    print(f"\nSource: {snapshot.source}")
    print(f"URL: {snapshot.url}")
    print(f"Reference: {snapshot.reference}")
    print(f"Title: {snapshot.title[:80] if snapshot.title else None}...")
    print(f"Price: {snapshot.price} {snapshot.currency}")
    print(f"Stock: {snapshot.stock_status}")
    print(f"Images: {len(snapshot.images)} images")
    print(f"Category: {snapshot.category}")
    print(f"Specs: {len(snapshot.specs)} specs")

    print(f"\nDebug status: {snapshot.debug.status}")
    if snapshot.debug.errors:
        print(f"Debug errors: {len(snapshot.debug.errors)}")
        for err in snapshot.debug.errors:
            print(f"  - {err}")

    print(f"\nIs complete: {snapshot.is_complete()}")
else:
    print(f"✗ Erreur: {result.error}")