52 lines
1.7 KiB
Python
Executable File
52 lines
1.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Script temporaire pour récupérer HTML Cdiscount avec Playwright."""
|
|
|
|
from pricewatch.app.scraping.pw_fetch import fetch_playwright
|
|
from pricewatch.app.stores.cdiscount.store import CdiscountStore
|
|
|
|
url = "https://www.cdiscount.com/informatique/ecrans-informatiques/ecran-pc-gamer-philips-27-fhd-180hz-dal/f-10732-phi1721524349346.html"
|
|
|
|
print(f"Récupération de {url}")
|
|
print("=" * 80)
|
|
|
|
result = fetch_playwright(
|
|
url,
|
|
headless=True,
|
|
timeout_ms=60000,
|
|
save_screenshot=False
|
|
)
|
|
|
|
if result.success and result.html:
|
|
output_path = "scraped/cdiscount_phi1721524349346_pw.html"
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
f.write(result.html)
|
|
print(f"✓ HTML sauvegardé: {output_path} ({len(result.html)} chars)")
|
|
|
|
# Parser le HTML
|
|
print("\n" + "=" * 80)
|
|
print("PARSING")
|
|
print("=" * 80)
|
|
|
|
store = CdiscountStore()
|
|
snapshot = store.parse(result.html, url)
|
|
|
|
print(f"\nSource: {snapshot.source}")
|
|
print(f"URL: {snapshot.url}")
|
|
print(f"Reference: {snapshot.reference}")
|
|
print(f"Title: {snapshot.title[:80] if snapshot.title else None}...")
|
|
print(f"Price: {snapshot.price} {snapshot.currency}")
|
|
print(f"Stock: {snapshot.stock_status}")
|
|
print(f"Images: {len(snapshot.images)} images")
|
|
print(f"Category: {snapshot.category}")
|
|
print(f"Specs: {len(snapshot.specs)} specs")
|
|
|
|
print(f"\nDebug status: {snapshot.debug.status}")
|
|
if snapshot.debug.errors:
|
|
print(f"Debug errors: {len(snapshot.debug.errors)}")
|
|
for err in snapshot.debug.errors:
|
|
print(f" - {err}")
|
|
|
|
print(f"\nIs complete: {snapshot.is_complete()}")
|
|
else:
|
|
print(f"✗ Erreur: {result.error}")
|