before claude

This commit is contained in:
Gilles Soulier
2026-01-18 06:26:17 +01:00
parent dc19315e5d
commit 740c3d7516
60 changed files with 3815 additions and 354 deletions

View File

@@ -157,6 +157,36 @@ def scrape_product(
)
success = False
fetch_error = str(exc)
# Si captcha detecte via HTTP, forcer une tentative Playwright.
if (
fetch_method == FetchMethod.HTTP
and use_playwright
and snapshot.debug.errors
and any("captcha" in error.lower() for error in snapshot.debug.errors)
):
logger.info("[FETCH] Captcha detecte, tentative Playwright")
pw_result = fetch_playwright(
canonical_url,
headless=not headful,
timeout_ms=timeout_ms,
save_screenshot=save_screenshot,
)
if pw_result.success and pw_result.html:
try:
snapshot = store.parse(pw_result.html, canonical_url)
snapshot.debug.method = FetchMethod.PLAYWRIGHT
snapshot.debug.duration_ms = pw_result.duration_ms
snapshot.debug.html_size_bytes = len(pw_result.html.encode("utf-8"))
snapshot.add_note("Captcha detecte via HTTP, fallback Playwright")
success = snapshot.debug.status != DebugStatus.FAILED
except Exception as exc:
snapshot.add_note(f"Fallback Playwright echoue: {exc}")
logger.error(f"[PARSE] Exception fallback Playwright: {exc}")
fetch_error = str(exc)
else:
error = pw_result.error or "Erreur Playwright"
snapshot.add_note(f"Fallback Playwright echoue: {error}")
fetch_error = error
else:
snapshot = ProductSnapshot(
source=store.store_id,