claude fix: improve product scraping and debugging features

This commit is contained in:
2026-01-19 21:26:45 +01:00
parent dcb25e0163
commit 20bdc7ff70
131 changed files with 544285 additions and 59 deletions

View File

@@ -244,6 +244,55 @@ def scrape_product(product_id: int) -> None:
session.close()
def scrape_preview(url: str) -> dict:
"""
Scrape une URL Amazon sans enregistrer en base.
Retourne les données extraites pour prévisualisation.
"""
logger.info("Prévisualisation scrape pour URL: {}", url)
config = load_config()
result = {
"url": url,
"success": False,
"data": {},
"error": None,
}
try:
with sync_playwright() as playwright:
browser, context = _create_browser_context(playwright, config)
page = context.new_page()
page.set_default_timeout(config.scrape.timeout_ms)
try:
page.goto(url, wait_until="domcontentloaded", timeout=config.scrape.timeout_ms)
# Extraire les données
data = extract_product_data(page, url)
# Vérifier si bloqué
if not data.get("titre"):
result["error"] = "Blocage Amazon détecté ou produit introuvable"
result["data"] = data
else:
result["success"] = True
result["data"] = data
# Sauvegarder la session
_save_storage_state(context)
finally:
context.close()
browser.close()
except Exception as e:
logger.exception("Erreur prévisualisation scrape: {}", e)
result["error"] = str(e)
return result
def scrape_all(product_ids: Iterable[int] | None = None) -> None:
logger.info("Déclenchement du scraping global")
session = database.SessionLocal()