claude fix: improve product scraping and debugging features
This commit is contained in:
@@ -244,6 +244,55 @@ def scrape_product(product_id: int) -> None:
|
||||
session.close()
|
||||
|
||||
|
||||
def scrape_preview(url: str) -> dict:
|
||||
"""
|
||||
Scrape une URL Amazon sans enregistrer en base.
|
||||
Retourne les données extraites pour prévisualisation.
|
||||
"""
|
||||
logger.info("Prévisualisation scrape pour URL: {}", url)
|
||||
config = load_config()
|
||||
|
||||
result = {
|
||||
"url": url,
|
||||
"success": False,
|
||||
"data": {},
|
||||
"error": None,
|
||||
}
|
||||
|
||||
try:
|
||||
with sync_playwright() as playwright:
|
||||
browser, context = _create_browser_context(playwright, config)
|
||||
page = context.new_page()
|
||||
page.set_default_timeout(config.scrape.timeout_ms)
|
||||
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=config.scrape.timeout_ms)
|
||||
|
||||
# Extraire les données
|
||||
data = extract_product_data(page, url)
|
||||
|
||||
# Vérifier si bloqué
|
||||
if not data.get("titre"):
|
||||
result["error"] = "Blocage Amazon détecté ou produit introuvable"
|
||||
result["data"] = data
|
||||
else:
|
||||
result["success"] = True
|
||||
result["data"] = data
|
||||
|
||||
# Sauvegarder la session
|
||||
_save_storage_state(context)
|
||||
|
||||
finally:
|
||||
context.close()
|
||||
browser.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Erreur prévisualisation scrape: {}", e)
|
||||
result["error"] = str(e)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def scrape_all(product_ids: Iterable[int] | None = None) -> None:
|
||||
logger.info("Déclenchement du scraping global")
|
||||
session = database.SessionLocal()
|
||||
|
||||
Reference in New Issue
Block a user