1
This commit is contained in:
@@ -214,15 +214,17 @@ def extract_product_data_from_html(html: str, url: str) -> dict[str, Any]:
|
||||
if not price_text:
|
||||
price_text = _safe_attr_soup(soup, "#twister-plus-price-data-price", "value")
|
||||
|
||||
price_list_text = _safe_text_soup(
|
||||
soup, "#corePriceDisplay_desktop_feature_div .a-text-price span.a-offscreen"
|
||||
)
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text_soup(soup, "#priceblock_strikeprice")
|
||||
# prix conseillé (srpPriceBlock = "Prix conseillé : XXX €")
|
||||
price_list_text = _safe_text_soup(soup, ".srpPriceBlock .srpPriceBlockAUI .a-offscreen")
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text_soup(soup, ".srpPriceBlock .a-offscreen")
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text_soup(soup, ".srpPriceBlockAUI .a-offscreen")
|
||||
price_list_text = _safe_text_soup(soup, "#priceblock_strikeprice")
|
||||
# fallback sur corePriceDisplay (prix barré) si pas de srpPriceBlock
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text_soup(
|
||||
soup, "#corePriceDisplay_desktop_feature_div .a-text-price span.a-offscreen"
|
||||
)
|
||||
|
||||
stock_text = _safe_text_soup(soup, "#availability span")
|
||||
if not stock_text:
|
||||
@@ -252,25 +254,33 @@ def extract_product_data_from_html(html: str, url: str) -> dict[str, Any]:
|
||||
prime_eligible = True
|
||||
amazon_exclusive = "Exclusivité Amazon" if "Exclusivité Amazon" in soup.get_text() else None
|
||||
|
||||
# prix plus bas 30 jours (basisPrice avec mention "30 jours")
|
||||
lowest_30d_text = _extract_lowest_30d_text_soup(soup)
|
||||
lowest_30d_price = None
|
||||
if lowest_30d_text:
|
||||
lowest_30d_price = parse_price_fr(lowest_30d_text)
|
||||
if lowest_30d_price is not None:
|
||||
candidate_list = parse_price_fr(price_list_text)
|
||||
if candidate_list == lowest_30d_price:
|
||||
price_list_text = None
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text_soup(soup, ".srpPriceBlock .a-offscreen")
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text_soup(soup, ".srpPriceBlockAUI .a-offscreen")
|
||||
|
||||
# si le prix conseillé == prix min 30j, c'est une erreur de détection
|
||||
# (le prix barré dans corePriceDisplay est en fait le prix min 30j, pas le conseillé)
|
||||
price_list_value = parse_price_fr(price_list_text)
|
||||
if price_list_value is not None and lowest_30d_price is not None and price_list_value == lowest_30d_price:
|
||||
price_list_text = None
|
||||
price_list_value = None
|
||||
|
||||
# réductions
|
||||
reduction_savings_text = _safe_text_soup(
|
||||
soup, "#corePriceDisplay_desktop_feature_div .savingsPercentage"
|
||||
)
|
||||
reduction_conseille_text = _safe_text_soup(soup, ".srpSavingsPercentageBlock")
|
||||
reduction_min_30j = _parse_percent(reduction_savings_text)
|
||||
reduction_conseille = _parse_percent(reduction_conseille_text)
|
||||
|
||||
# attribuer correctement les réductions selon ce qui est présent
|
||||
# - si prix min 30j présent, savingsPercentage = réduction par rapport au min 30j
|
||||
# - si prix conseillé présent (srpPriceBlock), srpSavingsPercentageBlock = réduction par rapport au conseillé
|
||||
reduction_min_30j = _parse_percent(reduction_savings_text) if lowest_30d_price is not None else None
|
||||
reduction_conseille = _parse_percent(reduction_conseille_text) if price_list_value is not None else None
|
||||
# si pas de srpSavingsPercentageBlock mais un savingsPercentage et un prix conseillé (sans min 30j)
|
||||
if reduction_conseille is None and price_list_value is not None and lowest_30d_price is None:
|
||||
reduction_conseille = _parse_percent(reduction_savings_text)
|
||||
|
||||
a_propos = _extract_about_bullets(soup)
|
||||
description = _extract_description(soup)
|
||||
@@ -285,7 +295,7 @@ def extract_product_data_from_html(html: str, url: str) -> dict[str, Any]:
|
||||
"titre": title,
|
||||
"url_image_principale": image_main_url,
|
||||
"prix_actuel": parse_price_fr(price_text),
|
||||
"prix_conseille": parse_price_fr(price_list_text),
|
||||
"prix_conseille": price_list_value,
|
||||
"prix_min_30j": lowest_30d_price,
|
||||
"prix_conseille_reduction": reduction_conseille,
|
||||
"prix_min_30j_reduction": reduction_min_30j,
|
||||
@@ -333,14 +343,15 @@ def extract_product_data(page: Page, url: str) -> dict[str, Any]:
|
||||
if not price_text:
|
||||
price_text = _safe_attr(page, "#twister-plus-price-data-price", "value")
|
||||
|
||||
# prix barré / conseillé
|
||||
price_list_text = _safe_text(page, "#corePriceDisplay_desktop_feature_div .a-text-price span.a-offscreen")
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text(page, "#priceblock_strikeprice")
|
||||
# prix conseillé (srpPriceBlock = "Prix conseillé : XXX €")
|
||||
price_list_text = _safe_text(page, ".srpPriceBlock .srpPriceBlockAUI .a-offscreen")
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text(page, ".srpPriceBlock .a-offscreen")
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text(page, ".srpPriceBlockAUI .a-offscreen")
|
||||
price_list_text = _safe_text(page, "#priceblock_strikeprice")
|
||||
# fallback sur corePriceDisplay (prix barré) si pas de srpPriceBlock
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text(page, "#corePriceDisplay_desktop_feature_div .a-text-price span.a-offscreen")
|
||||
|
||||
# stock
|
||||
stock_text = _safe_text(page, "#availability span")
|
||||
@@ -374,34 +385,44 @@ def extract_product_data(page: Page, url: str) -> dict[str, Any]:
|
||||
|
||||
amazon_exclusive = _safe_text(page, "text=Exclusivité Amazon")
|
||||
|
||||
# prix plus bas 30 jours
|
||||
# prix plus bas 30 jours (basisPrice ou corePriceDisplay avec mention "30 jours")
|
||||
lowest_30d_text = None
|
||||
lowest_30d_price = None
|
||||
if page.locator(".basisPrice").count() > 0:
|
||||
basis_text = page.locator(".basisPrice").first.inner_text()
|
||||
if basis_text and re.search(r"prix.+(30|trente).+jour", basis_text.lower()):
|
||||
lowest_30d_text = _safe_text(page, ".basisPrice .a-offscreen") or basis_text
|
||||
if not lowest_30d_text and page.locator("#priceBadging_feature_div").count() > 0:
|
||||
lowest_30d_text = _safe_text(page, ".basisPrice .a-price .a-offscreen") or basis_text
|
||||
lowest_30d_price = parse_price_fr(lowest_30d_text)
|
||||
# fallback sur corePriceDisplay si contient mention 30 jours
|
||||
if lowest_30d_price is None and page.locator("#corePriceDisplay_desktop_feature_div .a-text-price").count() > 0:
|
||||
core_text = page.locator("#corePriceDisplay_desktop_feature_div").first.inner_text()
|
||||
if core_text and re.search(r"prix.+(30|trente).+jour", core_text.lower()):
|
||||
lowest_30d_text = _safe_text(page, "#corePriceDisplay_desktop_feature_div .a-text-price .a-offscreen")
|
||||
lowest_30d_price = parse_price_fr(lowest_30d_text)
|
||||
if not lowest_30d_price and page.locator("#priceBadging_feature_div").count() > 0:
|
||||
badging_text = page.locator("#priceBadging_feature_div").first.inner_text()
|
||||
if badging_text and re.search(r"prix.+(30|trente).+jour", badging_text.lower()):
|
||||
lowest_30d_text = _safe_text(page, "#priceBadging_feature_div .a-offscreen") or badging_text
|
||||
if lowest_30d_text and not re.search(r"prix.+(30|trente).+jour", lowest_30d_text.lower()):
|
||||
lowest_30d_text = None
|
||||
lowest_30d_price = None
|
||||
if lowest_30d_text and "prix" in lowest_30d_text.lower():
|
||||
lowest_30d_price = parse_price_fr(lowest_30d_text)
|
||||
if lowest_30d_price is not None:
|
||||
candidate_list = parse_price_fr(price_list_text)
|
||||
if candidate_list == lowest_30d_price:
|
||||
price_list_text = None
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text(page, ".srpPriceBlock .a-offscreen")
|
||||
if not price_list_text:
|
||||
price_list_text = _safe_text(page, ".srpPriceBlockAUI .a-offscreen")
|
||||
lowest_30d_price = parse_price_fr(lowest_30d_text)
|
||||
|
||||
# si le prix conseillé == prix min 30j, c'est une erreur de détection
|
||||
price_list_value = parse_price_fr(price_list_text)
|
||||
if price_list_value is not None and lowest_30d_price is not None and price_list_value == lowest_30d_price:
|
||||
price_list_text = None
|
||||
price_list_value = None
|
||||
|
||||
# réductions
|
||||
# savingsPercentage dans corePriceDisplay = réduction par rapport au prix min 30j (si présent)
|
||||
# srpSavingsPercentageBlock = réduction par rapport au prix conseillé
|
||||
reduction_savings_text = _safe_text(page, "#corePriceDisplay_desktop_feature_div .savingsPercentage")
|
||||
reduction_conseille_text = _safe_text(page, ".srpSavingsPercentageBlock")
|
||||
reduction_min_30j = _parse_percent(reduction_savings_text)
|
||||
reduction_conseille = _parse_percent(reduction_conseille_text)
|
||||
|
||||
# attribuer correctement les réductions selon ce qui est présent
|
||||
reduction_min_30j = _parse_percent(reduction_savings_text) if lowest_30d_price is not None else None
|
||||
reduction_conseille = _parse_percent(reduction_conseille_text) if price_list_value is not None else None
|
||||
# si pas de srpSavingsPercentageBlock mais un savingsPercentage et un prix conseillé (sans min 30j)
|
||||
if reduction_conseille is None and price_list_value is not None and lowest_30d_price is None:
|
||||
reduction_conseille = _parse_percent(reduction_savings_text)
|
||||
|
||||
asin = _safe_attr(page, "input#ASIN", "value") or _extract_asin_from_url(url)
|
||||
|
||||
@@ -417,7 +438,7 @@ def extract_product_data(page: Page, url: str) -> dict[str, Any]:
|
||||
"titre": title,
|
||||
"url_image_principale": image_main_url,
|
||||
"prix_actuel": parse_price_fr(price_text),
|
||||
"prix_conseille": parse_price_fr(price_list_text),
|
||||
"prix_conseille": price_list_value,
|
||||
"prix_min_30j": lowest_30d_price,
|
||||
"prix_conseille_reduction": reduction_conseille,
|
||||
"prix_min_30j_reduction": reduction_min_30j,
|
||||
|
||||
Reference in New Issue
Block a user