Files
suivi_produit/backend/app/scraper/normalize.py
2026-01-18 12:23:01 +01:00

62 lines
1.7 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from typing import Optional
def parse_price_fr(text: str | None) -> Optional[float]:
if not text:
return None
# Exemple: "1249,99 €" -> 1249.99 (gère espaces insécables)
match = re.search(r"([0-9][0-9\s\.\u00a0\u202f]*(?:[,.][0-9]{2})?)", text)
if not match:
return None
cleaned = match.group(1).replace(" ", "").replace("\u00a0", "").replace("\u202f", "")
if "," in cleaned:
cleaned = cleaned.replace(".", "").replace(",", ".")
elif cleaned.count(".") == 1 and len(cleaned.split(".")[-1]) == 2:
# conserve le point comme séparateur décimal
pass
else:
cleaned = cleaned.replace(".", "")
try:
return float(cleaned)
except ValueError:
return None
def parse_rating_value(text: str | None) -> Optional[float]:
if not text:
return None
match = re.search(r"([0-9]+(?:[\.,][0-9]+)?)", text)
if not match:
return None
try:
return float(match.group(1).replace(",", "."))
except ValueError:
return None
def parse_rating_count(text: str | None) -> Optional[int]:
if not text:
return None
digits = re.sub(r"[^0-9]", "", text)
if not digits:
return None
try:
return int(digits)
except ValueError:
return None
def parse_stock_status(text: str | None) -> tuple[Optional[bool], Optional[str]]:
if not text:
return None, None
cleaned = " ".join(text.split())
lowered = cleaned.lower()
if "en stock" in lowered or "disponible" in lowered:
return True, cleaned
if "indisponible" in lowered or "rupture" in lowered:
return False, cleaned
return None, cleaned