61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
"""
|
||
Helpers pour parser des prix avec separateurs de milliers.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
from typing import Optional
|
||
|
||
|
||
def parse_price_text(text: str) -> Optional[float]:
|
||
"""
|
||
Parse un texte de prix en float.
|
||
|
||
Gere les separateurs espace, point, virgule et espaces insécables.
|
||
"""
|
||
if not text:
|
||
return None
|
||
|
||
euro_suffix = re.search(r"([0-9 .,]+)\s*€\s*(\d{2})\b", text)
|
||
if euro_suffix:
|
||
integer_part = euro_suffix.group(1)
|
||
decimal_part = euro_suffix.group(2)
|
||
integer_clean = re.sub(r"[^\d]", "", integer_part)
|
||
if integer_clean:
|
||
cleaned_decimal = f"{integer_clean}.{decimal_part}"
|
||
try:
|
||
return float(cleaned_decimal)
|
||
except ValueError:
|
||
pass
|
||
# Fallback to original replacement if suffix logic fails
|
||
text = re.sub(r"(\d)\s*€\s*(\d)", r"\1,\2", text)
|
||
cleaned = text.replace("\u00a0", " ").replace("\u202f", " ").replace("\u2009", " ")
|
||
cleaned = "".join(ch for ch in cleaned if ch.isdigit() or ch in ".,")
|
||
if not cleaned:
|
||
return None
|
||
|
||
if "," in cleaned and "." in cleaned:
|
||
if cleaned.rfind(",") > cleaned.rfind("."):
|
||
cleaned = cleaned.replace(".", "")
|
||
cleaned = cleaned.replace(",", ".")
|
||
else:
|
||
cleaned = cleaned.replace(",", "")
|
||
elif "," in cleaned:
|
||
parts = cleaned.split(",")
|
||
if len(parts) > 1:
|
||
decimal = parts[-1]
|
||
integer = "".join(parts[:-1])
|
||
cleaned = f"{integer}.{decimal}" if decimal else integer
|
||
elif "." in cleaned:
|
||
parts = cleaned.split(".")
|
||
if len(parts) > 1:
|
||
decimal = parts[-1]
|
||
integer = "".join(parts[:-1])
|
||
cleaned = f"{integer}.{decimal}" if decimal else integer
|
||
|
||
try:
|
||
return float(cleaned)
|
||
except ValueError:
|
||
return None
|