Files
scrap/pricewatch/app/stores/price_parser.py
Gilles Soulier cf7c415e22 before claude
2026-01-17 13:40:26 +01:00

61 lines
1.9 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Helpers pour parser des prix avec separateurs de milliers.
"""
from __future__ import annotations
import re
from typing import Optional
def parse_price_text(text: str) -> Optional[float]:
"""
Parse un texte de prix en float.
Gere les separateurs espace, point, virgule et espaces insécables.
"""
if not text:
return None
euro_suffix = re.search(r"([0-9 .,]+)\s*€\s*(\d{2})\b", text)
if euro_suffix:
integer_part = euro_suffix.group(1)
decimal_part = euro_suffix.group(2)
integer_clean = re.sub(r"[^\d]", "", integer_part)
if integer_clean:
cleaned_decimal = f"{integer_clean}.{decimal_part}"
try:
return float(cleaned_decimal)
except ValueError:
pass
# Fallback to original replacement if suffix logic fails
text = re.sub(r"(\d)\s*€\s*(\d)", r"\1,\2", text)
cleaned = text.replace("\u00a0", " ").replace("\u202f", " ").replace("\u2009", " ")
cleaned = "".join(ch for ch in cleaned if ch.isdigit() or ch in ".,")
if not cleaned:
return None
if "," in cleaned and "." in cleaned:
if cleaned.rfind(",") > cleaned.rfind("."):
cleaned = cleaned.replace(".", "")
cleaned = cleaned.replace(",", ".")
else:
cleaned = cleaned.replace(",", "")
elif "," in cleaned:
parts = cleaned.split(",")
if len(parts) > 1:
decimal = parts[-1]
integer = "".join(parts[:-1])
cleaned = f"{integer}.{decimal}" if decimal else integer
elif "." in cleaned:
parts = cleaned.split(".")
if len(parts) > 1:
decimal = parts[-1]
integer = "".join(parts[:-1])
cleaned = f"{integer}.{decimal}" if decimal else integer
try:
return float(cleaned)
except ValueError:
return None