feat(service): scraper station WeeWX (RSS current + NOAA yesterday)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-22 14:42:18 +01:00
parent cc69d0d5ad
commit 8a7a2c7c6d

View File

@@ -0,0 +1,121 @@
"""Service de collecte des données de la station météo locale WeeWX."""
import logging
import re
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta, timezone
import httpx
from app.config import STATION_URL
logger = logging.getLogger(__name__)
def _safe_float(text: str | None) -> float | None:
if text is None:
return None
try:
cleaned = text.strip().replace(",", ".")
# Retirer unités courantes
for unit in [" °C", " %", " hPa", " km/h", " W/m²", "°C", "%", "hPa"]:
cleaned = cleaned.replace(unit, "")
return float(cleaned.strip())
except (ValueError, AttributeError):
return None
def _direction_to_abbr(deg: float | None) -> str | None:
if deg is None:
return None
dirs = ["N", "NE", "E", "SE", "S", "SO", "O", "NO"]
return dirs[round(deg / 45) % 8]
def fetch_current(base_url: str = STATION_URL) -> dict | None:
"""Scrape les données actuelles depuis le RSS de la station WeeWX.
Retourne un dict avec les clés : temp_ext, humidite, pression,
pluie_mm, vent_kmh, vent_dir, uv, solaire — ou None si indisponible.
"""
try:
url = base_url.rstrip("/") + "/rss.xml"
r = httpx.get(url, timeout=10)
r.raise_for_status()
root = ET.fromstring(r.text)
channel = root.find("channel")
if channel is None:
return None
item = channel.find("item")
if item is None:
return None
desc = item.findtext("description") or ""
result: dict = {}
patterns = {
"temp_ext": r"(?:Outside|Ext(?:erieur)?)\s*Temp(?:erature)?\s*[:\s]+(-?\d+(?:[.,]\d+)?)",
"temp_int": r"(?:Inside|Int(?:erieur)?)\s*Temp(?:erature)?\s*[:\s]+(-?\d+(?:[.,]\d+)?)",
"humidite": r"(?:Outside\s*)?Hum(?:idity)?\s*[:\s]+(\d+(?:[.,]\d+)?)",
"pression": r"(?:Bar(?:ometer)?|Pression)\s*[:\s]+(\d+(?:[.,]\d+)?)",
"pluie_mm": r"(?:Rain(?:fall)?|Pluie)\s*[:\s]+(\d+(?:[.,]\d+)?)",
"vent_kmh": r"(?:Wind\s*Speed|Vent)\s*[:\s]+(\d+(?:[.,]\d+)?)",
"uv": r"UV\s*[:\s]+(\d+(?:[.,]\d+)?)",
"solaire": r"(?:Solar\s*Radiation|Solaire)\s*[:\s]+(\d+(?:[.,]\d+)?)",
}
for key, pattern in patterns.items():
m = re.search(pattern, desc, re.IGNORECASE)
result[key] = _safe_float(m.group(1)) if m else None
vent_dir_m = re.search(
r"(?:Wind\s*Dir(?:ection)?)\s*[:\s]+([NSEO]{1,2}|Nord|Sud|Est|Ouest|\d+)",
desc, re.IGNORECASE,
)
if vent_dir_m:
val = vent_dir_m.group(1).strip()
if val.isdigit():
result["vent_dir"] = _direction_to_abbr(float(val))
else:
result["vent_dir"] = val[:2].upper()
else:
result["vent_dir"] = None
return result if any(v is not None for v in result.values()) else None
except Exception as e:
logger.warning(f"Station fetch_current error: {e}")
return None
def fetch_yesterday_summary(base_url: str = STATION_URL) -> dict | None:
"""Récupère le résumé de la veille via le fichier NOAA mensuel de la station WeeWX.
Retourne un dict avec : temp_ext (moy), t_min, t_max, pluie_mm — ou None.
"""
yesterday = (datetime.now() - timedelta(days=1)).date()
year = yesterday.strftime("%Y")
month = yesterday.strftime("%m")
day = yesterday.day
try:
url = f"{base_url.rstrip('/')}/NOAA/NOAA-{year}-{month}.txt"
r = httpx.get(url, timeout=15)
r.raise_for_status()
for line in r.text.splitlines():
parts = line.split()
if len(parts) >= 7 and parts[0].isdigit() and int(parts[0]) == day:
# Format NOAA : jour tmax tmin tmoy precip ...
return {
"t_max": _safe_float(parts[1]),
"t_min": _safe_float(parts[2]),
"temp_ext": _safe_float(parts[3]),
"pluie_mm": _safe_float(parts[5]),
"vent_kmh": _safe_float(parts[6]) if len(parts) > 6 else None,
}
except Exception as e:
logger.warning(f"Station fetch_yesterday_summary error: {e}")
return None