#!/usr/bin/env python3 from __future__ import annotations import argparse import json import re from datetime import date from pathlib import Path DATE_HEADING_RE = re.compile( r"^\s{0,3}(?:#{1,6}\s*)?((?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12][0-9]|3[01])|(?:0[1-9]|[12][0-9]|3[01])[-/](?:0[1-9]|1[0-2])|(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))\s*$" ) INLINE_DATE_DIC_RE = re.compile( r"^\s*((?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12][0-9]|3[01])|(?:0[1-9]|[12][0-9]|3[01])[-/](?:0[1-9]|1[0-2])|(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))\s*[:\-]\s*(.+)$" ) def _normalize_mmdd(token: str) -> str | None: token = token.strip() if re.fullmatch(r"(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])", token): month = token[:2] day = token[2:] elif re.fullmatch(r"(0[1-9]|1[0-2])[-/](0[1-9]|[12][0-9]|3[01])", token): month, day = re.split(r"[-/]", token) elif re.fullmatch(r"(0[1-9]|[12][0-9]|3[01])[-/](0[1-9]|1[0-2])", token): day, month = re.split(r"[-/]", token) else: return None return f"{month}-{day}" def _unique(values: list[str]) -> list[str]: out: list[str] = [] seen: set[str] = set() for value in values: v = value.strip() if not v: continue if v not in seen: seen.add(v) out.append(v) return out def _split_saints(text: str) -> list[str]: # Normalize separators and preserve saint labels. cleaned = text.strip().strip(".") cleaned = re.sub(r"^(saints?\s*[:\-]\s*)", "", cleaned, flags=re.I).strip() if not cleaned: return [] parts = re.split(r"\s*(?:,|;|\||\set\s)\s*", cleaned, flags=re.I) return _unique(parts) def _split_dictons(text: str) -> list[str]: cleaned = text.strip() cleaned = re.sub(r"^(dictons?\s*[:\-]\s*)", "", cleaned, flags=re.I).strip() if not cleaned: return [] # Keep sentences readable; split on explicit separators first. if "|" in cleaned or ";" in cleaned: parts = re.split(r"\s*(?:\||;)\s*", cleaned) else: parts = [cleaned] return _unique(parts) def load_saints(path: Path) -> dict[str, list[str]]: if not path.exists(): return {} raw = json.loads(path.read_text(encoding="utf-8")) out: dict[str, list[str]] = {} for mmdd, saint_value in raw.items(): key = _normalize_mmdd(mmdd) if key is None: continue if isinstance(saint_value, list): saints = [str(x).strip() for x in saint_value] else: saints = _split_saints(str(saint_value)) out[key] = _unique(saints) return out def parse_dictons_text(path: Path) -> dict[str, list[str]]: if not path.exists(): return {} lines = path.read_text(encoding="utf-8").splitlines() out: dict[str, list[str]] = {} current_date: str | None = None for raw in lines: line = raw.strip() if not line: continue # Date heading block m_head = DATE_HEADING_RE.match(line) if m_head: current_date = _normalize_mmdd(m_head.group(1)) if current_date and current_date not in out: out[current_date] = [] continue # Inline date + dicton m_inline = INLINE_DATE_DIC_RE.match(line) if m_inline: mmdd = _normalize_mmdd(m_inline.group(1)) if mmdd: out.setdefault(mmdd, []).extend(_split_dictons(m_inline.group(2))) current_date = mmdd continue # Bullets or plain lines inside current date block if current_date: line = re.sub(r"^\s*[-*]\s*", "", line).strip() if not line: continue if re.match(r"^saints?\s*[:\-]", line, flags=re.I): # Saints line is ignored here; saints come from saints_json. continue out.setdefault(current_date, []).extend(_split_dictons(line)) return {k: _unique(v) for k, v in out.items()} def _as_iso(year: int, mmdd: str) -> str: month, day = mmdd.split("-") return date(year, int(month), int(day)).isoformat() def build_output( saints_by_date: dict[str, list[str]], dictons_by_date: dict[str, list[str]], year: int | None, ) -> list[dict]: all_dates = sorted(set(saints_by_date) | set(dictons_by_date)) rows: list[dict] = [] for mmdd in all_dates: row = { "date": _as_iso(year, mmdd) if year else mmdd, "saints": saints_by_date.get(mmdd, []), "dictons": dictons_by_date.get(mmdd, []), } rows.append(row) return rows def main() -> int: parser = argparse.ArgumentParser( description="Génère un JSON saints+dictons: date, saints[], dictons[]" ) parser.add_argument( "--saints-json", default="calendrier_lunaire/saints_dictons/saints_france.json", help="Fichier JSON des saints (clé MM-DD)", ) parser.add_argument( "--dictons-file", required=True, help="Fichier texte/markdown des dictons (avec dates MM-DD, DD/MM ou MMDD)", ) parser.add_argument( "--output", default="calendrier_lunaire/saints_dictons/saints_dictons.json", help="Fichier JSON de sortie", ) parser.add_argument( "--year", type=int, help="Optionnel: convertit MM-DD en YYYY-MM-DD", ) args = parser.parse_args() saints_path = Path(args.saints_json) dictons_path = Path(args.dictons_file) output_path = Path(args.output) saints_by_date = load_saints(saints_path) dictons_by_date = parse_dictons_text(dictons_path) rows = build_output(saints_by_date, dictons_by_date, args.year) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding="utf-8") print(f"JSON généré: {output_path} ({len(rows)} dates)") return 0 if __name__ == "__main__": raise SystemExit(main())