avant codex

2026-02-22 15:05:40 +01:00
parent fed449c784
commit 20af00d653
291 changed files with 51868 additions and 424 deletions
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from datetime import date
+from pathlib import Path
+
+
+DATE_HEADING_RE = re.compile(
+    r"^\s{0,3}(?:#{1,6}\s*)?((?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12][0-9]|3[01])|(?:0[1-9]|[12][0-9]|3[01])[-/](?:0[1-9]|1[0-2])|(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))\s*$"
+)
+INLINE_DATE_DIC_RE = re.compile(
+    r"^\s*((?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12][0-9]|3[01])|(?:0[1-9]|[12][0-9]|3[01])[-/](?:0[1-9]|1[0-2])|(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))\s*[:\-]\s*(.+)$"
+)
+
+
+def _normalize_mmdd(token: str) -> str | None:
+    token = token.strip()
+    if re.fullmatch(r"(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])", token):
+        month = token[:2]
+        day = token[2:]
+    elif re.fullmatch(r"(0[1-9]|1[0-2])[-/](0[1-9]|[12][0-9]|3[01])", token):
+        month, day = re.split(r"[-/]", token)
+    elif re.fullmatch(r"(0[1-9]|[12][0-9]|3[01])[-/](0[1-9]|1[0-2])", token):
+        day, month = re.split(r"[-/]", token)
+    else:
+        return None
+    return f"{month}-{day}"
+
+
+def _unique(values: list[str]) -> list[str]:
+    out: list[str] = []
+    seen: set[str] = set()
+    for value in values:
+        v = value.strip()
+        if not v:
+            continue
+        if v not in seen:
+            seen.add(v)
+            out.append(v)
+    return out
+
+
+def _split_saints(text: str) -> list[str]:
+    # Normalize separators and preserve saint labels.
+    cleaned = text.strip().strip(".")
+    cleaned = re.sub(r"^(saints?\s*[:\-]\s*)", "", cleaned, flags=re.I).strip()
+    if not cleaned:
+        return []
+    parts = re.split(r"\s*(?:,|;|\||\set\s)\s*", cleaned, flags=re.I)
+    return _unique(parts)
+
+
+def _split_dictons(text: str) -> list[str]:
+    cleaned = text.strip()
+    cleaned = re.sub(r"^(dictons?\s*[:\-]\s*)", "", cleaned, flags=re.I).strip()
+    if not cleaned:
+        return []
+    # Keep sentences readable; split on explicit separators first.
+    if "|" in cleaned or ";" in cleaned:
+        parts = re.split(r"\s*(?:\||;)\s*", cleaned)
+    else:
+        parts = [cleaned]
+    return _unique(parts)
+
+
+def load_saints(path: Path) -> dict[str, list[str]]:
+    if not path.exists():
+        return {}
+    raw = json.loads(path.read_text(encoding="utf-8"))
+    out: dict[str, list[str]] = {}
+    for mmdd, saint_value in raw.items():
+        key = _normalize_mmdd(mmdd)
+        if key is None:
+            continue
+        if isinstance(saint_value, list):
+            saints = [str(x).strip() for x in saint_value]
+        else:
+            saints = _split_saints(str(saint_value))
+        out[key] = _unique(saints)
+    return out
+
+
+def parse_dictons_text(path: Path) -> dict[str, list[str]]:
+    if not path.exists():
+        return {}
+    lines = path.read_text(encoding="utf-8").splitlines()
+    out: dict[str, list[str]] = {}
+    current_date: str | None = None
+
+    for raw in lines:
+        line = raw.strip()
+        if not line:
+            continue
+
+        # Date heading block
+        m_head = DATE_HEADING_RE.match(line)
+        if m_head:
+            current_date = _normalize_mmdd(m_head.group(1))
+            if current_date and current_date not in out:
+                out[current_date] = []
+            continue
+
+        # Inline date + dicton
+        m_inline = INLINE_DATE_DIC_RE.match(line)
+        if m_inline:
+            mmdd = _normalize_mmdd(m_inline.group(1))
+            if mmdd:
+                out.setdefault(mmdd, []).extend(_split_dictons(m_inline.group(2)))
+                current_date = mmdd
+            continue
+
+        # Bullets or plain lines inside current date block
+        if current_date:
+            line = re.sub(r"^\s*[-*]\s*", "", line).strip()
+            if not line:
+                continue
+            if re.match(r"^saints?\s*[:\-]", line, flags=re.I):
+                # Saints line is ignored here; saints come from saints_json.
+                continue
+            out.setdefault(current_date, []).extend(_split_dictons(line))
+
+    return {k: _unique(v) for k, v in out.items()}
+
+
+def _as_iso(year: int, mmdd: str) -> str:
+    month, day = mmdd.split("-")
+    return date(year, int(month), int(day)).isoformat()
+
+
+def build_output(
+    saints_by_date: dict[str, list[str]],
+    dictons_by_date: dict[str, list[str]],
+    year: int | None,
+) -> list[dict]:
+    all_dates = sorted(set(saints_by_date) | set(dictons_by_date))
+    rows: list[dict] = []
+    for mmdd in all_dates:
+        row = {
+            "date": _as_iso(year, mmdd) if year else mmdd,
+            "saints": saints_by_date.get(mmdd, []),
+            "dictons": dictons_by_date.get(mmdd, []),
+        }
+        rows.append(row)
+    return rows
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Génère un JSON saints+dictons: date, saints[], dictons[]"
+    )
+    parser.add_argument(
+        "--saints-json",
+        default="calendrier_lunaire/saints_dictons/saints_france.json",
+        help="Fichier JSON des saints (clé MM-DD)",
+    )
+    parser.add_argument(
+        "--dictons-file",
+        required=True,
+        help="Fichier texte/markdown des dictons (avec dates MM-DD, DD/MM ou MMDD)",
+    )
+    parser.add_argument(
+        "--output",
+        default="calendrier_lunaire/saints_dictons/saints_dictons.json",
+        help="Fichier JSON de sortie",
+    )
+    parser.add_argument(
+        "--year",
+        type=int,
+        help="Optionnel: convertit MM-DD en YYYY-MM-DD",
+    )
+    args = parser.parse_args()
+
+    saints_path = Path(args.saints_json)
+    dictons_path = Path(args.dictons_file)
+    output_path = Path(args.output)
+
+    saints_by_date = load_saints(saints_path)
+    dictons_by_date = parse_dictons_text(dictons_path)
+    rows = build_output(saints_by_date, dictons_by_date, args.year)
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"JSON généré: {output_path} ({len(rows)} dates)")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())