avant codex
This commit is contained in:
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
DATE_HEADING_RE = re.compile(
|
||||
r"^\s{0,3}(?:#{1,6}\s*)?((?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12][0-9]|3[01])|(?:0[1-9]|[12][0-9]|3[01])[-/](?:0[1-9]|1[0-2])|(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))\s*$"
|
||||
)
|
||||
INLINE_DATE_DIC_RE = re.compile(
|
||||
r"^\s*((?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12][0-9]|3[01])|(?:0[1-9]|[12][0-9]|3[01])[-/](?:0[1-9]|1[0-2])|(?:0[1-9]|1[0-2])(?:0[1-9]|[12][0-9]|3[01]))\s*[:\-]\s*(.+)$"
|
||||
)
|
||||
|
||||
|
||||
def _normalize_mmdd(token: str) -> str | None:
|
||||
token = token.strip()
|
||||
if re.fullmatch(r"(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])", token):
|
||||
month = token[:2]
|
||||
day = token[2:]
|
||||
elif re.fullmatch(r"(0[1-9]|1[0-2])[-/](0[1-9]|[12][0-9]|3[01])", token):
|
||||
month, day = re.split(r"[-/]", token)
|
||||
elif re.fullmatch(r"(0[1-9]|[12][0-9]|3[01])[-/](0[1-9]|1[0-2])", token):
|
||||
day, month = re.split(r"[-/]", token)
|
||||
else:
|
||||
return None
|
||||
return f"{month}-{day}"
|
||||
|
||||
|
||||
def _unique(values: list[str]) -> list[str]:
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for value in values:
|
||||
v = value.strip()
|
||||
if not v:
|
||||
continue
|
||||
if v not in seen:
|
||||
seen.add(v)
|
||||
out.append(v)
|
||||
return out
|
||||
|
||||
|
||||
def _split_saints(text: str) -> list[str]:
|
||||
# Normalize separators and preserve saint labels.
|
||||
cleaned = text.strip().strip(".")
|
||||
cleaned = re.sub(r"^(saints?\s*[:\-]\s*)", "", cleaned, flags=re.I).strip()
|
||||
if not cleaned:
|
||||
return []
|
||||
parts = re.split(r"\s*(?:,|;|\||\set\s)\s*", cleaned, flags=re.I)
|
||||
return _unique(parts)
|
||||
|
||||
|
||||
def _split_dictons(text: str) -> list[str]:
|
||||
cleaned = text.strip()
|
||||
cleaned = re.sub(r"^(dictons?\s*[:\-]\s*)", "", cleaned, flags=re.I).strip()
|
||||
if not cleaned:
|
||||
return []
|
||||
# Keep sentences readable; split on explicit separators first.
|
||||
if "|" in cleaned or ";" in cleaned:
|
||||
parts = re.split(r"\s*(?:\||;)\s*", cleaned)
|
||||
else:
|
||||
parts = [cleaned]
|
||||
return _unique(parts)
|
||||
|
||||
|
||||
def load_saints(path: Path) -> dict[str, list[str]]:
|
||||
if not path.exists():
|
||||
return {}
|
||||
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||
out: dict[str, list[str]] = {}
|
||||
for mmdd, saint_value in raw.items():
|
||||
key = _normalize_mmdd(mmdd)
|
||||
if key is None:
|
||||
continue
|
||||
if isinstance(saint_value, list):
|
||||
saints = [str(x).strip() for x in saint_value]
|
||||
else:
|
||||
saints = _split_saints(str(saint_value))
|
||||
out[key] = _unique(saints)
|
||||
return out
|
||||
|
||||
|
||||
def parse_dictons_text(path: Path) -> dict[str, list[str]]:
|
||||
if not path.exists():
|
||||
return {}
|
||||
lines = path.read_text(encoding="utf-8").splitlines()
|
||||
out: dict[str, list[str]] = {}
|
||||
current_date: str | None = None
|
||||
|
||||
for raw in lines:
|
||||
line = raw.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Date heading block
|
||||
m_head = DATE_HEADING_RE.match(line)
|
||||
if m_head:
|
||||
current_date = _normalize_mmdd(m_head.group(1))
|
||||
if current_date and current_date not in out:
|
||||
out[current_date] = []
|
||||
continue
|
||||
|
||||
# Inline date + dicton
|
||||
m_inline = INLINE_DATE_DIC_RE.match(line)
|
||||
if m_inline:
|
||||
mmdd = _normalize_mmdd(m_inline.group(1))
|
||||
if mmdd:
|
||||
out.setdefault(mmdd, []).extend(_split_dictons(m_inline.group(2)))
|
||||
current_date = mmdd
|
||||
continue
|
||||
|
||||
# Bullets or plain lines inside current date block
|
||||
if current_date:
|
||||
line = re.sub(r"^\s*[-*]\s*", "", line).strip()
|
||||
if not line:
|
||||
continue
|
||||
if re.match(r"^saints?\s*[:\-]", line, flags=re.I):
|
||||
# Saints line is ignored here; saints come from saints_json.
|
||||
continue
|
||||
out.setdefault(current_date, []).extend(_split_dictons(line))
|
||||
|
||||
return {k: _unique(v) for k, v in out.items()}
|
||||
|
||||
|
||||
def _as_iso(year: int, mmdd: str) -> str:
|
||||
month, day = mmdd.split("-")
|
||||
return date(year, int(month), int(day)).isoformat()
|
||||
|
||||
|
||||
def build_output(
|
||||
saints_by_date: dict[str, list[str]],
|
||||
dictons_by_date: dict[str, list[str]],
|
||||
year: int | None,
|
||||
) -> list[dict]:
|
||||
all_dates = sorted(set(saints_by_date) | set(dictons_by_date))
|
||||
rows: list[dict] = []
|
||||
for mmdd in all_dates:
|
||||
row = {
|
||||
"date": _as_iso(year, mmdd) if year else mmdd,
|
||||
"saints": saints_by_date.get(mmdd, []),
|
||||
"dictons": dictons_by_date.get(mmdd, []),
|
||||
}
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Génère un JSON saints+dictons: date, saints[], dictons[]"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--saints-json",
|
||||
default="calendrier_lunaire/saints_dictons/saints_france.json",
|
||||
help="Fichier JSON des saints (clé MM-DD)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dictons-file",
|
||||
required=True,
|
||||
help="Fichier texte/markdown des dictons (avec dates MM-DD, DD/MM ou MMDD)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
default="calendrier_lunaire/saints_dictons/saints_dictons.json",
|
||||
help="Fichier JSON de sortie",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--year",
|
||||
type=int,
|
||||
help="Optionnel: convertit MM-DD en YYYY-MM-DD",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
saints_path = Path(args.saints_json)
|
||||
dictons_path = Path(args.dictons_file)
|
||||
output_path = Path(args.output)
|
||||
|
||||
saints_by_date = load_saints(saints_path)
|
||||
dictons_by_date = parse_dictons_text(dictons_path)
|
||||
rows = build_output(saints_by_date, dictons_by_date, args.year)
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
print(f"JSON généré: {output_path} ({len(rows)} dates)")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user