Files
webcarto/backend/app/routes/datasets.py
2026-02-09 00:01:29 +01:00

297 lines
9.0 KiB
Python

import json
import shutil
import xml.etree.ElementTree as ET
import re
import base64
import logging
from pathlib import Path
from fastapi import APIRouter, Depends, UploadFile, File, Form, HTTPException
from sqlmodel import Session, select
from ..database import get_session
from ..models import Dataset, Feature, FeatureVersion
from ..config import DATA_DIR, MAX_UPLOAD_SIZE
from .images import extract_and_save_images, IMAGES_DIR
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/datasets", tags=["datasets"])
@router.get("")
def list_datasets(session: Session = Depends(get_session)):
datasets = session.exec(select(Dataset)).all()
results = []
for ds in datasets:
bbox = json.loads(ds.bbox_json) if ds.bbox_json else None
results.append({
"id": ds.id,
"name": ds.name,
"feature_count": ds.feature_count,
"created_at": ds.created_at.isoformat(),
"bbox": bbox,
})
return results
@router.get("/{dataset_id}")
def get_dataset(dataset_id: int, session: Session = Depends(get_session)):
ds = session.get(Dataset, dataset_id)
if not ds:
raise HTTPException(404, "Dataset non trouvé")
features = session.exec(
select(Feature).where(Feature.dataset_id == dataset_id)
).all()
bbox = json.loads(ds.bbox_json) if ds.bbox_json else None
return {
"id": ds.id,
"name": ds.name,
"feature_count": ds.feature_count,
"created_at": ds.created_at.isoformat(),
"bbox": bbox,
"raw_filename": ds.raw_filename,
"features": [
{
"id": f.id,
"geometry": json.loads(f.geometry_json),
"properties": json.loads(f.properties_json),
}
for f in features
],
}
@router.delete("/{dataset_id}")
def delete_dataset(dataset_id: int, session: Session = Depends(get_session)):
ds = session.get(Dataset, dataset_id)
if not ds:
raise HTTPException(404, "Dataset non trouvé")
# Supprimer les versions de toutes les features
features = session.exec(
select(Feature).where(Feature.dataset_id == dataset_id)
).all()
for f in features:
versions = session.exec(
select(FeatureVersion).where(FeatureVersion.feature_id == f.id)
).all()
for v in versions:
session.delete(v)
session.delete(f)
# Supprimer le dossier images
img_dir = IMAGES_DIR / str(dataset_id)
if img_dir.exists():
shutil.rmtree(img_dir)
# Supprimer le fichier raw
raw_path = DATA_DIR / "raw" / ds.raw_filename
if raw_path.exists():
raw_path.unlink()
session.delete(ds)
session.commit()
return {"ok": True}
@router.post("/import")
async def import_dataset(
file: UploadFile = File(...),
geojson: str = Form(...),
session: Session = Depends(get_session),
):
# Sauvegarder le fichier brut
raw_dir = DATA_DIR / "raw"
raw_dir.mkdir(exist_ok=True)
content = await file.read()
raw_path = raw_dir / file.filename
# Éviter les écrasements
counter = 1
while raw_path.exists():
stem = Path(file.filename).stem
suffix = Path(file.filename).suffix
raw_path = raw_dir / f"{stem}_{counter}{suffix}"
counter += 1
raw_path.write_bytes(content)
# Parser le GeoJSON
try:
fc = json.loads(geojson)
except json.JSONDecodeError:
raise HTTPException(400, "GeoJSON invalide")
if fc.get("type") != "FeatureCollection":
raise HTTPException(400, "Le JSON doit être un FeatureCollection")
features_data = fc.get("features", [])
# Calculer la bbox
bbox = _compute_bbox(features_data)
# Créer le dataset
ds = Dataset(
name=Path(file.filename).stem,
raw_filename=raw_path.name,
feature_count=len(features_data),
bbox_json=json.dumps(bbox) if bbox else None,
)
session.add(ds)
session.commit()
session.refresh(ds)
# Créer les features
for i, f_data in enumerate(features_data):
geometry = f_data.get("geometry", {})
properties = f_data.get("properties", {})
# Extraire les éventuelles images base64 inline (envoyées dans le JSON)
properties = extract_and_save_images(properties, ds.id, i)
feature = Feature(
dataset_id=ds.id,
geometry_json=json.dumps(geometry),
properties_json=json.dumps(properties),
)
session.add(feature)
session.commit()
# Si KML, extraire les images base64 depuis le fichier brut
if file.filename and file.filename.lower().endswith(".kml"):
_extract_kml_images(raw_path, ds.id, session)
bbox_out = json.loads(ds.bbox_json) if ds.bbox_json else None
return {
"id": ds.id,
"name": ds.name,
"feature_count": ds.feature_count,
"created_at": ds.created_at.isoformat(),
"bbox": bbox_out,
}
@router.post("/{dataset_id}/export")
def export_dataset(dataset_id: int, format: str = "geojson", session: Session = Depends(get_session)):
ds = session.get(Dataset, dataset_id)
if not ds:
raise HTTPException(404, "Dataset non trouvé")
features = session.exec(
select(Feature).where(Feature.dataset_id == dataset_id)
).all()
fc = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": json.loads(f.geometry_json),
"properties": json.loads(f.properties_json),
}
for f in features
],
}
from fastapi.responses import Response
return Response(
content=json.dumps(fc, ensure_ascii=False, indent=2),
media_type="application/geo+json",
headers={"Content-Disposition": f'attachment; filename="{ds.name}.geojson"'},
)
def _compute_bbox(features: list) -> list | None:
coords = []
for f in features:
_extract_coords(f.get("geometry", {}), coords)
if not coords:
return None
lngs = [c[0] for c in coords]
lats = [c[1] for c in coords]
return [min(lngs), min(lats), max(lngs), max(lats)]
def _extract_coords(geometry: dict, coords: list):
gtype = geometry.get("type", "")
coordinates = geometry.get("coordinates")
if not coordinates:
return
if gtype == "Point":
coords.append(coordinates)
elif gtype in ("MultiPoint", "LineString"):
coords.extend(coordinates)
elif gtype in ("MultiLineString", "Polygon"):
for ring in coordinates:
coords.extend(ring)
elif gtype == "MultiPolygon":
for polygon in coordinates:
for ring in polygon:
coords.extend(ring)
elif gtype == "GeometryCollection":
for g in geometry.get("geometries", []):
_extract_coords(g, coords)
def _extract_kml_images(kml_path: Path, dataset_id: int, session: Session):
"""Extraire les images base64 des gx:imageUrl du fichier KML brut
et les associer aux features correspondantes par index de Placemark."""
try:
tree = ET.parse(kml_path)
except ET.ParseError as e:
logger.warning(f"Impossible de parser le KML {kml_path}: {e}")
return
root = tree.getroot()
ns = {
"kml": "http://www.opengis.net/kml/2.2",
"gx": "http://www.google.com/kml/ext/2.2",
}
placemarks = root.findall(".//kml:Placemark", ns)
features = session.exec(
select(Feature).where(Feature.dataset_id == dataset_id)
).all()
if len(placemarks) != len(features):
logger.warning(
f"KML {kml_path}: {len(placemarks)} placemarks vs {len(features)} features, "
"extraction images par index impossible"
)
return
img_dir = IMAGES_DIR / str(dataset_id)
img_dir.mkdir(parents=True, exist_ok=True)
data_uri_re = re.compile(r"data:image/(\w+);base64,(.+)", re.DOTALL)
for i, (pm, feature) in enumerate(zip(placemarks, features)):
image_urls = pm.findall(".//gx:imageUrl", ns)
if not image_urls:
continue
saved = []
for j, img_el in enumerate(image_urls):
data_uri = (img_el.text or "").strip()
match = data_uri_re.match(data_uri)
if not match:
continue
ext = match.group(1)
if ext == "jpeg":
ext = "jpg"
b64_data = match.group(2)
try:
raw = base64.b64decode(b64_data)
filename = f"{i}_{j}.{ext}"
(img_dir / filename).write_bytes(raw)
saved.append(f"/api/images/{dataset_id}/{filename}")
except Exception as e:
logger.warning(f"Erreur décodage image placemark {i} img {j}: {e}")
continue
if saved:
props = json.loads(feature.properties_json)
existing = props.get("_images", [])
props["_images"] = existing + saved
feature.properties_json = json.dumps(props)
session.add(feature)
session.commit()