Importer des plannings PDF
+Glissez vos PDFs ici
+ou
+ + +Historique
+Aucun traitement précédent
diff --git a/.gitignore b/.gitignore index 408b7d7..7cc05f8 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ cache/ __pycache__/ *.pyc .env +webapp/data/ diff --git a/webapp/Dockerfile b/webapp/Dockerfile new file mode 100644 index 0000000..99588ae --- /dev/null +++ b/webapp/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Dépendances système minimales +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Dépendances Python +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Code applicatif +COPY app.py core.py ./ +COPY static/ ./static/ + +# Répertoires de données (sera écrasé par le volume en production) +RUN mkdir -p /app/data/cache /app/data/jobs /app/data/uploads + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=10s --start-period=20s --retries=3 \ + CMD curl -sf http://localhost:8000/api/health || exit 1 + +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/webapp/app.py b/webapp/app.py new file mode 100644 index 0000000..fce9311 --- /dev/null +++ b/webapp/app.py @@ -0,0 +1,287 @@ +""" +app.py - Backend FastAPI pour planning2ics web app. +""" + +import asyncio +import json +import secrets +import uuid +from datetime import datetime +from pathlib import Path +from typing import Optional + +from fastapi import FastAPI, Depends, File, HTTPException, Request, Response, UploadFile, Cookie +from fastapi.responses import FileResponse, StreamingResponse +from fastapi.staticfiles import StaticFiles + +import core + +# ── Chemins ─────────────────────────────────────────────────────────────────── +CONFIG_PATH = Path("/app/config.json") +DATA_DIR = Path("/app/data") + +def load_config() -> dict: + with open(CONFIG_PATH) as f: + return json.load(f) + + +# ── App ─────────────────────────────────────────────────────────────────────── +from contextlib import asynccontextmanager + +@asynccontextmanager +async def lifespan(app): + # Création des répertoires de données au démarrage + for _d in ["cache", "jobs", "uploads"]: + (DATA_DIR / _d).mkdir(parents=True, exist_ok=True) + yield + +app = FastAPI(title="planning2ics", docs_url=None, redoc_url=None, lifespan=lifespan) +app.mount("/static", StaticFiles(directory="/app/static"), name="static") + +# ── Auth ────────────────────────────────────────────────────────────────────── +sessions: dict[str, str] = {} # token → username + + +def get_current_user(session: Optional[str] = Cookie(default=None)) -> str: + if not session or session not in sessions: + raise HTTPException(status_code=401, detail="Non authentifié") + return sessions[session] + + +# ── Pages ───────────────────────────────────────────────────────────────────── +@app.get("/") +async def root(): + return FileResponse("/app/static/index.html") + + +@app.get("/api/health") +async def health(): + return {"status": "ok"} + + +# ── Auth endpoints ──────────────────────────────────────────────────────────── +@app.post("/api/auth/login") +async def login(request: Request, response: Response): + data = await request.json() + config = load_config() + username = data.get("username", "") + password = data.get("password", "") + + for user in config["auth"]["users"]: + if user["username"] == username and user["password"] == password: + token = secrets.token_hex(32) + sessions[token] = username + response.set_cookie( + key="session", value=token, + httponly=True, samesite="lax", max_age=86400 * 7 + ) + return {"ok": True, "username": username} + + raise HTTPException(status_code=401, detail="Identifiants incorrects") + + +@app.post("/api/auth/logout") +async def logout(response: Response, session: Optional[str] = Cookie(default=None)): + if session and session in sessions: + del sessions[session] + response.delete_cookie("session") + return {"ok": True} + + +@app.get("/api/auth/me") +async def me(user: str = Depends(get_current_user)): + return {"username": user} + + +# ── Config publique ─────────────────────────────────────────────────────────── +@app.get("/api/config") +async def public_config(user: str = Depends(get_current_user)): + cfg = load_config() + return { + "ollama_url": cfg["ollama"]["url"], + "cluster_model": cfg["ollama"]["cluster_model"], + "local_model": cfg["ollama"]["local_model"], + } + + +# ── Traitement PDF ──────────────────────────────────────────────────────────── +jobs: dict[str, dict] = {} + + +@app.post("/api/process") +async def start_processing( + files: list[UploadFile], + user: str = Depends(get_current_user), +): + job_id = str(uuid.uuid4()) + queue = asyncio.Queue() + upload_dir = DATA_DIR / "uploads" / job_id + upload_dir.mkdir(parents=True) + + saved_paths = [] + pdf_names = [] + for file in files: + if not file.filename.lower().endswith('.pdf'): + continue + dest = upload_dir / file.filename + dest.write_bytes(await file.read()) + saved_paths.append(dest) + pdf_names.append(file.filename) + + if not saved_paths: + raise HTTPException(400, "Aucun fichier PDF valide fourni") + + jobs[job_id] = { + "status": "running", + "queue": queue, + "result": None, + "created_at": datetime.now().isoformat(), + "pdf_names": pdf_names, + "user": user, + } + + asyncio.create_task(_run_processing(job_id, saved_paths, queue)) + return {"job_id": job_id} + + +async def _run_processing(job_id: str, pdf_paths: list, queue: asyncio.Queue): + loop = asyncio.get_running_loop() + config = load_config() + + def log(msg: str): + asyncio.run_coroutine_threadsafe( + queue.put({"type": "progress", "message": msg}), loop + ) + + try: + result = await loop.run_in_executor( + None, lambda: core.process_pdfs(pdf_paths, config, DATA_DIR, log) + ) + + # Sauvegarder les ICS + output_dir = DATA_DIR / "jobs" / job_id + output_dir.mkdir(parents=True, exist_ok=True) + + series_list = [] + for series_title, data in result.items(): + (output_dir / data['filename']).write_bytes(data['bytes']) + series_list.append({ + "name": series_title, + "filename": data['filename'], + "event_count": data['event_count'], + }) + + meta = { + "job_id": job_id, + "created_at": jobs[job_id]["created_at"], + "pdf_names": jobs[job_id]["pdf_names"], + "series": series_list, + } + (output_dir / "metadata.json").write_text( + json.dumps(meta, ensure_ascii=False, indent=2) + ) + + jobs[job_id]["status"] = "done" + jobs[job_id]["result"] = series_list + await queue.put({"type": "done", "series": series_list}) + + except Exception as e: + import traceback + traceback.print_exc() + jobs[job_id]["status"] = "error" + await queue.put({"type": "error", "message": str(e)}) + + +@app.get("/api/progress/{job_id}") +async def progress_stream(job_id: str, user: str = Depends(get_current_user)): + if job_id not in jobs: + raise HTTPException(404, "Job introuvable") + + async def event_stream(): + q = jobs[job_id]["queue"] + while True: + try: + msg = await asyncio.wait_for(q.get(), timeout=30) + yield f"data: {json.dumps(msg, ensure_ascii=False)}\n\n" + if msg["type"] in ("done", "error"): + break + except asyncio.TimeoutError: + yield "data: {\"type\":\"ping\"}\n\n" + + return StreamingResponse( + event_stream(), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, + ) + + +# ── Jobs ────────────────────────────────────────────────────────────────────── +@app.get("/api/jobs") +async def list_jobs(user: str = Depends(get_current_user)): + result = [] + jobs_dir = DATA_DIR / "jobs" + if jobs_dir.exists(): + for d in sorted(jobs_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True): + meta = d / "metadata.json" + if meta.exists(): + result.append(json.loads(meta.read_text())) + return result + + +@app.get("/api/jobs/{job_id}") +async def get_job(job_id: str, user: str = Depends(get_current_user)): + if job_id in jobs: + j = jobs[job_id] + return { + "job_id": job_id, + "status": j["status"], + "pdf_names": j["pdf_names"], + "series": j.get("result"), + "created_at": j["created_at"], + } + meta = DATA_DIR / "jobs" / job_id / "metadata.json" + if meta.exists(): + return json.loads(meta.read_text()) + raise HTTPException(404, "Job introuvable") + + +@app.get("/api/download/{job_id}/{filename}") +async def download_ics( + job_id: str, filename: str, user: str = Depends(get_current_user) +): + # Sécurité : empêcher path traversal + filename = Path(filename).name + ics_path = DATA_DIR / "jobs" / job_id / filename + if not ics_path.exists(): + raise HTTPException(404, "Fichier introuvable") + return FileResponse( + ics_path, media_type="text/calendar", filename=filename, + headers={"Content-Disposition": f'attachment; filename="{filename}"'}, + ) + + +# ── Cache ───────────────────────────────────────────────────────────────────── +@app.get("/api/cache/status") +async def cache_status(user: str = Depends(get_current_user)): + cache_dir = DATA_DIR / "cache" + return { + "website_cached": (cache_dir / "website_catalog.json").exists(), + "series_cached": (cache_dir / "series_mapping.json").exists(), + } + + +@app.delete("/api/cache") +async def clear_cache(user: str = Depends(get_current_user)): + cache_dir = DATA_DIR / "cache" + deleted = [] + for name in ["website_catalog.json", "series_mapping.json"]: + p = cache_dir / name + if p.exists(): + p.unlink() + deleted.append(name) + return {"deleted": deleted} + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/webapp/config.json b/webapp/config.json new file mode 100644 index 0000000..d9e4ac1 --- /dev/null +++ b/webapp/config.json @@ -0,0 +1,17 @@ +{ + "ollama": { + "url": "http://192.168.7.119:11434", + "cluster_model": "qwen3.5:cloud", + "local_model": "qwen3:8b" + }, + "site": { + "calendar_url": "https://www.opera-orchestre-montpellier.fr/calendrier/?saisons=32669", + "base_url": "https://www.opera-orchestre-montpellier.fr" + }, + "auth": { + "session_secret": "changez-cette-cle-secrete-en-production", + "users": [ + {"username": "admin", "password": "changeme"} + ] + } +} diff --git a/webapp/core.py b/webapp/core.py new file mode 100644 index 0000000..c046bc0 --- /dev/null +++ b/webapp/core.py @@ -0,0 +1,470 @@ +""" +core.py - Logique métier pour planning2ics web app. +Adapté de planning2ics.py pour usage web (config injectable, callback de progression). +""" + +import re +import json +import time as time_module +from pathlib import Path +from datetime import datetime, date, time, timedelta +from typing import Callable, Optional + +import pdfplumber +import requests +from bs4 import BeautifulSoup +from icalendar import Calendar, Event +from uuid import uuid4 + +MONTH_MAP = { + "JANV": 1, "JAN": 1, "JANVIER": 1, + "FEV": 2, "FEVR": 2, "FEVRIER": 2, + "MARS": 3, "MAR": 3, + "AVRIL": 4, "AVR": 4, + "MAI": 5, "JUIN": 6, + "JUIL": 7, "JUILLET": 7, + "AOUT": 8, "AOÛT": 8, + "SEPT": 9, "SEP": 9, "SEPTEMBRE": 9, + "OCT": 10, "OCTOBRE": 10, + "NOV": 11, "NOVEMBRE": 11, + "DEC": 12, "DÉC": 12, "DECEMBRE": 12, "DÉCEMBRE": 12, +} + +CONCERT_KEYWORDS = { + 'concert', 'représentation', 'générale publique', + 'raccord', 'italienne', 'scène orch' +} + + +# ── Utilitaires ─────────────────────────────────────────────────────────────── + +def normalize_note(note: str) -> str: + return re.sub(r'\s+', ' ', note).strip() + +def is_public_event(titre: str) -> bool: + t = titre.lower() + return any(k in t for k in CONCERT_KEYWORDS) + +def sanitize_filename(name: str) -> str: + clean = re.sub(r'[^\w\s\-éèêàùûîôç]', '', name, flags=re.UNICODE) + return clean.strip().replace(' ', '_')[:80] or 'SERIE_INCONNUE' + +def extract_year_month_from_filename(filename: str): + year_match = re.search(r'(\d{4})', filename) + year = int(year_match.group(1)) if year_match else 2026 + stem = Path(filename).stem.upper() + main_month = 1 + for key, val in MONTH_MAP.items(): + if key in stem: + main_month = val + break + return year, main_month + +def parse_date(date_str: str, main_year: int, main_month: int) -> Optional[date]: + try: + day, month = map(int, date_str.strip().split('/')) + if month > main_month + 3: + year = main_year - 1 + elif month < main_month - 3: + year = main_year + 1 + else: + year = main_year + return date(year, month, day) + except Exception: + return None + +def parse_time(s: str) -> Optional[time]: + m = re.match(r'(\d{1,2}):(\d{2})', s.strip()) + return time(int(m.group(1)), int(m.group(2))) if m else None + +def parse_horaires(s: str): + s = s.strip() + m = re.match(r'(\d{1,2}:\d{2})\s*[-–]\s*(\d{1,2}:\d{2})', s) + if m: + return parse_time(m.group(1)), parse_time(m.group(2)) + m = re.match(r'(\d{1,2}:\d{2})', s) + if m: + return parse_time(m.group(1)), None + return None, None + + +# ── Extraction PDF ──────────────────────────────────────────────────────────── + +def extract_events_from_pdf(pdf_path: Path) -> list: + events = [] + main_year, main_month = extract_year_month_from_filename(pdf_path.name) + current_date = None + + with pdfplumber.open(pdf_path) as pdf: + for page in pdf.pages: + for table in (page.extract_tables() or []): + for row in table: + if not row: + continue + cells = [str(c).strip() if c else '' for c in row] + if cells[0].lower() == 'jour' or len(cells) < 5: + continue + + date_str = cells[1] + horaires = cells[2] + titre = cells[3] + lieu = cells[4] + note = cells[5] if len(cells) > 5 else '' + dec = cells[6] if len(cells) > 6 else '' + voy = cells[7] if len(cells) > 7 else '' + + if date_str and re.match(r'\d{1,2}/\d{2}', date_str): + parsed = parse_date(date_str, main_year, main_month) + if parsed: + current_date = parsed + + if not current_date: + continue + if 'repos' in horaires.lower(): + continue + if not re.search(r'\d{1,2}:\d{2}', horaires): + continue + + start_time, end_time = parse_horaires(horaires) + if not start_time: + continue + + events.append({ + 'date': current_date, + 'horaires': horaires, + 'start_time': start_time, + 'end_time': end_time, + 'titre': titre, + 'lieu': lieu, + 'note': normalize_note(note), + 'dec': dec, + 'voy': voy, + 'source_file': pdf_path.name, + }) + return events + + +# ── Scraping site web ───────────────────────────────────────────────────────── + +def scrape_catalog(config: dict, cache_dir: Path, + log: Callable = None, force: bool = False) -> dict: + cache_file = cache_dir / "website_catalog.json" + cache_dir.mkdir(parents=True, exist_ok=True) + + if not force and cache_file.exists(): + if log: + log("Catalogue site web chargé depuis le cache") + with open(cache_file) as f: + return json.load(f) + + headers = {'User-Agent': 'Mozilla/5.0 (compatible; planning2ics/1.0)'} + calendar_url = config['site']['calendar_url'] + site_base = config['site']['base_url'] + + if log: + log("Scraping du site web de l'opéra...") + + resp = requests.get(calendar_url, headers=headers, timeout=30) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, 'html.parser') + + event_links = {} + for a in soup.find_all('a', href=True): + href = a['href'] + if '/evenements/' in href and href.rstrip('/') != f'{site_base}/evenements': + full_url = href if href.startswith('http') else site_base + href + h3 = a.find('h3') + cat_tag = a.find('p') + title = h3.get_text(strip=True) if h3 else a.get_text(strip=True) + category = cat_tag.get_text(strip=True) if cat_tag else '' + if title and len(title) > 3: + event_links[title] = {'url': full_url, 'category': category} + + catalog = {} + total = len(event_links) + if log: + log(f"{total} événements trouvés sur le site, récupération des descriptions...") + + for i, (title, info) in enumerate(event_links.items()): + if log and i % 20 == 0: + log(f"Descriptions : {i}/{total}") + try: + r = requests.get(info['url'], headers=headers, timeout=20) + r.raise_for_status() + page_soup = BeautifulSoup(r.text, 'html.parser') + catalog[title] = { + 'url': info['url'], + 'description': _extract_description(page_soup), + 'category': info['category'], + } + time_module.sleep(0.2) + except Exception: + catalog[title] = { + 'url': info['url'], 'description': '', 'category': info['category'] + } + + with open(cache_file, 'w') as f: + json.dump(catalog, f, ensure_ascii=False, indent=2) + + if log: + log(f"Catalogue mis en cache : {len(catalog)} événements") + return catalog + + +def _extract_description(soup: BeautifulSoup) -> str: + for selector in ['div.wp-block-group', 'div.entry-content', 'article', 'main']: + container = soup.select_one(selector) + if container: + for tag in container.find_all(['nav', 'header', 'footer', 'button', 'form']): + tag.decompose() + lines = [ + l.strip() for l in container.get_text('\n', strip=True).splitlines() + if l.strip() and len(l.strip()) > 15 + ][:40] + if lines: + return '\n'.join(lines) + return soup.get_text('\n', strip=True)[:2000] + + +# ── LLM ─────────────────────────────────────────────────────────────────────── + +def _llm_call(prompt: str, ollama_url: str, model: str) -> str: + resp = requests.post( + f"{ollama_url}/api/chat", + json={ + "model": model, + "messages": [{"role": "user", "content": prompt}], + "stream": True, + "options": {"temperature": 0.05, "num_predict": 16384}, + "think": False, + }, + stream=True, + timeout=600, + ) + resp.raise_for_status() + content = "" + for line in resp.iter_lines(): + if line: + chunk = json.loads(line) + content += chunk.get('message', {}).get('content', '') + if chunk.get('done'): + break + return content + + +def _apply_parallel_heuristic(note: str, catalog: dict) -> Optional[str]: + m = re.match(r"^\([AB]'?\)\s*:\s*[\"']?(.+?)[\"']?\s*$", note, re.IGNORECASE) + if not m: + return None + inner = m.group(1).strip().lower() + for title in catalog: + if inner in title.lower() or title.lower() in inner: + return title + return m.group(1).strip().strip('"\'') + + +def cluster_notes_global(unique_notes: set, catalog: dict, config: dict, + cache_dir: Path, log: Callable = None, + force: bool = False) -> dict: + cache_file = cache_dir / "series_mapping.json" + cache_dir.mkdir(parents=True, exist_ok=True) + + if not force and cache_file.exists(): + if log: + log("Mapping des séries chargé depuis le cache") + with open(cache_file) as f: + return json.load(f) + + catalog_titles = sorted(catalog.keys()) + titles_list = '\n'.join(f'- "{t}"' for t in catalog_titles) + notes_list = '\n'.join(f'- {repr(n)}' for n in sorted(unique_notes) if n.strip()) + + prompt = f"""Tu analyses le planning interne de l'Opéra Orchestre National Montpellier. + +Voici les titres OFFICIELS des événements de la saison (depuis le site web) : +{titles_list} + +Voici toutes les notes du planning interne (certaines sont des variantes de la même série) : +{notes_list} + +Ta tâche : associer CHAQUE note à UN titre officiel. +Règles IMPORTANTES : +1. Les notes listant les mêmes compositeurs (ordre ou sous-titres différents) → MÊME série +2. Les préfixes "(A) :", "(B) :", "(A') :", "(B') :" → séries PARALLÈLES DIFFÉRENTES + Ex: '(A) : "Magdalena"' → "Magdalena" ; '(B) : "Élémentaire"' → "Élémentaire, mon cher !" +3. Les annotations entre parenthèses (captation, présence de...) ne changent PAS la série +4. Les répétitions partielles (Cordes, Vents...) = même série que le Tutti + +Réponds UNIQUEMENT avec un JSON valide, sans texte autour : +{{ + "matches": {{ + "note exacte telle quelle": "Titre Officiel du Site", + ... + }} +}}""" + + model = config['ollama']['cluster_model'] + if log: + log(f"Identification des séries avec l'IA ({model})...") + + content = _llm_call(prompt, config['ollama']['url'], model) + + json_match = re.search(r'\{[\s\S]*\}', content) + if not json_match: + raise ValueError("Pas de JSON dans la réponse LLM") + + raw = json_match.group() + try: + result = json.loads(raw).get('matches', {}) + except json.JSONDecodeError: + result = {} + for m in re.finditer(r'"((?:[^"\\]|\\.)*)"\s*:\s*"((?:[^"\\]|\\.)*)"', raw): + result[m.group(1)] = m.group(2) + + with open(cache_file, 'w') as f: + json.dump(result, f, ensure_ascii=False, indent=2) + + if log: + log(f"{len(result)} notes associées à des séries") + return result + + +def match_notes_to_series(unique_notes: set, catalog: dict, config: dict, + cache_dir: Path, log: Callable = None, + force_series: bool = False) -> dict: + note_to_series = cluster_notes_global( + unique_notes, catalog, config, cache_dir, log, force_series + ) + # Heuristique (A)/(B) pour les non-assignés + for note in unique_notes: + if note not in note_to_series and note.strip(): + r = _apply_parallel_heuristic(note, catalog) + if r: + note_to_series[note] = r + + # Retry local pour les notes restantes + still_missing = [n for n in unique_notes if n.strip() and n not in note_to_series] + if still_missing: + if log: + log(f"Retry pour {len(still_missing)} notes non assignées...") + titles_str = '\n'.join(f'- "{t}"' for t in sorted(catalog.keys())) + notes_str = '\n'.join(f'- {repr(n)}' for n in still_missing) + prompt = ( + f"Associe ces notes à des titres officiels.\n" + f"Titres:\n{titles_str}\nNotes:\n{notes_str}\n" + f'Réponds UNIQUEMENT avec JSON: {{"matches": {{"note": "Titre"}}}}' + ) + content = _llm_call(prompt, config['ollama']['url'], config['ollama']['local_model']) + j = re.search(r'\{[\s\S]*\}', content) + if j: + try: + note_to_series.update(json.loads(j.group()).get('matches', {})) + except Exception: + pass + + return note_to_series + + +# ── Génération ICS ──────────────────────────────────────────────────────────── + +def _build_description(evt: dict, series_title: str, catalog: dict) -> str: + lines = [] + if is_public_event(evt['titre']): + desc = catalog.get(series_title, {}).get('description', '') + lines.append(desc[:1500] if desc else f"Programme : {evt['note']}") + else: + if evt['note']: + lines.append(f"Œuvres : {evt['note']}") + lines.append(f"Type : {evt['titre']}") + if evt['dec']: + lines.append(f"Durée déclarée : {evt['dec']}") + if evt['voy']: + lines.append(f"Déplacement : {evt['voy']}h de trajet") + lines.append(f"Source : {evt['source_file']}") + return '\n'.join(lines) + + +def _create_ics_bytes(series_title: str, events: list, catalog: dict) -> bytes: + cal = Calendar() + cal.add('prodid', '-//Opéra Orchestre National Montpellier//planning2ics//FR') + cal.add('version', '2.0') + cal.add('x-wr-calname', series_title) + cal.add('x-wr-timezone', 'Europe/Paris') + + for evt in sorted(events, key=lambda e: (e['date'], e['start_time'])): + vevent = Event() + start_dt = datetime.combine(evt['date'], evt['start_time']) + vevent.add('dtstart', start_dt) + + if evt['end_time']: + end_dt = datetime.combine(evt['date'], evt['end_time']) + else: + dec_m = re.match(r'(\d{1,2}):(\d{2})', evt['dec']) + duration = ( + timedelta(hours=int(dec_m.group(1)), minutes=int(dec_m.group(2))) + if dec_m else timedelta(hours=2) + ) + end_dt = start_dt + duration + + vevent.add('dtend', end_dt) + vevent.add('summary', f"{evt['titre']} – {series_title}") + if evt['lieu']: + vevent.add('location', evt['lieu']) + vevent.add('description', _build_description(evt, series_title, catalog)) + vevent.add('uid', str(uuid4()) + '@planning-orchestre') + cal.add_component(vevent) + + return cal.to_ical() + + +# ── Point d'entrée principal ────────────────────────────────────────────────── + +def process_pdfs(pdf_paths: list, config: dict, data_dir: Path, + log: Callable = None) -> dict: + """ + Traite une liste de PDFs. + Retourne {series_title: {filename, bytes, event_count}}. + """ + cache_dir = data_dir / "cache" + + # 1. Extraction + if log: + log(f"Extraction de {len(pdf_paths)} PDF(s)...") + all_events = [] + for i, pdf_path in enumerate(pdf_paths): + if log: + log(f"Extraction {i+1}/{len(pdf_paths)} : {pdf_path.name}") + all_events.extend(extract_events_from_pdf(pdf_path)) + if log: + log(f"{len(all_events)} événements extraits au total") + + # 2. Catalogue site web + catalog = scrape_catalog(config, cache_dir, log) + + # 3. Identification des séries + unique_notes = {e['note'] for e in all_events} + if log: + log(f"{len(unique_notes)} notes uniques à analyser...") + note_to_series = match_notes_to_series(unique_notes, catalog, config, cache_dir, log) + + # 4. Groupement et génération ICS + series_events: dict[str, list] = {} + for evt in all_events: + s = note_to_series.get(evt['note']) + if s: + series_events.setdefault(s, []).append(evt) + + if log: + log(f"Génération de {len(series_events)} fichiers ICS...") + + result = {} + for series_title, events in series_events.items(): + result[series_title] = { + 'filename': sanitize_filename(series_title) + '.ics', + 'bytes': _create_ics_bytes(series_title, events, catalog), + 'event_count': len(events), + } + + if log: + log(f"Terminé : {len(result)} séries générées") + return result diff --git a/webapp/docker-compose.yml b/webapp/docker-compose.yml new file mode 100644 index 0000000..39036ca --- /dev/null +++ b/webapp/docker-compose.yml @@ -0,0 +1,35 @@ +version: '3.8' + +# Déploiement Docker Swarm +# 1. Construire l'image : docker build -t planning2ics:latest ./webapp +# 2. Déployer : docker stack deploy -c webapp/docker-compose.yml planning2ics + +services: + app: + image: planning2ics:latest + ports: + - "8080:8000" + volumes: + # Données persistantes (cache, jobs, uploads) + - planning_data:/app/data + # Config montée en lecture seule — éditez config.json sur l'hôte + - ./config.json:/app/config.json:ro + environment: + - TZ=Europe/Paris + deploy: + replicas: 1 + restart_policy: + condition: on-failure + delay: 5s + max_attempts: 3 + update_config: + parallelism: 1 + delay: 10s + failure_action: rollback + resources: + limits: + memory: 512M + +volumes: + planning_data: + driver: local diff --git a/webapp/requirements.txt b/webapp/requirements.txt new file mode 100644 index 0000000..dfed138 --- /dev/null +++ b/webapp/requirements.txt @@ -0,0 +1,7 @@ +fastapi==0.115.5 +uvicorn[standard]==0.32.1 +python-multipart==0.0.12 +pdfplumber==0.11.4 +icalendar==6.0.1 +requests==2.32.3 +beautifulsoup4==4.12.3 diff --git a/webapp/static/app.js b/webapp/static/app.js new file mode 100644 index 0000000..54c3fa8 --- /dev/null +++ b/webapp/static/app.js @@ -0,0 +1,315 @@ +'use strict'; + +// ── Helpers API ─────────────────────────────────────────────────────────────── +async function api(method, path, body = null) { + const isForm = body instanceof FormData; + const res = await fetch(path, { + method, + credentials: 'include', + headers: (!isForm && body) ? {'Content-Type': 'application/json'} : {}, + body: body ? (isForm ? body : JSON.stringify(body)) : undefined, + }); + if (res.status === 401) { showPage('login'); return null; } + if (!res.ok) { + const err = await res.json().catch(() => ({ detail: res.statusText })); + throw new Error(err.detail || 'Erreur serveur'); + } + return res.json(); +} + +function esc(s) { + return String(s) + .replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); +} + +function fmtDate(iso) { + try { + return new Date(iso).toLocaleString('fr-FR', { + day: '2-digit', month: '2-digit', year: 'numeric', + hour: '2-digit', minute: '2-digit', + }); + } catch { return iso; } +} + +// ── Pages ───────────────────────────────────────────────────────────────────── +function showPage(name) { + document.getElementById('page-login').classList.toggle('hidden', name !== 'login'); + document.getElementById('page-app').classList.toggle('hidden', name !== 'app'); +} + +// ── Auth ────────────────────────────────────────────────────────────────────── +async function checkAuth() { + const me = await api('GET', '/api/auth/me'); + if (!me) return; + document.getElementById('header-user').textContent = me.username; + showPage('app'); + loadAll(); +} + +document.getElementById('form-login').addEventListener('submit', async e => { + e.preventDefault(); + const errEl = document.getElementById('login-error'); + errEl.classList.add('hidden'); + try { + const res = await fetch('/api/auth/login', { + method: 'POST', credentials: 'include', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + username: document.getElementById('username').value, + password: document.getElementById('password').value, + }), + }); + if (!res.ok) { + const err = await res.json(); + errEl.textContent = err.detail || 'Identifiants incorrects'; + errEl.classList.remove('hidden'); + return; + } + const data = await res.json(); + document.getElementById('header-user').textContent = data.username; + showPage('app'); + loadAll(); + } catch (err) { + errEl.textContent = err.message; + errEl.classList.remove('hidden'); + } +}); + +document.getElementById('btn-logout').addEventListener('click', async () => { + await api('POST', '/api/auth/logout'); + showPage('login'); +}); + +// ── Init globale ────────────────────────────────────────────────────────────── +async function loadAll() { + await Promise.all([loadConfig(), loadHistory(), loadCacheStatus()]); +} + +// ── Config ──────────────────────────────────────────────────────────────────── +async function loadConfig() { + const cfg = await api('GET', '/api/config'); + if (!cfg) return; + document.getElementById('cfg-ollama-url').textContent = cfg.ollama_url; + document.getElementById('cfg-cluster-model').textContent = cfg.cluster_model; +} + +// ── Upload ──────────────────────────────────────────────────────────────────── +let selectedFiles = []; + +const dropZone = document.getElementById('drop-zone'); +const fileInput = document.getElementById('file-input'); + +dropZone.addEventListener('click', () => fileInput.click()); +dropZone.addEventListener('dragover', e => { + e.preventDefault(); dropZone.classList.add('drag-over'); +}); +dropZone.addEventListener('dragleave', () => dropZone.classList.remove('drag-over')); +dropZone.addEventListener('drop', e => { + e.preventDefault(); dropZone.classList.remove('drag-over'); + addFiles(Array.from(e.dataTransfer.files).filter(f => f.name.toLowerCase().endsWith('.pdf'))); +}); +fileInput.addEventListener('change', e => { + addFiles(Array.from(e.target.files)); + fileInput.value = ''; +}); + +function addFiles(newFiles) { + const existing = new Set(selectedFiles.map(f => f.name)); + newFiles.forEach(f => { if (!existing.has(f.name)) selectedFiles.push(f); }); + renderFileList(); +} + +function renderFileList() { + const listEl = document.getElementById('file-list'); + const actionsEl = document.getElementById('upload-actions'); + if (!selectedFiles.length) { + listEl.classList.add('hidden'); + actionsEl.classList.add('hidden'); + return; + } + listEl.classList.remove('hidden'); + actionsEl.classList.remove('hidden'); + listEl.innerHTML = selectedFiles.map((f, i) => ` +
Aucun traitement pr\u00e9c\u00e9dent
'; + return; + } + listEl.innerHTML = jobs.slice(0, 15).map(j => ` +Opéra Orchestre National Montpellier
+ +