Rapports et gestion des erreurs

- skills/reporting.py : REPORT: / REPORT_ERRORS: avec historique SQLite
- skills/delegate.py : log des exécutions + détection erreurs + notification MQTT
- skills/schedule_tasks.py : log des tâches planifiées
- agent1.py : abonnement agents/errors + agents/scheduler/notifications → alerte XMPP
- cli.py : commandes /report et /errors
- system_prompt.txt : REPORT: et REPORT_ERRORS: ajoutés

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 13:20:32 +00:00
parent 305999d8bf
commit 144f481320
6 changed files with 245 additions and 23 deletions
+41 -3
View File
@@ -102,6 +102,37 @@ def on_mqtt_message(client, userdata, msg):
mqtt_publish(reply_to, reply)
print("[MQTT] Réponse envoyée sur {}".format(reply_to))
def on_mqtt_error(client, userdata, msg):
"""Reçoit les erreurs des agents et notifie l'utilisateur via XMPP."""
try:
data = json.loads(msg.payload.decode(errors="replace"))
agent = data.get("agent", "?")
task = data.get("task", "?")
error = data.get("error", "?")
source = data.get("source", "?")
notif = "[ERREUR][{}] Agent : {}\nTâche : {}\nErreur : {}".format(
source.upper(), agent, task[:100], error[:300])
print(notif)
if xmpp_bot:
xmpp_bot.send_message(mto=ADMIN_JID, mbody=notif, mtype='chat')
except Exception as e:
print("[MQTT] Erreur parsing notification : {}".format(e))
def on_mqtt_notification(client, userdata, msg):
"""Reçoit les notifications du scheduler."""
try:
data = json.loads(msg.payload.decode(errors="replace"))
status = data.get("status", "?")
agent = data.get("agent", "?")
task = data.get("task", "?")[:80]
ts = data.get("timestamp", "?")
# Notifier XMPP seulement en cas d'erreur ou de succès important
if status == "error" and xmpp_bot:
notif = "[PLANIF ERREUR] {} | {}{}\nStatut : {}".format(ts, agent, task, status)
xmpp_bot.send_message(mto=ADMIN_JID, mbody=notif, mtype='chat')
except Exception as e:
print("[MQTT] Erreur parsing notification scheduler : {}".format(e))
def start_mqtt_listener():
global mqtt_pub_client
@@ -111,10 +142,17 @@ def start_mqtt_listener():
mqtt_pub_client.loop_start()
sub = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="agent1_sub")
sub.on_message = on_mqtt_message
sub.message_callback_add("agents/agent1/inbox", on_mqtt_message)
sub.message_callback_add("agents/errors", on_mqtt_error)
sub.message_callback_add("agents/scheduler/notifications", on_mqtt_notification)
sub.on_message = on_mqtt_message # fallback
sub.connect(MQTT_HOST, MQTT_PORT)
sub.subscribe(MQTT_INBOX)
print("[MQTT] Agent1 écoute sur {}".format(MQTT_INBOX))
sub.subscribe([
(MQTT_INBOX, 0),
("agents/errors", 0),
("agents/scheduler/notifications", 0),
])
print("[MQTT] Agent1 écoute sur {}, agents/errors, agents/scheduler/notifications".format(MQTT_INBOX))
sub.loop_forever()
# ── BOT XMPP ─────────────────────────────────────────────────────────────
+13 -1
View File
@@ -115,7 +115,7 @@ def main_loop(agent: str):
connect_mqtt()
console.print(Rule("[bold blue]Agent CLI[/bold blue]"))
console.print("[dim]Agent cible : [bold]{}[/bold] | /reset | /plans | /quit[/dim]\n".format(agent))
console.print("[dim]Agent cible : [bold]{}[/bold] | /reset | /plans | /report | /errors | /agent <nom> | /quit[/dim]\n".format(agent))
while True:
try:
@@ -142,6 +142,18 @@ def main_loop(agent: str):
print_response("plans", result)
continue
if user_input == "/report":
with console.status("[bold yellow]Génération rapport...[/bold yellow]"):
result = send_and_wait("agent1", "REPORT:", timeout=30)
print_response("rapport", result)
continue
if user_input == "/errors":
with console.status("[bold yellow]Récupération erreurs...[/bold yellow]"):
result = send_and_wait("agent1", "REPORT_ERRORS:", timeout=30)
print_response("erreurs", result)
continue
if user_input.startswith("/agent "):
agent = user_input.split(" ", 1)[1].strip()
console.print("[dim]Agent changé : [bold]{}[/bold][/dim]".format(agent))
+12
View File
@@ -27,6 +27,18 @@ PLAN_LIST:
PLAN_CANCEL: <job_id>
→ Annuler une tâche planifiée
REPORT:
→ Rapport complet des 20 dernières exécutions de tous les agents
REPORT: <agent>
→ Rapport filtré pour un agent spécifique
REPORT_ERRORS:
→ Afficher uniquement les erreurs récentes
REPORT_ERRORS: <agent>
→ Erreurs d'un agent spécifique
SEARCH: <requête>
→ Recherche web DuckDuckGo
+49 -10
View File
@@ -1,12 +1,10 @@
"""
Skill : DELEGATE
Délègue une tâche à un agent spécialisé via MQTT et attend sa réponse.
Log le résultat et notifie agent1 en cas d'erreur.
Commande :
DELEGATE: <agent_name> | <tâche>
Exemple :
DELEGATE: agent2_debian13 | Comment installer Docker sur Debian 13 ?
"""
import json
import time
@@ -19,13 +17,39 @@ TRIGGER = "DELEGATE:"
CONFIG_FILE = Path("/opt/agent/config/config.json")
REGISTRY_FILE = Path("/opt/agent/config/agents_registry.json")
TIMEOUT = 120 # secondes max d'attente
TIMEOUT = 120
# Mots-clés indiquant une erreur dans la réponse d'un agent
ERROR_KEYWORDS = ["erreur", "error", "timeout", "échec", "failed", "cannot", "permission denied",
"command not found", "no such file", "connexion refusée"]
def _load():
cfg = json.loads(CONFIG_FILE.read_text())
registry = json.loads(REGISTRY_FILE.read_text())
return cfg, registry
def _is_error(result: str) -> bool:
lower = result.lower()
if "[erreur" in lower or "exit code" in lower:
return True
return any(kw in lower for kw in ERROR_KEYWORDS)
def _notify_error(host: str, port: int, agent: str, task: str, result: str):
"""Publie l'erreur sur le topic d'erreurs pour que agent1 notifie l'utilisateur."""
try:
payload = json.dumps({
"agent" : agent,
"task" : task[:200],
"error" : result[:500],
"source" : "delegate"
})
pub = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="delegate_err_pub")
pub.connect(host, port)
pub.publish("agents/errors", payload)
pub.disconnect()
except Exception:
pass
def execute(args: str) -> str:
if "|" not in args:
return "Erreur : format attendu → DELEGATE: <agent> | <tâche>"
@@ -44,21 +68,19 @@ def execute(args: str) -> str:
host = cfg.get("mqtt_host", "localhost")
port = int(cfg.get("mqtt_port", 1883))
response_received = threading.Event()
response_received = threading.Event()
response_container = []
def on_message(client, userdata, msg):
response_container.append(msg.payload.decode(errors="replace"))
response_received.set()
# Souscription à la réponse
sub = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="agent1_delegate_sub")
sub.on_message = on_message
sub.connect(host, port)
sub.subscribe(outbox)
sub.loop_start()
# Envoi de la tâche
pub = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="agent1_delegate_pub")
pub.connect(host, port)
pub.publish(inbox, task)
@@ -66,11 +88,28 @@ def execute(args: str) -> str:
print("[DELEGATE] Tâche envoyée à {} : {}".format(agent_name, task[:80]))
# Attente de la réponse
start = time.time()
received = response_received.wait(timeout=TIMEOUT)
duration = time.time() - start
sub.loop_stop()
sub.disconnect()
from skills.reporting import log_execution
if received and response_container:
return "[{}] {}".format(agent_name, response_container[0])
return "Timeout : {} n'a pas répondu dans les {}s.".format(agent_name, TIMEOUT)
result = response_container[0]
status = "error" if _is_error(result) else "success"
log_execution("delegate", agent_name, task, status, result, duration)
if status == "error":
_notify_error(host, port, agent_name, task, result)
print("[DELEGATE] Erreur détectée dans la réponse de {}".format(agent_name))
return "[{}] {}".format(agent_name, result)
# Timeout
timeout_msg = "Timeout : {} n'a pas répondu dans les {}s.".format(agent_name, TIMEOUT)
log_execution("delegate", agent_name, task, "timeout", timeout_msg, duration)
_notify_error(host, port, agent_name, task, timeout_msg)
return timeout_msg
+114
View File
@@ -0,0 +1,114 @@
"""
Skill : REPORT / REPORT_ERRORS
Historique des exécutions et des erreurs de tous les agents.
Commandes :
REPORT: → rapport complet des 20 dernières exécutions
REPORT: <agent> → rapport filtré par agent
REPORT_ERRORS: → uniquement les erreurs récentes
REPORT_ERRORS: <agent> → erreurs d'un agent spécifique
"""
import sqlite3
from pathlib import Path
from datetime import datetime
SKILL_NAME = "reporting"
TRIGGER = None
TRIGGERS = {
"REPORT:": "report",
"REPORT_ERRORS:": "report_errors",
}
DB_PATH = Path("/opt/agent/executions.db")
def _get_conn():
conn = sqlite3.connect(DB_PATH)
conn.execute("""
CREATE TABLE IF NOT EXISTS executions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT NOT NULL,
source TEXT NOT NULL,
agent TEXT NOT NULL,
task TEXT NOT NULL,
status TEXT NOT NULL,
result TEXT,
duration_s REAL
)
""")
conn.commit()
return conn
def log_execution(source: str, agent: str, task: str,
status: str, result: str, duration_s: float = 0.0):
"""Enregistre une exécution dans la base. Appelé par delegate et schedule."""
try:
with _get_conn() as conn:
conn.execute("""
INSERT INTO executions (timestamp, source, agent, task, status, result, duration_s)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
source,
agent,
task[:200],
status,
result[:1000] if result else "",
round(duration_s, 2)
))
except Exception as e:
print("[Reporting] Erreur log : {}".format(e))
def _format_rows(rows) -> str:
if not rows:
return "Aucune exécution trouvée."
lines = []
for r in rows:
id_, ts, source, agent, task, status, result, dur = r
icon = "" if status == "success" else ""
lines.append("{} [{}] {} | {}{} ({:.1f}s)".format(
icon, ts, agent, task[:50], status, dur or 0))
if status != "success" and result:
lines.append("{}".format(result[:150]))
return "\n".join(lines)
def report(args: str) -> str:
agent_filter = args.strip() or None
try:
with _get_conn() as conn:
if agent_filter:
rows = conn.execute("""
SELECT id, timestamp, source, agent, task, status, result, duration_s
FROM executions WHERE agent = ? ORDER BY id DESC LIMIT 20
""", (agent_filter,)).fetchall()
else:
rows = conn.execute("""
SELECT id, timestamp, source, agent, task, status, result, duration_s
FROM executions ORDER BY id DESC LIMIT 20
""").fetchall()
header = "Rapport d'exécution{} (20 dernières) :".format(
" [{}]".format(agent_filter) if agent_filter else "")
return header + "\n" + _format_rows(rows)
except Exception as e:
return "Erreur REPORT : {}".format(e)
def report_errors(args: str) -> str:
agent_filter = args.strip() or None
try:
with _get_conn() as conn:
if agent_filter:
rows = conn.execute("""
SELECT id, timestamp, source, agent, task, status, result, duration_s
FROM executions WHERE status != 'success' AND agent = ?
ORDER BY id DESC LIMIT 20
""", (agent_filter,)).fetchall()
else:
rows = conn.execute("""
SELECT id, timestamp, source, agent, task, status, result, duration_s
FROM executions WHERE status != 'success'
ORDER BY id DESC LIMIT 20
""").fetchall()
header = "Erreurs{} (20 dernières) :".format(
" [{}]".format(agent_filter) if agent_filter else "")
return header + "\n" + _format_rows(rows)
except Exception as e:
return "Erreur REPORT_ERRORS : {}".format(e)
+16 -9
View File
@@ -47,23 +47,30 @@ def _get_scheduler():
def _run_delegated_task(agent: str, task: str):
"""Exécutée par le scheduler : délègue la tâche à l'agent."""
import time as _time
from skills.delegate import execute as delegate_exec
from skills.reporting import log_execution
import paho.mqtt.publish as publish
import json as _json
start = _time.time()
result = delegate_exec("{} | {}".format(agent, task))
print("[SCHEDULE] Tâche exécutée [{}{}] : {}".format(
datetime.now().strftime("%Y-%m-%d %H:%M"), agent, task[:60]))
duration = _time.time() - start
ts = datetime.now().strftime("%Y-%m-%d %H:%M")
# Notifier via MQTT sur le topic de notification
status = "error" if "erreur" in result.lower() or "timeout" in result.lower() else "success"
log_execution("schedule", agent, task, status, result, duration)
print("[SCHEDULE] Tâche exécutée [{}{}] statut={} : {}".format(ts, agent, status, task[:60]))
# Notifier via MQTT
try:
cfg = _json.loads(Path("/opt/agent/config/config.json").read_text())
publish.single(
"agents/scheduler/notifications",
payload="[{}] {}\n{}".format(agent, task, result),
hostname=cfg.get("mqtt_host", "localhost"),
port=int(cfg.get("mqtt_port", 1883))
)
host = cfg.get("mqtt_host", "localhost")
port = int(cfg.get("mqtt_port", 1883))
payload = _json.dumps({"agent": agent, "task": task, "status": status,
"result": result[:500], "timestamp": ts})
publish.single("agents/scheduler/notifications", payload=payload,
hostname=host, port=port)
except Exception:
pass