diff --git a/__pycache__/agent_logwatch.cpython-313.pyc b/__pycache__/agent_logwatch.cpython-313.pyc index ed5b8ec..ee74424 100644 Binary files a/__pycache__/agent_logwatch.cpython-313.pyc and b/__pycache__/agent_logwatch.cpython-313.pyc differ diff --git a/agent_logwatch.py b/agent_logwatch.py index 335c285..f2529fe 100644 --- a/agent_logwatch.py +++ b/agent_logwatch.py @@ -21,33 +21,63 @@ from agents_core import BaseAgent, AgentContext, Message, MessageType logger = logging.getLogger(__name__) # ─── Pré-filtres sans LLM ──────────────────────────────────────────────────── +# Tier 1 — signaux critiques (mots-clés uppercase exacts, très peu de faux positifs) +# Tier 2 — patterns contextuels précis (évite les faux positifs du lowercase générique) FILTER_PATTERNS = [ - re.compile(r'\b(ERROR|CRITICAL|FATAL|PANIC|EMERG|ALERT|CRIT)\b'), - re.compile(r'\bException\b|\bTraceback\b|\bTraceback \(most recent'), + # Tier 1 : mots-clés uppercase — très fiables + re.compile(r'\b(EMERG|ALERT|CRIT|CRITICAL|FATAL|PANIC)\b'), + re.compile(r'\bERROR\b'), # uppercase uniquement + re.compile(r'\bException\b|\bTraceback\b'), # Python/Java + re.compile(r'<[0-3]>'), # syslog prio 0-3 + + # Tier 2 : patterns précis avec contexte re.compile(r'\bsegfault\b|\bSegmentation fault\b', re.IGNORECASE), - re.compile(r'\bout of memory\b|\bOOM killer\b|\bOOM-killer\b', re.IGNORECASE), - re.compile(r'\b(failed|failure)\b', re.IGNORECASE), - re.compile(r'\bkilled\b', re.IGNORECASE), - re.compile(r'\b(BUG|Oops):\s'), - re.compile(r'<[0-3]>'), # syslog priorities 0=emerg, 1=alert, 2=crit, 3=err + re.compile(r'\bout of memory\b|\bOOM[ -]killer\b', re.IGNORECASE), re.compile(r'\bcore dumped\b', re.IGNORECASE), - re.compile(r'\bpanic\b', re.IGNORECASE), - re.compile(r'\bdenied\b.*\bpermission\b|\bpermission\b.*\bdenied\b', re.IGNORECASE), - re.compile(r'\bauthentication failure\b|\bfailed login\b|\bfailed password\b', re.IGNORECASE), - re.compile(r'\bdisk full\b|\bno space left\b', re.IGNORECASE), - re.compile(r'\bconnection refused\b|\bconnection timed out\b', re.IGNORECASE), - re.compile(r'\bssh.*invalid user\b|\binvalid user.*ssh\b', re.IGNORECASE), + re.compile(r'\b(BUG|Oops):\s'), # kernel bugs + + # systemd : "Failed to start X" ou "failed with result" + re.compile(r'systemd.*:\s+Failed\b', re.IGNORECASE), + re.compile(r'\bfailed with result\b', re.IGNORECASE), + re.compile(r'\.service.*failed\b', re.IGNORECASE), + + # kernel : OOM kill, panic noyau + re.compile(r'kernel:.*[Kk]ill\b.*\bprocess\b'), + re.compile(r'kernel:.*[Pp]anic\b'), + + # Authentification : patterns précis, pas juste "failed" + re.compile(r'\bauthentication failure\b', re.IGNORECASE), + re.compile(r'\bFailed password\b|\bFailed publickey\b'), # sshd exact + re.compile(r'\bInvalid user\b'), # sshd exact + + # Disque / espace + re.compile(r'\bno space left on device\b', re.IGNORECASE), + re.compile(r'\bdisk full\b', re.IGNORECASE), + + # Réseau : refus explicite (pas les retries normaux) + re.compile(r'\bconnection refused\b', re.IGNORECASE), +] + +# Lignes à exclure même si un pattern matche (bruit connu) +EXCLUDE_PATTERNS = [ + re.compile(r'\bsystemd\b.*\bStarted\b', re.IGNORECASE), + re.compile(r'\bLogWatch\b', re.IGNORECASE), # éviter de s'auto-analyser + re.compile(r'^\s*$'), ] SEVERITY_RANK = { 'EMERG': 0, 'ALERT': 1, 'CRIT': 2, 'CRITICAL': 2, 'FATAL': 2, 'PANIC': 2, - 'ERROR': 3, 'ERR': 3, - 'FAILED': 4, 'FAILURE': 4, 'DENIED': 4, + 'ERROR': 3, + 'FAILED': 4, 'FAILURE': 4, 'EXCEPTION': 5, 'TRACEBACK': 5, - 'KILLED': 6, 'OOM': 6, 'SEGFAULT': 6, 'CORE': 6, + 'OOM': 6, 'SEGFAULT': 6, 'CORE': 6, + 'INVALID USER': 7, 'AUTH': 7, } +# Déduplication : max N occurrences de la même signature par session de filtrage +MAX_DUPLICATES = 5 + CHUNK_SIZE = 150 # lignes envoyées au LLM par appel @@ -155,6 +185,16 @@ class LogWatchAgent(BaseAgent): INSERT OR IGNORE INTO agent_config VALUES ('enabled', '1'); INSERT OR IGNORE INTO agent_config VALUES ('log_retention_days', '7'); INSERT OR IGNORE INTO agent_config VALUES ('local_collect_time', ''); + + CREATE TABLE IF NOT EXISTS reports ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + machine_id INTEGER NOT NULL, + report_date TEXT NOT NULL, + content TEXT NOT NULL, + logs_count INTEGER DEFAULT 0, + created_at TEXT NOT NULL, + FOREIGN KEY (machine_id) REFERENCES machines(id) + ); """) def _cfg(self, key: str, default: str = '') -> str: @@ -255,16 +295,47 @@ class LogWatchAgent(BaseAgent): logger.error(f"[_on_log_received] {e}", exc_info=True) def _prefilter(self, lines: list) -> list: - """Filtre les lignes, retourne [(line, severity)].""" - result = [] + """ + Filtre les lignes, retourne [(line, severity)]. + - Applique les patterns d'exclusion en premier + - Déduplique les lignes similaires (même signature, max MAX_DUPLICATES) + """ + result = [] + seen_sigs = {} # signature → count + for line in lines: line = str(line).strip() if not line: continue + + # Exclusions d'abord + if any(ex.search(line) for ex in EXCLUDE_PATTERNS): + continue + + # Test des patterns d'inclusion + matched = False for pat in FILTER_PATTERNS: if pat.search(line): - result.append((line, _detect_severity(line))) + matched = True break + if not matched: + continue + + # Déduplication : signature = partie fixe de la ligne (sans timestamp/PID) + sig = re.sub(r'\b\d+\b', 'N', line) # remplace les nombres + sig = re.sub(r'\[[\w/]+\]', '[X]', sig) # remplace les identifiants entre [] + sig = sig[:120] + + count = seen_sigs.get(sig, 0) + if count >= MAX_DUPLICATES: + continue + seen_sigs[sig] = count + 1 + + # Annoter si répétition + sev = _detect_severity(line) + entry = line if count == 0 else f"{line} [×{count+1}]" + result.append((entry, sev)) + return result def _register_machine(self, hostname: str) -> int: @@ -644,6 +715,13 @@ class LogWatchAgent(BaseAgent): ) report += '\n\n'.join(all_reports) self._notify_admin(report) + # Stocker le rapport en DB + with self._get_db() as conn: + conn.execute( + "INSERT INTO reports (machine_id, report_date, content, logs_count, created_at) " + "VALUES (?,?,?,?,?)", + (machine_id, today, report, len(logs_list), datetime.now().isoformat()) + ) else: self._notify_admin(f"ℹ️ LogWatch: **{hostname}** — LLM n'a pas retourné de rapport.") diff --git a/config/system_prompt.txt b/config/system_prompt.txt index 99fcc30..df4a126 100644 --- a/config/system_prompt.txt +++ b/config/system_prompt.txt @@ -25,6 +25,7 @@ Tu reçois des instructions via MQTT (depuis Nexus) ou XMPP (directement). - `retention ` : durée de conservation des logs filtrés - `analyze ` : lancer l'analyse d'une machine spécifique maintenant - `analyze_all` : lancer l'analyse complète de toutes les machines + - `report [hostname] [YYYY-MM-DD]` : relire un rapport stocké (sans hostname = liste tous) - `collect [since]` : collecter maintenant les logs locaux (ex: collect "1 hour ago") - `logs [N]` : voir les N derniers logs filtrés d'une machine - `reset ` : réinitialiser l'analyse d'une machine diff --git a/data/logwatch.db b/data/logwatch.db index 087a83c..90d5950 100644 Binary files a/data/logwatch.db and b/data/logwatch.db differ diff --git a/data/omemo.db b/data/omemo.db index 9776b30..2b119a4 100644 Binary files a/data/omemo.db and b/data/omemo.db differ diff --git a/data/queue.db b/data/queue.db index 9c95e65..cd9b228 100644 Binary files a/data/queue.db and b/data/queue.db differ diff --git a/skills/__pycache__/logwatch.cpython-313.pyc b/skills/__pycache__/logwatch.cpython-313.pyc index 9b4ddba..164ecd2 100644 Binary files a/skills/__pycache__/logwatch.cpython-313.pyc and b/skills/__pycache__/logwatch.cpython-313.pyc differ diff --git a/skills/logwatch.py b/skills/logwatch.py index 44439e4..250421b 100644 --- a/skills/logwatch.py +++ b/skills/logwatch.py @@ -26,6 +26,7 @@ USAGE = ( "SKILL:logwatch ARGS:overage \n" "SKILL:logwatch ARGS:analyze \n" "SKILL:logwatch ARGS:analyze_all\n" + "SKILL:logwatch ARGS:report [hostname] [YYYY-MM-DD]\n" "SKILL:logwatch ARGS:collect [since]\n" "SKILL:logwatch ARGS:retention \n" "SKILL:logwatch ARGS:logs [N]\n" @@ -260,6 +261,54 @@ def run(args: str, context) -> str: ) return "\n".join(lines) + # ── report [date] ────────────────────────────────────────────── + if action == 'report': + p = rest.split(None, 1) + hostname = p[0].strip() if p else '' + date_str = p[1].strip() if len(p) > 1 else '' + + if not hostname: + # Sans hostname : liste les derniers rapports toutes machines + with _db(context) as conn: + rows = conn.execute( + "SELECT m.hostname, r.report_date, r.logs_count, r.created_at " + "FROM reports r JOIN machines m ON m.id=r.machine_id " + "ORDER BY r.id DESC LIMIT 20" + ).fetchall() + if not rows: + return "Aucun rapport stocké." + lines = ["── Rapports disponibles ──────────────────────"] + for r in rows: + lines.append( + f" {r['report_date']} | {r['hostname']:<30s} | {r['logs_count']} erreurs" + ) + lines.append("\nUtilise : logwatch report [YYYY-MM-DD]") + return "\n".join(lines) + + with _db(context) as conn: + m = conn.execute( + "SELECT id FROM machines WHERE hostname=?", (hostname,) + ).fetchone() + if not m: + return f"Machine '{hostname}' introuvable." + + if date_str: + row = conn.execute( + "SELECT content, report_date, logs_count FROM reports " + "WHERE machine_id=? AND report_date=? ORDER BY id DESC LIMIT 1", + (m['id'], date_str) + ).fetchone() + else: + row = conn.execute( + "SELECT content, report_date, logs_count FROM reports " + "WHERE machine_id=? ORDER BY id DESC LIMIT 1", + (m['id'],) + ).fetchone() + + if not row: + return f"Aucun rapport trouvé pour '{hostname}'" + (f" le {date_str}" if date_str else "") + "." + return f"[{row['report_date']} — {row['logs_count']} erreurs]\n\n{row['content']}" + # ── collect [since] ─────────────────────────────────────────────────────── if action == 'collect': since = rest.strip() or 'yesterday'