Initial commit — Agent HAL v1.0
Agent système complet remplaçant agent_debian : - 20 skills : apt, systemd, cron, process, network, user, sysinfo, journal, container, shell, filesystem (enhanced), git, ssh, web_fetch, todo, script, mqtt_send, mqtt_subscribe, muc_send, agents_status - filesystem : read avec numéros de lignes, edit, multiedit (style SHAI) - git : status, log, diff, add, commit, push, pull, clone, branch, checkout - ssh : exécution distante + SCP (password ou clé) - web_fetch : GET/HEAD/POST avec nettoyage HTML - todo : liste de tâches en mémoire
This commit is contained in:
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Skill WEB_FETCH — récupérer le contenu d'une URL HTTP/HTTPS.
|
||||
|
||||
Usage LLM :
|
||||
SKILL:web_fetch ARGS:get <url>
|
||||
SKILL:web_fetch ARGS:head <url>
|
||||
SKILL:web_fetch ARGS:post <url> | <body_json>
|
||||
"""
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import json
|
||||
import re
|
||||
|
||||
DESCRIPTION = "Récupérer le contenu d'une URL HTTP/HTTPS (GET, HEAD, POST)"
|
||||
USAGE = "SKILL:web_fetch ARGS:get <url> | head <url> | post <url>|<body_json>"
|
||||
|
||||
MAX_SIZE = 8000
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
"""Supprime les balises HTML et nettoie le texte."""
|
||||
# Supprime scripts et styles
|
||||
html = re.sub(r'<(script|style)[^>]*>.*?</\1>', ' ', html, flags=re.DOTALL | re.IGNORECASE)
|
||||
# Supprime les balises
|
||||
html = re.sub(r'<[^>]+>', ' ', html)
|
||||
# Décode les entités HTML basiques
|
||||
html = html.replace('&', '&').replace('<', '<').replace('>', '>') \
|
||||
.replace('"', '"').replace(''', "'").replace(' ', ' ')
|
||||
# Nettoie les espaces multiples
|
||||
html = re.sub(r'\s+', ' ', html).strip()
|
||||
return html
|
||||
|
||||
|
||||
def run(args: str, context) -> str:
|
||||
parts = args.strip().split(None, 1)
|
||||
action = parts[0].lower() if parts else "get"
|
||||
rest = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
if action == "get":
|
||||
url = rest.strip()
|
||||
if not url:
|
||||
return "Précise une URL."
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "HAL-Agent/1.0 (compatible; Python urllib)",
|
||||
"Accept": "text/html,text/plain,application/json,*/*"
|
||||
}
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
content_type = resp.headers.get("Content-Type", "")
|
||||
raw = resp.read(MAX_SIZE * 3) # Lit plus pour avoir du contenu après stripping
|
||||
charset = "utf-8"
|
||||
if "charset=" in content_type:
|
||||
charset = content_type.split("charset=")[-1].split(";")[0].strip()
|
||||
text = raw.decode(charset, errors="replace")
|
||||
|
||||
# Si HTML, nettoie les balises
|
||||
if "html" in content_type.lower():
|
||||
text = _strip_html(text)
|
||||
|
||||
if len(text) > MAX_SIZE:
|
||||
text = text[:MAX_SIZE] + f"\n... (tronqué à {MAX_SIZE} caractères)"
|
||||
return f"[{resp.status} {url}]\n{text}"
|
||||
except urllib.error.HTTPError as e:
|
||||
return f"Erreur HTTP {e.code} : {e.reason} — {url}"
|
||||
except urllib.error.URLError as e:
|
||||
return f"Erreur URL : {e.reason} — {url}"
|
||||
except Exception as e:
|
||||
return f"Erreur : {e}"
|
||||
|
||||
if action == "head":
|
||||
url = rest.strip()
|
||||
if not url:
|
||||
return "Précise une URL."
|
||||
try:
|
||||
req = urllib.request.Request(url, method="HEAD")
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
headers = dict(resp.headers)
|
||||
lines = [f"[{resp.status} {url}]"]
|
||||
for k, v in headers.items():
|
||||
lines.append(f" {k}: {v}")
|
||||
return "\n".join(lines)
|
||||
except Exception as e:
|
||||
return f"Erreur : {e}"
|
||||
|
||||
if action == "post":
|
||||
if "|" not in rest:
|
||||
return "Format : post <url> | <body_json>"
|
||||
url, body = rest.split("|", 1)
|
||||
url = url.strip()
|
||||
body = body.strip().encode("utf-8")
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url, data=body, method="POST",
|
||||
headers={
|
||||
"User-Agent": "HAL-Agent/1.0",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
text = resp.read(MAX_SIZE).decode("utf-8", errors="replace")
|
||||
return f"[{resp.status} {url}]\n{text}"
|
||||
except Exception as e:
|
||||
return f"Erreur : {e}"
|
||||
|
||||
return "Action inconnue. Disponible : get, head, post"
|
||||
Reference in New Issue
Block a user