agent2_ansible/skills/web_read.py

"""
Skill : READ
Télécharge une page web et la convertit en texte lisible.
"""
import urllib.request
from bs4 import BeautifulSoup

SKILL_NAME = "read"
TRIGGER    = "READ:"

MAX_CHARS = 4000

def execute(args: str) -> str:
    url = args.strip()
    if not url:
        return "Erreur : URL vide."
    try:
        req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
        with urllib.request.urlopen(req, timeout=15) as r:
            html = r.read()

        soup = BeautifulSoup(html, "html.parser")

        # Supprimer scripts, styles, nav
        for tag in soup(["script", "style", "nav", "footer", "header"]):
            tag.decompose()

        text = soup.get_text(separator="\n")
        lines = [l.strip() for l in text.splitlines() if l.strip()]
        content = "\n".join(lines)

        if len(content) > MAX_CHARS:
            content = content[:MAX_CHARS] + "\n...[tronqué]"

        return "Contenu de {} :\n{}".format(url, content)

    except Exception as e:
        return "Erreur lors de la lecture de {} : {}".format(url, e)