65 lines
2.3 KiB
Python
65 lines
2.3 KiB
Python
"""Web tools: search, fetch URL, scrape."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import httpx
|
|
from bs4 import BeautifulSoup
|
|
|
|
from . import tool
|
|
|
|
|
|
@tool("web_search", "Search the web using DuckDuckGo", category="web")
|
|
def web_search(query: str, max_results: int = 5) -> str:
|
|
try:
|
|
# Use DuckDuckGo HTML search (no API key needed)
|
|
r = httpx.get(
|
|
"https://html.duckduckgo.com/html/",
|
|
params={"q": query},
|
|
headers={"User-Agent": "Mozilla/5.0 (compatible; CheddahBot/1.0)"},
|
|
timeout=15,
|
|
follow_redirects=True,
|
|
)
|
|
soup = BeautifulSoup(r.text, "html.parser")
|
|
results = []
|
|
for item in soup.select(".result")[:max_results]:
|
|
title_el = item.select_one(".result__title a")
|
|
snippet_el = item.select_one(".result__snippet")
|
|
if title_el:
|
|
title = title_el.get_text(strip=True)
|
|
url = title_el.get("href", "")
|
|
snippet = snippet_el.get_text(strip=True) if snippet_el else ""
|
|
results.append(f"**{title}**\n{url}\n{snippet}")
|
|
return "\n\n".join(results) if results else "No results found."
|
|
except Exception as e:
|
|
return f"Search error: {e}"
|
|
|
|
|
|
@tool("fetch_url", "Fetch and extract text content from a URL", category="web")
|
|
def fetch_url(url: str) -> str:
|
|
try:
|
|
r = httpx.get(
|
|
url,
|
|
headers={"User-Agent": "Mozilla/5.0 (compatible; CheddahBot/1.0)"},
|
|
timeout=20,
|
|
follow_redirects=True,
|
|
)
|
|
content_type = r.headers.get("content-type", "")
|
|
if "html" in content_type:
|
|
soup = BeautifulSoup(r.text, "html.parser")
|
|
# Remove script/style elements
|
|
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
|
tag.decompose()
|
|
text = soup.get_text(separator="\n", strip=True)
|
|
# Collapse whitespace
|
|
lines = [line.strip() for line in text.split("\n") if line.strip()]
|
|
text = "\n".join(lines)
|
|
if len(text) > 15000:
|
|
text = text[:15000] + "\n... (truncated)"
|
|
return text
|
|
elif "json" in content_type:
|
|
return r.text[:15000]
|
|
else:
|
|
return r.text[:5000]
|
|
except Exception as e:
|
|
return f"Fetch error: {e}"
|