"""Web tools: search, fetch URL, scrape.""" from __future__ import annotations import httpx from bs4 import BeautifulSoup from . import tool @tool("web_search", "Search the web using DuckDuckGo", category="web") def web_search(query: str, max_results: int = 5) -> str: try: # Use DuckDuckGo HTML search (no API key needed) r = httpx.get( "https://html.duckduckgo.com/html/", params={"q": query}, headers={"User-Agent": "Mozilla/5.0 (compatible; CheddahBot/1.0)"}, timeout=15, follow_redirects=True, ) soup = BeautifulSoup(r.text, "html.parser") results = [] for item in soup.select(".result")[:max_results]: title_el = item.select_one(".result__title a") snippet_el = item.select_one(".result__snippet") if title_el: title = title_el.get_text(strip=True) url = title_el.get("href", "") snippet = snippet_el.get_text(strip=True) if snippet_el else "" results.append(f"**{title}**\n{url}\n{snippet}") return "\n\n".join(results) if results else "No results found." except Exception as e: return f"Search error: {e}" @tool("fetch_url", "Fetch and extract text content from a URL", category="web") def fetch_url(url: str) -> str: try: r = httpx.get( url, headers={"User-Agent": "Mozilla/5.0 (compatible; CheddahBot/1.0)"}, timeout=20, follow_redirects=True, ) content_type = r.headers.get("content-type", "") if "html" in content_type: soup = BeautifulSoup(r.text, "html.parser") # Remove script/style elements for tag in soup(["script", "style", "nav", "footer", "header"]): tag.decompose() text = soup.get_text(separator="\n", strip=True) # Collapse whitespace lines = [line.strip() for line in text.split("\n") if line.strip()] text = "\n".join(lines) if len(text) > 15000: text = text[:15000] + "\n... (truncated)" return text elif "json" in content_type: return r.text[:15000] else: return r.text[:5000] except Exception as e: return f"Fetch error: {e}"