07. Building a Web Search Tool
Chapter 7 of 16 · 20 min
A web search tool lets the agent fetch real-time information from the internet. This is the highest-value tool for research agents because it gives the model access to data that changes constantly.
Tool design
import requests
from typing import Optional
class WebSearchTool(Tool):
def __init__(self, api_key: Optional[str] = None, base_url: str = "https://api.example.com/search"):
super().__init__(
name="web_search",
description=(
"Perform a web search and return relevant results with titles, URLs, "
"and snippets. Use for factual questions, news, current events, product "
"reviews, or any query requiring up-to-date external information."
),
input_schema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query. Use specific keywords rather than full questions."
},
"num_results": {
"type": "integer",
"description": "Number of results to return (1-10)",
"default": 5,
"minimum": 1,
"maximum": 10
}
},
"required": ["query"]
}
)
self.api_key = api_key
self.base_url = base_url
def invoke(self, query: str, num_results: int = 5) -> str:
try:
params = {"q": query, "num": num_results}
headers = {}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
response = requests.get(self.base_url, params=params, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
results = []
for item in data.get("results", [])[:num_results]:
results.append(f"- {item['title']}: {item['url']}\n {item.get('snippet', '')}")
if not results:
return "No results found."
return "\n\n".join(results)
except requests.exceptions.Timeout:
return "Error: Search request timed out. Try a simpler query."
except requests.exceptions.RequestException as e:
return f"Error: Search failed - {e}"
Handling search API variants
The code above assumes a generic search API. Common options include:
- SerpAPI for Google search results
- DuckDuckGo for free, no-authentication search
- ** Tavily** for AI-optimized search
- Custom API for company-internal search engines
For local development, the DuckDuckGo scriptable API is the easiest to integrate:
from duckduckgo_search import DDGS
def local_web_search(query: str, num_results: int = 5) -> str:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=num_results))
if not results:
return "No results found."
formatted = []
for r in results:
formatted.append(f"- {r['title']}: {r['href']}\n {r['body']}")
return "\n\n".join(formatted)
Rate limiting
Search APIs impose rate limits. Implement a simple token bucket:
import time
class RateLimitedSearchTool:
def __init__(self, calls_per_minute: int = 10):
self.calls_per_minute = calls_per_minute
self.interval = 60.0 / calls_per_minute
self.last_call = 0
def invoke(self, query: str, num_results: int = 5) -> str:
now = time.time()
elapsed = now - self.last_call
if elapsed < self.interval:
time.sleep(self.interval - elapsed)
self.last_call = time.time()
return local_web_search(query, num_results)
EXERCISE
Build a web search tool that uses the DuckDuckGo library. Run five consecutive searches with a rate limiter and verify the timing. Then simulate an API failure and check that the tool returns a safe error message.