10. Adaptive Retrieval
Chapter 10 of 22 · 25 min
Not all queries deserve the same retrieval strategy. Adaptive retrieval selects retrieval tactics based on query characteristics, balancing precision, recall, and latency against query complexity.
Query Classification
The first step in adaptive retrieval is classifying incoming queries:
class QueryClassifier:
def __init__(self, model):
self.model = model
self.categories = [
'factual_simple', # Single entity lookup
'factual_complex', # Multi-entity comparison
'procedural', # How-to questions
'explanatory', # Why/how questions
'exploratory' # Open-ended searches
]
def classify(self, query):
"""Classify query into retrieval strategy type."""
prompt = f"""Classify this query into one of these categories:
- factual_simple: Direct lookup of a specific fact or entity
- factual_complex: Comparison or aggregation across entities
- procedural: Questions about how to do something
- explanatory: Questions asking why something is the way it is
- exploratory: Broad research into a topic
Query: {query}
Category:"""
response = self.model.invoke(prompt)
category = response.content.strip().lower()
# Validate category
for cat in self.categories:
if cat in category:
return cat
return 'factual_simple' # Default fallback
Mapping Strategy to Tactics
Different query types benefit from different retrieval tactics:
| Query Type | Dense Weights | Sparse Weights | Expansion | k Value |
|---|---|---|---|---|
| factual_simple | 0.7 | 0.3 | None | 10 |
| factual_complex | 0.5 | 0.5 | Decompose | 30 |
| procedural | 0.6 | 0.4 | Multi-query | 20 |
| explanatory | 0.8 | 0.2 | Step-back | 15 |
| exploratory | 0.3 | 0.7 | High expansion | 100 |
RETRIEVAL_STRATEGIES = {
'factual_simple': {
'alpha': 0.7,
'use_reranking': True,
'expansion': None,
'k': 10
},
'factual_complex': {
'alpha': 0.5,
'use_reranking': True,
'expansion': 'decompose',
'k': 30
},
'procedural': {
'alpha': 0.6,
'use_reranking': True,
'expansion': 'multi_query',
'k': 20
},
'explanatory': {
'alpha': 0.8,
'use_reranking': True,
'expansion': 'step_back',
'k': 15
},
'exploratory': {
'alpha': 0.3,
'use_reranking': False,
'expansion': 'high',
'k': 100
}
}
class AdaptiveRetriever:
def __init__(self, classifier, strategies, hybrid_retriever, reranker):
self.classifier = classifier
self.strategies = strategies
self.hybrid_retriever = hybrid_retriever
self.reranker = reranker
def retrieve(self, query):
# Classify query
category = self.classifier.classify(query)
strategy = self.strategies[category]
# Retrieve based on strategy
if strategy['expansion'] == 'decompose':
sub_queries = decompose_query(query)
all_results = []
for sq in sub_queries:
results = self.hybrid_retriever.retrieve(sq, k=strategy['k'] // len(sub_queries))
all_results.extend(results)
results = deduplicate_and_merge(all_results)
elif strategy['expansion'] == 'multi_query':
# Similar multi-query implementation
...
elif strategy['expansion'] == 'step_back':
step_back_q = generate_step_back(query)
step_back_results = self.hybrid_retriever.retrieve(step_back_q, k=5)
direct_results = self.hybrid_retriever.retrieve(query, k=strategy['k'])
results = fuse_results(step_back_results, direct_results)
else:
results = self.hybrid_retriever.retrieve(query, k=strategy['k'])
# Apply reranking if configured
if strategy['use_reranking']:
results = self.reranker.rerank(query, results, top_k=min(10, len(results)))
return results
Cost-Latency Budgeting
Query classification can also incorporate resource constraints:
def adaptive_retrieval_with_budget(query, budget_ms=500, available_compute='cpu'):
"""
Adaptive retrieval with latency and compute constraints.
"""
# Fast classification (keyword-based)
query_type = fast_classify(query)
if budget_ms < 100:
# Minimal retrieval: just dense, no reranking
return dense_only_retrieval(query, k=5)
elif budget_ms < 300:
# Standard retrieval: dense + sparse + light reranking
return hybrid_retrieval(query, k=20, reranker='light')
elif budget_ms < 500:
if query_type in ['factual_complex', 'exploratory']:
# Complex query, sufficient time: expand + full reranking
return expanded_with_reranking(query, strategies[query_type])
else:
# Simple query: standard hybrid with reranking
return hybrid_retrieval(query, k=20, reranker='full')
else:
# Unlimited time: full expansion pipeline
return maximum_effort_retrieval(query)
Measuring Adaptation Quality
Track retrieval quality metrics stratified by query type:
def evaluate_adaptive_retrieval(retriever, eval_queries):
"""
Evaluate retrieval quality, broken down by query type.
"""
results_by_type = defaultdict(lambda: {'recalls': [], 'mrrs': [], 'latencies': []})
for query, relevant_docs in eval_queries:
start = time.time()
retrieved = retriever.retrieve(query)
latency = time.time() - start
query_type = retriever.classifier.classify(query)
retrieved_ids = [doc['document'] for doc in retrieved]
recall = len(set(retrieved_ids) & set(relevant_docs)) / len(relevant_docs)
mrr = calculate_mrr(retrieved_ids, relevant_docs)
results_by_type[query_type]['recalls'].append(recall)
results_by_type[query_type]['mrrs'].append(mrr)
results_by_type[query_type]['latencies'].append(latency)
summary = {}
for qtype, metrics in results_by_type.items():
summary[qtype] = {
'avg_recall': np.mean(metrics['recalls']),
'avg_mrr': np.mean(metrics['mrrs']),
'avg_latency_ms': np.mean(metrics['latencies']) * 1000
}
return summary
EXERCISE
Implement a simple adaptive router that classifies queries as factoid, multi-document, or comparative. Assign different retrieval strategies (k=5, k=20 with reranking, k=50 with reranking) to each class. Measure end-to-end accuracy on 30 test queries.