20. Performance Optimization
OpenCLaw runs on personal hardware with resource constraints. Performance optimization ensures responsive interaction despite limited compute budgets.
Profiling Framework
Optimization begins with measurement. OpenCLaw includes a profiling framework that identifies bottlenecks in message processing, context management, and model inference.
# performance.py
import time
from contextlib import contextmanager
from dataclasses import dataclass, field
from typing import Dict, List
@dataclass
class ProfilerStats:
operation: str
call_count: int
total_duration: float
avg_duration: float
min_duration: float
max_duration: float
class Profiler:
def __init__(self):
self.stats: Dict[str, List[float]] = {}
self.enabled = True
@contextmanager
def measure(self, operation: str):
if not self.enabled:
yield
return
start = time.perf_counter()
try:
yield
finally:
duration = time.perf_counter() - start
if operation not in self.stats:
self.stats[operation] = []
self.stats[operation].append(duration)
def get_stats(self) -> List[ProfilerStats]:
results = []
for op, durations in self.stats.items():
results.append(ProfilerStats(
operation=op,
call_count=len(durations),
total_duration=sum(durations),
avg_duration=sum(durations) / len(durations),
min_duration=min(durations),
max_duration=max(durations)
))
return results
def reset(self):
self.stats = {}
Caching Strategies
Repeated operations benefit from caching. OpenCLaw implements multi-level caching: in-memory LRU for frequent operations, disk cache for intermediate results, and semantic cache for similar queries.
from functools import lru_cache
import hashlib
class SemanticCache:
def __init__(self, max_size: int = 1000, similarity_threshold: float = 0.85):
self.max_size = max_size
self.threshold = similarity_threshold
self.cache = {}
self.access_order = []
def _compute_key(self, query: str) -> str:
return hashlib.md5(query.lower().strip().encode()).hexdigest()
def get(self, query: str) -> Optional[str]:
key = self._compute_key(query)
for cached_key, cached_value in self.cache.items():
if self._similarity(key, cached_key) > self.threshold:
return cached_value
return None
def set(self, query: str, result: str):
key = self._compute_key(query)
if len(self.cache) >= self.max_size:
oldest = self.access_order.pop(0)
del self.cache[oldest]
self.cache[key] = result
self.access_order.append(key)
def _similarity(self, key1: str, key2: str) -> float:
return 1.0 if key1 == key2 else 0.0
Memory Management
Long-running OpenCLaw instances accumulate memory pressure. Proactive memory management releases unused context and compresses conversation history.
class MemoryManager:
def __init__(self, memory_limit_mb: int = 512):
self.limit = memory_limit_mb * 1024 * 1024
self.gc_interval = 3600
def check_memory(self):
import psutil
process = psutil.Process()
used = process.memory_info().rss
if used > self.limit * 0.8:
self.trigger_cleanup()
def trigger_cleanup(self):
import gc
gc.collect()
for conversation in self.active_conversations:
conversation.summarize_if_needed()
conversation.release_unused_context()
Implement an adaptive caching system that adjusts cache size based on available memory. Design the eviction policy to prioritize frequently accessed entries while preventing memory exhaustion.