20. Performance Optimization

Chapter 20 of 24 · 20 min

OpenCLaw runs on personal hardware with resource constraints. Performance optimization ensures responsive interaction despite limited compute budgets.

Profiling Framework

Optimization begins with measurement. OpenCLaw includes a profiling framework that identifies bottlenecks in message processing, context management, and model inference.

# performance.py
import time
from contextlib import contextmanager
from dataclasses import dataclass, field
from typing import Dict, List

@dataclass
class ProfilerStats:
    operation: str
    call_count: int
    total_duration: float
    avg_duration: float
    min_duration: float
    max_duration: float

class Profiler:
    def __init__(self):
        self.stats: Dict[str, List[float]] = {}
        self.enabled = True
    
    @contextmanager
    def measure(self, operation: str):
        if not self.enabled:
            yield
            return
        
        start = time.perf_counter()
        try:
            yield
        finally:
            duration = time.perf_counter() - start
            if operation not in self.stats:
                self.stats[operation] = []
            self.stats[operation].append(duration)
    
    def get_stats(self) -> List[ProfilerStats]:
        results = []
        for op, durations in self.stats.items():
            results.append(ProfilerStats(
                operation=op,
                call_count=len(durations),
                total_duration=sum(durations),
                avg_duration=sum(durations) / len(durations),
                min_duration=min(durations),
                max_duration=max(durations)
            ))
        return results
    
    def reset(self):
        self.stats = {}

Caching Strategies

Repeated operations benefit from caching. OpenCLaw implements multi-level caching: in-memory LRU for frequent operations, disk cache for intermediate results, and semantic cache for similar queries.

from functools import lru_cache
import hashlib

class SemanticCache:
    def __init__(self, max_size: int = 1000, similarity_threshold: float = 0.85):
        self.max_size = max_size
        self.threshold = similarity_threshold
        self.cache = {}
        self.access_order = []
    
    def _compute_key(self, query: str) -> str:
        return hashlib.md5(query.lower().strip().encode()).hexdigest()
    
    def get(self, query: str) -> Optional[str]:
        key = self._compute_key(query)
        
        for cached_key, cached_value in self.cache.items():
            if self._similarity(key, cached_key) > self.threshold:
                return cached_value
        
        return None
    
    def set(self, query: str, result: str):
        key = self._compute_key(query)
        
        if len(self.cache) >= self.max_size:
            oldest = self.access_order.pop(0)
            del self.cache[oldest]
        
        self.cache[key] = result
        self.access_order.append(key)
    
    def _similarity(self, key1: str, key2: str) -> float:
        return 1.0 if key1 == key2 else 0.0

Memory Management

Long-running OpenCLaw instances accumulate memory pressure. Proactive memory management releases unused context and compresses conversation history.

class MemoryManager:
    def __init__(self, memory_limit_mb: int = 512):
        self.limit = memory_limit_mb * 1024 * 1024
        self.gc_interval = 3600
    
    def check_memory(self):
        import psutil
        process = psutil.Process()
        used = process.memory_info().rss
        
        if used > self.limit * 0.8:
            self.trigger_cleanup()
    
    def trigger_cleanup(self):
        import gc
        gc.collect()
        
        for conversation in self.active_conversations:
            conversation.summarize_if_needed()
            conversation.release_unused_context()
EXERCISE

Implement an adaptive caching system that adjusts cache size based on available memory. Design the eviction policy to prioritize frequently accessed entries while preventing memory exhaustion.