23. Testing and Debugging
Chapter 23 of 24 · 20 min
Reliable software requires thorough testing. OpenCLaw includes testing utilities for unit tests, integration tests, and debugging tools for production issues.
Testing Framework
The testing framework provides fixtures for common OpenCLaw components and utilities for simulating user interactions.
# testing.py
import pytest
from unittest.mock import Mock
from typing import Dict, Any
@pytest.fixture
def mock_llm():
mock = Mock()
mock.generate.return_value = Mock(
content="Test response",
confidence=0.9
)
return mock
@pytest.fixture
def mock_storage():
mock = Mock()
mock.insert.return_value = "msg_123"
mock.query.return_value = []
return mock
@pytest.fixture
def openclaw_instance(mock_llm, mock_storage):
from openclaw.core import OpenCLawCore
config = CoreConfig(
llm_client=mock_llm,
storage_backend=mock_storage
)
return OpenCLawCore(config)
class TestConversation:
def test_simple_message(self, openclaw_instance):
response = openclaw_instance.process_message("Hello")
assert response.content is not None
assert len(response.content) > 0
def test_context_preservation(self, openclaw_instance):
openclaw_instance.process_message("My favorite color is blue")
response = openclaw_instance.process_message("What color did I mention?")
assert "blue" in response.content.lower()
class TestFeedback:
def test_feedback_recording(self, openclaw_instance, mock_storage):
interaction_id = openclaw_instance.process_message("Test").message_id
openclaw_instance.record_feedback(
interaction_id=interaction_id,
feedback_type="correction",
correction="Better answer"
)
mock_storage.insert.assert_called()
Integration Testing
Integration tests verify component interactions. The test harness simulates realistic usage patterns.
class IntegrationTestHarness:
def __init__(self):
self.components = {}
self.test_data = {}
def setup_full_system(self) -> OpenCLawCore:
storage = InMemoryStorage()
llm = MockLLM()
core = OpenCLawCore(CoreConfig(
llm_client=llm,
storage_backend=storage
))
self.components['core'] = core
self.components['storage'] = storage
self.components['llm'] = llm
return core
def simulate_conversation(self, core, messages: list):
responses = []
for msg in messages:
response = core.process_message(msg)
responses.append(response)
return responses
Debugging Tools
The debugging toolkit includes message inspection, state visualization, and performance tracing.
class DebugSession:
def __init__(self, openclaw_core):
self.core = openclaw_core
self.breakpoints = []
def inspect_message(self, message_id: str) -> Dict[str, Any]:
return {
'id': message_id,
'content': self.core.storage.get_message(message_id),
'processing_steps': self.core.get_processing_trace(message_id),
'context_snapshot': self.core.get_context_snapshot(message_id)
}
def set_breakpoint(self, event_type: str, condition: callable):
self.breakpoints.append({
'type': event_type,
'condition': condition
})
def trace_execution(self, operation: callable):
import traceback
with traceback.Trace(self.breakpoints):
return operation()
class PerformanceTracer:
def __init__(self):
self.spans = []
def start_span(self, name: str):
import time
return TracedSpan(name, time.perf_counter(), self)
def generate_report(self) -> str:
total = sum(s.duration for s in self.spans)
report = f"Total execution: {total:.2f}s\n"
for span in sorted(self.spans, key=lambda s: s.duration, reverse=True):
report += f" {span.name}: {span.duration:.2f}s ({span.duration/total*100:.1f}%)\n"
return report
EXERCISE
Design a regression test suite for the feedback learning system. Create test cases that verify feedback is correctly recorded, retrieved, and applied. Include edge cases like overlapping feedback and conflicting corrections.