23. Testing and Debugging

Chapter 23 of 24 · 20 min

Reliable software requires thorough testing. OpenCLaw includes testing utilities for unit tests, integration tests, and debugging tools for production issues.

Testing Framework

The testing framework provides fixtures for common OpenCLaw components and utilities for simulating user interactions.

# testing.py
import pytest
from unittest.mock import Mock
from typing import Dict, Any

@pytest.fixture
def mock_llm():
    mock = Mock()
    mock.generate.return_value = Mock(
        content="Test response",
        confidence=0.9
    )
    return mock

@pytest.fixture
def mock_storage():
    mock = Mock()
    mock.insert.return_value = "msg_123"
    mock.query.return_value = []
    return mock

@pytest.fixture
def openclaw_instance(mock_llm, mock_storage):
    from openclaw.core import OpenCLawCore
    config = CoreConfig(
        llm_client=mock_llm,
        storage_backend=mock_storage
    )
    return OpenCLawCore(config)

class TestConversation:
    def test_simple_message(self, openclaw_instance):
        response = openclaw_instance.process_message("Hello")
        assert response.content is not None
        assert len(response.content) > 0
    
    def test_context_preservation(self, openclaw_instance):
        openclaw_instance.process_message("My favorite color is blue")
        response = openclaw_instance.process_message("What color did I mention?")
        assert "blue" in response.content.lower()

class TestFeedback:
    def test_feedback_recording(self, openclaw_instance, mock_storage):
        interaction_id = openclaw_instance.process_message("Test").message_id
        
        openclaw_instance.record_feedback(
            interaction_id=interaction_id,
            feedback_type="correction",
            correction="Better answer"
        )
        
        mock_storage.insert.assert_called()

Integration Testing

Integration tests verify component interactions. The test harness simulates realistic usage patterns.

class IntegrationTestHarness:
    def __init__(self):
        self.components = {}
        self.test_data = {}
    
    def setup_full_system(self) -> OpenCLawCore:
        storage = InMemoryStorage()
        llm = MockLLM()
        
        core = OpenCLawCore(CoreConfig(
            llm_client=llm,
            storage_backend=storage
        ))
        
        self.components['core'] = core
        self.components['storage'] = storage
        self.components['llm'] = llm
        
        return core
    
    def simulate_conversation(self, core, messages: list):
        responses = []
        for msg in messages:
            response = core.process_message(msg)
            responses.append(response)
        return responses

Debugging Tools

The debugging toolkit includes message inspection, state visualization, and performance tracing.

class DebugSession:
    def __init__(self, openclaw_core):
        self.core = openclaw_core
        self.breakpoints = []
    
    def inspect_message(self, message_id: str) -> Dict[str, Any]:
        return {
            'id': message_id,
            'content': self.core.storage.get_message(message_id),
            'processing_steps': self.core.get_processing_trace(message_id),
            'context_snapshot': self.core.get_context_snapshot(message_id)
        }
    
    def set_breakpoint(self, event_type: str, condition: callable):
        self.breakpoints.append({
            'type': event_type,
            'condition': condition
        })
    
    def trace_execution(self, operation: callable):
        import traceback
        with traceback.Trace(self.breakpoints):
            return operation()

class PerformanceTracer:
    def __init__(self):
        self.spans = []
    
    def start_span(self, name: str):
        import time
        return TracedSpan(name, time.perf_counter(), self)
    
    def generate_report(self) -> str:
        total = sum(s.duration for s in self.spans)
        report = f"Total execution: {total:.2f}s\n"
        for span in sorted(self.spans, key=lambda s: s.duration, reverse=True):
            report += f"  {span.name}: {span.duration:.2f}s ({span.duration/total*100:.1f}%)\n"
        return report
EXERCISE

Design a regression test suite for the feedback learning system. Create test cases that verify feedback is correctly recorded, retrieved, and applied. Include edge cases like overlapping feedback and conflicting corrections.