14. User Preference Learning
Beyond task-level feedback, users develop consistent preferences across interaction styles, output formats, and response characteristics. OpenCLaw maintains user profiles that encode these preferences and apply them proactively.
Preference Taxonomy
User preferences divide into five categories: communication style (verbose vs. concise), format preferences (lists vs. paragraphs vs. code blocks), temporal patterns (active hours, response expectations), topic interests (domains of frequent queries), and trust thresholds (what the system is permitted to do autonomously).
# preference_learning.py
from typing import Dict, List, Optional
from dataclasses import dataclass, field
from datetime import datetime
@dataclass
class PreferenceProfile:
user_id: str
communication_style: Dict[str, float] = field(default_factory=dict)
format_preferences: Dict[str, float] = field(default_factory=dict)
active_hours: List[int] = field(default_factory=list)
topic_interests: Dict[str, float] = field(default_factory=dict)
autonomy_level: float = 0.5
last_updated: datetime = field(default_factory=datetime.utcnow)
def get_style_score(self, style: str) -> float:
return self.communication_style.get(style, 0.5)
def get_format_weight(self, format_type: str) -> float:
return self.format_preferences.get(format_type, 1.0)
class PreferenceLearner:
def __init__(self, profile_store):
self.profiles = profile_store
self.observation_buffer = {}
def observe_interaction(self, user_id: str, interaction: Interaction) -> None:
if user_id not in self.observation_buffer:
self.observation_buffer[user_id] = []
self.observation_buffer[user_id].append(interaction)
if len(self.observation_buffer[user_id]) >= self.batch_size:
self.process_batch(user_id)
def process_batch(self, user_id: str) -> None:
observations = self.observation_buffer[user_id]
profile = self.profiles.get_or_create(user_id)
self.update_communication_style(profile, observations)
self.update_format_preferences(profile, observations)
self.update_temporal_patterns(profile, observations)
self.update_topic_interests(profile, observations)
profile.last_updated = datetime.utcnow()
self.profiles.save(profile)
self.observation_buffer[user_id] = []
Bayesian Preference Estimation
Preference strength estimation uses Bayesian inference to handle uncertainty. Rather than claiming high confidence from few observations, the system maintains probability distributions over preference parameters and updates them incrementally.
class BayesianPreferenceEstimator:
def __init__(self, prior_alpha: float = 1.0, prior_beta: float = 1.0):
self.prior_alpha = prior_alpha
self.prior_beta = prior_beta
def update(self, observations: List[float]) -> Dict[str, float]:
alpha_posterior = self.prior_alpha + sum(observations)
beta_posterior = self.prior_beta + len(observations)
mean_estimate = alpha_posterior / (alpha_posterior + beta_posterior)
variance_estimate = (alpha_posterior * beta_posterior) / (
((alpha_posterior + beta_posterior) ** 2) *
(alpha_posterior + beta_posterior + 1)
)
return {
'mean': mean_estimate,
'variance': variance_estimate,
'confidence': 1.0 - min(variance_estimate * 10, 0.95)
}
Preference Application
Applying learned preferences requires balancing them against task requirements. A user who prefers concise responses should still receive thorough explanations for complex topics. The preference mixer weighs multiple factors.
class PreferenceMixer:
def __init__(self, profile: PreferenceProfile):
self.profile = profile
self.task_complexity_weight = 0.6
self.preference_weight = 0.4
def mix_response_config(self, base_config: ResponseConfig,
task_complexity: float) -> ResponseConfig:
pref_weight = self.preference_weight * (1.0 - task_complexity)
task_weight = self.task_complexity_weight * task_complexity
verbosity = self.interpolate(
self.profile.get_style_score('concise'),
base_config.verbosity,
pref_weight + task_weight
)
format_hint = self.select_format(
base_config.suggested_format,
task_complexity
)
return ResponseConfig(
verbosity=verbosity,
format_hint=format_hint,
include_reasoning=task_complexity > 0.7
)
Implement a preference override mechanism that allows users to temporarily request different communication styles without permanently altering their profile. Design the temporal scope of these overrides.