21. User Feedback Loop

Chapter 21 of 24 · 20 min

KEY INSIGHT

Local AI product feedback loops must work offline and respect that users can't share their prompts. Your feedback mechanisms need alternative signals—output quality ratings, feature requests, model comparisons—where users CAN safely share. The challenge: cloud products get continuous feedback through user behavior. Local products need explicit feedback mechanisms—and users need to understand what they can safely share. Rating an output's quality (without sharing the content) is the primary feedback channel. ```python # feedback_system.py from dataclasses import dataclass, field from typing import List, Optional, Callable from datetime import datetime from enum import Enum import json import hashlib class FeedbackType(Enum): OUTPUT_RATING = "rating" # 1-5 quality rating MODEL_COMPARISON = "comparison" # Which model performed better ERROR_REPORT = "error_report" # Technical feedback (safe) FEATURE_REQUEST = "feature_request" # What users want USAGE_PATTERN = "usage_pattern" # How they're using the product @dataclass class AnonymousFeedback: feedback_id: str feedback_type: FeedbackType timestamp: datetime safe_to_share: bool = True # For output ratings rating: Optional[int] = None # 1-5 generation_context: Optional[str] = None # "code generation", "summarization", etc. # For model comparisons preferred_model: Optional[str] = None alternative_model: Optional[str] = None # For error reports error_type: Optional[str] = None hardware_profile: Optional[Dict] = None # Hardware spec only, no user data # For feature requests feature_description: Optional[str] = None # Metadata product_version: str = "" model_version: str = "" anonymized_hardware_bucket: str = "" class LocalModelFeedbackSystem: def __init__(self, product_id: str, privacy_salt: str): self.product_id = product_id self.privacy_salt = privacy_salt self.feedback_queue: List[AnonymousFeedback] = [] self.version_info = {} def create_hardware_bucket(self, hardware_info: dict) -> str: """Bucket hardware into general categories for categorization, not identification.""" ram_bucket = f"{int(hardware_info.get('ram_gb', 0) / 4) * 4}-{int(hardware_info.get('ram_gb', 0) / 4) * 4 + 3}" gpu_bucket = hardware_info.get('has_gpu', False) return f"ram{ram_bucket}_gpu{gpu_bucket}" def record_output_rating(self, rating: int, task_category: str, hardware_info: dict, model_id: str) -> str: """Record output quality rating. No prompt content captured.""" if not 1 <= rating <= 5: raise ValueError("Rating must be 1-5") hardware_bucket = self.create_hardware_bucket(hardware_info) feedback = AnonymousFeedback( feedback_id=hashlib.sha256(f"{datetime.now().isoformat()}{self.privacy_salt}".encode()).hexdigest()[:16], feedback_type=FeedbackType.OUTPUT_RATING, timestamp=datetime.now(), safe_to_share=True, rating=rating, generation_context=task_category, model_version=model_id, anonymized_hardware_bucket=hardware_bucket, product_version=self.version_info.get('product', 'unknown') ) self.feedback_queue.append(feedback) return feedback.feedback_id def record_model_comparison(self, preferred: str, alternative: str, task_category: str, hardware_info: dict) -> str: """ Record user preference between models for same task. This is gold for understanding model fit by use case. """ feedback = AnonymousFeedback( feedback_id=hashlib.sha256(f"{datetime.now().isoformat()}{self.privacy_salt}".encode()).hexdigest()[:16], feedback_type=FeedbackType.MODEL_COMPARISON, timestamp=datetime.now(), safe_to_share=True, preferred_model=preferred, alternative_model=alternative, generation_context=task_category, anonymized_hardware_bucket=self.create_hardware_bucket(hardware_info), product_version=self.version_info.get('product', 'unknown') ) self.feedback_queue.append(feedback) return feedback.feedback_id def record_error_report(self, error_type: str, error_message: str, hardware_info: dict, model_id: str) -> str: """ Record technical error. Only safe technical details included. Error messages are sanitized to remove any potentially sensitive content. """ # Basic sanitization - remove any疑似 sensitive patterns sanitized = error_message[:500] if len(error_message) > 500 else error_message feedback = AnonymousFeedback( feedback_id=hashlib.sha256(f"{datetime.now().isoformat()}{self.privacy_salt}".encode()).hexdigest()[:16], feedback_type=FeedbackType.ERROR_REPORT, timestamp=datetime.now(), safe_to_share=True, error_type=error_type, hardware_profile={"bucket": self.create_hardware_bucket(hardware_info)}, model_version=model_id, product_version=self.version_info.get('product', 'unknown') ) self.feedback_queue.append(feedback) return feedback.feedback_id def flush_feedback(self) -> List[dict]: """Flush queued feedback for transmission.""" feedback_data = [] for fb in self.feedback_queue: fb_dict = { "feedback_id": fb.feedback_id, "feedback_type": fb.feedback_type.value, "timestamp": fb.timestamp.isoformat(), "product_version": fb.product_version, "hardware_bucket": fb.anonymized_hardware_bucket, "safe_to_share": fb.safe_to_share } if fb.feedback_type == FeedbackType.OUTPUT_RATING: fb_dict.update({ "rating": fb.rating, "task_category": fb.generation_context, "model": fb.model_version }) elif fb.feedback_type == FeedbackType.MODEL_COMPARISON: fb_dict.update({ "preferred": fb.preferred_model, "alternative": fb.alternative_model, "task_category": fb.generation_context }) elif fb.feedback_type == FeedbackType.ERROR_REPORT: fb_dict.update({ "error_type": fb.error_type, "model": fb.model_version }) feedback_data.append(fb_dict) self.feedback_queue = [] return feedback_data ``` Design feedback prompts that teach users what's shareable. "How would you rate this response?" (shareable) vs "What did you ask?" (not shareable). The interface should make this distinction clear.

EXERCISE

Build a feedback collection system that aggregates user ratings by task type and hardware bucket. Create analysis functions that identify which models perform best for specific use cases. Implement a feature request triage system that categorizes and prioritizes user requests.