KEY INSIGHT
Analytics for local AI products face a fundamental tension—meaningful product insights often require data that violates user privacy guarantees. Your analytics architecture must find the signal without compromising local inference.
The local model constraint: you don't receive prompts. But you receive aggregated signals—generation lengths, model usage frequencies, feature adoption, error patterns, performance metrics. These tell you what's happening without revealing what was generated.
```python
# local_product_analytics.py
from dataclasses import dataclass, field
from typing import Dict, List, Optional
from datetime import datetime, timedelta
import hashlib
import json
@dataclass
class AnonymizedMetric:
metric_type: str
bucket: str # Time bucket for aggregation
value: int
metadata: Dict # Only non-identifying metadata
@dataclass
class CohortMetrics:
cohort_id: str # Hashed hardware profile, not user ID
model_usage_counts: Dict[str, int] # model_id -> count
avg_generation_tokens: float
error_breakdown: Dict[str, int]
session_count: int
retention_days: int
class LocalProductAnalytics:
def __init__(self, privacy_salt: str):
self.privacy_salt = privacy_salt
self.pending_metrics: List[AnonymizedMetric] = []
self.cohort_buffer: Dict[str, CohortMetrics] = {}
def hash_for_cohort(self, hardware_fingerprint: str) -> str:
"""
Create cohort ID from hardware fingerprint, not user identity.
Hardware profiles are stable enough for cohorting without identifying users.
"""
raw = f"{self.privacy_salt}:{hardware_fingerprint}"
return hashlib.sha256(raw.encode()).hexdigest()[:16]
def record_generation(self, model_id: str, input_tokens: int,
output_tokens: int, latency_ms: int,
hardware_profile: Dict) -> None:
"""Record generation metrics without capturing prompt content."""
cohort_id = self.hash_for_cohort(str(hardware_profile))
metric = AnonymizedMetric(
metric_type="generation",
bucket=datetime.now().strftime("%Y-%m-%d-%H"),
value=output_tokens,
metadata={
"model": model_id,
"input_tokens": input_tokens,
"latency_ms": latency_ms,
"throughput_tokens_per_sec": output_tokens / (latency_ms / 1000) if latency_ms > 0 else 0,
"cohort": cohort_id
}
)
self.pending_metrics.append(metric)
# Update cohort metrics
if cohort_id not in self.cohort_buffer:
self.cohort_buffer[cohort_id] = CohortMetrics(
cohort_id=cohort_id,
model_usage_counts={},
avg_generation_tokens=0,
error_breakdown={},
session_count=0,
retention_days=0
)
cohort = self.cohort_buffer[cohort_id]
cohort.model_usage_counts[model_id] = cohort.model_usage_counts.get(model_id, 0) + 1
# Running average calculation
total_so far = cohort.avg_generation_tokens * (sum(cohort.model_usage_counts.values()) - 1)
cohort.avg_generation_tokens = (total_so_far + output_tokens) / sum(cohort.model_usage_counts.values())
def record_error(self, error_type: str, error_context: Dict,
hardware_profile: Dict) -> None:
"""Record error patterns for product improvement."""
cohort_id = self.hash_for_cohort(str(hardware_profile))
# Only record error type and generic context
sanitized_context = {
k: v for k, v in error_context.items()
if k in ['memory_available_gb', 'model_loaded', 'gpu_utilization']
}
metric = AnonymizedMetric(
metric_type="error",
bucket=datetime.now().strftime("%Y-%m-%d-%H"),
value=1,
metadata={
"error_type": error_type,
"context_summary": sanitized_context,
"cohort": cohort_id
}
)
self.pending_metrics.append(metric)
def flush_metrics(self, destination: str = "analytics_endpoint") -> Dict:
"""Batch flush pending metrics to analytics service."""
if not self.pending_metrics:
return {"status": "no_pending"}
# Aggregate into hourly buckets
hourly_aggregates: Dict[str, List[AnonymizedMetric]] = {}
for metric in self.pending_metrics:
key = f"{metric.metric_type}:{metric.bucket}"
if key not in hourly_aggregates:
hourly_aggregates[key] = []
hourly_aggregates[key].append(metric)
self.pending_metrics = [] # Clear after reading
return {
"destination": destination,
"aggregated_metrics": hourly_aggregates,
"cohort_summary": self.cohort_buffer
}
```
Dashboard design for local products: show cohort behavior (not individual behavior). What models do users prefer? Which error types dominate? How does memory constraint affect generation quality? These aggregate insights drive product decisions without compromising privacy.