KEY INSIGHT
Local AI products fail differently than cloud services—and so should their error states. When a cloud model errors, it's usually transient infrastructure. When local fails, it's hardware constraints, model corruption, or resource exhaustion. Your error UX must guide users toward real solutions.
Common local model failures: OOM during inference, model file corruption, CUDA errors, disk space exhaustion, incompatible hardware. Each requires different user action. Generic "Something went wrong" errors abandon users at their moment of need.
```python
# error_handler.py
import psutil
import torch
from dataclasses import dataclass
from typing import Optional
import traceback
@dataclass
class LocalModelError:
error_code: str
user_message: str
technical_details: str
resolution_steps: List[str]
class LocalModelErrorHandler:
def __init__(self):
self.error_registry = {
'OUT_OF_MEMORY': self._handle_oom,
'MODEL_CORRUPT': self._handle_corruption,
'CUDA_UNAVAILABLE': self._handle_cuda,
'DISK_FULL': self._handle_disk,
'INFERENCE_TIMEOUT': self._handle_timeout,
'CONTEXT_OVERFLOW': self._handle_context
}
def diagnose_and_handle(self, exception: Exception, context: dict) -> LocalModelError:
"""Diagnose error and return actionable response."""
error_type = type(exception).__name__
error_handler = self.error_registry.get(error_type, self._handle_generic)
return error_handler(exception, context)
def _handle_oom(self, exc, context) -> LocalModelError:
available = psutil.virtual_memory().available / (1024**3)
vram_info = "N/A"
if torch.cuda.is_available():
vram_info = f"{torch.cuda.memory_allocated()/1024**3:.1f}GB / {torch.cuda.memory_reserved()/1024**3:.1f}GB"
return LocalModelError(
error_code="OUT_OF_MEMORY",
user_message="Generation stopped—your system ran out of memory.",
technical_details=str(exc),
resolution_steps=[
f"Available RAM: {available:.1f}GB",
f"GPU memory: {vram_info}",
"Try a smaller model (7B instead of 13B)",
"Close other applications to free memory",
"Reduce batch size or context length"
]
)
def _handle_cuda(self, exc, context) -> LocalModelError:
return LocalModelError(
error_code="CUDA_UNAVAILABLE",
user_message="GPU acceleration not available on this system.",
technical_details=str(exc),
resolution_steps=[
"Check CUDA installation: nvidia-smi",
"Verify PyTorch CUDA support: python -c 'import torch; print(torch.cuda.is_available())'",
"Ensure NVIDIA driver is installed",
"Consider CPU-only mode for this device"
]
)
def _handle_disk(self, exc, context) -> LocalModelError:
disk = psutil.disk_usage('/')
return LocalModelError(
error_code="DISK_FULL",
user_message="Not enough disk space to load the model.",
technical_details=str(exc),
resolution_steps=[
f"Free disk: {disk.free / (1024**3):.1f}GB",
f"Model size needed: ~{context.get('model_size_gb', 'unknown')}GB",
"Free up disk space or move model to larger drive",
"Consider downloading smaller model variant"
]
)
def _handle_generic(self, exc, context) -> LocalModelError:
return LocalModelError(
error_code="UNKNOWN",
user_message="An unexpected error occurred.",
technical_details=traceback.format_exc(),
resolution_steps=[
"Check system requirements",
"Restart the application",
"Re-download model if corruption is suspected"
]
)
```
Error state copy matters. "Out of memory" sounds like a user problem. "Generation stopped—your system ran out of memory" acknowledges the situation without blame. Pair technical diagnostics with actionable resolution steps.