13. Offline Operation
Chapter 13 of 18 · 20 min
Offline operation requires storing models and data locally without cloud dependencies. This capability matters for IoT sensors in remote locations, mobile applications in areas with poor connectivity, and privacy-sensitive applications where data cannot leave the device.
Model storage sizing and management:
import os
import shutil
def model_storage_report(models_dir):
"""Report storage usage for all models"""
total_size = 0
reports = []
for filename in os.listdir(models_dir):
filepath = os.path.join(models_dir, filename)
size_mb = os.path.getsize(filepath) / (1024 * 1024)
total_size += size_mb
reports.append({
"name": filename,
"size_mb": size_mb,
"type": filename.split('.')[-1]
})
return {
"total_mb": total_size,
"models": sorted(reports, key=lambda x: x['size_mb'], reverse=True)
}
# Example output structure
# {'total_mb': 142.5, 'models': [
# {'name': 'yolov8.onnx', 'size_mb': 96.2, 'type': 'onnx'},
# {'name': 'movenet.tflite', 'size_mb': 12.1, 'type': 'tflite'},
# {'name': 'squeezeNet.mlmodel', 'size_mb': 8.2, 'type': 'mlmodel'}
# ]}
SQLite for local data persistence:
import sqlite3
import numpy as np
def init_local_database(db_path):
"""Initialize local prediction cache"""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE prediction_cache (
input_hash TEXT PRIMARY KEY,
input_data BLOB,
output_data BLOB,
model_version TEXT,
timestamp INTEGER
)
''')
cursor.execute('''
CREATE TABLE inference_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
model_name TEXT,
latency_ms REAL,
memory_mb REAL,
timestamp INTEGER
)
''')
conn.commit()
return conn
def cache_prediction(conn, input_data, output_data, model_version):
"""Cache inference result for offline reuse"""
import hashlib
input_hash = hashlib.sha256(input_data.tobytes()).hexdigest()
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO prediction_cache
(input_hash, input_data, output_data, model_version, timestamp)
VALUES (?, ?, ?, ?, ?)
''', (
input_hash,
input_data.tobytes(),
output_data.tobytes(),
model_version,
int(time.time())
))
conn.commit()
Android offline configuration:
// Bundle model in assets, install on first run
fun copyModelFromAssetsIfNeeded(context: Context, modelFile: String): File {
val modelPath = File(context.filesDir, modelFile)
if (!modelPath.exists()) {
context.assets.open(modelFile).use { input ->
FileOutputStream(modelPath).use { output ->
input.copyTo(output)
}
}
}
return modelPath
}
// Network-aware inference with local fallback
fun classifyWithOfflineFallback(image: Bitmap): ClassificationResult {
val classifier = ImageClassifier.create()
return try {
if (isNetworkAvailable()) {
// Cloud inference with fallback
cloudClassifier.classify(image, onFallback = {
classifier.classify(image)
})
} else {
// Pure offline
classifier.classify(image)
}
} catch (e: MLKitException) {
// Last resort: cached results
getCachedPredictions()
}
}
Offline-first architecture patterns:
# Sync strategy with conflict resolution
class OfflineInferenceEngine:
def __init__(self, local_model, sync_manager):
self.model = local_model
self.sync = sync_manager
def infer_with_eventual_sync(self, input_data):
# Immediate local inference
local_result = self.model.predict(input_data)
# Queue for background sync
self.sync.queue_inference_event(
input_hash=hash(input_data),
local_result=local_result,
timestamp=time.time()
)
return local_result
def sync_when_online(self):
"""Sync pending events when connection available"""
if not self.sync.is_connected():
return
pending = self.sync.get_pending_events()
cloud_results = self.sync.upload_batch(pending)
# Apply server corrections
for event, cloud_result in zip(pending, cloud_results):
if cloud_result.confidence > event.local_confidence:
self.sync.apply_correction(event, cloud_result)
EXERCISE
Implement an offline-first inference system with SQLite prediction caching, measure cold-start time vs warm inference, and test graceful degradation after simulated network loss.