13. Offline Operation

Chapter 13 of 18 · 20 min

Offline operation requires storing models and data locally without cloud dependencies. This capability matters for IoT sensors in remote locations, mobile applications in areas with poor connectivity, and privacy-sensitive applications where data cannot leave the device.

Model storage sizing and management:

import os
import shutil

def model_storage_report(models_dir):
    """Report storage usage for all models"""
    total_size = 0
    reports = []
    
    for filename in os.listdir(models_dir):
        filepath = os.path.join(models_dir, filename)
        size_mb = os.path.getsize(filepath) / (1024 * 1024)
        total_size += size_mb
        
        reports.append({
            "name": filename,
            "size_mb": size_mb,
            "type": filename.split('.')[-1]
        })
    
    return {
        "total_mb": total_size,
        "models": sorted(reports, key=lambda x: x['size_mb'], reverse=True)
    }

# Example output structure
# {'total_mb': 142.5, 'models': [
#     {'name': 'yolov8.onnx', 'size_mb': 96.2, 'type': 'onnx'},
#     {'name': 'movenet.tflite', 'size_mb': 12.1, 'type': 'tflite'},
#     {'name': 'squeezeNet.mlmodel', 'size_mb': 8.2, 'type': 'mlmodel'}
# ]}

SQLite for local data persistence:

import sqlite3
import numpy as np

def init_local_database(db_path):
    """Initialize local prediction cache"""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    cursor.execute('''
        CREATE TABLE prediction_cache (
            input_hash TEXT PRIMARY KEY,
            input_data BLOB,
            output_data BLOB,
            model_version TEXT,
            timestamp INTEGER
        )
    ''')
    
    cursor.execute('''
        CREATE TABLE inference_metrics (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            model_name TEXT,
            latency_ms REAL,
            memory_mb REAL,
            timestamp INTEGER
        )
    ''')
    
    conn.commit()
    return conn

def cache_prediction(conn, input_data, output_data, model_version):
    """Cache inference result for offline reuse"""
    import hashlib
    
    input_hash = hashlib.sha256(input_data.tobytes()).hexdigest()
    cursor = conn.cursor()
    
    cursor.execute('''
        INSERT OR REPLACE INTO prediction_cache 
        (input_hash, input_data, output_data, model_version, timestamp)
        VALUES (?, ?, ?, ?, ?)
    ''', (
        input_hash,
        input_data.tobytes(),
        output_data.tobytes(),
        model_version,
        int(time.time())
    ))
    
    conn.commit()

Android offline configuration:

// Bundle model in assets, install on first run
fun copyModelFromAssetsIfNeeded(context: Context, modelFile: String): File {
    val modelPath = File(context.filesDir, modelFile)
    
    if (!modelPath.exists()) {
        context.assets.open(modelFile).use { input ->
            FileOutputStream(modelPath).use { output ->
                input.copyTo(output)
            }
        }
    }
    
    return modelPath
}

// Network-aware inference with local fallback
fun classifyWithOfflineFallback(image: Bitmap): ClassificationResult {
    val classifier = ImageClassifier.create()
    
    return try {
        if (isNetworkAvailable()) {
            // Cloud inference with fallback
            cloudClassifier.classify(image, onFallback = {
                classifier.classify(image)
            })
        } else {
            // Pure offline
            classifier.classify(image)
        }
    } catch (e: MLKitException) {
        // Last resort: cached results
        getCachedPredictions()
    }
}

Offline-first architecture patterns:

# Sync strategy with conflict resolution
class OfflineInferenceEngine:
    def __init__(self, local_model, sync_manager):
        self.model = local_model
        self.sync = sync_manager
    
    def infer_with_eventual_sync(self, input_data):
        # Immediate local inference
        local_result = self.model.predict(input_data)
        
        # Queue for background sync
        self.sync.queue_inference_event(
            input_hash=hash(input_data),
            local_result=local_result,
            timestamp=time.time()
        )
        
        return local_result
    
    def sync_when_online(self):
        """Sync pending events when connection available"""
        if not self.sync.is_connected():
            return
        
        pending = self.sync.get_pending_events()
        cloud_results = self.sync.upload_batch(pending)
        
        # Apply server corrections
        for event, cloud_result in zip(pending, cloud_results):
            if cloud_result.confidence > event.local_confidence:
                self.sync.apply_correction(event, cloud_result)
EXERCISE

Implement an offline-first inference system with SQLite prediction caching, measure cold-start time vs warm inference, and test graceful degradation after simulated network loss.