Offline Operation — Edge AI: Mobile and IoT (Chapter 13)

Offline operation requires storing models and data locally without cloud dependencies. This capability matters for IoT sensors in remote locations, mobile applications in areas with poor connectivity, and privacy-sensitive applications where data cannot leave the device.

Model storage sizing and management:

import os
import shutil

def model_storage_report(models_dir):
    """Report storage usage for all models"""
    total_size = 0
    reports = []
    
    for filename in os.listdir(models_dir):
        filepath = os.path.join(models_dir, filename)
        size_mb = os.path.getsize(filepath) / (1024 * 1024)
        total_size += size_mb
        
        reports.append({
            "name": filename,
            "size_mb": size_mb,
            "type": filename.split('.')[-1]
        })
    
    return {
        "total_mb": total_size,
        "models": sorted(reports, key=lambda x: x['size_mb'], reverse=True)
    }

# Example output structure
# {'total_mb': 142.5, 'models': [
#     {'name': 'yolov8.onnx', 'size_mb': 96.2, 'type': 'onnx'},
#     {'name': 'movenet.tflite', 'size_mb': 12.1, 'type': 'tflite'},
#     {'name': 'squeezeNet.mlmodel', 'size_mb': 8.2, 'type': 'mlmodel'}
# ]}

SQLite for local data persistence:

import sqlite3
import numpy as np

def init_local_database(db_path):
    """Initialize local prediction cache"""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    cursor.execute('''
        CREATE TABLE prediction_cache (
            input_hash TEXT PRIMARY KEY,
            input_data BLOB,
            output_data BLOB,
            model_version TEXT,
            timestamp INTEGER
        )
    ''')
    
    cursor.execute('''
        CREATE TABLE inference_metrics (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            model_name TEXT,
            latency_ms REAL,
            memory_mb REAL,
            timestamp INTEGER
        )
    ''')
    
    conn.commit()
    return conn

def cache_prediction(conn, input_data, output_data, model_version):
    """Cache inference result for offline reuse"""
    import hashlib
    
    input_hash = hashlib.sha256(input_data.tobytes()).hexdigest()
    cursor = conn.cursor()
    
    cursor.execute('''
        INSERT OR REPLACE INTO prediction_cache 
        (input_hash, input_data, output_data, model_version, timestamp)
        VALUES (?, ?, ?, ?, ?)
    ''', (
        input_hash,
        input_data.tobytes(),
        output_data.tobytes(),
        model_version,
        int(time.time())
    ))
    
    conn.commit()

Android offline configuration:

// Bundle model in assets, install on first run
fun copyModelFromAssetsIfNeeded(context: Context, modelFile: String): File {
    val modelPath = File(context.filesDir, modelFile)
    
    if (!modelPath.exists()) {
        context.assets.open(modelFile).use { input ->
            FileOutputStream(modelPath).use { output ->
                input.copyTo(output)
            }
        }
    }
    
    return modelPath
}

// Network-aware inference with local fallback
fun classifyWithOfflineFallback(image: Bitmap): ClassificationResult {
    val classifier = ImageClassifier.create()
    
    return try {
        if (isNetworkAvailable()) {
            // Cloud inference with fallback
            cloudClassifier.classify(image, onFallback = {
                classifier.classify(image)
            })
        } else {
            // Pure offline
            classifier.classify(image)
        }
    } catch (e: MLKitException) {
        // Last resort: cached results
        getCachedPredictions()
    }
}

Offline-first architecture patterns:

# Sync strategy with conflict resolution
class OfflineInferenceEngine:
    def __init__(self, local_model, sync_manager):
        self.model = local_model
        self.sync = sync_manager
    
    def infer_with_eventual_sync(self, input_data):
        # Immediate local inference
        local_result = self.model.predict(input_data)
        
        # Queue for background sync
        self.sync.queue_inference_event(
            input_hash=hash(input_data),
            local_result=local_result,
            timestamp=time.time()
        )
        
        return local_result
    
    def sync_when_online(self):
        """Sync pending events when connection available"""
        if not self.sync.is_connected():
            return
        
        pending = self.sync.get_pending_events()
        cloud_results = self.sync.upload_batch(pending)
        
        # Apply server corrections
        for event, cloud_result in zip(pending, cloud_results):
            if cloud_result.confidence > event.local_confidence:
                self.sync.apply_correction(event, cloud_result)