Core ML for iOS — Edge AI: Mobile and IoT (Chapter 6)

Apple's Core ML framework provides hardware-accelerated inference on iOS devices. Models convert from ONNX or TensorFlow/Keras formats to Core ML packages (.mlmodel). The Neural Engine (ANE) in Apple Silicon and A-series chips delivers 10-40 TOPS for compatible operator workloads.

Prerequisites include Xcode 14+, Python with coremltools, and macOS for conversion. Installation:

pip install coremltools

Conversion from ONNX follows this pattern:

import coremltools as ct
import onnx

# Load ONNX model
onnx_model = onnx.load("model.onnx")

# Convert to Core ML
mlmodel = ct.convert(
    onnx_model,
    inputs=[("input", ct.TensorType(shape=(1, 3, 224, 224)))],
    outputs=[("output", ct.TensorType(shape=(1, 1000)))],
    minimum_deployment_target=ct.target.iOS16
)

# Save the model
mlmodel.save("model.mlmodel")

Keras model conversion uses coremltools.converters:

import coremltools as ct
import tensorflow as tf

model = tf.keras.applications.MobileNetV2(weights="imagenet")

# Convert with input shape specification
mlmodel = ct.convert(
    model,
    inputs=[("input", ct.TensorType(shape=(1, 224, 224, 3)))],
    minimum_deployment_target=ct.target.iOS15
)

mlmodel.save("mobilenetv2.mlmodel")

Swift inference code structure:

import CoreML

func loadAndRunModel(imageData: Data) throws -> [Double] {
    let config = MLModelConfiguration()
    config.computeUnits = .all // .cpuOnly, .gpuOnly, .neuralEngine
    
    let model = try MLModel(contentsOf: modelURL, configuration: config)
    
    let inputShape: [NSNumber] = [1, 224, 224, 3]
    guard let inputTensor = try? MLMultiArray(shape: inputShape, dataType: .float32) else {
        throw ModelError.tensorCreationFailed
    }
    
    // Copy image data into tensor
    // ... pixel copy loop ...
    
    let inputFeature = MLFeatureValue(multiArray: inputTensor)
    let inputFeatures = ["input": inputFeature]
    
    let inputProvider = try? MLDictionaryFeatureProvider(dictionary: inputFeatures)
    let outputFeatures = try? model.prediction(from: inputProvider!)
    
    guard let output = outputFeatures?.multiArrayValue(for: "output") else {
        throw ModelError.outputExtractionFailed
    }
    
    return (0..<output.count).map { output[$0].doubleValue }
}

Model package inspection reveals hardware allocation predictions:

# The .mlmodel file is actually a directory structure
unzip -l model.mlmodel

# Look for the NeuralEngine blob allocation

Performance tuning requires understanding thermal state. ANE execution throttles when device temperature exceeds thresholds—profiling should occur after device thermal equilibrium (15 minutes of moderate use).