07. ML Kit for Android

Chapter 7 of 18 · 20 min

Google's ML Kit provides on-device inference capabilities for Android applications. The library bundles TensorFlow Lite models with a simplified API that handles thread management, model caching, and hardware acceleration. ML Kit supports custom model loading alongside pre-built solutions for face detection, text recognition, and object detection.

Adding ML Kit to Android projects via Gradle:

dependencies {
    // Base ML Kit library
    implementation 'com.google.mlkit:object-detection:17.0.0'
    
    // Custom model support
    implementation 'com.google.mlkit:linkfirebase:16.0.0-beta1'
}

Custom model inference with ML Kit involves wrapping TensorFlow Lite:

import com.google.mlkit.vision.common.InputImage
import com.google.mlkit.vision.custom.CustomImageClassifier
import com.google.mlkit.vision.custom.CustomImageClassifierOptions

class ImageClassifier(private val context: Context) {
    
    private val options = CustomImageClassifierOptions.Builder()
        .setFloatingModel(false) // true for fp32, false for INT8
        .setMaxWorkers(4)
        .build()
    
    private val classifier = CustomImageClassifier.createFromFile(
        context,
        "model.tflite"
    )
    
    fun classify(bitmap: Bitmap, callback: (String, Float) -> Unit) {
        val inputImage = InputImage.fromBitmap(bitmap, 0)
        
        classifier.classify(inputImage)
            .addOnSuccessListener { result ->
                result.classificationCategories.forEach { category ->
                    callback(category.label, category.confidence)
                }
            }
            .addOnFailureListener { exception ->
                Log.e("ImageClassifier", "Classification failed", exception)
            }
    }
    
    fun close() {
        classifier.close()
    }
}

Multi-model concurrency requires separate interpreters:

class MultiModelExecutor {
    private val interpreterOptions = Interpreter.Options()
        .setNumThreads(4)
        .setRuntime(Interpreter.TfLiteRuntime.PREFER_EDGE_TFLITE)
    
    // Separate interpreters for each model prevent contention
    private val modelADelegate = Interpreter(modelABuffer, interpreterOptions)
    private val modelBDelegate = Interpreter(modelBBuffer, interpreterOptions)
    
    fun runConcurrent(bitmap: Bitmap) {
        CoroutineScope(Dispatchers.Default).launch {
            val resultA = async { runModelA(bitmap) }
            val resultB = async { runModelB(bitmap) }
            
            val (a, b) = awaitAll(resultA, resultB)
            // Process combined results
        }
    }
}

Memory management during bitmap conversion frequently causes bitmap recycle errors:

// Copy bitmap data instead of passing references
fun preprocessBitmap(bitmap: Bitmap): ByteBuffer {
    val inputBuffer = ByteBuffer.allocateDirect(
        1 * 224 * 224 * 3 * 4 // fp32 requires 4 bytes per channel
    ).apply {
        order(ByteOrder.nativeOrder())
    }
    
    val scaledBitmap = Bitmap.createScaledBitmap(bitmap, 224, 224, true)
    val pixels = IntArray(224 * 224)
    scaledBitmap.getPixels(pixels, 0, 224, 0, 0, 224, 224)
    
    // Normalize to [-1, 1] range
    pixels.forEach { pixel ->
        inputBuffer.putFloat(((pixel shr 16 and 0xFF) / 127.5f) - 1.0f)
        inputBuffer.putFloat(((pixel shr 8 and 0xFF) / 127.5f) - 1.0f)
        inputBuffer.putFloat(((pixel and 0xFF) / 127.5f) - 1.0f)
    }
    
    if (scaledBitmap != bitmap) scaledBitmap.recycle()
    return inputBuffer.rewind() as ByteBuffer
}
EXERCISE

Build Android app with ML Kit custom model inference, implement proper bitmap preprocessing with normalization, and measure inference latency with Android Profiler.