07. ML Kit for Android
Google's ML Kit provides on-device inference capabilities for Android applications. The library bundles TensorFlow Lite models with a simplified API that handles thread management, model caching, and hardware acceleration. ML Kit supports custom model loading alongside pre-built solutions for face detection, text recognition, and object detection.
Adding ML Kit to Android projects via Gradle:
dependencies {
// Base ML Kit library
implementation 'com.google.mlkit:object-detection:17.0.0'
// Custom model support
implementation 'com.google.mlkit:linkfirebase:16.0.0-beta1'
}
Custom model inference with ML Kit involves wrapping TensorFlow Lite:
import com.google.mlkit.vision.common.InputImage
import com.google.mlkit.vision.custom.CustomImageClassifier
import com.google.mlkit.vision.custom.CustomImageClassifierOptions
class ImageClassifier(private val context: Context) {
private val options = CustomImageClassifierOptions.Builder()
.setFloatingModel(false) // true for fp32, false for INT8
.setMaxWorkers(4)
.build()
private val classifier = CustomImageClassifier.createFromFile(
context,
"model.tflite"
)
fun classify(bitmap: Bitmap, callback: (String, Float) -> Unit) {
val inputImage = InputImage.fromBitmap(bitmap, 0)
classifier.classify(inputImage)
.addOnSuccessListener { result ->
result.classificationCategories.forEach { category ->
callback(category.label, category.confidence)
}
}
.addOnFailureListener { exception ->
Log.e("ImageClassifier", "Classification failed", exception)
}
}
fun close() {
classifier.close()
}
}
Multi-model concurrency requires separate interpreters:
class MultiModelExecutor {
private val interpreterOptions = Interpreter.Options()
.setNumThreads(4)
.setRuntime(Interpreter.TfLiteRuntime.PREFER_EDGE_TFLITE)
// Separate interpreters for each model prevent contention
private val modelADelegate = Interpreter(modelABuffer, interpreterOptions)
private val modelBDelegate = Interpreter(modelBBuffer, interpreterOptions)
fun runConcurrent(bitmap: Bitmap) {
CoroutineScope(Dispatchers.Default).launch {
val resultA = async { runModelA(bitmap) }
val resultB = async { runModelB(bitmap) }
val (a, b) = awaitAll(resultA, resultB)
// Process combined results
}
}
}
Memory management during bitmap conversion frequently causes bitmap recycle errors:
// Copy bitmap data instead of passing references
fun preprocessBitmap(bitmap: Bitmap): ByteBuffer {
val inputBuffer = ByteBuffer.allocateDirect(
1 * 224 * 224 * 3 * 4 // fp32 requires 4 bytes per channel
).apply {
order(ByteOrder.nativeOrder())
}
val scaledBitmap = Bitmap.createScaledBitmap(bitmap, 224, 224, true)
val pixels = IntArray(224 * 224)
scaledBitmap.getPixels(pixels, 0, 224, 0, 0, 224, 224)
// Normalize to [-1, 1] range
pixels.forEach { pixel ->
inputBuffer.putFloat(((pixel shr 16 and 0xFF) / 127.5f) - 1.0f)
inputBuffer.putFloat(((pixel shr 8 and 0xFF) / 127.5f) - 1.0f)
inputBuffer.putFloat(((pixel and 0xFF) / 127.5f) - 1.0f)
}
if (scaledBitmap != bitmap) scaledBitmap.recycle()
return inputBuffer.rewind() as ByteBuffer
}
Build Android app with ML Kit custom model inference, implement proper bitmap preprocessing with normalization, and measure inference latency with Android Profiler.