17. Testing on Device
Chapter 17 of 18 · 25 min
Device testing reveals performance characteristics and failure modes invisible to desktop development. Production ML applications require testing frameworks that measure latency, memory, and accuracy on target hardware under realistic conditions.
On-device unit testing:
import unittest
import numpy as np
import onnxruntime as ort
class TestEdgeInference(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.session = ort.InferenceSession("model.onnx")
cls.test_data = np.load("test_preprocessing/test_inputs.npz")
def test_output_shape(self):
input_name = self.session.get_inputs()[0].name
input_shape = self.session.get_inputs()[0].shape
dummy_output = self.session.run(
None,
{input_name: np.random.randn(*input_shape).astype(np.float32)}
)
self.assertEqual(dummy_output[0].shape[0], input_shape[0])
def test_accuracy_against_reference(self):
reference_outputs = self.session.run(None, self.reference_inputs)
current_outputs = self.session.run(None, self.current_inputs)
np.testing.assert_allclose(
reference_outputs,
current_outputs,
rtol=1e-4,
atol=1e-5,
err_msg="Model outputs deviate from reference"
)
def test_latency_requirements(self):
import time
warmup_rounds = 10
for _ in range(warmup_rounds):
self.session.run(self.warmup_input)
timings = []
for _ in range(100):
start = time.perf_counter()
self.session.run(None, self.reference_inputs)
timings.append((time.perf_counter() - start) * 1000)
p95_latency = np.percentile(timings, 95)
self.assertLess(p95_latency, 50, f"P95 latency {p95_latency:.2f}ms exceeds 50ms")
# Run on actual device
# adb shell "cd /data/local/tmp && python -m pytest test_edge_inference.py -v"
Android UI testing for ML features:
// Espresso test for ML inference in UI
@RunWith(AndroidJUnit4::class)
class ImageClassificationTest {
@get:Rule
val activityRule = ActivityScenarioRule(MainActivity::class.java)
@Test
fun testClassificationDisplaysResult() {
// Click inference button with test image
onView(withId(R.id.classifyButton)).perform(click())
// Verify result appears within timeout
onView(withId(R.id.resultText))
.waitFor(3000) // 3 second timeout
.check(matches(not(withText(""))))
}
@Test
fun testOfflineClassification() {
// Enable airplane mode
activityRule.scenario.onActivity { activity ->
// Inject network monitor
val mockNetworkMonitor = FakeNetworkMonitor(connected = false)
activity.networkMonitor = mockNetworkMonitor
// Trigger classification
activity.classifyImage(testBitmap)
// Verify offline path used
assertThat(mockNetworkMonitor.classificationCount).isEqualTo(0)
}
}
}
fun ViewInteraction.waitFor(timeoutMs: Long): ViewInteraction {
val startTime = System.currentTimeMillis()
while (System.currentTimeMillis() - startTime < timeoutMs) {
try {
this.check(matches(isDisplayed()))
return this
} catch (e: Exception) {
Thread.sleep(100)
}
}
throw TimeoutException("View not displayed within ${timeoutMs}ms")
}
iOS XCTest for Core ML:
import XCTest
import CoreML
class CoreMLInferenceTests: XCTestCase {
var classifier: ImageClassifier!
override func setUp() async throws {
// Load model from bundle
let modelURL = Bundle.main.url(forResource: "classifier", withExtension: "mlmodel")!
classifier = try ImageClassifier(modelURL: modelURL)
}
func testInferenceOnSimulatedInput() async throws {
// Create grayscale test image
let context = CIContext()
let testImage = CIImage(color: .gray).cropped(to: CGRect(x: 0, y: 0, width: 224, height: 224))
let cgImage = try context.createCGImage(testImage, from: testImage.extent)
// Verify inference completes
let result = try await classifier.classify(image: cgImage)
XCTAssertFalse(result.isEmpty, "Classification should return non-empty results")
XCTAssertGreaterThan(result.first?.confidence ?? 0, 0.5, "Confidence should exceed threshold")
}
func testPerformance() async throws {
let iterations = 100
var latencies: [TimeInterval] = []
for _ in 0..<iterations {
let start = CFAbsoluteTimeGetCurrent()
_ = try await classifier.classify(image: testImage)
let elapsed = CFAbsoluteTimeGetCurrent() - start
latencies.append(elapsed)
}
let p95 = latencies.sorted()[Int(Double(iterations) * 0.95)]
XCTAssertLessThan(p95, 0.1, "P95 inference latency should be under 100ms")
}
}
Stress testing under memory pressure:
#!/bin/bash
# Stress test: run model inference under memory pressure
# Generate memory pressure
fallocate -l 500M /tmp/fillmem
rm /tmp/fillmem
# Run concurrent inference
for i in {1..100}; do
adb shell "cd /data/local/tmp && ./run_inference model.onnx input.bin" &
done
wait
echo "Stress test completed"
# Check for OOM errors in logcat
adb logcat -d | grep -E "(OOM|OutOfMemory|ANR)" | head -20
EXERCISE
Build an on-device test suite that runs 1000 inference iterations, captures latency percentiles, detects memory leaks via tracemalloc, and generates JUnit/HTML test reports.