RUNLOCALAIv38
->Will it run?Best GPUCompareTroubleshootStartLearnPulseModelsHardwareToolsBench
Run check
RUNLOCALAI

Independently operated catalog for local-AI hardware and software. Hand-written verdicts. Source-cited claims. Reproducible commands when we have them.

OP·Fredoline Eruo
DIR
  • Models
  • Hardware
  • Tools
  • Benchmarks
TOOLS
  • Will it run?
  • Compare hardware
  • Cost vs cloud
  • Choose my GPU
  • Prompting kits
  • Quick answers
REF
  • All buyer guides
  • Learn local AI
  • Methodology
  • Glossary
  • Errors KB
  • Trust
EDITOR
  • About
  • Author
  • How we make money
  • Editorial policy
  • Contact
LEGAL
  • Privacy
  • Terms
  • Sitemap
MAIL · MONTHLY DIGEST
Get monthly local AI changes
Monthly recap. No spam.
DISCLOSURE

Some links on this site are affiliate links (Amazon Associates and other first-class retailers). When you buy through them, we earn a small commission at no extra cost to you. Affiliate links do not influence our verdicts — there are cards we rate highly that we don't have affiliate relationships with, and cards that sell well that we refuse to recommend. Read more →

© 2026 runlocalai.coIndependently operated
RUNLOCALAI · v38
  1. >
  2. Home
  3. /Learn
  4. /Courses
  5. /Voice AI with Local Models
  6. /Ch. 19
Voice AI with Local Models

19. Testing Voice Pipelines

Chapter 19 of 22 · 25 min
KEY INSIGHT

Voice pipeline tests require audio fixtures, latency assertions, and quality metrics—testing that the system produces correct output within acceptable time bounds.

Testing voice AI systems requires audio-specific fixtures, latency assertions, and quality validation beyond standard unit testing.

Audio Test Fixtures

import pytest
import numpy as np
from pathlib import Path

@pytest.fixture
def audio_samples():
    return {
        "clean_speech": load_audio("tests/fixtures/clean_speech.wav"),
        "noisy_speech": load_audio("tests/fixtures/noisy_speech.wav"),
        "silence": load_audio("tests/fixtures/silence.wav"),
        "multi_speaker": load_audio("tests/fixtures/multi_speaker.wav")
    }

@pytest.fixture
def mock_microphone(audio_samples):
    from unittest.mock import MagicMock
    mic = MagicMock()
    mic.read.return_value = audio_samples["clean_speech"].tobytes()
    return mic

def load_audio(path: str) -> np.ndarray:
    import soundfile as sf
    return sf.read(path)[0]

@pytest.fixture
def temp_audio_file():
    content = np.random.randint(-1000, 1000, size=16000, dtype=np.int16)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
        sf.write(f.name, content, 16000)
        yield f.name
    os.unlink(f.name)

Unit Tests for Components

class TestNoiseReduction:
    def test_reduces_noise_floor(self, audio_samples):
        reducer = NoiseReductionPipeline()
        output = reducer.process(audio_samples["noisy_speech"])
        
        noise_floor_before = np.mean(np.abs(audio_samples["noisy_speech"]))
        noise_floor_after = np.mean(np.abs(output))
        
        assert noise_floor_after < noise_floor_before * 0.8
    
    def test_preserves_speech_quality(self, audio_samples):
        reducer = NoiseReductionPipeline()
        output = reducer.process(audio_samples["clean_speech"])
        
        correlation = np.corrcoef(
            audio_samples["clean_speech"].astype(float),
            output.astype(float)
        )[0, 1]
        
        assert correlation > 0.9

class TestLanguageDetection:
    @pytest.mark.asyncio
    async def test_detects_english(self, audio_samples):
        detector = LanguageDetector()
        lang = await detector.detect(audio_samples["english_speech"])
        
        assert lang == "en"
    
    @pytest.mark.asyncio
    async def test_detects_spanish(self, audio_samples):
        detector = LanguageDetector()
        lang = await detector.detect(audio_samples["spanish_speech"])
        
        assert lang == "es"

class TestTTS:
    def test_output_shape(self):
        tts = CoquiTTS()
        audio = tts.synthesize("Hello world")
        
        assert audio.dtype == np.float32
        assert len(audio) > 16000  # At least 1 second
        assert -1.0 <= audio.min() <= 1.0
        assert -1.0 <= audio.max() <= 1.0

Latency Testing

import time

class TestLatency:
    @pytest.mark.asyncio
    async def test_end_to_end_latency(self, audio_samples):
        pipeline = VoicePipeline()
        start = time.perf_counter()
        
        result = await pipeline.process(audio_samples["clean_speech"])
        
        latency_ms = (time.perf_counter() - start) * 1000
        
        assert latency_ms < 500, f"Latency {latency_ms}ms exceeds 500ms threshold"
    
    @pytest.mark.asyncio
    async def test_component_latencies(self, audio_samples):
        pipeline = VoicePipeline()
        
        with LatencyTracker() as tracker:
            await pipeline.process(audio_samples["clean_speech"])
        
        report = tracker.get_report()
        
        assert report["asr"] < 200, f"ASR took {report['asr']}ms"
        assert report["llm"] < 300, f"LLM took {report['llm']}ms"
        assert report["tts"] < 150, f"TTS took {report['tts']}ms"

@pytest.fixture
def latency_tracker():
    return LatencyTracker()

class LatencyTracker:
    def __init__(self):
        self.measurements = {}
        self.start_times = {}
    
    def __enter__(self):
        return self
    
    def start(self, component: str):
        self.start_times[component] = time.perf_counter()
    
    def end(self, component: str):
        if component in self.start_times:
            elapsed = (time.perf_counter() - self.start_times[component]) * 1000
            self.measurements[component] = elapsed
    
    def get_report(self) -> dict:
        return self.measurements.copy()

Integration Tests

@pytest.mark.asyncio
async def test_full_pipeline(audio_samples):
    pipeline = VoicePipeline()
    
    result = await pipeline.process(audio_samples["clean_speech"])
    
    assert result["transcription"] is not None
    assert result["response"] is not None
    assert result["audio"] is not None
    assert len(result["audio"]) > 0

@pytest.mark.asyncio
async def test_handles_noisy_input(audio_samples):
    pipeline = VoicePipeline(config={"noise_reduction": True})
    
    result = await pipeline.process(audio_samples["noisy_speech"])
    
    # Should still produce valid output despite noise
    assert result["transcription"] is not None

@pytest.mark.asyncio
async def test_conversation_continuity():
    pipeline = VoicePipeline()
    
    # Multiple exchanges should maintain context
    r1 = await pipeline.process("Hello")
    r2 = await pipeline.process("What was my previous message?")
    
    # The model should reference the prior exchange
    assert "hello" in r2["response"].lower() or "previous" in r2["response"].lower()

Property-Based Testing

from hypothesis import given, strategies as st

class TestAudioProperties:
    @given(audio=st.audio(min_samples=1600, max_samples=48000))
    def test_normalizes_output(self, audio):
        tts = CoquiTTS()
        output = tts.synthesize("Test")
        
        assert -1.0 <= output.min() <= 1.0
        assert -1.0 <= output.max() <= 1.0
    
    @given(text=st.text(min_length=1, max_length=500))
    def test_handles_various_text_lengths(self, text):
        tts = CoquiTTS()
        output = tts.synthesize(text)
        
        # Output should be proportional to input length
        assert len(output) > 0
        assert isinstance(output, np.ndarray)
EXERCISE

Write a test suite for a TTS model that verifies: (1) output is normalized between -1 and 1, (2) synthesis latency is under 500ms, and (3) output duration scales linearly with input text length. Time: 15 minutes.

← Chapter 18
Error Handling
Chapter 20 →
Docker Deployment