20. Docker Deployment
Chapter 20 of 22 · 25 min
Containerization ensures consistent voice AI deployments across development, testing, and production environments with all dependencies isolated.
Dockerfile for Voice Pipeline
FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y \
libsndfile1 \
libsndfile1-dev \
ffmpeg \
libportaudio2 \
libportaudio-dev \
python3.10 \
python3-pip \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Set Python defaults
RUN ln -sf /usr/bin/python3 /usr/bin/python
# Create virtual environment
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Install Python dependencies
COPY requirements.txt /tmp/requirements.txt
RUN pip install --no-cache-dir -r /tmp/requirements.txt
# Copy application code
COPY app/ /app/
WORKDIR /app
# Pre-download models (reduces cold start)
RUN python -c "from transformers import AutoModel; AutoModel.from_pretrained('small')"
# Environment variables
ENV PYTHONUNBUFFERED=1
ENV CUDA_VISIBLE_DEVICES=0
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python healthcheck.py
# Run the application
CMD ["python", "main.py"]
requirements.txt
# Core ML
torch>=2.0.0
transformers>=4.30.0
accelerate>=0.20.0
# Voice processing
openai-whisper>=20231117
CoquiTTS>=0.21.0
silero-vad>=4.0
# Audio I/O
soundfile>=0.12.1
pyaudio>=0.2.14
librosa>=0.10.0
# Web serving
fastapi>=0.100.0
uvicorn[standard]>=0.23.0
websockets>=11.0
# Infrastructure
redis>=4.5.0
prometheus-client>=0.17.0
docker-compose.yml
version: '3.8'
services:
voice-pipeline:
build: .
ports:
- "8000:8000"
- "5004:5004" # Metrics
volumes:
- ./models:/app/models # Cache models
- ./audio:/app/audio
environment:
- MODEL_CACHE_DIR=/app/models
- REDIS_URL=redis://cache:6379
- GPU_ENABLED=true
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "python", "healthcheck.py"]
interval: 30s
timeout: 10s
retries: 3
cache:
image: redis:7-alpine
volumes:
- redis-data:/data
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
metrics:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
depends_on:
- voice-pipeline
volumes:
redis-data:
models:
audio:
Multi-Stage Build for Smaller Images
# Stage 1: Build stage
FROM python:3.10-slim as builder
RUN apt-get update && apt-get install -y gcc
COPY requirements.txt .
RUN pip install --user -r requirements.txt
# Stage 2: Production stage
FROM python:3.10-slim
COPY --from=builder /root/.local /root/.local
ENV PATH=/root/.local/bin:$PATH
# Remove unnecessary packages
RUN apt-get purge -y gcc && rm -rf /var/lib/apt/lists/*
COPY app/ /app/
WORKDIR /app
CMD ["python", "main.py"]
Healthcheck Implementation
# healthcheck.py
import sys
import httpx
def main():
try:
# Check API health
response = httpx.get("http://localhost:8000/health", timeout=5)
if response.status_code == 200:
data = response.json()
if data.get("status") == "healthy":
sys.exit(0)
sys.exit(1)
except Exception:
sys.exit(1)
if __name__ == "__main__":
main()
GPU Docker Configuration
# docker-compose.gpu.yml
services:
voice-pipeline:
runtime: nvidia
environment:
- NVIDIA_VISIBLE_DEVICES=0
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu, compute]
Deployment Commands
# Build image
docker build -t voice-pipeline:latest .
# Run with GPU
docker run --gpus all --rm -p 8000:8000 voice-pipeline:latest
# Run with docker-compose
docker-compose -f docker-compose.yml up -d
# View logs
docker-compose logs -f voice-pipeline
# Scale horizontally (multiple instances)
docker-compose up -d --scale voice-pipeline=3
EXERCISE
Create a Dockerfile for a voice pipeline that uses a multi-stage build, caches models in a volume, and includes a working healthcheck. Verify the container starts and responds to health checks. Time: 15 minutes.