07. Integration Testing

Chapter 7 of 18 · 20 min

Integration tests verify that components work together correctly. For an AI application, this means testing the full request path from API gateway through backend to model serving. Tests must handle async operations and streaming responses.

The test setup uses pytest with pytest-asyncio for async support. A Docker Compose test fixture spins up all services with test data:

# tests/conftest.py
import pytest
import docker
from docker.types import ServicePort, EndpointSpec

@pytest.fixture(scope="session")
def docker_services(docker_api_client):
    client = docker.from_env()
    
    # Start postgres for test data
    postgres = client.containers.run(
        "postgres:15",
        name="test-postgres",
        environment={"POSTGRES_DB": "test_db", "POSTGRES_PASSWORD": "test"},
        ports={"5432/tcp": None},
        remove=True,
        detach=True
    )
    
    # Start mock model server for predictable responses
    mock_model = client.containers.run(
        "nginx:alpine",
        name="test-model-server",
        ports={"8080/tcp": None},
        remove=True,
        detach=True
    )
    
    yield {
        "postgres": postgres,
        "model_server": mock_model
    }
    
    docker_services.cleanup()

Integration tests for the upload flow verify file handling, progress reporting, and database persistence:

# tests/test_upload.py
import pytest
from httpx import AsyncClient

@pytest.mark.asyncio
async def test_upload_pdf_success(docker_services):
    async with AsyncClient(base_url="http://localhost:8000") as client:
        with open("tests/fixtures/sample.pdf", "rb") as f:
            files = {"file": ("sample.pdf", f, "application/pdf")}
            async with client.stream("POST", "/api/v1/upload", files=files) as response:
                assert response.status_code == 200
                
                data = await response.json()
                assert "document_id" in data
                assert response.headers["content-type"] == "application/json"

@pytest.mark.asyncio
async def test_upload_size_limit(docker_services):
    async with AsyncClient(base_url="http://localhost:8000") as client:
        # Generate 60MB file (over 50MB limit)
        large_content = b"x" * (60 * 1024 * 1024)
        
        files = {"file": ("large.pdf", large_content, "application/pdf")}
        response = await client.post("/api/v1/upload", files=files)
        
        assert response.status_code == 413
        assert "File too large" in response.text

The streaming response test requires careful handling of the SSE format:

# tests/test_chat.py
@pytest.mark.asyncio
async def test_ask_streaming_response(docker_services):
    async with AsyncClient(base_url="http://localhost:8000") as client:
        payload = {
            "question": "What is this document about?",
            "document_id": "test-doc-123"
        }
        
        chunks = []
        async with client.stream("POST", "/api/v1/ask", json=payload) as response:
            assert response.status_code == 200
            
            async for line in response.aiter_lines():
                if line.startswith("data: "):
                    chunks.append(line[6:])  # Strip "data: " prefix
        
        full_response = "".join(chunks)
        assert len(full_response) > 0
        assert "error" not in full_response.lower()

Common integration test failures include timing issues with async services, resource contention between tests, and test data pollution. Use unique identifiers for test documents to avoid cross-test interference.

EXERCISE

Write integration tests covering the complete flow: upload document, wait for processing, ask question, receive streaming response.