KEY INSIGHT
Infrastructure as Code (IaC) treats compute resources, networking, and serving configuration as versioned, declarative specifications. For local AI, IaC ensures consistent deployment across edge devices and reproducible environment reconstruction after failures.
### IaC for ML Serving
When you deploy models locally, "infrastructure" includes your serving servers, model artifacts, runtime configuration, monitoring setup, and network access controls. IaC codifies these into version-controlled definitions.
Without IaC, each deployed instance represents accumulated manual configuration drift. When you need to replicate an environment (new edge node, disaster recovery, scaling), undocumented steps cause failures.
### Terraform for ML Infrastructure
```hcl
# HCL: Terraform configuration for local AI serving infrastructure
# File: main.tf
terraform {
required_version = ">= 1.0"
# Backend for state storage (local for simple deployments)
backend "local" {
path = "terraform.tfstate"
}
}
# Variables for environment-specific configuration
variable "deployment_name" {
description = "Name for this deployment"
type = string
default = "local-ai-production"
}
variable "model_versions" {
description = "Map of model names to their current versions"
type = map(string)
default = {
sentiment = "v2.1.3"
nlp_parser = "v1.5.0"
recommender = "v3.2.1"
}
}
variable "compute_resources" {
description = "Compute allocation per model"
type = map(object({
cpu_cores = number
memory_gb = number
gpu_enabled = bool
}))
default = {
sentiment = { cpu_cores = 2, memory_gb = 4, gpu_enabled = false }
nlp_parser = { cpu_cores = 4, memory_gb = 8, gpu_enabled = true }
recommender = { cpu_cores = 2, memory_gb = 4, gpu_enabled = false }
}
}
# Model registry configuration
locals {
model_registry_path = "s3://internal-models/mlops/"
monitoring_endpoint = "http://monitoring.internal:9090"
}
# S3 bucket for model artifacts
resource "aws_s3_bucket" "model_artifacts" {
bucket = "${var.deployment_name}-artifacts"
versioning {
enabled = true
}
lifecycle {
rule {
enabled = true
noncurrent_version_transition {
days = 30
storage_class = "GLACIER"
}
}
}
tags = {
Environment = var.deployment_name
Component = "model-storage"
ManagedBy = "terraform"
}
}
# Compute instance for model serving (example for local VM/virtualized)
resource "aws_instance" "ml_serving" {
for_each = var.model_versions
ami = "ami-model-serving-v2" # Pre-built serving image
instance_type = lookup(var.compute_resources[each.key], "gpu_enabled") ? "g4dn.xlarge" : "c5.xlarge"
tags = {
Name = "${var.deployment_name}-${each.key}"
Model = each.key
ModelVersion = each.value
Environment = var.deployment_name
ManagedBy = "terraform"
}
lifecycle {
create_before_destroy = true
}
}
# Security group for model serving
resource "aws_security_group" "ml_serving" {
name = "${var.deployment_name}-ml-serving"
description = "Security group for ML model serving instances"
ingress {
from_port = 8080 # Inference endpoint
to_port = 8080
protocol = "tcp"
cidr_blocks = ["10.0.0.0/8"] # Internal network only
}
ingress {
from_port = 9090 # Metrics endpoint
to_port = 9090
protocol = "tcp"
cidr_blocks = ["10.0.1.0/24"] # Monitoring network only
}
egress {
from_port = 443 # HTTPS outbound for model downloads
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
ManagedBy = "terraform"
}
}
# Outputs for deployment information
output "model_endpoints" {
description = "Inference endpoints for deployed models"
value = {
for model, instance in aws_instance.ml_serving : model => "http://${instance.public_ip}:8080"
}
}
output "deployment_status" {
description = "Current deployment state summary"
value = {
deployment_name = var.deployment_name
models = var.model_versions
instances = length(aws_instance.ml_serving)
}
}
```
### Ansible for Configuration Management
Terraform manages infrastructure creation. Ansible manages configuration within existing state. Use Ansible for installing dependencies, configuring runtime environment, and deploying model artifacts.
```yaml
# YAML: Ansible playbook for ML model deployment
# File: deploy-models.yml
- name: Deploy ML Models to Serving Infrastructure
hosts: ml_serving
become: yes
vars:
model_registry: "s3://internal-models/mlops/"
serving_config_dir: /etc/ml-serving
model_versions:
sentiment: "v2.1.3"
nlp_parser: "v1.5.0"
recommender: "v3.2.1"
tasks:
- name: Ensure serving directories exist
file:
path: """{{ serving_config_dir }}/{{ item }}""""
state: directory
owner: ml-serving
group: ml-serving
mode: '0755'
loop:
- models
- logs
- config
- name: Create Python virtual environment for each model
python_venv:
name: """{{ serving_config_dir }}/venv/{{ item }}"""
system_site_packages: no
loop:
- "{{ model_versions.keys() | list }}"
- name: Install model serving dependencies
pip:
venv: """{{ serving_config_dir }}/venv/{{ item.service }}"""
name: "{{ item.packages }}"
loop:
- { service: sentiment, packages: ['torch', 'fastapi', 'uvicorn'] }
- { service: nlp_parser, packages: ['transformers', 'fastapi', 'uvicorn'] }
- { service: recommender, packages: ['implicit', 'fastapi', 'uvicorn'] }
- name: Download model artifacts from registry
aws_s3:
bucket: internal-models
object: """mlops/{{ item.model }}/{{ item.version }}.tar.gz"""
dest: """{{ serving_config_dir }}/models/{{ item.model }}.tar.gz"""
mode: get
loop:
- { model: sentiment, version: "v2.1.3" }
- { model: nlp_parser, version: "v1.5.0" }
- { model: recommender, version: "v3.2.1" }
- name: Extract model artifacts
unarchive:
src: """{{ serving_config_dir }}/models/{{ item.model }}.tar.gz"""
dest: """{{ serving_config_dir }}/models/{{ item.model }}/"""
remote_src: yes
loop:
- { model: sentiment }
- { model: nlp_parser }
- { model: recommender }
- name: Configure model serving service
template:
src: """templates/ml-service-{{ item }}.service.j2"""
dest: """/etc/systemd/system/ml-{{ item }}.service"""
owner: root
group: root
mode: '0644'
loop:
- "{{ model_versions.keys() | list }}"
notify: reload systemd
- name: Ensure ML serving services are started and enabled
systemd:
name: """ml-{{ item }}.service"""
state: started
enabled: yes
loop:
- "{{ model_versions.keys() | list }}"
```
### State Management
IaC state tracks what you've created so subsequent runs know what exists. For local deployments, store IaC state files centrally—part of your infrastructure repository—not on individual devices.
State files contain sensitive information (IPs, resource identifiers). Store them encrypted or in protected storage in your network.
---