KEY INSIGHT
Model governance encompasses the policies, procedures, and controls that ensure models operate responsibly, legally, and ethically. Governance addresses fairness, explainability, bias detection, and compliance—this is not optional for production ML systems.
### Governance Framework Components
Effective model governance requires four interconnected components:
**Model inventory**: A detailed registry tracking every deployed model, its version, owner, training data provenance, evaluation metrics, and deployment status. You cannot govern what you cannot see.
**Lifecycle management**: Definitions of model stages (development, validation, staging, production, archived), promotion criteria for advancing between stages, and deprecation procedures for retiring models.
**Risk assessment**: Evaluation of model impact on individuals or groups, required safeguards based on risk level, and documented approval chains.
**Monitoring and audit**: Continuous surveillance for fairness metrics, drift detection, and audit trail maintenance for compliance.
### Model Registry Implementation
```python
# Python: Model registry with governance metadata
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional
import json
import os
from pathlib import Path
class ModelStage(Enum):
DEVELOPMENT = "development"
VALIDATION = "validation"
STAGING = "staging"
PRODUCTION = "production"
ARCHIVED = "archived"
DEPRECATED = "deprecated"
class RiskLevel(Enum):
LOW = "low" # Minimal individual impact
MEDIUM = "medium" # Some individual impact, limited scope
HIGH = "high" # Significant individual impact
CRITICAL = "critical" # Legal, financial, or fundamental rights impact
@dataclass
class ModelMetadata:
"""Complete governance metadata for a model."""
model_id: str
name: str
version: str
model_type: str # classifier, regressor, etc.
# Ownership
owner_team: str
owner_contact: str
data_scientist: str
# Training provenance
training_data_id: str
training_data_version: str
training_data_hash: str # SHA256 of training data
training_start_date: str
training_end_date: str
training_duration_hours: float
# Configuration
hyperparameters: dict
features: list[str]
feature_preprocessing: dict
# Performance
evaluation_metrics: dict # metric_name -> value
evaluation_date: str
evaluation_dataset_id: str
# Governance
stage: ModelStage
risk_level: RiskLevel
regulatory_context: Optional[str] # HIPAA, GDPR, etc.
approval_status: str
approved_by: list[str]
approval_date: str
# Fairness
fairness_metrics: Optional[dict]
protected_groups: Optional[list[str]]
fairness_thresholds: Optional[dict]
# Deployment
deployment_date: Optional[str]
serving_endpoint: Optional[str]
traffic_percentage: float = 0.0
def to_dict(self) -> dict:
return {k: v if not isinstance(v, Enum) else v.value
for k, v in self.__dict__.items()}
class ModelRegistry:
"""
File-based model registry with governance metadata.
Production systems should use PostgreSQL or similar.
"""
def __init__(self, registry_path: str):
self.registry_path = Path(registry_path)
self.registry_path.mkdir(parents=True, exist_ok=True)
self.models_file = self.registry_path / "models.json"
self._ensure_registry_file()
def _ensure_registry_file(self):
if not self.models_file.exists():
self.models_file.write_text(json.dumps({"models": []}))
def _load_registry(self) -> dict:
return json.loads(self.models_file.read_text())
def _save_registry(self, registry: dict):
self.models_file.write_text(json.dumps(registry, indent=2))
def register_model(self, metadata: ModelMetadata) -> str:
"""Register a new model version."""
registry = self._load_registry()
# Check for existing version
existing = [m for m in registry["models"]
if m["model_id"] == metadata.model_id
and m["version"] == metadata.version]
if existing:
raise ValueError(f"Model {metadata.model_id} v{metadata.version} already registered")
# Add to registry
registry["models"].append(metadata.to_dict())
self._save_registry(registry)
return metadata.model_id
def list_models(self, stage: Optional[ModelStage] = None) -> list[dict]:
"""List all models, optionally filtered by stage."""
registry = self._load_registry()
models = registry["models"]
if stage:
models = [m for m in models if m["stage"] == stage.value]
return models
def get_model(self, model_id: str, version: Optional[str] = None) -> Optional[dict]:
"""Retrieve model metadata."""
registry = self._load_registry()
candidates = [m for m in registry["models"] if m["model_id"] == model_id]
if not candidates:
return None
if version:
matches = [m for m in candidates if m["version"] == version]
return matches[0] if matches else None
# Return latest by version sort
return sorted(candidates, key=lambda m: m["version"])[-1]
def promote_model(
self,
model_id: str,
target_stage: ModelStage,
approved_by: list[str]
) -> dict:
"""Promote a model to a new stage with approval."""
registry = self._load_registry()
for model in registry["models"]:
if model["model_id"] == model_id:
model["stage"] = target_stage.value
model["approval_status"] = "approved"
model["approved_by"] = approved_by
model["approval_date"] = datetime.now().isoformat()
break
self._save_registry(registry)
return self.get_model(model_id)
```
### Fairness Evaluation
```python
# Python: Fairness evaluation for model governance
import numpy as np
from collections import defaultdict
from typing import Optional
class FairnessEvaluator:
"""
Evaluate model fairness across protected groups.
Required for responsible governance.
"""
def __init__(self, protected_groups: list[str]):
self.protected_groups = protected_groups
def evaluate(
self,
predictions: np.ndarray,
protected_attributes: dict[str, np.ndarray],
outcome: Optional[np.ndarray] = None # Ground truth for some analyses
) -> dict:
"""
Compute fairness metrics across protected groups.
Returns metrics for discrepancy detection.
"""
metrics = {}
for group in self.protected_groups:
if group not in protected_attributes:
continue
group_mask = protected_attributes[group] == 1
non_group_mask = ~group_mask
metrics[group] = self._compute_group_metrics(
predictions,
group_mask,
non_group_mask,
outcome
)
# Cross-group fairness summary
metrics["disparity_summary"] = self._compute_disparity_summary(metrics)
return metrics
def _compute_group_metrics(
self,
predictions: np.ndarray,
group_mask: np.ndarray,
non_group_mask: np.ndarray,
outcome: Optional[np.ndarray]
) -> dict:
"""Compute metrics for a single protected group."""
group_preds = predictions[group_mask]
non_group_preds = predictions[non_group_mask]
metrics = {
"positive_rate_difference": float(
np.mean(group_preds) - np.mean(non_group_preds)
),
"selection_rate": float(np.mean(group_preds)),
"sample_count": int(np.sum(group_mask)),
}
if outcome is not None:
group_outcomes = outcome[group_mask]
non_group_outcomes = outcome[non_group_mask]
# Equal Opportunity: equal true positive rates
group_tpr = np.mean(group_preds[group_outcomes == 1])
non_group_tpr = np.mean(non_group_preds[non_group_outcomes == 1])
metrics["equal_opportunity_diff"] = float(group_tpr - non_group_tpr)
return metrics
def _compute_disparity_summary(self, metrics: dict) -> dict:
"""Summarize maximum disparities across groups."""
positive_rate_diffs = [
m["positive_rate_difference"]
for m in metrics.values()
if "positive_rate_difference" in m
]
return {
"max_positive_rate_diff": float(max(positive_rate_diffs, key=abs)),
"groups_flagged": [
group for group, m in metrics.items()
if abs(m.get("positive_rate_difference", 0)) > 0.1
]
}
```
### Governance Workflows
Governance is only effective if enforced through workflow. Every model promotion requires documented approval chains. High-risk models require documented review by legal, compliance, and ethics stakeholders. Governance workflows must be integrated into your CI/CD pipeline.