KEY INSIGHT
Scaling Nigerian SaaS requires balancing cost optimization with performance requirements, where strategies differ significantly between low-cost development environments and production systems handling real Naira transactions.
Scaling strategy must account for the unpredictable traffic patterns common in Nigerian markets, where viral content or viral marketing can create sudden traffic spikes that overwhelm unprepared systems.
```python
from dataclasses import dataclass
from typing import Optional
import boto3
@dataclass
class ScalingConfig:
min_instances: int
max_instances: int
target_cpu_utilization: float
scale_up_cooldown: int
scale_down_cooldown: int
class ScalingStrategy:
"""Implement multi-tier scaling strategy for SaaS."""
def __init__(self, ecs_client, cloudwatch_client):
self.ecs = ecs_client
self.cloudwatch = cloudwatch_client
self.strategies = {
'aggressive': ScalingConfig(2, 20, 50, 60, 300),
'conservative': ScalingConfig(2, 8, 70, 120, 600),
'balanced': ScalingConfig(2, 12, 60, 90, 450)
}
def apply_scaling_strategy(
self,
service_name: str,
strategy: str
) -> dict:
"""Apply scaling strategy to an ECS service."""
config = self.strategies.get(strategy, self.strategies['balanced'])
scaling_policy = {
'TargetGroupConfigurations': [{
'TargetGroupArn': self._get_target_group_arn(service_name),
'ContainerPort': 8000
}],
'ScalableTargetDimension': 'service:DesiredCount',
'MinCapacity': config.min_instances,
'MaxCapacity': config.max_instances
}
target = self.ecs.register_scalable_target(**scaling_policy)
cpu_scaling = self._create_step_scaling_policy(
'CpuScaling',
'AverageCPUUtilization',
config.target_cpu_utilization,
config.scale_up_cooldown,
config.scale_down_cooldown
)
return {
'target': target,
'policies': [cpu_scaling]
}
```
**Horizontal Pod Autoscaling for Kubernetes:**
```python
from kubernetes import client, config
class KubernetesScalingManager:
"""Manage K8s autoscaling for SaaS workloads."""
def __init__(self):
config.load_kube_config()
self.autoscaling = client.AutoscalingV2Api()
def deploy_hpa(self, service_name: str, config: dict) -> dict:
"""Deploy Horizontal Pod Autoscaler."""
hpa = client.HorizontalPodAutoscaler(
api_version='autoscaling/v2',
kind='HorizontalPodAutoscaler',
metadata=client.V1ObjectMeta(
name=f'{service_name}-hpa',
namespace='default',
labels={'app': service_name}
),
spec=client.HorizontalPodAutoscalerSpec(
scale_target_ref=client.CrossVersionObjectReference(
api_version='apps/v1',
kind='Deployment',
name=service_name
),
min_replicas=config.get('min_replicas', 2),
max_replicas=config.get('max_replicas', 10),
metrics=[
client.MetricSpec(
type='Resource',
resource=client.ResourceMetricStatus(
name='cpu',
target=client.MetricTarget(
type='Utilization',
average_utilization=config.get('target_cpu_percent', 60)
)
)
),
client.MetricSpec(
type='Pods',
pods=client.PodsMetricStatus(
metric=client.MetricIdentifier(
name='requests_per_second'
),
target=client.MetricTarget(
type='AverageValue',
average_value=client.Quantity('100')
)
)
)
],
behavior=client.HorizontalPodAutoscalerBehavior(
scale_up=client.ScalingRules(
stabilization_window_seconds=0,
policies=[
client.ScalingPolicy(
type='Percent',
value=100,
period_seconds=15
)
]
),
scale_down=client.ScalingRules(
stabilization_window_seconds=300,
policies=[
client.ScalingPolicy(
type='Percent',
value=10,
period_seconds=60
)
]
)
)
)
)
return self.autoscaling.create_namespaced_horizontal_pod_autoscaler(
namespace='default',
body=hpa
)
```
**Database Scaling Strategy:**
```python
class DatabaseScalingManager:
"""Manage database scaling with read replicas and connection pooling."""
def __init__(self, rds_client, proxy_client):
self.rds = rds_client
self.proxy = proxy_client
def setup_read_replica(self, primary_db_arn: str, region: str) -> dict:
"""Create read replica for read scaling."""
replica = self.rds.create_db_instance_read_replica(
DBInstanceIdentifier=f'saas-read-replica-{region}',
SourceDBInstanceIdentifier=primary_db_arn,
DBInstanceClass='db.t3.medium',
AvailabilityZone=f'{region}a',
PubliclyAccessible=False,
Tags=[
{'Key': 'Purpose', 'Value': 'read-replica'},
{'Key': 'Region', 'Value': region}
]
)
return replica
def configure_connection_pool(
self,
pool_size: int,
max_connections: int
) -> dict:
"""Configure PgBouncer connection pooling."""
pooler_config = {
'pool_size': pool_size,
'max_connections': max_connections,
'server_idle_timeout': 600,
'server_lifetime': 3600,
'query_timeout': 30,
'pool_mode': 'transaction'
}
return self.proxy.update_pool_configuration(pooler_config)
```
**Predictive Scaling with ML:**
```python
from sklearn.linear_model import LinearRegression
from datetime import datetime, timedelta
import numpy as np
class PredictiveScalingService:
"""Predict traffic and scale proactively."""
def __init__(self, metrics_client):
self.metrics = metrics_client
self.model = LinearRegression()
def train_model(self, historical_data: pd.DataFrame):
"""Train scaling prediction model."""
X = historical_data[['hour', 'day_of_week', 'is_business_day', 'marketing_campaign_active']]
y = historical_data['requests_per_second']
self.model.fit(X, y)
def predict_load(self, target_datetime: datetime) -> dict:
"""Predict load for a future time."""
features = self._prepare_features(target_datetime)
predicted_load = self.model.predict([features])[0]
confidence = self._calculate_confidence(features)
recommended_instances = self._calculate_instances(
predicted_load,
confidence
)
return {
'predicted_requests_per_second': predicted_load,
'confidence': confidence,
'recommended_instances': recommended_instances,
'predicted_at': datetime.utcnow()
}
def _calculate_instances(self, predicted_load: float, confidence: float) -> int:
"""Calculate recommended instance count."""
base_instances = 2
load_factor = predicted_load / 1000
safety_factor = 1.5 if confidence < 0.7 else 1.2
return int(base_instances + load_factor * safety_factor)
```
**Common Failure Modes:**
Auto-scaling that responds to short traffic spikes causes thrashing. Always implement stabilization windows and minimum scale-down thresholds.
```python
def configure_safe_autoscaling():
"""Configure autoscaling with safety measures."""
rules = {
'scale_up': {
'stabilization_window': 0,
'policies': [{'type': 'percent', 'value': 100, 'period': 15}]
},
'scale_down': {
'stabilization_window': 300,
'policies': [
{'type': 'percent', 'value': 10, 'period': 60},
{'type': 'absolute', 'value': 1, 'period': 300}
]
}
}
return rules
```