KEY INSIGHT
Quota management in multi-tenant AI SaaS requires tracking both hard limits (absolute caps) and soft limits (warning thresholds), with Nigerian billing cycles requiring alignment between usage tracking and Naira payments.
Quota management extends beyond simple counters. It must handle partial usage, rollover policies, and the complex logic of resetting quotas while maintaining historical accuracy for billing disputes.
```python
from dataclasses import dataclass
from datetime import datetime, timedelta
from enum import Enum
class QuotaType(Enum):
API_CALLS = "api_calls"
AI_TOKENS = "ai_tokens"
STORAGE_GB = "storage_gb"
TEAM_MEMBERS = "team_members"
PROJECTS = "projects"
@dataclass
class QuotaLimit:
"""Represents a quota limit configuration."""
quota_type: QuotaType
hard_limit: int
soft_limit_percentage: float = 0.8
warning_enabled: bool = True
class QuotaManager:
"""Manages tenant quotas with tracking and enforcement."""
def __init__(self, db_session, redis_client, notification_service):
self.db = db_session
self.redis = redis_client
self.notifications = notification_service
self.quota_config = self._load_quota_config()
def check_quota(
self,
tenant_id: str,
quota_type: QuotaType,
requested_amount: int = 1
) -> tuple[bool, dict]:
"""Check if tenant can consume quota, return status."""
tenant = self.db.query(Tenant).filter(
Tenant.id == tenant_id
).first()
plan = self._get_plan_limits(tenant.plan)
limit = plan.get(quota_type)
if not limit:
return True, {'unlimited': True}
key = self._get_quota_key(tenant_id, quota_type)
current_usage = int(self.redis.get(key) or 0)
would_exceed = current_usage + requested_amount > limit.hard_limit
soft_limit = int(limit.hard_limit * limit.soft_limit_percentage)
approaching_limit = current_usage >= soft_limit and current_usage < limit.hard_limit
if approaching_limit and limit.warning_enabled:
self._send_warning_notification(tenant_id, quota_type, current_usage, limit.hard_limit)
return not would_exceed, {
'current_usage': current_usage,
'hard_limit': limit.hard_limit,
'remaining': max(0, limit.hard_limit - current_usage - requested_amount),
'approaching': approaching_limit,
'exceeded': would_exceed
}
def consume_quota(
self,
tenant_id: str,
quota_type: QuotaType,
amount: int = 1,
metadata: dict = None
) -> bool:
"""Consume quota and record usage."""
allowed, status = self.check_quota(tenant_id, quota_type, amount)
if not allowed:
logger.warning(f"Quota exceeded for tenant {tenant_id}, type {quota_type.value}")
raise QuotaExceededError(
f"Quota limit reached for {quota_type.value}",
current=status['current_usage'],
limit=status['hard_limit']
)
key = self._get_quota_key(tenant_id, quota_type)
new_usage = self.redis.incrby(key, amount)
usage_record = UsageRecord(
tenant_id=tenant_id,
quota_type=quota_type.value,
amount=amount,
metadata=metadata,
created_at=datetime.utcnow()
)
self.db.add(usage_record)
self.db.commit()
return True
def reset_quota(
self,
tenant_id: str,
quota_type: QuotaType,
billing_cycle_start: datetime
) -> dict:
"""Reset quota for new billing cycle with archive."""
key = self._get_quota_key(tenant_id, quota_type)
current = int(self.redis.get(key) or 0)
archive_key = f"quota_archive:{tenant_id}:{quota_type.value}:{billing_cycle_start.isoformat()}"
self.redis.set(archive_key, current, ex=86400 * 90)
self.redis.delete(key)
return {
'archived_usage': current,
'reset_at': datetime.utcnow(),
'cycle_start': billing_cycle_start
}
```
**Nigerian Billing Cycle Alignment:**
Nigerian businesses often operate on monthly cycles aligned with calendar months, but some prefer to align with their fiscal year or contract start date.
```python
def get_billing_cycle_dates(tenant: Tenant) -> tuple[datetime, datetime]:
"""Determine billing cycle start and end dates."""
if tenant.billing_anchor_day:
today = datetime.utcnow()
anchor = tenant.billing_anchor_day
cycle_start = today.replace(day=anchor)
if today.day < anchor:
cycle_start = (today - timedelta(days=30)).replace(day=anchor)
cycle_end = (cycle_start + timedelta(days=32)).replace(day=1)
cycle_end = cycle_end - timedelta(days=1)
else:
cycle_start = today.replace(day=1)
cycle_end = (cycle_start + timedelta(days=32)).replace(day=1)
cycle_end = cycle_end - timedelta(days=1)
return cycle_start, cycle_end
def _get_plan_limits(self, plan: str) -> dict[QuotaType, QuotaLimit]:
"""Get quota limits for a plan."""
limits = {
'free': {
QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 1000),
QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 100000),
QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 1),
QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 3),
QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 2),
},
'starter': {
QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 50000),
QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 1000000),
QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 10),
QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 10),
QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 10),
},
'professional': {
QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 500000),
QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 10000000),
QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 100),
QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 50),
QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 100),
},
'enterprise': {},
}
return limits.get(plan, limits['free'])
```
**Common Failure Modes:**
Quota consumption in high-concurrency scenarios causes race conditions where multiple requests pass the check simultaneously, exceeding the limit before any consumption is recorded.
```python
def consume_quota_atomic(
self,
tenant_id: str,
quota_type: QuotaType,
amount: int = 1
) -> tuple[bool, dict]:
"""Atomic quota consumption using Lua script."""
script = """
local key = KEYS[1]
local limit = tonumber(ARGV[1])
local requested = tonumber(ARGV[2])
local current = tonumber(redis.call('GET', key) or '0')
local new_total = current + requested
if new_total > limit then
return {0, current, limit, limit - current}
end
redis.call('INCRBY', key, requested)
return {1, new_total, limit, limit - new_total}
"""
limit = self._get_plan_limits(tenant_id, quota_type)
result = self.redis.eval(
script,
1,
self._get_quota_key(tenant_id, quota_type),
limit,
amount
)
allowed = bool(result[0])
return allowed, {
'current_usage': result[1],
'hard_limit': result[2],
'remaining': result[3]
}
```