15. Quota Management

Chapter 15 of 24 · 25 min

KEY INSIGHT

Quota management in multi-tenant AI SaaS requires tracking both hard limits (absolute caps) and soft limits (warning thresholds), with Nigerian billing cycles requiring alignment between usage tracking and Naira payments. Quota management extends beyond simple counters. It must handle partial usage, rollover policies, and the complex logic of resetting quotas while maintaining historical accuracy for billing disputes. ```python from dataclasses import dataclass from datetime import datetime, timedelta from enum import Enum class QuotaType(Enum): API_CALLS = "api_calls" AI_TOKENS = "ai_tokens" STORAGE_GB = "storage_gb" TEAM_MEMBERS = "team_members" PROJECTS = "projects" @dataclass class QuotaLimit: """Represents a quota limit configuration.""" quota_type: QuotaType hard_limit: int soft_limit_percentage: float = 0.8 warning_enabled: bool = True class QuotaManager: """Manages tenant quotas with tracking and enforcement.""" def __init__(self, db_session, redis_client, notification_service): self.db = db_session self.redis = redis_client self.notifications = notification_service self.quota_config = self._load_quota_config() def check_quota( self, tenant_id: str, quota_type: QuotaType, requested_amount: int = 1 ) -> tuple[bool, dict]: """Check if tenant can consume quota, return status.""" tenant = self.db.query(Tenant).filter( Tenant.id == tenant_id ).first() plan = self._get_plan_limits(tenant.plan) limit = plan.get(quota_type) if not limit: return True, {'unlimited': True} key = self._get_quota_key(tenant_id, quota_type) current_usage = int(self.redis.get(key) or 0) would_exceed = current_usage + requested_amount > limit.hard_limit soft_limit = int(limit.hard_limit * limit.soft_limit_percentage) approaching_limit = current_usage >= soft_limit and current_usage < limit.hard_limit if approaching_limit and limit.warning_enabled: self._send_warning_notification(tenant_id, quota_type, current_usage, limit.hard_limit) return not would_exceed, { 'current_usage': current_usage, 'hard_limit': limit.hard_limit, 'remaining': max(0, limit.hard_limit - current_usage - requested_amount), 'approaching': approaching_limit, 'exceeded': would_exceed } def consume_quota( self, tenant_id: str, quota_type: QuotaType, amount: int = 1, metadata: dict = None ) -> bool: """Consume quota and record usage.""" allowed, status = self.check_quota(tenant_id, quota_type, amount) if not allowed: logger.warning(f"Quota exceeded for tenant {tenant_id}, type {quota_type.value}") raise QuotaExceededError( f"Quota limit reached for {quota_type.value}", current=status['current_usage'], limit=status['hard_limit'] ) key = self._get_quota_key(tenant_id, quota_type) new_usage = self.redis.incrby(key, amount) usage_record = UsageRecord( tenant_id=tenant_id, quota_type=quota_type.value, amount=amount, metadata=metadata, created_at=datetime.utcnow() ) self.db.add(usage_record) self.db.commit() return True def reset_quota( self, tenant_id: str, quota_type: QuotaType, billing_cycle_start: datetime ) -> dict: """Reset quota for new billing cycle with archive.""" key = self._get_quota_key(tenant_id, quota_type) current = int(self.redis.get(key) or 0) archive_key = f"quota_archive:{tenant_id}:{quota_type.value}:{billing_cycle_start.isoformat()}" self.redis.set(archive_key, current, ex=86400 * 90) self.redis.delete(key) return { 'archived_usage': current, 'reset_at': datetime.utcnow(), 'cycle_start': billing_cycle_start } ``` **Nigerian Billing Cycle Alignment:** Nigerian businesses often operate on monthly cycles aligned with calendar months, but some prefer to align with their fiscal year or contract start date. ```python def get_billing_cycle_dates(tenant: Tenant) -> tuple[datetime, datetime]: """Determine billing cycle start and end dates.""" if tenant.billing_anchor_day: today = datetime.utcnow() anchor = tenant.billing_anchor_day cycle_start = today.replace(day=anchor) if today.day < anchor: cycle_start = (today - timedelta(days=30)).replace(day=anchor) cycle_end = (cycle_start + timedelta(days=32)).replace(day=1) cycle_end = cycle_end - timedelta(days=1) else: cycle_start = today.replace(day=1) cycle_end = (cycle_start + timedelta(days=32)).replace(day=1) cycle_end = cycle_end - timedelta(days=1) return cycle_start, cycle_end def _get_plan_limits(self, plan: str) -> dict[QuotaType, QuotaLimit]: """Get quota limits for a plan.""" limits = { 'free': { QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 1000), QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 100000), QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 1), QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 3), QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 2), }, 'starter': { QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 50000), QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 1000000), QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 10), QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 10), QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 10), }, 'professional': { QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 500000), QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 10000000), QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 100), QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 50), QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 100), }, 'enterprise': {}, } return limits.get(plan, limits['free']) ``` **Common Failure Modes:** Quota consumption in high-concurrency scenarios causes race conditions where multiple requests pass the check simultaneously, exceeding the limit before any consumption is recorded. ```python def consume_quota_atomic( self, tenant_id: str, quota_type: QuotaType, amount: int = 1 ) -> tuple[bool, dict]: """Atomic quota consumption using Lua script.""" script = """ local key = KEYS[1] local limit = tonumber(ARGV[1]) local requested = tonumber(ARGV[2]) local current = tonumber(redis.call('GET', key) or '0') local new_total = current + requested if new_total > limit then return {0, current, limit, limit - current} end redis.call('INCRBY', key, requested) return {1, new_total, limit, limit - new_total} """ limit = self._get_plan_limits(tenant_id, quota_type) result = self.redis.eval( script, 1, self._get_quota_key(tenant_id, quota_type), limit, amount ) allowed = bool(result[0]) return allowed, { 'current_usage': result[1], 'hard_limit': result[2], 'remaining': result[3] } ```

EXERCISE

Implement a quota system that supports rollover (unused quota from previous month carries forward up to a cap). Create a rollover calculation that runs at billing cycle end, archives current usage, and updates the Redis quota key to include carried-over amounts. Test with a tenant who used 30% of their 5M token limit in month one, verifying the rollover adds 3.5M tokens (70% of 5M) to month two's allocation, capped at 10M total.