RUNLOCALAIv38
->Will it run?Best GPUCompareTroubleshootStartLearnPulseModelsHardwareToolsBench
Run check
RUNLOCALAI

Independently operated catalog for local-AI hardware and software. Hand-written verdicts. Source-cited claims. Reproducible commands when we have them.

OP·Fredoline Eruo
DIR
  • Models
  • Hardware
  • Tools
  • Benchmarks
TOOLS
  • Will it run?
  • Compare hardware
  • Cost vs cloud
  • Choose my GPU
  • Prompting kits
  • Quick answers
REF
  • All buyer guides
  • Learn local AI
  • Methodology
  • Glossary
  • Errors KB
  • Trust
EDITOR
  • About
  • Author
  • How we make money
  • Editorial policy
  • Contact
LEGAL
  • Privacy
  • Terms
  • Sitemap
MAIL · MONTHLY DIGEST
Get monthly local AI changes
Monthly recap. No spam.
DISCLOSURE

Some links on this site are affiliate links (Amazon Associates and other first-class retailers). When you buy through them, we earn a small commission at no extra cost to you. Affiliate links do not influence our verdicts — there are cards we rate highly that we don't have affiliate relationships with, and cards that sell well that we refuse to recommend. Read more →

© 2026 runlocalai.coIndependently operated
RUNLOCALAI · v38
  1. >
  2. Home
  3. /Learn
  4. /Courses
  5. /AI-Powered SaaS Products
  6. /Ch. 15
AI-Powered SaaS Products

15. Quota Management

Chapter 15 of 24 · 25 min
KEY INSIGHT

Quota management in multi-tenant AI SaaS requires tracking both hard limits (absolute caps) and soft limits (warning thresholds), with Nigerian billing cycles requiring alignment between usage tracking and Naira payments. Quota management extends beyond simple counters. It must handle partial usage, rollover policies, and the complex logic of resetting quotas while maintaining historical accuracy for billing disputes. ```python from dataclasses import dataclass from datetime import datetime, timedelta from enum import Enum class QuotaType(Enum): API_CALLS = "api_calls" AI_TOKENS = "ai_tokens" STORAGE_GB = "storage_gb" TEAM_MEMBERS = "team_members" PROJECTS = "projects" @dataclass class QuotaLimit: """Represents a quota limit configuration.""" quota_type: QuotaType hard_limit: int soft_limit_percentage: float = 0.8 warning_enabled: bool = True class QuotaManager: """Manages tenant quotas with tracking and enforcement.""" def __init__(self, db_session, redis_client, notification_service): self.db = db_session self.redis = redis_client self.notifications = notification_service self.quota_config = self._load_quota_config() def check_quota( self, tenant_id: str, quota_type: QuotaType, requested_amount: int = 1 ) -> tuple[bool, dict]: """Check if tenant can consume quota, return status.""" tenant = self.db.query(Tenant).filter( Tenant.id == tenant_id ).first() plan = self._get_plan_limits(tenant.plan) limit = plan.get(quota_type) if not limit: return True, {'unlimited': True} key = self._get_quota_key(tenant_id, quota_type) current_usage = int(self.redis.get(key) or 0) would_exceed = current_usage + requested_amount > limit.hard_limit soft_limit = int(limit.hard_limit * limit.soft_limit_percentage) approaching_limit = current_usage >= soft_limit and current_usage < limit.hard_limit if approaching_limit and limit.warning_enabled: self._send_warning_notification(tenant_id, quota_type, current_usage, limit.hard_limit) return not would_exceed, { 'current_usage': current_usage, 'hard_limit': limit.hard_limit, 'remaining': max(0, limit.hard_limit - current_usage - requested_amount), 'approaching': approaching_limit, 'exceeded': would_exceed } def consume_quota( self, tenant_id: str, quota_type: QuotaType, amount: int = 1, metadata: dict = None ) -> bool: """Consume quota and record usage.""" allowed, status = self.check_quota(tenant_id, quota_type, amount) if not allowed: logger.warning(f"Quota exceeded for tenant {tenant_id}, type {quota_type.value}") raise QuotaExceededError( f"Quota limit reached for {quota_type.value}", current=status['current_usage'], limit=status['hard_limit'] ) key = self._get_quota_key(tenant_id, quota_type) new_usage = self.redis.incrby(key, amount) usage_record = UsageRecord( tenant_id=tenant_id, quota_type=quota_type.value, amount=amount, metadata=metadata, created_at=datetime.utcnow() ) self.db.add(usage_record) self.db.commit() return True def reset_quota( self, tenant_id: str, quota_type: QuotaType, billing_cycle_start: datetime ) -> dict: """Reset quota for new billing cycle with archive.""" key = self._get_quota_key(tenant_id, quota_type) current = int(self.redis.get(key) or 0) archive_key = f"quota_archive:{tenant_id}:{quota_type.value}:{billing_cycle_start.isoformat()}" self.redis.set(archive_key, current, ex=86400 * 90) self.redis.delete(key) return { 'archived_usage': current, 'reset_at': datetime.utcnow(), 'cycle_start': billing_cycle_start } ``` **Nigerian Billing Cycle Alignment:** Nigerian businesses often operate on monthly cycles aligned with calendar months, but some prefer to align with their fiscal year or contract start date. ```python def get_billing_cycle_dates(tenant: Tenant) -> tuple[datetime, datetime]: """Determine billing cycle start and end dates.""" if tenant.billing_anchor_day: today = datetime.utcnow() anchor = tenant.billing_anchor_day cycle_start = today.replace(day=anchor) if today.day < anchor: cycle_start = (today - timedelta(days=30)).replace(day=anchor) cycle_end = (cycle_start + timedelta(days=32)).replace(day=1) cycle_end = cycle_end - timedelta(days=1) else: cycle_start = today.replace(day=1) cycle_end = (cycle_start + timedelta(days=32)).replace(day=1) cycle_end = cycle_end - timedelta(days=1) return cycle_start, cycle_end def _get_plan_limits(self, plan: str) -> dict[QuotaType, QuotaLimit]: """Get quota limits for a plan.""" limits = { 'free': { QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 1000), QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 100000), QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 1), QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 3), QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 2), }, 'starter': { QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 50000), QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 1000000), QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 10), QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 10), QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 10), }, 'professional': { QuotaType.API_CALLS: QuotaLimit(QuotaType.API_CALLS, 500000), QuotaType.AI_TOKENS: QuotaLimit(QuotaType.AI_TOKENS, 10000000), QuotaType.STORAGE_GB: QuotaLimit(QuotaType.STORAGE_GB, 100), QuotaType.TEAM_MEMBERS: QuotaLimit(QuotaType.TEAM_MEMBERS, 50), QuotaType.PROJECTS: QuotaLimit(QuotaType.PROJECTS, 100), }, 'enterprise': {}, } return limits.get(plan, limits['free']) ``` **Common Failure Modes:** Quota consumption in high-concurrency scenarios causes race conditions where multiple requests pass the check simultaneously, exceeding the limit before any consumption is recorded. ```python def consume_quota_atomic( self, tenant_id: str, quota_type: QuotaType, amount: int = 1 ) -> tuple[bool, dict]: """Atomic quota consumption using Lua script.""" script = """ local key = KEYS[1] local limit = tonumber(ARGV[1]) local requested = tonumber(ARGV[2]) local current = tonumber(redis.call('GET', key) or '0') local new_total = current + requested if new_total > limit then return {0, current, limit, limit - current} end redis.call('INCRBY', key, requested) return {1, new_total, limit, limit - new_total} """ limit = self._get_plan_limits(tenant_id, quota_type) result = self.redis.eval( script, 1, self._get_quota_key(tenant_id, quota_type), limit, amount ) allowed = bool(result[0]) return allowed, { 'current_usage': result[1], 'hard_limit': result[2], 'remaining': result[3] } ```

EXERCISE

Implement a quota system that supports rollover (unused quota from previous month carries forward up to a cap). Create a rollover calculation that runs at billing cycle end, archives current usage, and updates the Redis quota key to include carried-over amounts. Test with a tenant who used 30% of their 5M token limit in month one, verifying the rollover adds 3.5M tokens (70% of 5M) to month two's allocation, capped at 10M total.

← Chapter 14
API Rate Limiting
Chapter 16 →
Analytics Dashboard