Without rate limiting, one aggressive client can bring down your entire service. But implementation is trickier than it sounds—distributed systems, user fairness, and graceful degradation all matter.
Rate Limiting Algorithms
// 1. Token Bucket - smooth, allows bursts
class TokenBucket {
private tokens: number
private lastRefill: number
constructor(
private capacity: number, // Max tokens
private refillRate: number // Tokens per second
) {
this.tokens = capacity
this.lastRefill = Date.now()
}
tryConsume(tokens: number = 1): boolean {
this.refill()
if (this.tokens >= tokens) {
this.tokens -= tokens
return true
}
return false
}
private refill() {
const now = Date.now()
const elapsed = (now - this.lastRefill) / 1000
this.tokens = Math.min(this.capacity, this.tokens + elapsed * this.refillRate)
this.lastRefill = now
}
}
// 2. Sliding Window - more accurate, more complex
class SlidingWindowCounter {
constructor(
private windowSize: number, // In milliseconds
private limit: number
) {}
async checkLimit(key: string, redis: Redis): Promise {
const now = Date.now()
const windowStart = now - this.windowSize
// Remove old entries and count current window
await redis.zremrangebyscore(key, 0, windowStart)
const count = await redis.zcard(key)
if (count < this.limit) {
await redis.zadd(key, now, `${now}-${Math.random()}`)
await redis.pexpire(key, this.windowSize)
return true
}
return false
}
}
Distributed Rate Limiting with Redis
import { Redis } from 'ioredis'
class DistributedRateLimiter {
constructor(private redis: Redis) {}
async checkRateLimit(config: RateLimitConfig): Promise {
const key = `ratelimit:${config.identifier}`
const now = Date.now()
const windowStart = now - config.windowMs
// Lua script for atomic operation
const script = `
local key = KEYS[1]
local now = tonumber(ARGV[1])
local window = tonumber(ARGV[2])
local limit = tonumber(ARGV[3])
-- Remove old entries
redis.call('ZREMRANGEBYSCORE', key, 0, now - window)
-- Count current entries
local count = redis.call('ZCARD', key)
if count < limit then
-- Add new entry
redis.call('ZADD', key, now, now .. '-' .. math.random())
redis.call('PEXPIRE', key, window)
return {1, limit - count - 1, 0} -- allowed, remaining, retry-after
else
-- Get oldest entry for retry-after
local oldest = redis.call('ZRANGE', key, 0, 0, 'WITHSCORES')
local retryAfter = oldest[2] and (oldest[2] + window - now) or window
return {0, 0, retryAfter} -- denied, remaining, retry-after
end
`
const [allowed, remaining, retryAfter] = await this.redis.eval(
script, 1, key, now, config.windowMs, config.limit
)
return {
allowed: allowed === 1,
remaining,
retryAfterMs: retryAfter,
}
}
}
Per-User vs Per-IP vs Per-API-Key
function getRateLimitIdentifier(request: Request): RateLimitIdentifier {
// API key takes precedence (authenticated users)
const apiKey = request.headers.get('x-api-key')
if (apiKey) {
return { type: 'api-key', value: apiKey, limit: 1000 } // Higher limit
}
// Authenticated user
const userId = request.auth?.userId
if (userId) {
return { type: 'user', value: userId, limit: 100 }
}
// Fall back to IP (lowest trust)
const ip = request.headers.get('x-forwarded-for')?.split(',')[0] ||
request.headers.get('x-real-ip') ||
'unknown'
return { type: 'ip', value: ip, limit: 20 }
}
Graceful Degradation: 429 Handling
function createRateLimitResponse(result: RateLimitResult): Response {
const headers = {
'X-RateLimit-Limit': result.limit.toString(),
'X-RateLimit-Remaining': result.remaining.toString(),
'X-RateLimit-Reset': new Date(Date.now() + result.retryAfterMs).toISOString(),
}
if (!result.allowed) {
return new Response(
JSON.stringify({
error: 'Too Many Requests',
message: 'Rate limit exceeded. Please retry later.',
retryAfter: Math.ceil(result.retryAfterMs / 1000),
}),
{
status: 429,
headers: {
...headers,
'Retry-After': Math.ceil(result.retryAfterMs / 1000).toString(),
},
}
)
}
return null // Continue with request
}
Rate Limit Headers for Clients
// Client-side rate limit handling
async function fetchWithRateLimit(url: string, options?: RequestInit) {
const response = await fetch(url, options)
const remaining = parseInt(response.headers.get('X-RateLimit-Remaining') || '0')
const reset = response.headers.get('X-RateLimit-Reset')
if (response.status === 429) {
const retryAfter = parseInt(response.headers.get('Retry-After') || '60')
await new Promise(resolve => setTimeout(resolve, retryAfter * 1000))
return fetchWithRateLimit(url, options) // Retry
}
// Proactive slowdown when approaching limit
if (remaining < 10) {
await new Promise(resolve => setTimeout(resolve, 100))
}
return response
}
Key Takeaways
Use Redis for distributed systems. In-memory rate limiting doesn't work across servers.
Choose the right algorithm. Token bucket allows bursts; sliding window is more accurate.
Communicate limits clearly. Headers help clients back off gracefully.
Tier your limits. Authenticated users get more than anonymous IPs.
