The moment your AI product gains traction is both exhilarating and terrifying—suddenly, the architecture decisions you made in week one are being stress-tested by thousands of concurrent users. This chapter is your survival guide for scaling without the catastrophic rewrites that have killed promising startups.
123456789101112async function callAIWithBackoff<T>( fn: () => Promise<T>, maxRetries: number = 5, baseDelay: number = 1000 ): Promise<T> { let lastError: Error; for (let attempt = 0; attempt < maxRetries; attempt++) { try { return await fn(); } catch (error) { lastError = error as Error;
123456789101112class TokenAwareRateLimiter { private requestWindow: Map<string, number[]> = new Map(); private tokenWindow: Map<string, number[]> = new Map(); constructor( private maxRequestsPerMinute: number = 500, private maxTokensPerMinute: number = 150000, private windowMs: number = 60000 ) {} async canProceed(key: string, estimatedTokens: number): Promise<{ allowed: boolean;
123456789101112import Redis from 'ioredis'; import { createHash } from 'crypto'; class AIResponseCache { private redis: Redis; private defaultTTL = 3600; // 1 hour constructor() { this.redis = new Redis({ host: process.env.REDIS_HOST, port: 6379, maxRetriesPerRequest: 3,
123456789101112import { Pool, PoolConfig } from 'pg'; import { EventEmitter } from 'events'; class MonitoredPool extends EventEmitter { private pool: Pool; private metrics = { totalConnections: 0, idleConnections: 0, waitingClients: 0, queryCount: 0, slowQueries: 0, errors: 0,
123456789101112import Bull from 'bull'; interface AIRequest { userId: string; prompt: string; priority: 'high' | 'normal' | 'low'; timestamp: number; } const aiQueue = new Bull<AIRequest>('ai-processing', { redis: { host: process.env.REDIS_HOST }, defaultJobOptions: {