The difference between teams that ship AI products weekly versus monthly often comes down to infrastructure choices made in the first few weeks. Great infrastructure disappears—you don't think about it, you just ship.
123456789101112// api/chat/route.ts - Edge function for AI chat import { OpenAI } from 'openai'; import { Redis } from '@upstash/redis'; export const runtime = 'edge'; // Deploy to 200+ edge locations const openai = new OpenAI(); const redis = Redis.fromEnv(); export async function POST(req: Request) { const { messages, sessionId } = await req.json();
123456789101112// Vercel Edge Function with OpenAI streaming import { OpenAIStream, StreamingTextResponse } from 'ai'; import OpenAI from 'openai'; export const config = { runtime: 'edge', // Run at the edge for lowest latency }; const openai = new OpenAI(); export default async function handler(req: Request) { const { messages, userId } = await req.json();
123456789101112import { OpenAI } from 'openai'; import { Index } from '@upstash/vector'; const openai = new OpenAI(); const vectorIndex = new Index(); const SIMILARITY_THRESHOLD = 0.95; const CACHE_TTL = 3600; // 1 hour async function semanticCache( query: string, generateFn: () => Promise<string>
123456789101112// app/api/chat/route.ts import { OpenAIStream, StreamingTextResponse } from 'ai' import OpenAI from 'openai' import { Redis } from '@upstash/redis' const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }) const redis = Redis.fromEnv() export async function POST(req: Request) { const { messages, userId } = await req.json() // Rate limiting check
123456789101112// lib/ai-providers.ts interface AIProvider { name: string isHealthy: boolean latencyP99: number complete(messages: Message[]): Promise<AsyncIterable<string>> } class AIProviderManager { private providers: AIProvider[] = [] private healthCheckInterval: NodeJS.Timeout
123456789101112// lib/speed-stack.ts import { Redis } from '@upstash/redis' import { Ratelimit } from '@upstash/ratelimit' import { OpenAI } from 'openai' import { Anthropic } from '@anthropic-ai/sdk' // 1. Multi-layer caching const redis = Redis.fromEnv() const browserCache = new Map<string, { data: string; timestamp: number }>() // 2. Rate limiting with sliding window const ratelimit = new Ratelimit({