Machine learning infrastructure has traditionally demanded significant upfront investment in GPU clusters, container orchestration, and always-on compute resources that sit idle 90% of the time. Serverless architecture fundamentally changes this equation by allowing ML engineers to pay only for actual inference time while automatically scaling from zero to thousands of concurrent requests.
123456789101112import json import os import boto3 import numpy as np # Initialize model OUTSIDE handler for container reuse # This code runs once per cold start, not per invocation MODEL = None S3_CLIENT = boto3.client('s3') def load_model(): """Load model from S3 or Lambda Layer - runs once per container"""
123456789101112import json import os import boto3 from functools import lru_cache import numpy as np # Global model cache - persists across warm invocations _model = None _model_version = None def get_model(version: str): """Load model with caching and version checking."""
123456789101112import json import pickle import os from typing import Dict, Any # Global model cache - persists across warm invocations MODEL = None def load_model(): global MODEL if MODEL is None: model_path = os.path.join(os.path.dirname(__file__), 'model.pkl')
123456789101112{ "Comment": "Serverless ML Inference Pipeline with Error Handling", "StartAt": "ValidateInput", "States": { "ValidateInput": { "Type": "Task", "Resource": "arn:aws:lambda:us-east-1:123456789:function:validate-input", "Next": "FetchFeatures", "Catch": [{ "ErrorEquals": ["ValidationError"], "Next": "HandleValidationError" }],
123456789101112import * as cdk from 'aws-cdk-lib'; import * as lambda from 'aws-cdk-lib/aws-lambda'; import * as apigateway from 'aws-cdk-lib/aws-apigateway'; import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; export class ServerlessMlStack extends cdk.Stack { constructor(scope: cdk.App, id: string, props?: cdk.StackProps) { super(scope, id, props); // Feature cache for low-latency lookups const featureCache = new dynamodb.Table(this, 'FeatureCache', { partitionKey: { name: 'userId', type: dynamodb.AttributeType.STRING },