Core Architectural Patterns for GenAI
Core Architectural Patterns for GenAI
When you build AI systems, certain patterns emerge repeatedly. The router pattern decides where requests should go. The chain pattern sequences operations. The fallback pattern handles failures gracefully. Understanding these patterns lets you design robust systems without reinventing solutions.
The Router Pattern
The router pattern directs requests to different handlers based on content. Imagine a customer support system that needs to route questions to different departments—billing, technical support, or sales.
from openai import OpenAI
from enum import Enum
class SupportCategory(Enum):
BILLING = "billing"
TECHNICAL = "technical"
SALES = "sales"
GENERAL = "general"
def classify_support_query(query: str) -> SupportCategory:
"""Classify a support query to route to the right team."""
client = OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": """You are a support ticket router. Classify the following query
into ONE category: billing, technical, sales, or general.
Respond with only the category name."""
},
{"role": "user", "content": query}
],
temperature=0
)
category_str = response.choices[0].message.content.strip().lower()
try:
return SupportCategory(category_str)
except ValueError:
return SupportCategory.GENERAL
def route_support_query(query: str) -> dict:
"""Route a support query and get the appropriate response."""
category = classify_support_query(query)
handlers = {
SupportCategory.BILLING: handle_billing_query,
SupportCategory.TECHNICAL: handle_technical_query,
SupportCategory.SALES: handle_sales_query,
SupportCategory.GENERAL: handle_general_query
}
handler = handlers[category]
return {
"category": category.value,
"response": handler(query)
}
def handle_billing_query(query: str) -> str:
"""Handle billing-specific questions."""
client = OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You are a billing specialist. Answer billing questions."
},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
def handle_technical_query(query: str) -> str:
"""Handle technical support questions."""
client = OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You are a technical support expert. Solve technical issues."
},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
def handle_sales_query(query: str) -> str:
"""Handle sales inquiries."""
client = OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You are a sales representative. Help customers with purchases."
},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
def handle_general_query(query: str) -> str:
"""Handle general questions."""
client = OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You are a helpful support agent."
},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
# Usage
result = route_support_query("Why was I charged twice?")
print(f"Category: {result['category']}")
print(f"Response: {result['response']}")
The router pattern excels when you have:
- Different types of requests needing different handlers
- Performance benefits from specialized processing
- Clear classification criteria
- Domain-specific knowledge needed per category
The Chain Pattern
The chain pattern sequences multiple steps, where output from one step feeds into the next. Think of it like an assembly line for AI tasks.
from openai import OpenAI
def chain_text_processing(text: str) -> dict:
"""Chain multiple text processing steps."""
client = OpenAI()
# Step 1: Summarize
step1_response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Summarize the following text in 2-3 sentences."
},
{"role": "user", "content": text}
]
)
summary = step1_response.choices[0].message.content
# Step 2: Extract key concepts (using summary as input)
step2_response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Extract 3-5 key concepts from the following text."
},
{"role": "user", "content": summary}
]
)
concepts = step2_response.choices[0].message.content
# Step 3: Generate questions (using concepts)
step3_response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Generate 2-3 study questions based on these concepts."
},
{"role": "user", "content": concepts}
]
)
questions = step3_response.choices[0].message.content
return {
"original": text,
"summary": summary,
"concepts": concepts,
"questions": questions
}
# Usage
text = "Machine learning is a subset of artificial intelligence that focuses on enabling computers to learn from data without being explicitly programmed."
result = chain_text_processing(text)
print(f"Summary: {result['summary']}")
print(f"Concepts: {result['concepts']}")
print(f"Questions: {result['questions']}")
The chain pattern works well for:
- Sequential processing where later steps depend on earlier ones
- Building complexity from simple building blocks
- Decomposing large problems into steps
The Fallback Pattern
The fallback pattern provides alternatives when the primary approach fails. It’s your safety net.
from openai import OpenAI
from anthropic import Anthropic
def get_response_with_fallback(prompt: str) -> dict:
"""Try OpenAI first, fall back to Anthropic if it fails."""
# Try primary approach
try:
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt}],
timeout=10
)
return {
"success": True,
"provider": "openai",
"response": response.choices[0].message.content
}
except Exception as e:
print(f"OpenAI failed: {e}")
# Try fallback approach
try:
client = Anthropic()
response = client.messages.create(
model="claude-3-opus-20240229",
max_tokens=1000,
messages=[{"role": "user", "content": prompt}]
)
return {
"success": True,
"provider": "anthropic",
"response": response.content[0].text
}
except Exception as e:
print(f"Anthropic also failed: {e}")
# All fallbacks exhausted
return {
"success": False,
"provider": None,
"response": "All AI providers failed. Please try again later."
}
# Usage
result = get_response_with_fallback("What is AI?")
print(f"Provider: {result['provider']}")
print(f"Response: {result['response']}")
The fallback pattern handles:
- API failures and timeouts
- Rate limiting from one provider (switch to another)
- Model-specific limitations
- Graceful degradation
The MapReduce Pattern for LLMs
MapReduce divides large problems into smaller ones (map), processes them in parallel, then combines results (reduce). This is powerful for handling large datasets.
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor, as_completed
def analyze_document_with_mapreduce(document_chunks: list[str]) -> dict:
"""Use MapReduce to analyze a large document in chunks."""
client = OpenAI()
# MAP: Analyze each chunk independently
def analyze_chunk(chunk: str, index: int) -> dict:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Extract the main ideas from this text passage."
},
{"role": "user", "content": chunk}
]
)
return {
"chunk_index": index,
"ideas": response.choices[0].message.content
}
# Process chunks in parallel
results = []
with ThreadPoolExecutor(max_workers=5) as executor:
futures = {
executor.submit(analyze_chunk, chunk, i): i
for i, chunk in enumerate(document_chunks)
}
for future in as_completed(futures):
results.append(future.result())
# Sort by chunk index to maintain order
results.sort(key=lambda x: x["chunk_index"])
# REDUCE: Combine all ideas into a coherent summary
combined_ideas = "\n\n".join([r["ideas"] for r in results])
reduce_response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Combine these chunk analyses into a cohesive summary."
},
{"role": "user", "content": combined_ideas}
]
)
final_summary = reduce_response.choices[0].message.content
return {
"chunk_analyses": results,
"final_summary": final_summary
}
# Usage
chunks = [
"Chunk 1: Discussion of machine learning fundamentals...",
"Chunk 2: Deep learning architectures...",
"Chunk 3: Practical applications..."
]
result = analyze_document_with_mapreduce(chunks)
print(f"Final summary: {result['final_summary']}")
MapReduce works well for:
- Processing large documents
- Batch analysis of many items
- Parallel processing where order doesn’t matter initially
The Gateway Pattern
The gateway pattern acts as a single entry point, handling authentication, validation, and routing.
from dataclasses import dataclass
from typing import Optional
@dataclass
class APIRequest:
user_id: str
prompt: str
model: str = "gpt-3.5-turbo"
temperature: float = 0.7
class AIGateway:
"""Gateway for all AI API calls."""
def __init__(self, rate_limit: int = 100):
self.rate_limit = rate_limit
self.usage = {}
self.client = OpenAI()
def validate_request(self, request: APIRequest) -> tuple[bool, Optional[str]]:
"""Validate incoming request."""
if not request.user_id:
return False, "Missing user_id"
if not request.prompt:
return False, "Empty prompt"
if len(request.prompt) > 10000:
return False, "Prompt too long"
if request.model not in ["gpt-3.5-turbo", "gpt-4-turbo"]:
return False, "Invalid model"
return True, None
def check_rate_limit(self, user_id: str) -> bool:
"""Check if user has exceeded rate limit."""
count = self.usage.get(user_id, 0)
return count < self.rate_limit
def process_request(self, request: APIRequest) -> dict:
"""Process an AI request through the gateway."""
# Validate
valid, error = self.validate_request(request)
if not valid:
return {"success": False, "error": error}
# Check rate limit
if not self.check_rate_limit(request.user_id):
return {
"success": False,
"error": f"Rate limit exceeded ({self.rate_limit} requests)"
}
# Log usage
self.usage[request.user_id] = self.usage.get(request.user_id, 0) + 1
# Process
try:
response = self.client.chat.completions.create(
model=request.model,
messages=[{"role": "user", "content": request.prompt}],
temperature=request.temperature
)
return {
"success": True,
"response": response.choices[0].message.content,
"tokens": response.usage.total_tokens
}
except Exception as e:
return {"success": False, "error": str(e)}
# Usage
gateway = AIGateway(rate_limit=10)
request = APIRequest(
user_id="user123",
prompt="What is AI?",
model="gpt-3.5-turbo"
)
result = gateway.process_request(request)
print(result)
The gateway pattern provides:
- Centralized authentication and authorization
- Request validation
- Rate limiting
- Logging and monitoring
- Single point to enforce policies
Key Takeaway
Core architectural patterns solve recurring design problems. The router pattern directs requests to specialized handlers. The chain pattern sequences operations. The fallback pattern provides resilience. MapReduce distributes large problems. The gateway pattern centralizes control. Understanding and combining these patterns lets you build scalable, maintainable AI systems.
Exercises
-
Implement a router: Build a router that classifies customer reviews (positive/negative/neutral) and routes to different handlers for each sentiment.
-
Chain multiple operations: Create a chain that takes raw text, extracts entities, then generates a report from those entities.
-
Add fallback: Implement a function that tries OpenAI first, falls back to Anthropic, then to a local mock if both fail.
-
MapReduce a document: Break a multi-page document into chunks, analyze each in parallel, then combine results.
-
Design a gateway: Create a gateway class that validates requests, enforces rate limits, and logs all API calls.