State Management and Conversation Design
State Management and Conversation Design
Conversations are sequences of messages where context matters. Early messages influence later responses. This lesson covers managing conversation state, handling context windows, summarizing history, and persisting sessions across application restarts.
Understanding Conversation State
At its core, conversation state is just a list of messages. But managing it requires thoughtful design:
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
@dataclass
class Message:
"""Represents a single message in a conversation."""
role: str # "user", "assistant", "system"
content: str
timestamp: datetime
tokens: Optional[int] = None
class ConversationState:
"""Manage conversation state and history."""
def __init__(self, system_prompt: str = ""):
self.messages: list[Message] = []
self.system_prompt = system_prompt
self.created_at = datetime.now()
self.updated_at = datetime.now()
def add_message(self, role: str, content: str, tokens: Optional[int] = None):
"""Add a message to the conversation."""
message = Message(
role=role,
content=content,
timestamp=datetime.now(),
tokens=tokens
)
self.messages.append(message)
self.updated_at = datetime.now()
def get_messages_for_api(self) -> list[dict]:
"""Format messages for API calls."""
api_messages = []
# Add system prompt if present
if self.system_prompt:
api_messages.append({
"role": "system",
"content": self.system_prompt
})
# Add all conversation messages
for msg in self.messages:
api_messages.append({
"role": msg.role,
"content": msg.content
})
return api_messages
def get_conversation_length(self) -> int:
"""Get total tokens used in conversation."""
return sum(msg.tokens for msg in self.messages if msg.tokens)
def clear(self):
"""Clear conversation history."""
self.messages.clear()
self.updated_at = datetime.now()
def get_summary(self) -> str:
"""Get a human-readable summary of the conversation."""
summary = f"Conversation started {self.created_at.strftime('%Y-%m-%d %H:%M')}\n"
summary += f"Total messages: {len(self.messages)}\n"
summary += f"Last message: {self.updated_at.strftime('%Y-%m-%d %H:%M')}\n"
return summary
# Usage
state = ConversationState(system_prompt="You are a helpful assistant.")
state.add_message("user", "What is machine learning?", tokens=10)
state.add_message("assistant", "Machine learning is...", tokens=50)
state.add_message("user", "Can you give an example?", tokens=8)
messages = state.get_messages_for_api()
print(f"Messages ready for API: {len(messages)}")
print(f"Conversation length: {state.get_conversation_length()} tokens")
Context Window Management
Language models have limited context windows. GPT-4 has 128K tokens, but older models might have only 4K. You need strategies for when conversations grow too large.
import tiktoken
from openai import OpenAI
class ContextWindowManager:
"""Manage context window limits."""
def __init__(self, model: str = "gpt-4-turbo", window_size: int = 128000):
self.model = model
self.window_size = window_size
self.encoding = tiktoken.encoding_for_model(model)
# Reserve tokens for response (safety buffer)
self.reserve = 2000
def count_tokens(self, messages: list[dict]) -> int:
"""Count tokens in message list."""
total = 0
for msg in messages:
total += 4 # Message overhead
for value in msg.values():
total += len(self.encoding.encode(str(value)))
return total
def will_fit(self, messages: list[dict]) -> bool:
"""Check if messages fit in context window."""
tokens = self.count_tokens(messages)
return tokens + self.reserve < self.window_size
def trim_messages(self, messages: list[dict]) -> list[dict]:
"""Remove oldest messages to fit context window."""
# Keep system message always
system_msg = [m for m in messages if m["role"] == "system"]
conversation = [m for m in messages if m["role"] != "system"]
# Start with system message + most recent messages
trimmed = system_msg
for msg in reversed(conversation):
test_messages = [msg] + trimmed
if self.will_fit(test_messages):
trimmed.insert(len(system_msg), msg)
else:
break
return trimmed
# Usage
manager = ContextWindowManager()
messages = [
{"role": "system", "content": "You are helpful"},
{"role": "user", "content": "Tell me about AI" * 100},
{"role": "assistant", "content": "AI is..." * 100},
]
tokens = manager.count_tokens(messages)
print(f"Tokens used: {tokens}")
if not manager.will_fit(messages):
print("Messages don't fit, trimming...")
trimmed = manager.trim_messages(messages)
print(f"Trimmed to {len(trimmed)} messages")
Summarization Strategies
When history grows large, summarize instead of truncating:
from openai import OpenAI
class ConversationSummarizer:
"""Summarize conversation history to save context."""
def __init__(self):
self.client = OpenAI()
def summarize_messages(self, messages: list[dict]) -> str:
"""Summarize a conversation into a brief summary."""
# Format conversation for summarization
conversation_text = "\n".join([
f"{msg['role'].upper()}: {msg['content']}"
for msg in messages
])
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "Summarize the following conversation in 2-3 sentences."
},
{"role": "user", "content": conversation_text}
],
temperature=0
)
return response.choices[0].message.content
def compress_conversation(
self,
messages: list[dict],
summary_every_n_messages: int = 10
) -> list[dict]:
"""Compress old messages while keeping recent ones."""
if len(messages) <= summary_every_n_messages:
return messages
# Keep system message and last N messages
system = [m for m in messages if m["role"] == "system"]
conversation = [m for m in messages if m["role"] != "system"]
old_messages = conversation[:-summary_every_n_messages]
recent_messages = conversation[-summary_every_n_messages:]
# Summarize old messages
if old_messages:
summary = self.summarize_messages(old_messages)
summary_msg = {
"role": "assistant",
"content": f"[Previous conversation summary: {summary}]"
}
return system + [summary_msg] + recent_messages
else:
return system + recent_messages
# Usage
summarizer = ConversationSummarizer()
messages = [
{"role": "system", "content": "You are helpful"},
{"role": "user", "content": "What is AI?"},
{"role": "assistant", "content": "AI is..."},
# ... many more messages
]
compressed = summarizer.compress_conversation(messages, summary_every_n_messages=3)
print(f"Compressed from {len(messages)} to {len(compressed)} messages")
Session Persistence
Save conversations to disk so you can resume them later:
import json
import os
from datetime import datetime
class PersistentConversation:
"""Persist conversations to disk."""
def __init__(self, session_id: str, storage_dir: str = "./conversations"):
self.session_id = session_id
self.storage_dir = storage_dir
self.filepath = os.path.join(storage_dir, f"{session_id}.json")
self.messages = []
self.metadata = {
"created": datetime.now().isoformat(),
"updated": datetime.now().isoformat()
}
os.makedirs(storage_dir, exist_ok=True)
# Load if exists
if os.path.exists(self.filepath):
self.load()
def add_message(self, role: str, content: str):
"""Add message and persist."""
self.messages.append({
"role": role,
"content": content,
"timestamp": datetime.now().isoformat()
})
self.metadata["updated"] = datetime.now().isoformat()
self.save()
def save(self):
"""Save conversation to disk."""
data = {
"session_id": self.session_id,
"metadata": self.metadata,
"messages": self.messages
}
with open(self.filepath, 'w') as f:
json.dump(data, f, indent=2)
def load(self):
"""Load conversation from disk."""
with open(self.filepath, 'r') as f:
data = json.load(f)
self.messages = data.get("messages", [])
self.metadata = data.get("metadata", {})
def get_messages_for_api(self) -> list[dict]:
"""Format messages for API."""
return [
{"role": msg["role"], "content": msg["content"]}
for msg in self.messages
]
def clear(self):
"""Clear and delete session."""
self.messages = []
if os.path.exists(self.filepath):
os.remove(self.filepath)
# Usage
session = PersistentConversation("user_alice_session_123")
# Add messages (saved to disk automatically)
session.add_message("user", "Hello")
session.add_message("assistant", "Hi there!")
# Later, create new instance with same ID - loads from disk
session2 = PersistentConversation("user_alice_session_123")
print(f"Loaded {len(session2.messages)} messages from disk")
Smart Conversation Design
Design conversations that work well with LLM limitations:
class ConversationDesigner:
"""Best practices for conversation design."""
@staticmethod
def get_preamble(task: str) -> str:
"""Get a good system prompt for a task."""
preambles = {
"customer_support": """You are a helpful customer support representative.
- Be empathetic and professional
- Provide clear, concise answers
- Escalate complex issues appropriately""",
"code_assistant": """You are an expert programming assistant.
- Provide working code examples
- Explain the reasoning
- Point out potential issues""",
"research": """You are a research assistant.
- Provide accurate, cited information
- Acknowledge uncertainty
- Suggest further reading"""
}
return preambles.get(task, "You are a helpful assistant.")
@staticmethod
def start_conversation(task: str) -> list[dict]:
"""Start a conversation with good defaults."""
return [
{"role": "system", "content": ConversationDesigner.get_preamble(task)}
]
@staticmethod
def should_summarize(messages: list[dict], token_limit: int = 4000) -> bool:
"""Determine if conversation should be summarized."""
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
tokens = sum(len(encoding.encode(m["content"])) for m in messages)
return tokens > token_limit
# Usage
messages = ConversationDesigner.start_conversation("customer_support")
print(f"System prompt: {messages[0]['content']}")
Multi-User Conversations
Handle conversations for multiple concurrent users:
from typing import Dict
class ConversationManager:
"""Manage multiple concurrent conversations."""
def __init__(self):
self.conversations: Dict[str, PersistentConversation] = {}
def get_conversation(self, user_id: str) -> PersistentConversation:
"""Get or create conversation for user."""
if user_id not in self.conversations:
self.conversations[user_id] = PersistentConversation(user_id)
return self.conversations[user_id]
def add_message(self, user_id: str, role: str, content: str):
"""Add message to user's conversation."""
conversation = self.get_conversation(user_id)
conversation.add_message(role, content)
def get_messages(self, user_id: str) -> list[dict]:
"""Get formatted messages for API."""
conversation = self.get_conversation(user_id)
return conversation.get_messages_for_api()
def list_conversations(self) -> list[str]:
"""List all active conversations."""
return list(self.conversations.keys())
# Usage
manager = ConversationManager()
manager.add_message("user1", "user", "What is AI?")
manager.add_message("user1", "assistant", "AI is...")
manager.add_message("user2", "user", "Tell me a joke")
manager.add_message("user2", "assistant", "Why did...")
print(f"Active conversations: {manager.list_conversations()}")
Key Takeaway
Conversations are state that grows over time. Manage this state carefully: track messages, monitor context window usage, summarize when necessary, and persist sessions to disk. Design conversations with limitations in mind, and prepare strategies for when history grows too large.
Exercises
-
Conversation state: Build a conversation state manager. Add, retrieve, and clear messages. Format for API calls.
-
Context window management: Implement a context manager that tracks tokens and trims old messages when limits approach.
-
Summarization: Build a summarizer that compresses conversation history while preserving key information.
-
Persistence: Create a system that saves conversations to disk and loads them on restart.
-
Multi-user: Implement a conversation manager that handles multiple concurrent user sessions.