State Management and Conversation Design

Conversations are sequences of messages where context matters. Early messages influence later responses. This lesson covers managing conversation state, handling context windows, summarizing history, and persisting sessions across application restarts.

Understanding Conversation State

At its core, conversation state is just a list of messages. But managing it requires thoughtful design:

from dataclasses import dataclass
from datetime import datetime
from typing import Optional

@dataclass
class Message:
    """Represents a single message in a conversation."""
    role: str  # "user", "assistant", "system"
    content: str
    timestamp: datetime
    tokens: Optional[int] = None

class ConversationState:
    """Manage conversation state and history."""

    def __init__(self, system_prompt: str = ""):
        self.messages: list[Message] = []
        self.system_prompt = system_prompt
        self.created_at = datetime.now()
        self.updated_at = datetime.now()

    def add_message(self, role: str, content: str, tokens: Optional[int] = None):
        """Add a message to the conversation."""
        message = Message(
            role=role,
            content=content,
            timestamp=datetime.now(),
            tokens=tokens
        )
        self.messages.append(message)
        self.updated_at = datetime.now()

    def get_messages_for_api(self) -> list[dict]:
        """Format messages for API calls."""
        api_messages = []

        # Add system prompt if present
        if self.system_prompt:
            api_messages.append({
                "role": "system",
                "content": self.system_prompt
            })

        # Add all conversation messages
        for msg in self.messages:
            api_messages.append({
                "role": msg.role,
                "content": msg.content
            })

        return api_messages

    def get_conversation_length(self) -> int:
        """Get total tokens used in conversation."""
        return sum(msg.tokens for msg in self.messages if msg.tokens)

    def clear(self):
        """Clear conversation history."""
        self.messages.clear()
        self.updated_at = datetime.now()

    def get_summary(self) -> str:
        """Get a human-readable summary of the conversation."""
        summary = f"Conversation started {self.created_at.strftime('%Y-%m-%d %H:%M')}\n"
        summary += f"Total messages: {len(self.messages)}\n"
        summary += f"Last message: {self.updated_at.strftime('%Y-%m-%d %H:%M')}\n"
        return summary

# Usage
state = ConversationState(system_prompt="You are a helpful assistant.")
state.add_message("user", "What is machine learning?", tokens=10)
state.add_message("assistant", "Machine learning is...", tokens=50)
state.add_message("user", "Can you give an example?", tokens=8)

messages = state.get_messages_for_api()
print(f"Messages ready for API: {len(messages)}")
print(f"Conversation length: {state.get_conversation_length()} tokens")

Context Window Management

Language models have limited context windows. GPT-4 has 128K tokens, but older models might have only 4K. You need strategies for when conversations grow too large.

import tiktoken
from openai import OpenAI

class ContextWindowManager:
    """Manage context window limits."""

    def __init__(self, model: str = "gpt-4-turbo", window_size: int = 128000):
        self.model = model
        self.window_size = window_size
        self.encoding = tiktoken.encoding_for_model(model)
        # Reserve tokens for response (safety buffer)
        self.reserve = 2000

    def count_tokens(self, messages: list[dict]) -> int:
        """Count tokens in message list."""
        total = 0
        for msg in messages:
            total += 4  # Message overhead
            for value in msg.values():
                total += len(self.encoding.encode(str(value)))
        return total

    def will_fit(self, messages: list[dict]) -> bool:
        """Check if messages fit in context window."""
        tokens = self.count_tokens(messages)
        return tokens + self.reserve < self.window_size

    def trim_messages(self, messages: list[dict]) -> list[dict]:
        """Remove oldest messages to fit context window."""
        # Keep system message always
        system_msg = [m for m in messages if m["role"] == "system"]
        conversation = [m for m in messages if m["role"] != "system"]

        # Start with system message + most recent messages
        trimmed = system_msg
        for msg in reversed(conversation):
            test_messages = [msg] + trimmed
            if self.will_fit(test_messages):
                trimmed.insert(len(system_msg), msg)
            else:
                break

        return trimmed

# Usage
manager = ContextWindowManager()

messages = [
    {"role": "system", "content": "You are helpful"},
    {"role": "user", "content": "Tell me about AI" * 100},
    {"role": "assistant", "content": "AI is..." * 100},
]

tokens = manager.count_tokens(messages)
print(f"Tokens used: {tokens}")

if not manager.will_fit(messages):
    print("Messages don't fit, trimming...")
    trimmed = manager.trim_messages(messages)
    print(f"Trimmed to {len(trimmed)} messages")

Summarization Strategies

When history grows large, summarize instead of truncating:

from openai import OpenAI

class ConversationSummarizer:
    """Summarize conversation history to save context."""

    def __init__(self):
        self.client = OpenAI()

    def summarize_messages(self, messages: list[dict]) -> str:
        """Summarize a conversation into a brief summary."""
        # Format conversation for summarization
        conversation_text = "\n".join([
            f"{msg['role'].upper()}: {msg['content']}"
            for msg in messages
        ])

        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {
                    "role": "system",
                    "content": "Summarize the following conversation in 2-3 sentences."
                },
                {"role": "user", "content": conversation_text}
            ],
            temperature=0
        )

        return response.choices[0].message.content

    def compress_conversation(
        self,
        messages: list[dict],
        summary_every_n_messages: int = 10
    ) -> list[dict]:
        """Compress old messages while keeping recent ones."""
        if len(messages) <= summary_every_n_messages:
            return messages

        # Keep system message and last N messages
        system = [m for m in messages if m["role"] == "system"]
        conversation = [m for m in messages if m["role"] != "system"]

        old_messages = conversation[:-summary_every_n_messages]
        recent_messages = conversation[-summary_every_n_messages:]

        # Summarize old messages
        if old_messages:
            summary = self.summarize_messages(old_messages)
            summary_msg = {
                "role": "assistant",
                "content": f"[Previous conversation summary: {summary}]"
            }
            return system + [summary_msg] + recent_messages
        else:
            return system + recent_messages

# Usage
summarizer = ConversationSummarizer()

messages = [
    {"role": "system", "content": "You are helpful"},
    {"role": "user", "content": "What is AI?"},
    {"role": "assistant", "content": "AI is..."},
    # ... many more messages
]

compressed = summarizer.compress_conversation(messages, summary_every_n_messages=3)
print(f"Compressed from {len(messages)} to {len(compressed)} messages")

Session Persistence

Save conversations to disk so you can resume them later:

import json
import os
from datetime import datetime

class PersistentConversation:
    """Persist conversations to disk."""

    def __init__(self, session_id: str, storage_dir: str = "./conversations"):
        self.session_id = session_id
        self.storage_dir = storage_dir
        self.filepath = os.path.join(storage_dir, f"{session_id}.json")
        self.messages = []
        self.metadata = {
            "created": datetime.now().isoformat(),
            "updated": datetime.now().isoformat()
        }

        os.makedirs(storage_dir, exist_ok=True)

        # Load if exists
        if os.path.exists(self.filepath):
            self.load()

    def add_message(self, role: str, content: str):
        """Add message and persist."""
        self.messages.append({
            "role": role,
            "content": content,
            "timestamp": datetime.now().isoformat()
        })
        self.metadata["updated"] = datetime.now().isoformat()
        self.save()

    def save(self):
        """Save conversation to disk."""
        data = {
            "session_id": self.session_id,
            "metadata": self.metadata,
            "messages": self.messages
        }
        with open(self.filepath, 'w') as f:
            json.dump(data, f, indent=2)

    def load(self):
        """Load conversation from disk."""
        with open(self.filepath, 'r') as f:
            data = json.load(f)
            self.messages = data.get("messages", [])
            self.metadata = data.get("metadata", {})

    def get_messages_for_api(self) -> list[dict]:
        """Format messages for API."""
        return [
            {"role": msg["role"], "content": msg["content"]}
            for msg in self.messages
        ]

    def clear(self):
        """Clear and delete session."""
        self.messages = []
        if os.path.exists(self.filepath):
            os.remove(self.filepath)

# Usage
session = PersistentConversation("user_alice_session_123")

# Add messages (saved to disk automatically)
session.add_message("user", "Hello")
session.add_message("assistant", "Hi there!")

# Later, create new instance with same ID - loads from disk
session2 = PersistentConversation("user_alice_session_123")
print(f"Loaded {len(session2.messages)} messages from disk")

Smart Conversation Design

Design conversations that work well with LLM limitations:

class ConversationDesigner:
    """Best practices for conversation design."""

    @staticmethod
    def get_preamble(task: str) -> str:
        """Get a good system prompt for a task."""
        preambles = {
            "customer_support": """You are a helpful customer support representative.
            - Be empathetic and professional
            - Provide clear, concise answers
            - Escalate complex issues appropriately""",

            "code_assistant": """You are an expert programming assistant.
            - Provide working code examples
            - Explain the reasoning
            - Point out potential issues""",

            "research": """You are a research assistant.
            - Provide accurate, cited information
            - Acknowledge uncertainty
            - Suggest further reading"""
        }
        return preambles.get(task, "You are a helpful assistant.")

    @staticmethod
    def start_conversation(task: str) -> list[dict]:
        """Start a conversation with good defaults."""
        return [
            {"role": "system", "content": ConversationDesigner.get_preamble(task)}
        ]

    @staticmethod
    def should_summarize(messages: list[dict], token_limit: int = 4000) -> bool:
        """Determine if conversation should be summarized."""
        encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
        tokens = sum(len(encoding.encode(m["content"])) for m in messages)
        return tokens > token_limit

# Usage
messages = ConversationDesigner.start_conversation("customer_support")
print(f"System prompt: {messages[0]['content']}")

Multi-User Conversations

Handle conversations for multiple concurrent users:

from typing import Dict

class ConversationManager:
    """Manage multiple concurrent conversations."""

    def __init__(self):
        self.conversations: Dict[str, PersistentConversation] = {}

    def get_conversation(self, user_id: str) -> PersistentConversation:
        """Get or create conversation for user."""
        if user_id not in self.conversations:
            self.conversations[user_id] = PersistentConversation(user_id)
        return self.conversations[user_id]

    def add_message(self, user_id: str, role: str, content: str):
        """Add message to user's conversation."""
        conversation = self.get_conversation(user_id)
        conversation.add_message(role, content)

    def get_messages(self, user_id: str) -> list[dict]:
        """Get formatted messages for API."""
        conversation = self.get_conversation(user_id)
        return conversation.get_messages_for_api()

    def list_conversations(self) -> list[str]:
        """List all active conversations."""
        return list(self.conversations.keys())

# Usage
manager = ConversationManager()

manager.add_message("user1", "user", "What is AI?")
manager.add_message("user1", "assistant", "AI is...")

manager.add_message("user2", "user", "Tell me a joke")
manager.add_message("user2", "assistant", "Why did...")

print(f"Active conversations: {manager.list_conversations()}")

Key Takeaway

Conversations are state that grows over time. Manage this state carefully: track messages, monitor context window usage, summarize when necessary, and persist sessions to disk. Design conversations with limitations in mind, and prepare strategies for when history grows too large.

Exercises

Conversation state: Build a conversation state manager. Add, retrieve, and clear messages. Format for API calls.
Context window management: Implement a context manager that tracks tokens and trims old messages when limits approach.
Summarization: Build a summarizer that compresses conversation history while preserving key information.
Persistence: Create a system that saves conversations to disk and loads them on restart.
Multi-user: Implement a conversation manager that handles multiple concurrent user sessions.