Agent Memory, Planning, and Reflection

Sophisticated agents require more than reactive responses to individual queries. They need persistent memory to learn from experience, planning capabilities to organize complex tasks, and reflection mechanisms to improve their reasoning. This lesson explores how to add memory, planning, and self-improvement to your agents.

Memory Systems for Agents

Effective memory systems distinguish between short-term context (current conversation) and long-term knowledge (learned facts and patterns).

Short-Term Memory (Context Window)

The simplest form of memory is maintaining conversation history:

import anthropic
from dataclasses import dataclass
from typing import List

@dataclass
class ConversationMemory:
    """Manages conversation history."""
    messages: List[dict] = None

    def __post_init__(self):
        if self.messages is None:
            self.messages = []

    def add_message(self, role: str, content: str):
        """Add message to history."""
        self.messages.append({
            "role": role,
            "content": content
        })

    def get_context(self, max_messages: int = 10) -> List[dict]:
        """Get recent messages for context."""
        return self.messages[-max_messages:]

    def clear(self):
        """Clear all history."""
        self.messages = []

class ConversationalAgent:
    """Agent with short-term memory."""

    def __init__(self):
        self.client = anthropic.Anthropic()
        self.memory = ConversationMemory()

    def chat(self, user_message: str) -> str:
        """Have conversation with context."""
        self.memory.add_message("user", user_message)

        # Get recent context
        context = self.memory.get_context(max_messages=20)

        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1024,
            system="""You are a helpful assistant. Use the conversation history
            to maintain context across multiple exchanges. Reference previous
            messages when relevant.""",
            messages=context
        )

        assistant_response = response.content[0].text
        self.memory.add_message("assistant", assistant_response)

        return assistant_response

# Usage
agent = ConversationalAgent()
print(agent.chat("What's my favorite color?"))
print(agent.chat("Actually, I prefer blue. Remember that."))
print(agent.chat("What's my favorite color now?"))

Long-Term Memory (Knowledge Base)

For persistent learning, store important facts and learned patterns:

import json
from datetime import datetime
from typing import Dict

class LongTermMemory:
    """Stores learned facts and patterns."""

    def __init__(self, storage_file: str = "agent_memory.json"):
        self.storage_file = storage_file
        self.facts: Dict[str, dict] = self._load_facts()

    def _load_facts(self) -> Dict[str, dict]:
        """Load facts from storage."""
        try:
            with open(self.storage_file, 'r') as f:
                return json.load(f)
        except FileNotFoundError:
            return {}

    def _save_facts(self):
        """Persist facts to storage."""
        with open(self.storage_file, 'w') as f:
            json.dump(self.facts, f, indent=2)

    def store_fact(self, key: str, value: any, confidence: float = 1.0):
        """Store a learned fact."""
        self.facts[key] = {
            "value": value,
            "confidence": confidence,
            "timestamp": datetime.now().isoformat(),
            "access_count": 0
        }
        self._save_facts()

    def retrieve_fact(self, key: str) -> any:
        """Retrieve a learned fact."""
        if key in self.facts:
            fact = self.facts[key]
            fact["access_count"] += 1
            self._save_facts()
            return fact["value"]
        return None

    def forget_fact(self, key: str):
        """Remove a learned fact."""
        if key in self.facts:
            del self.facts[key]
            self._save_facts()

    def update_confidence(self, key: str, confidence: float):
        """Update confidence in a fact."""
        if key in self.facts:
            self.facts[key]["confidence"] = confidence
            self._save_facts()

class AgentWithLongTermMemory:
    """Agent that learns and remembers facts."""

    def __init__(self):
        self.client = anthropic.Anthropic()
        self.short_term = ConversationMemory()
        self.long_term = LongTermMemory()

    def extract_facts(self, text: str) -> List[str]:
        """Extract learnable facts from text."""
        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=500,
            system="""Extract facts from the given text in JSON format.
            Return a list of {key, value, confidence} objects.
            Example: [{"key": "age", "value": "30", "confidence": 0.9}]""",
            messages=[{"role": "user", "content": text}]
        )

        try:
            facts = json.loads(response.content[0].text)
            return facts
        except json.JSONDecodeError:
            return []

    def process_with_memory(self, user_message: str) -> str:
        """Process message and extract/use memory."""
        self.short_term.add_message("user", user_message)

        # Retrieve relevant facts
        relevant_facts = self._get_relevant_facts(user_message)

        # Build context with facts
        context = self.short_term.get_context(max_messages=10)
        fact_context = "\n".join([
            f"Known fact: {k} = {v['value']} (confidence: {v['confidence']})"
            for k, v in relevant_facts.items()
        ])

        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1024,
            system=f"""You are an agent with memory. Use these known facts to inform
            your responses:

            {fact_context}

            Learn new facts from the conversation and remember them.""",
            messages=context
        )

        assistant_response = response.content[0].text
        self.short_term.add_message("assistant", assistant_response)

        # Extract and store new facts
        new_facts = self.extract_facts(user_message)
        for fact in new_facts:
            self.long_term.store_fact(fact["key"], fact["value"], fact.get("confidence", 1.0))

        return assistant_response

    def _get_relevant_facts(self, query: str) -> Dict:
        """Get facts relevant to current query."""
        relevant = {}
        for key, fact in self.long_term.facts.items():
            # Simple relevance: check if key appears in query
            if key.lower() in query.lower():
                relevant[key] = fact
        return relevant

# Usage
agent = AgentWithLongTermMemory()
print(agent.process_with_memory("My name is Alice and I'm 30 years old."))
print(agent.process_with_memory("What's my name and age?"))

Planning Mechanisms

Agents benefit from explicitly planning their approach before taking action:

Task Decomposition

class PlanningAgent:
    """Agent that plans before acting."""

    def __init__(self):
        self.client = anthropic.Anthropic()

    def create_plan(self, goal: str) -> List[str]:
        """Create step-by-step plan for a goal."""
        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1000,
            system="""You are a strategic planner. Break goals into clear,
            actionable steps. Return steps as a JSON list of strings.""",
            messages=[{"role": "user", "content": f"Create a plan for: {goal}"}]
        )

        try:
            plan_text = response.content[0].text
            # Parse JSON list from response
            import re
            match = re.search(r'\[.*\]', plan_text, re.DOTALL)
            if match:
                steps = json.loads(match.group())
                return steps
        except:
            pass

        return [goal]  # Fallback

    def execute_plan(self, goal: str) -> str:
        """Create and execute a plan."""
        plan = self.create_plan(goal)

        print(f"Plan for '{goal}':")
        for i, step in enumerate(plan, 1):
            print(f"  {i}. {step}")

        # Execute each step
        results = []
        for step in plan:
            response = self.client.messages.create(
                model="claude-3-5-sonnet-20241022",
                max_tokens=500,
                system="Execute this step concisely.",
                messages=[{"role": "user", "content": step}]
            )
            results.append(response.content[0].text)

        # Synthesize results
        synthesis_response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1000,
            system=f"""Synthesize these step results into a coherent answer for goal: {goal}""",
            messages=[{"role": "user", "content": "\n".join(results)}]
        )

        return synthesis_response.content[0].text

# Usage
planner = PlanningAgent()
result = planner.execute_plan("Build a simple web scraper for news articles")
print(result)

Conditional Planning

class ConditionalPlanningAgent:
    """Agent that adapts plans based on conditions."""

    def __init__(self):
        self.client = anthropic.Anthropic()

    def create_conditional_plan(self, goal: str, context: str) -> dict:
        """Create plan with conditional branches."""
        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1500,
            system="""Create a conditional plan with branches based on possible outcomes.
            Return JSON with structure:
            {
                "primary_steps": [...],
                "conditions": [{"if": "...", "then": [...]}]
            }""",
            messages=[{
                "role": "user",
                "content": f"Goal: {goal}\nContext: {context}"
            }]
        )

        try:
            plan = json.loads(response.content[0].text)
            return plan
        except:
            return {"primary_steps": [goal]}

Reflection and Self-Improvement

Agents can reflect on their actions and learn from outcomes:

Action Reflection

class ReflectiveAgent:
    """Agent that reflects on and improves its actions."""

    def __init__(self):
        self.client = anthropic.Anthropic()
        self.interaction_log = []

    def take_action(self, action_description: str) -> str:
        """Take action and log it."""
        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=500,
            system="Execute this action and describe the result.",
            messages=[{"role": "user", "content": action_description}]
        )

        result = response.content[0].text
        self.interaction_log.append({
            "action": action_description,
            "result": result
        })

        return result

    def reflect(self) -> str:
        """Reflect on recent actions and lessons learned."""
        if not self.interaction_log:
            return "No actions to reflect on."

        # Format recent interactions
        recent = self.interaction_log[-5:]
        interactions_text = "\n".join([
            f"Action: {i['action']}\nResult: {i['result']}"
            for i in recent
        ])

        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=800,
            system="""Reflect on these recent interactions. Identify:
            1. What worked well
            2. What didn't work
            3. Lessons learned
            4. How to improve future actions""",
            messages=[{"role": "user", "content": interactions_text}]
        )

        reflection = response.content[0].text
        self.interaction_log.append({
            "type": "reflection",
            "content": reflection
        })

        return reflection

    def improve_strategy(self) -> str:
        """Use reflection to improve strategy."""
        reflection = self.reflect()

        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1000,
            system="""Based on this reflection, propose specific improvements
            to the agent's future behavior and strategy.""",
            messages=[{"role": "user", "content": reflection}]
        )

        return response.content[0].text

# Usage
agent = ReflectiveAgent()
agent.take_action("Write code to parse JSON files")
agent.take_action("Test the JSON parser with edge cases")
print(agent.improve_strategy())

Outcome Evaluation

class OutcomeAwareAgent:
    """Agent that evaluates outcomes and adjusts approach."""

    def __init__(self):
        self.client = anthropic.Anthropic()
        self.success_patterns = {}

    def execute_and_evaluate(self, task: str, success_criteria: str) -> dict:
        """Execute task and evaluate against criteria."""
        # Execute task
        response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1000,
            system="Execute this task carefully.",
            messages=[{"role": "user", "content": task}]
        )

        result = response.content[0].text

        # Evaluate outcome
        eval_response = self.client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=500,
            system="""Evaluate if this outcome meets the success criteria.
            Return JSON: {"success": bool, "score": 0-1, "feedback": "..."}""",
            messages=[{
                "role": "user",
                "content": f"Task: {task}\nResult: {result}\nCriteria: {success_criteria}"
            }]
        )

        try:
            evaluation = json.loads(eval_response.content[0].text)
        except:
            evaluation = {"success": False, "score": 0, "feedback": "Evaluation failed"}

        return {
            "task": task,
            "result": result,
            "evaluation": evaluation
        }

Best Practices for Agent Intelligence

1. Memory Hierarchy

Organize memory by relevance and update frequency (short-term > working > long-term).

2. Plan Validation

Always validate generated plans before execution to catch logical errors.

3. Reflection Frequency

Balance reflection with action. Too frequent reflection wastes tokens; too infrequent misses learning.

4. Confidence Tracking

Track confidence in memories and decisions. Low confidence should trigger verification.

def store_fact_with_confidence(agent: AgentWithLongTermMemory, key: str, value: str):
    """Store fact with appropriate confidence."""
    confidence = 0.9 if key in agent.short_term.get_context() else 0.5
    agent.long_term.store_fact(key, value, confidence)

Key Takeaway

Advanced agents leverage structured memory systems, explicit planning, and reflection mechanisms to become more capable and autonomous. Memory distinguishes learning agents from stateless ones, while planning and reflection enable continuous self-improvement.

Exercises

Build Memory System: Implement both short-term and long-term memory. Test that the agent remembers facts across multiple conversations.
Planning Agent: Create an agent that decomposes complex goals into steps and executes them sequentially.
Conditional Planning: Add branching logic to plans based on intermediate outcomes.
Reflection Loop: Implement an agent that reflects on its actions every N interactions and adjusts behavior.
Confidence Tracking: Add confidence scores to stored facts and trigger re-verification for low-confidence facts.
Memory Analysis: Track and visualize which facts the agent accesses most frequently. Analyze patterns in learned information.