Agent Memory, Planning, and Reflection
Agent Memory, Planning, and Reflection
Sophisticated agents require more than reactive responses to individual queries. They need persistent memory to learn from experience, planning capabilities to organize complex tasks, and reflection mechanisms to improve their reasoning. This lesson explores how to add memory, planning, and self-improvement to your agents.
Memory Systems for Agents
Effective memory systems distinguish between short-term context (current conversation) and long-term knowledge (learned facts and patterns).
Short-Term Memory (Context Window)
The simplest form of memory is maintaining conversation history:
import anthropic
from dataclasses import dataclass
from typing import List
@dataclass
class ConversationMemory:
"""Manages conversation history."""
messages: List[dict] = None
def __post_init__(self):
if self.messages is None:
self.messages = []
def add_message(self, role: str, content: str):
"""Add message to history."""
self.messages.append({
"role": role,
"content": content
})
def get_context(self, max_messages: int = 10) -> List[dict]:
"""Get recent messages for context."""
return self.messages[-max_messages:]
def clear(self):
"""Clear all history."""
self.messages = []
class ConversationalAgent:
"""Agent with short-term memory."""
def __init__(self):
self.client = anthropic.Anthropic()
self.memory = ConversationMemory()
def chat(self, user_message: str) -> str:
"""Have conversation with context."""
self.memory.add_message("user", user_message)
# Get recent context
context = self.memory.get_context(max_messages=20)
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
system="""You are a helpful assistant. Use the conversation history
to maintain context across multiple exchanges. Reference previous
messages when relevant.""",
messages=context
)
assistant_response = response.content[0].text
self.memory.add_message("assistant", assistant_response)
return assistant_response
# Usage
agent = ConversationalAgent()
print(agent.chat("What's my favorite color?"))
print(agent.chat("Actually, I prefer blue. Remember that."))
print(agent.chat("What's my favorite color now?"))
Long-Term Memory (Knowledge Base)
For persistent learning, store important facts and learned patterns:
import json
from datetime import datetime
from typing import Dict
class LongTermMemory:
"""Stores learned facts and patterns."""
def __init__(self, storage_file: str = "agent_memory.json"):
self.storage_file = storage_file
self.facts: Dict[str, dict] = self._load_facts()
def _load_facts(self) -> Dict[str, dict]:
"""Load facts from storage."""
try:
with open(self.storage_file, 'r') as f:
return json.load(f)
except FileNotFoundError:
return {}
def _save_facts(self):
"""Persist facts to storage."""
with open(self.storage_file, 'w') as f:
json.dump(self.facts, f, indent=2)
def store_fact(self, key: str, value: any, confidence: float = 1.0):
"""Store a learned fact."""
self.facts[key] = {
"value": value,
"confidence": confidence,
"timestamp": datetime.now().isoformat(),
"access_count": 0
}
self._save_facts()
def retrieve_fact(self, key: str) -> any:
"""Retrieve a learned fact."""
if key in self.facts:
fact = self.facts[key]
fact["access_count"] += 1
self._save_facts()
return fact["value"]
return None
def forget_fact(self, key: str):
"""Remove a learned fact."""
if key in self.facts:
del self.facts[key]
self._save_facts()
def update_confidence(self, key: str, confidence: float):
"""Update confidence in a fact."""
if key in self.facts:
self.facts[key]["confidence"] = confidence
self._save_facts()
class AgentWithLongTermMemory:
"""Agent that learns and remembers facts."""
def __init__(self):
self.client = anthropic.Anthropic()
self.short_term = ConversationMemory()
self.long_term = LongTermMemory()
def extract_facts(self, text: str) -> List[str]:
"""Extract learnable facts from text."""
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=500,
system="""Extract facts from the given text in JSON format.
Return a list of {key, value, confidence} objects.
Example: [{"key": "age", "value": "30", "confidence": 0.9}]""",
messages=[{"role": "user", "content": text}]
)
try:
facts = json.loads(response.content[0].text)
return facts
except json.JSONDecodeError:
return []
def process_with_memory(self, user_message: str) -> str:
"""Process message and extract/use memory."""
self.short_term.add_message("user", user_message)
# Retrieve relevant facts
relevant_facts = self._get_relevant_facts(user_message)
# Build context with facts
context = self.short_term.get_context(max_messages=10)
fact_context = "\n".join([
f"Known fact: {k} = {v['value']} (confidence: {v['confidence']})"
for k, v in relevant_facts.items()
])
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
system=f"""You are an agent with memory. Use these known facts to inform
your responses:
{fact_context}
Learn new facts from the conversation and remember them.""",
messages=context
)
assistant_response = response.content[0].text
self.short_term.add_message("assistant", assistant_response)
# Extract and store new facts
new_facts = self.extract_facts(user_message)
for fact in new_facts:
self.long_term.store_fact(fact["key"], fact["value"], fact.get("confidence", 1.0))
return assistant_response
def _get_relevant_facts(self, query: str) -> Dict:
"""Get facts relevant to current query."""
relevant = {}
for key, fact in self.long_term.facts.items():
# Simple relevance: check if key appears in query
if key.lower() in query.lower():
relevant[key] = fact
return relevant
# Usage
agent = AgentWithLongTermMemory()
print(agent.process_with_memory("My name is Alice and I'm 30 years old."))
print(agent.process_with_memory("What's my name and age?"))
Planning Mechanisms
Agents benefit from explicitly planning their approach before taking action:
Task Decomposition
class PlanningAgent:
"""Agent that plans before acting."""
def __init__(self):
self.client = anthropic.Anthropic()
def create_plan(self, goal: str) -> List[str]:
"""Create step-by-step plan for a goal."""
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1000,
system="""You are a strategic planner. Break goals into clear,
actionable steps. Return steps as a JSON list of strings.""",
messages=[{"role": "user", "content": f"Create a plan for: {goal}"}]
)
try:
plan_text = response.content[0].text
# Parse JSON list from response
import re
match = re.search(r'\[.*\]', plan_text, re.DOTALL)
if match:
steps = json.loads(match.group())
return steps
except:
pass
return [goal] # Fallback
def execute_plan(self, goal: str) -> str:
"""Create and execute a plan."""
plan = self.create_plan(goal)
print(f"Plan for '{goal}':")
for i, step in enumerate(plan, 1):
print(f" {i}. {step}")
# Execute each step
results = []
for step in plan:
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=500,
system="Execute this step concisely.",
messages=[{"role": "user", "content": step}]
)
results.append(response.content[0].text)
# Synthesize results
synthesis_response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1000,
system=f"""Synthesize these step results into a coherent answer for goal: {goal}""",
messages=[{"role": "user", "content": "\n".join(results)}]
)
return synthesis_response.content[0].text
# Usage
planner = PlanningAgent()
result = planner.execute_plan("Build a simple web scraper for news articles")
print(result)
Conditional Planning
class ConditionalPlanningAgent:
"""Agent that adapts plans based on conditions."""
def __init__(self):
self.client = anthropic.Anthropic()
def create_conditional_plan(self, goal: str, context: str) -> dict:
"""Create plan with conditional branches."""
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1500,
system="""Create a conditional plan with branches based on possible outcomes.
Return JSON with structure:
{
"primary_steps": [...],
"conditions": [{"if": "...", "then": [...]}]
}""",
messages=[{
"role": "user",
"content": f"Goal: {goal}\nContext: {context}"
}]
)
try:
plan = json.loads(response.content[0].text)
return plan
except:
return {"primary_steps": [goal]}
Reflection and Self-Improvement
Agents can reflect on their actions and learn from outcomes:
Action Reflection
class ReflectiveAgent:
"""Agent that reflects on and improves its actions."""
def __init__(self):
self.client = anthropic.Anthropic()
self.interaction_log = []
def take_action(self, action_description: str) -> str:
"""Take action and log it."""
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=500,
system="Execute this action and describe the result.",
messages=[{"role": "user", "content": action_description}]
)
result = response.content[0].text
self.interaction_log.append({
"action": action_description,
"result": result
})
return result
def reflect(self) -> str:
"""Reflect on recent actions and lessons learned."""
if not self.interaction_log:
return "No actions to reflect on."
# Format recent interactions
recent = self.interaction_log[-5:]
interactions_text = "\n".join([
f"Action: {i['action']}\nResult: {i['result']}"
for i in recent
])
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=800,
system="""Reflect on these recent interactions. Identify:
1. What worked well
2. What didn't work
3. Lessons learned
4. How to improve future actions""",
messages=[{"role": "user", "content": interactions_text}]
)
reflection = response.content[0].text
self.interaction_log.append({
"type": "reflection",
"content": reflection
})
return reflection
def improve_strategy(self) -> str:
"""Use reflection to improve strategy."""
reflection = self.reflect()
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1000,
system="""Based on this reflection, propose specific improvements
to the agent's future behavior and strategy.""",
messages=[{"role": "user", "content": reflection}]
)
return response.content[0].text
# Usage
agent = ReflectiveAgent()
agent.take_action("Write code to parse JSON files")
agent.take_action("Test the JSON parser with edge cases")
print(agent.improve_strategy())
Outcome Evaluation
class OutcomeAwareAgent:
"""Agent that evaluates outcomes and adjusts approach."""
def __init__(self):
self.client = anthropic.Anthropic()
self.success_patterns = {}
def execute_and_evaluate(self, task: str, success_criteria: str) -> dict:
"""Execute task and evaluate against criteria."""
# Execute task
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1000,
system="Execute this task carefully.",
messages=[{"role": "user", "content": task}]
)
result = response.content[0].text
# Evaluate outcome
eval_response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=500,
system="""Evaluate if this outcome meets the success criteria.
Return JSON: {"success": bool, "score": 0-1, "feedback": "..."}""",
messages=[{
"role": "user",
"content": f"Task: {task}\nResult: {result}\nCriteria: {success_criteria}"
}]
)
try:
evaluation = json.loads(eval_response.content[0].text)
except:
evaluation = {"success": False, "score": 0, "feedback": "Evaluation failed"}
return {
"task": task,
"result": result,
"evaluation": evaluation
}
Best Practices for Agent Intelligence
1. Memory Hierarchy
Organize memory by relevance and update frequency (short-term > working > long-term).
2. Plan Validation
Always validate generated plans before execution to catch logical errors.
3. Reflection Frequency
Balance reflection with action. Too frequent reflection wastes tokens; too infrequent misses learning.
4. Confidence Tracking
Track confidence in memories and decisions. Low confidence should trigger verification.
def store_fact_with_confidence(agent: AgentWithLongTermMemory, key: str, value: str):
"""Store fact with appropriate confidence."""
confidence = 0.9 if key in agent.short_term.get_context() else 0.5
agent.long_term.store_fact(key, value, confidence)
Key Takeaway
Advanced agents leverage structured memory systems, explicit planning, and reflection mechanisms to become more capable and autonomous. Memory distinguishes learning agents from stateless ones, while planning and reflection enable continuous self-improvement.
Exercises
-
Build Memory System: Implement both short-term and long-term memory. Test that the agent remembers facts across multiple conversations.
-
Planning Agent: Create an agent that decomposes complex goals into steps and executes them sequentially.
-
Conditional Planning: Add branching logic to plans based on intermediate outcomes.
-
Reflection Loop: Implement an agent that reflects on its actions every N interactions and adjusts behavior.
-
Confidence Tracking: Add confidence scores to stored facts and trigger re-verification for low-confidence facts.
-
Memory Analysis: Track and visualize which facts the agent accesses most frequently. Analyze patterns in learned information.