Building Reliable LLM Pipelines
Building Reliable LLM Pipelines
A pipeline is only as reliable as its weakest link. In this lesson, you’ll learn to validate inputs, parse outputs confidently, enforce guardrails, and handle failures gracefully. By the end, you’ll build systems that work reliably, not just when everything goes right.
Input Validation
Never trust user input. Always validate before sending to an LLM.
from typing import Optional
from urllib.parse import urlparse
import re
class InputValidator:
"""Validate inputs before sending to LLM."""
@staticmethod
def validate_prompt(prompt: str, min_length: int = 1, max_length: int = 10000) -> tuple[bool, Optional[str]]:
"""Validate a prompt."""
if not prompt or not isinstance(prompt, str):
return False, "Prompt must be a non-empty string"
if len(prompt) < min_length:
return False, f"Prompt too short (minimum {min_length} characters)"
if len(prompt) > max_length:
return False, f"Prompt too long (maximum {max_length} characters)"
return True, None
@staticmethod
def validate_temperature(temperature: float) -> tuple[bool, Optional[str]]:
"""Validate temperature parameter."""
if not isinstance(temperature, (int, float)):
return False, "Temperature must be a number"
if temperature < 0 or temperature > 2:
return False, "Temperature must be between 0 and 2"
return True, None
@staticmethod
def validate_url(url: str) -> tuple[bool, Optional[str]]:
"""Validate a URL."""
try:
result = urlparse(url)
if all([result.scheme, result.netloc]):
return True, None
else:
return False, "Invalid URL format"
except Exception as e:
return False, str(e)
@staticmethod
def sanitize_prompt(prompt: str) -> str:
"""Remove potentially harmful patterns from prompt."""
# Remove control characters
sanitized = "".join(char for char in prompt if ord(char) >= 32 or char in '\n\t')
# Remove excessive whitespace
sanitized = re.sub(r'\s+', ' ', sanitized).strip()
return sanitized
# Usage
validator = InputValidator()
prompt = "What is machine learning?"
valid, error = validator.validate_prompt(prompt)
print(f"Valid: {valid}, Error: {error}")
temperature = 0.7
valid, error = validator.validate_temperature(temperature)
print(f"Valid: {valid}, Error: {error}")
dirty_prompt = "What is AI? "
clean = validator.sanitize_prompt(dirty_prompt)
print(f"Cleaned: '{clean}'")
Output Parsing with Confidence
LLMs sometimes make mistakes. Parse outputs defensively:
import json
from typing import Optional
from openai import OpenAI
class OutputParser:
"""Parse and validate LLM outputs."""
@staticmethod
def parse_json(response_text: str) -> tuple[dict, bool, Optional[str]]:
"""Safely parse JSON from LLM response."""
# Try direct parsing first
try:
return json.loads(response_text), True, None
except json.JSONDecodeError:
pass
# Try removing markdown wrapping
if response_text.startswith("```json"):
try:
json_str = response_text[7:] # Remove ```json
json_str = json_str.rsplit("```", 1)[0] # Remove trailing ```
return json.loads(json_str), True, None
except json.JSONDecodeError:
pass
# Try finding JSON in the text
import re
matches = re.findall(r'\{.*\}', response_text, re.DOTALL)
if matches:
try:
return json.loads(matches[0]), True, None
except json.JSONDecodeError:
pass
return {}, False, "Could not parse JSON from response"
@staticmethod
def validate_schema(data: dict, schema: dict) -> tuple[bool, Optional[str]]:
"""Validate data matches expected schema."""
for key, expected_type in schema.items():
if key not in data:
return False, f"Missing required key: {key}"
if not isinstance(data[key], expected_type):
return False, f"Key '{key}' has wrong type: expected {expected_type.__name__}, got {type(data[key]).__name__}"
return True, None
@staticmethod
def extract_list(response_text: str, separator: str = "\n") -> list[str]:
"""Extract a list from LLM response."""
lines = response_text.split(separator)
cleaned = [line.strip() for line in lines if line.strip()]
# Remove numbering like "1. Item" -> "Item"
cleaned = [re.sub(r'^[\d]+\.\s+', '', item) for item in cleaned]
return cleaned
# Usage
parser = OutputParser()
# Parse JSON
json_response = '```json\n{"name": "Alice", "age": 30}\n```'
data, success, error = parser.parse_json(json_response)
print(f"Parsed: {data}, Success: {success}")
# Validate schema
schema = {"name": str, "age": int}
valid, error = parser.validate_schema(data, schema)
print(f"Schema valid: {valid}, Error: {error}")
# Extract list
list_response = "1. Item A\n2. Item B\n3. Item C"
items = parser.extract_list(list_response)
print(f"Items: {items}")
Structured Output Enforcement
Use response format to force the structure you need:
from openai import OpenAI
import json
def extract_with_guaranteed_structure(text: str) -> dict:
"""Extract data with guaranteed JSON structure."""
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[
{
"role": "system",
"content": """Extract information from the text. Return ONLY valid JSON
in this exact format:
{"sentiment": "positive|negative|neutral", "confidence": 0.0-1.0, "summary": "text"}"""
},
{"role": "user", "content": text}
],
response_format={"type": "json_object"} # Enforce JSON output
)
try:
data = json.loads(response.choices[0].message.content)
return data
except json.JSONDecodeError as e:
print(f"Failed to parse: {e}")
return {"error": "Could not parse response"}
# Usage
result = extract_with_guaranteed_structure("I love this product! It works great.")
print(json.dumps(result, indent=2))
Guardrails and Output Filtering
Don’t trust everything the model outputs. Add guardrails:
class OutputGuardrails:
"""Enforce safety constraints on LLM outputs."""
FORBIDDEN_PATTERNS = [
r'credit card',
r'password',
r'api.?key',
r'ssn',
r'social.?security'
]
PII_PATTERNS = [
(r'\b\d{3}-\d{2}-\d{4}\b', 'SSN'), # SSN
(r'\b\d{16}\b', 'Credit card'), # Credit card number
(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', 'Email'), # Email
]
@staticmethod
def check_forbidden_content(text: str) -> tuple[bool, Optional[str]]:
"""Check if text contains forbidden patterns."""
for pattern in OutputGuardrails.FORBIDDEN_PATTERNS:
if re.search(pattern, text, re.IGNORECASE):
return False, f"Output contains forbidden content: {pattern}"
return True, None
@staticmethod
def detect_pii(text: str) -> list[str]:
"""Detect personally identifiable information."""
found_pii = []
for pattern, pii_type in OutputGuardrails.PII_PATTERNS:
if re.search(pattern, text):
found_pii.append(pii_type)
return found_pii
@staticmethod
def redact_pii(text: str) -> str:
"""Redact PII from text."""
for pattern, _ in OutputGuardrails.PII_PATTERNS:
text = re.sub(pattern, '[REDACTED]', text)
return text
@staticmethod
def enforce_length_limit(text: str, max_length: int = 1000) -> str:
"""Enforce maximum output length."""
if len(text) > max_length:
return text[:max_length] + "..."
return text
# Usage
guardrails = OutputGuardrails()
response = "Here's a credit card number: 1234-5678-9012-3456"
safe, error = guardrails.check_forbidden_content(response)
print(f"Safe: {safe}, Error: {error}")
pii = guardrails.detect_pii(response)
print(f"PII detected: {pii}")
redacted = guardrails.redact_pii(response)
print(f"Redacted: {redacted}")
Retry with Different Prompts
If the first attempt fails, try again with a different approach:
from openai import OpenAI
class SmartRetry:
"""Retry with different prompts on failure."""
PROMPTS = {
"direct": "Extract {field} from this text: {text}",
"detailed": "Carefully analyze this text and extract the {field}. Explain your reasoning.",
"structured": "Extract {field}. Return ONLY the value, nothing else.",
"json": 'Extract {field}. Return as JSON: {{"value": ...}}'
}
@staticmethod
def extract_field_with_retry(text: str, field: str) -> Optional[str]:
"""Try multiple prompt strategies to extract a field."""
client = OpenAI()
for strategy, prompt_template in SmartRetry.PROMPTS.items():
try:
prompt = prompt_template.format(field=field, text=text)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
temperature=0
)
result = response.choices[0].message.content.strip()
# Validate we got a non-empty response
if result and len(result) > 0:
print(f"Success with '{strategy}' strategy")
return result
except Exception as e:
print(f"Strategy '{strategy}' failed: {e}")
continue
return None
# Usage
text = "The product costs $49.99 and comes in blue or red."
price = SmartRetry.extract_field_with_retry(text, "price")
print(f"Extracted price: {price}")
Pipelining It All Together
Combine all these techniques into a reliable pipeline:
class ReliableLLMPipeline:
"""End-to-end reliable LLM pipeline."""
def __init__(self):
self.validator = InputValidator()
self.parser = OutputParser()
self.guardrails = OutputGuardrails()
self.client = OpenAI()
def process(self, prompt: str, expected_schema: dict = None) -> dict:
"""Process a prompt through the full pipeline."""
# Step 1: Validate input
valid, error = self.validator.validate_prompt(prompt)
if not valid:
return {"success": False, "error": f"Invalid input: {error}"}
# Sanitize input
prompt = self.validator.sanitize_prompt(prompt)
# Step 2: Call API
try:
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"} if expected_schema else None
)
raw_output = response.choices[0].message.content
except Exception as e:
return {"success": False, "error": f"API call failed: {e}"}
# Step 3: Check guardrails
safe, error = self.guardrails.check_forbidden_content(raw_output)
if not safe:
return {"success": False, "error": error}
# Step 4: Parse output
if expected_schema:
data, success, error = self.parser.parse_json(raw_output)
if not success:
return {"success": False, "error": error}
# Validate schema
valid, error = self.parser.validate_schema(data, expected_schema)
if not valid:
return {"success": False, "error": error}
return {"success": True, "data": data}
else:
return {"success": True, "data": raw_output}
# Usage
pipeline = ReliableLLMPipeline()
schema = {"answer": str, "confidence": float}
result = pipeline.process(
'What is AI? Return JSON.',
expected_schema=schema
)
print(result)
Key Takeaway
Reliable pipelines validate inputs before processing, parse outputs defensively, enforce guardrails to catch harmful content, and retry intelligently when things go wrong. Build defensive systems that work even when components fail or behave unexpectedly.
Exercises
-
Input validation: Create validators for various input types (URLs, dates, numbers). Test edge cases.
-
Output parsing: Build a parser that handles JSON, CSV, and plain text outputs from LLMs.
-
Guardrails: Implement guardrails that detect and redact PII, forbidden patterns, and length violations.
-
Retry strategies: Build a retry function that uses different prompt formulations on failure.
-
Full pipeline: Integrate validation, parsing, guardrails, and retry into a complete pipeline. Test failure scenarios.