Building Reliable LLM Pipelines

A pipeline is only as reliable as its weakest link. In this lesson, you’ll learn to validate inputs, parse outputs confidently, enforce guardrails, and handle failures gracefully. By the end, you’ll build systems that work reliably, not just when everything goes right.

Input Validation

Never trust user input. Always validate before sending to an LLM.

from typing import Optional
from urllib.parse import urlparse
import re

class InputValidator:
    """Validate inputs before sending to LLM."""

    @staticmethod
    def validate_prompt(prompt: str, min_length: int = 1, max_length: int = 10000) -> tuple[bool, Optional[str]]:
        """Validate a prompt."""
        if not prompt or not isinstance(prompt, str):
            return False, "Prompt must be a non-empty string"

        if len(prompt) < min_length:
            return False, f"Prompt too short (minimum {min_length} characters)"

        if len(prompt) > max_length:
            return False, f"Prompt too long (maximum {max_length} characters)"

        return True, None

    @staticmethod
    def validate_temperature(temperature: float) -> tuple[bool, Optional[str]]:
        """Validate temperature parameter."""
        if not isinstance(temperature, (int, float)):
            return False, "Temperature must be a number"

        if temperature < 0 or temperature > 2:
            return False, "Temperature must be between 0 and 2"

        return True, None

    @staticmethod
    def validate_url(url: str) -> tuple[bool, Optional[str]]:
        """Validate a URL."""
        try:
            result = urlparse(url)
            if all([result.scheme, result.netloc]):
                return True, None
            else:
                return False, "Invalid URL format"
        except Exception as e:
            return False, str(e)

    @staticmethod
    def sanitize_prompt(prompt: str) -> str:
        """Remove potentially harmful patterns from prompt."""
        # Remove control characters
        sanitized = "".join(char for char in prompt if ord(char) >= 32 or char in '\n\t')

        # Remove excessive whitespace
        sanitized = re.sub(r'\s+', ' ', sanitized).strip()

        return sanitized

# Usage
validator = InputValidator()

prompt = "What is machine learning?"
valid, error = validator.validate_prompt(prompt)
print(f"Valid: {valid}, Error: {error}")

temperature = 0.7
valid, error = validator.validate_temperature(temperature)
print(f"Valid: {valid}, Error: {error}")

dirty_prompt = "What is  AI?   "
clean = validator.sanitize_prompt(dirty_prompt)
print(f"Cleaned: '{clean}'")

Output Parsing with Confidence

LLMs sometimes make mistakes. Parse outputs defensively:

import json
from typing import Optional
from openai import OpenAI

class OutputParser:
    """Parse and validate LLM outputs."""

    @staticmethod
    def parse_json(response_text: str) -> tuple[dict, bool, Optional[str]]:
        """Safely parse JSON from LLM response."""
        # Try direct parsing first
        try:
            return json.loads(response_text), True, None
        except json.JSONDecodeError:
            pass

        # Try removing markdown wrapping
        if response_text.startswith("```json"):
            try:
                json_str = response_text[7:]  # Remove ```json
                json_str = json_str.rsplit("```", 1)[0]  # Remove trailing ```
                return json.loads(json_str), True, None
            except json.JSONDecodeError:
                pass

        # Try finding JSON in the text
        import re
        matches = re.findall(r'\{.*\}', response_text, re.DOTALL)
        if matches:
            try:
                return json.loads(matches[0]), True, None
            except json.JSONDecodeError:
                pass

        return {}, False, "Could not parse JSON from response"

    @staticmethod
    def validate_schema(data: dict, schema: dict) -> tuple[bool, Optional[str]]:
        """Validate data matches expected schema."""
        for key, expected_type in schema.items():
            if key not in data:
                return False, f"Missing required key: {key}"

            if not isinstance(data[key], expected_type):
                return False, f"Key '{key}' has wrong type: expected {expected_type.__name__}, got {type(data[key]).__name__}"

        return True, None

    @staticmethod
    def extract_list(response_text: str, separator: str = "\n") -> list[str]:
        """Extract a list from LLM response."""
        lines = response_text.split(separator)
        cleaned = [line.strip() for line in lines if line.strip()]
        # Remove numbering like "1. Item" -> "Item"
        cleaned = [re.sub(r'^[\d]+\.\s+', '', item) for item in cleaned]
        return cleaned

# Usage
parser = OutputParser()

# Parse JSON
json_response = '```json\n{"name": "Alice", "age": 30}\n```'
data, success, error = parser.parse_json(json_response)
print(f"Parsed: {data}, Success: {success}")

# Validate schema
schema = {"name": str, "age": int}
valid, error = parser.validate_schema(data, schema)
print(f"Schema valid: {valid}, Error: {error}")

# Extract list
list_response = "1. Item A\n2. Item B\n3. Item C"
items = parser.extract_list(list_response)
print(f"Items: {items}")

Structured Output Enforcement

Use response format to force the structure you need:

from openai import OpenAI
import json

def extract_with_guaranteed_structure(text: str) -> dict:
    """Extract data with guaranteed JSON structure."""
    client = OpenAI()

    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {
                "role": "system",
                "content": """Extract information from the text. Return ONLY valid JSON
                in this exact format:
                {"sentiment": "positive|negative|neutral", "confidence": 0.0-1.0, "summary": "text"}"""
            },
            {"role": "user", "content": text}
        ],
        response_format={"type": "json_object"}  # Enforce JSON output
    )

    try:
        data = json.loads(response.choices[0].message.content)
        return data
    except json.JSONDecodeError as e:
        print(f"Failed to parse: {e}")
        return {"error": "Could not parse response"}

# Usage
result = extract_with_guaranteed_structure("I love this product! It works great.")
print(json.dumps(result, indent=2))

Guardrails and Output Filtering

Don’t trust everything the model outputs. Add guardrails:

class OutputGuardrails:
    """Enforce safety constraints on LLM outputs."""

    FORBIDDEN_PATTERNS = [
        r'credit card',
        r'password',
        r'api.?key',
        r'ssn',
        r'social.?security'
    ]

    PII_PATTERNS = [
        (r'\b\d{3}-\d{2}-\d{4}\b', 'SSN'),  # SSN
        (r'\b\d{16}\b', 'Credit card'),  # Credit card number
        (r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', 'Email'),  # Email
    ]

    @staticmethod
    def check_forbidden_content(text: str) -> tuple[bool, Optional[str]]:
        """Check if text contains forbidden patterns."""
        for pattern in OutputGuardrails.FORBIDDEN_PATTERNS:
            if re.search(pattern, text, re.IGNORECASE):
                return False, f"Output contains forbidden content: {pattern}"
        return True, None

    @staticmethod
    def detect_pii(text: str) -> list[str]:
        """Detect personally identifiable information."""
        found_pii = []
        for pattern, pii_type in OutputGuardrails.PII_PATTERNS:
            if re.search(pattern, text):
                found_pii.append(pii_type)
        return found_pii

    @staticmethod
    def redact_pii(text: str) -> str:
        """Redact PII from text."""
        for pattern, _ in OutputGuardrails.PII_PATTERNS:
            text = re.sub(pattern, '[REDACTED]', text)
        return text

    @staticmethod
    def enforce_length_limit(text: str, max_length: int = 1000) -> str:
        """Enforce maximum output length."""
        if len(text) > max_length:
            return text[:max_length] + "..."
        return text

# Usage
guardrails = OutputGuardrails()

response = "Here's a credit card number: 1234-5678-9012-3456"

safe, error = guardrails.check_forbidden_content(response)
print(f"Safe: {safe}, Error: {error}")

pii = guardrails.detect_pii(response)
print(f"PII detected: {pii}")

redacted = guardrails.redact_pii(response)
print(f"Redacted: {redacted}")

Retry with Different Prompts

If the first attempt fails, try again with a different approach:

from openai import OpenAI

class SmartRetry:
    """Retry with different prompts on failure."""

    PROMPTS = {
        "direct": "Extract {field} from this text: {text}",
        "detailed": "Carefully analyze this text and extract the {field}. Explain your reasoning.",
        "structured": "Extract {field}. Return ONLY the value, nothing else.",
        "json": 'Extract {field}. Return as JSON: {{"value": ...}}'
    }

    @staticmethod
    def extract_field_with_retry(text: str, field: str) -> Optional[str]:
        """Try multiple prompt strategies to extract a field."""
        client = OpenAI()

        for strategy, prompt_template in SmartRetry.PROMPTS.items():
            try:
                prompt = prompt_template.format(field=field, text=text)

                response = client.chat.completions.create(
                    model="gpt-3.5-turbo",
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0
                )

                result = response.choices[0].message.content.strip()

                # Validate we got a non-empty response
                if result and len(result) > 0:
                    print(f"Success with '{strategy}' strategy")
                    return result

            except Exception as e:
                print(f"Strategy '{strategy}' failed: {e}")
                continue

        return None

# Usage
text = "The product costs $49.99 and comes in blue or red."
price = SmartRetry.extract_field_with_retry(text, "price")
print(f"Extracted price: {price}")

Pipelining It All Together

Combine all these techniques into a reliable pipeline:

class ReliableLLMPipeline:
    """End-to-end reliable LLM pipeline."""

    def __init__(self):
        self.validator = InputValidator()
        self.parser = OutputParser()
        self.guardrails = OutputGuardrails()
        self.client = OpenAI()

    def process(self, prompt: str, expected_schema: dict = None) -> dict:
        """Process a prompt through the full pipeline."""

        # Step 1: Validate input
        valid, error = self.validator.validate_prompt(prompt)
        if not valid:
            return {"success": False, "error": f"Invalid input: {error}"}

        # Sanitize input
        prompt = self.validator.sanitize_prompt(prompt)

        # Step 2: Call API
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"} if expected_schema else None
            )
            raw_output = response.choices[0].message.content

        except Exception as e:
            return {"success": False, "error": f"API call failed: {e}"}

        # Step 3: Check guardrails
        safe, error = self.guardrails.check_forbidden_content(raw_output)
        if not safe:
            return {"success": False, "error": error}

        # Step 4: Parse output
        if expected_schema:
            data, success, error = self.parser.parse_json(raw_output)
            if not success:
                return {"success": False, "error": error}

            # Validate schema
            valid, error = self.parser.validate_schema(data, expected_schema)
            if not valid:
                return {"success": False, "error": error}

            return {"success": True, "data": data}
        else:
            return {"success": True, "data": raw_output}

# Usage
pipeline = ReliableLLMPipeline()

schema = {"answer": str, "confidence": float}
result = pipeline.process(
    'What is AI? Return JSON.',
    expected_schema=schema
)
print(result)

Key Takeaway

Reliable pipelines validate inputs before processing, parse outputs defensively, enforce guardrails to catch harmful content, and retry intelligently when things go wrong. Build defensive systems that work even when components fail or behave unexpectedly.

Exercises

Input validation: Create validators for various input types (URLs, dates, numbers). Test edge cases.
Output parsing: Build a parser that handles JSON, CSV, and plain text outputs from LLMs.
Guardrails: Implement guardrails that detect and redact PII, forbidden patterns, and length violations.
Retry strategies: Build a retry function that uses different prompt formulations on failure.
Full pipeline: Integrate validation, parsing, guardrails, and retry into a complete pipeline. Test failure scenarios.