Structured Output
Structured output allows agents to return responses in a predefined JSON format, ensuring consistent and reliable data extraction. ActiveAgent provides comprehensive support for structured output through JSON schemas and automatic model schema generation.
Overview
Structured output ensures AI responses conform to a specific JSON schema, making it ideal for:
- Data extraction from unstructured text, images, and documents
- API integrations requiring consistent response formats
- Form processing and validation
- Database record creation from natural language
Key Features
Automatic JSON Parsing
When using structured output, responses are automatically:
- Tagged with
content_type: "application/json"
- Parsed from JSON strings to Ruby hashes
- Validated against the provided schema
Schema Generator
ActiveAgent includes a SchemaGenerator
module that creates JSON schemas from:
- ActiveRecord models with database columns and validations
- ActiveModel classes with attributes and validations
- Custom Ruby classes with the module included
Quick Start
Using Model Schema Generation
ActiveAgent can automatically generate schemas from your Rails models:
# Generate schema from model - returns a Ruby hash
user_schema = TestUser.to_json_schema(strict: true, name: "user_extraction")
# In actual usage, the agent would use the hash directly:
# prompt(output_schema: user_schema)
Basic Structured Output Example
Define a schema and use it with the output_schema
parameter:
# frozen_string_literal: true
require "test_helper"
require "active_agent/schema_generator"
class StructuredOutputJsonParsingTest < ActiveSupport::TestCase
class DataExtractionAgent < ApplicationAgent
generate_with :openai
def extract_user_data
prompt(
message: params[:message] || "Extract the following user data from this text: John Doe is 30 years old and his email is john@example.com",
output_schema: params[:output_schema]
)
end
def extract_with_model_schema
prompt(
message: "Extract user information from: Jane Smith, age 25, contact: jane.smith@email.com",
output_schema: params[:output_schema]
)
end
def extract_with_active_record_schema
prompt(
message: "Extract user data from: Alice Johnson, 28 years old, email: alice@example.com, bio: Software engineer",
output_schema: params[:output_schema]
)
end
# Remove the after_generation callback for now - focus on testing the core functionality
end
test "structured output sets content_type to application/json and auto-parses JSON" do
VCR.use_cassette("structured_output_json_parsing") do
# Create a test model class with schema generator
test_user_model = Class.new do
include ActiveModel::Model
include ActiveModel::Attributes
include ActiveModel::Validations
include ActiveAgent::SchemaGenerator
attribute :name, :string
attribute :age, :integer
attribute :email, :string
validates :name, presence: true
validates :age, presence: true, numericality: { greater_than: 0 }
validates :email, presence: true, format: { with: URI::MailTo::EMAIL_REGEXP }
end
# Generate schema from the model using the schema generator
schema = test_user_model.to_json_schema(strict: true, name: "user_data")
# Generate with structured output using the .with pattern
response = DataExtractionAgent.with(output_schema: schema).extract_user_data.generate_now
# Verify content_type is set to application/json
assert_equal "application/json", response.message.content_type
# Verify content is automatically parsed as JSON
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("age")
# Verify raw content is still available as string
assert response.message.raw_content.is_a?(String)
doc_example_output(response)
end
end
test "integration with ActiveModel schema generator for structured output" do
VCR.use_cassette("structured_output_with_model_schema") do
# Create an ActiveModel class for testing
test_model = Class.new do
include ActiveModel::Model
include ActiveModel::Attributes
include ActiveAgent::SchemaGenerator
attribute :name, :string
attribute :age, :integer
attribute :email, :string
end
# Generate schema from ActiveModel
schema = test_model.to_json_schema(strict: true, name: "user_data")
# Generate response using model-generated schema
response = DataExtractionAgent.with(output_schema: schema).extract_with_model_schema.generate_now
# Verify content_type
assert_equal "application/json", response.message.content_type
# Verify JSON was automatically parsed
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("age")
assert response.message.content.key?("email")
# Verify values make sense
assert_equal "Jane Smith", response.message.content["name"]
assert_equal 25, response.message.content["age"]
assert response.message.content["email"].include?("@")
doc_example_output(response)
end
end
test "integration with ActiveRecord schema generator for structured output" do
VCR.use_cassette("structured_output_with_active_record_schema") do
# Use the existing User model from test/dummy
require_relative "../dummy/app/models/user"
# Generate schema from ActiveRecord model
schema = User.to_json_schema(strict: true, name: "user_data")
# Generate response using ActiveRecord-generated schema
response = DataExtractionAgent.with(output_schema: schema).extract_with_active_record_schema.generate_now
# Verify content_type
assert_equal "application/json", response.message.content_type
# Verify JSON was automatically parsed
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("email")
assert response.message.content.key?("age")
# Verify the data makes sense
assert response.message.content["name"].is_a?(String)
assert response.message.content["age"].is_a?(Integer)
assert response.message.content["email"].include?("@")
doc_example_output(response)
end
end
test "without structured output uses text/plain content_type" do
VCR.use_cassette("plain_text_response") do
# Generate without structured output (no output_schema)
response = DataExtractionAgent.with(message: "What is the capital of France?").prompt_context.generate_now
# Verify content_type is plain text
assert_equal "text/plain", response.message.content_type
# Content should not be parsed as JSON
assert response.message.content.is_a?(String)
assert response.message.content.downcase.include?("paris")
doc_example_output(response)
end
end
test "handles invalid JSON gracefully" do
# This test ensures that if for some reason the provider returns invalid JSON
# with application/json content_type, we handle it gracefully
# Create a message with invalid JSON but JSON content_type
message = ActiveAgent::ActionPrompt::Message.new(
content: "{invalid json}",
content_type: "application/json",
role: :assistant
)
# Should return the raw string since parsing failed
assert_equal "{invalid json}", message.content
assert_equal "{invalid json}", message.raw_content
end
end
The response will automatically have:
content_type
set to"application/json"
content
parsed as a Ruby Hashraw_content
available as the original JSON string
Schema Generation
From ActiveModel
Create schemas from ActiveModel classes with validations:
class TestUser
include ActiveModel::Model
include ActiveModel::Attributes
include ActiveModel::Validations
include ActiveAgent::SchemaGenerator
attribute :name, :string
attribute :email, :string
attribute :age, :integer
attribute :active, :boolean
validates :name, presence: true, length: { minimum: 2, maximum: 100 }
validates :email, presence: true, format: { with: URI::MailTo::EMAIL_REGEXP }
validates :age, numericality: { greater_than_or_equal_to: 18 }
end
Generate the schema:
schema = TestUser.to_json_schema
From ActiveRecord
Generate schemas from database-backed models:
schema = User.to_json_schema
Strict Schemas
For providers requiring strict schemas (like OpenAI):
schema = TestBlogPost.to_json_schema(strict: true, name: "blog_post_schema")
In strict mode:
- All properties are marked as required
additionalProperties
is set to false- The schema is wrapped with name and strict flags
Excluding Fields
Exclude sensitive or unnecessary fields from schemas:
schema = TestBlogPost.to_json_schema(exclude: [ :tags, :published_at ])
JSON Response Handling
Automatic Parsing
With structured output, responses are automatically parsed:
# Without structured output
response = agent.prompt(message: "Hello").generate_now
response.message.content # => "Hello! How can I help?"
response.message.content_type # => "text/plain"
# With structured output
response = agent.prompt(
message: "Extract user data",
output_schema: schema
).generate_now
response.message.content # => { "name" => "John", "age" => 30 }
response.message.content_type # => "application/json"
response.message.raw_content # => '{"name":"John","age":30}'
Error Handling
Handle JSON parsing errors gracefully:
# frozen_string_literal: true
require "test_helper"
require "active_agent/schema_generator"
class StructuredOutputJsonParsingTest < ActiveSupport::TestCase
class DataExtractionAgent < ApplicationAgent
generate_with :openai
def extract_user_data
prompt(
message: params[:message] || "Extract the following user data from this text: John Doe is 30 years old and his email is john@example.com",
output_schema: params[:output_schema]
)
end
def extract_with_model_schema
prompt(
message: "Extract user information from: Jane Smith, age 25, contact: jane.smith@email.com",
output_schema: params[:output_schema]
)
end
def extract_with_active_record_schema
prompt(
message: "Extract user data from: Alice Johnson, 28 years old, email: alice@example.com, bio: Software engineer",
output_schema: params[:output_schema]
)
end
# Remove the after_generation callback for now - focus on testing the core functionality
end
test "structured output sets content_type to application/json and auto-parses JSON" do
VCR.use_cassette("structured_output_json_parsing") do
# Create a test model class with schema generator
test_user_model = Class.new do
include ActiveModel::Model
include ActiveModel::Attributes
include ActiveModel::Validations
include ActiveAgent::SchemaGenerator
attribute :name, :string
attribute :age, :integer
attribute :email, :string
validates :name, presence: true
validates :age, presence: true, numericality: { greater_than: 0 }
validates :email, presence: true, format: { with: URI::MailTo::EMAIL_REGEXP }
end
# Generate schema from the model using the schema generator
schema = test_user_model.to_json_schema(strict: true, name: "user_data")
# Generate with structured output using the .with pattern
response = DataExtractionAgent.with(output_schema: schema).extract_user_data.generate_now
# Verify content_type is set to application/json
assert_equal "application/json", response.message.content_type
# Verify content is automatically parsed as JSON
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("age")
# Verify raw content is still available as string
assert response.message.raw_content.is_a?(String)
doc_example_output(response)
end
end
test "integration with ActiveModel schema generator for structured output" do
VCR.use_cassette("structured_output_with_model_schema") do
# Create an ActiveModel class for testing
test_model = Class.new do
include ActiveModel::Model
include ActiveModel::Attributes
include ActiveAgent::SchemaGenerator
attribute :name, :string
attribute :age, :integer
attribute :email, :string
end
# Generate schema from ActiveModel
schema = test_model.to_json_schema(strict: true, name: "user_data")
# Generate response using model-generated schema
response = DataExtractionAgent.with(output_schema: schema).extract_with_model_schema.generate_now
# Verify content_type
assert_equal "application/json", response.message.content_type
# Verify JSON was automatically parsed
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("age")
assert response.message.content.key?("email")
# Verify values make sense
assert_equal "Jane Smith", response.message.content["name"]
assert_equal 25, response.message.content["age"]
assert response.message.content["email"].include?("@")
doc_example_output(response)
end
end
test "integration with ActiveRecord schema generator for structured output" do
VCR.use_cassette("structured_output_with_active_record_schema") do
# Use the existing User model from test/dummy
require_relative "../dummy/app/models/user"
# Generate schema from ActiveRecord model
schema = User.to_json_schema(strict: true, name: "user_data")
# Generate response using ActiveRecord-generated schema
response = DataExtractionAgent.with(output_schema: schema).extract_with_active_record_schema.generate_now
# Verify content_type
assert_equal "application/json", response.message.content_type
# Verify JSON was automatically parsed
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("email")
assert response.message.content.key?("age")
# Verify the data makes sense
assert response.message.content["name"].is_a?(String)
assert response.message.content["age"].is_a?(Integer)
assert response.message.content["email"].include?("@")
doc_example_output(response)
end
end
test "without structured output uses text/plain content_type" do
VCR.use_cassette("plain_text_response") do
# Generate without structured output (no output_schema)
response = DataExtractionAgent.with(message: "What is the capital of France?").prompt_context.generate_now
# Verify content_type is plain text
assert_equal "text/plain", response.message.content_type
# Content should not be parsed as JSON
assert response.message.content.is_a?(String)
assert response.message.content.downcase.include?("paris")
doc_example_output(response)
end
end
test "handles invalid JSON gracefully" do
# This test ensures that if for some reason the provider returns invalid JSON
# with application/json content_type, we handle it gracefully
# Create a message with invalid JSON but JSON content_type
message = ActiveAgent::ActionPrompt::Message.new(
content: "{invalid json}",
content_type: "application/json",
role: :assistant
)
# Should return the raw string since parsing failed
assert_equal "{invalid json}", message.content
assert_equal "{invalid json}", message.raw_content
end
end
Provider Support
Different AI providers have varying levels of structured output support:
- OpenAI - Native JSON mode with strict schema validation
- OpenRouter - Support through compatible models, ideal for multimodal tasks
- Anthropic - Instruction-based JSON generation
- Ollama - Local model support with JSON mode
Real-World Examples
Data Extraction Agent
The Data Extraction Agent demonstrates comprehensive structured output usage:
prompt = DataExtractionAgent.with(
output_schema: :chart_schema,
image_path: sales_chart_path
).parse_content
Integration with Rails Models
Use your existing Rails models for schema generation:
# frozen_string_literal: true
require "test_helper"
require "active_agent/schema_generator"
class StructuredOutputJsonParsingTest < ActiveSupport::TestCase
class DataExtractionAgent < ApplicationAgent
generate_with :openai
def extract_user_data
prompt(
message: params[:message] || "Extract the following user data from this text: John Doe is 30 years old and his email is john@example.com",
output_schema: params[:output_schema]
)
end
def extract_with_model_schema
prompt(
message: "Extract user information from: Jane Smith, age 25, contact: jane.smith@email.com",
output_schema: params[:output_schema]
)
end
def extract_with_active_record_schema
prompt(
message: "Extract user data from: Alice Johnson, 28 years old, email: alice@example.com, bio: Software engineer",
output_schema: params[:output_schema]
)
end
# Remove the after_generation callback for now - focus on testing the core functionality
end
test "structured output sets content_type to application/json and auto-parses JSON" do
VCR.use_cassette("structured_output_json_parsing") do
# Create a test model class with schema generator
test_user_model = Class.new do
include ActiveModel::Model
include ActiveModel::Attributes
include ActiveModel::Validations
include ActiveAgent::SchemaGenerator
attribute :name, :string
attribute :age, :integer
attribute :email, :string
validates :name, presence: true
validates :age, presence: true, numericality: { greater_than: 0 }
validates :email, presence: true, format: { with: URI::MailTo::EMAIL_REGEXP }
end
# Generate schema from the model using the schema generator
schema = test_user_model.to_json_schema(strict: true, name: "user_data")
# Generate with structured output using the .with pattern
response = DataExtractionAgent.with(output_schema: schema).extract_user_data.generate_now
# Verify content_type is set to application/json
assert_equal "application/json", response.message.content_type
# Verify content is automatically parsed as JSON
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("age")
# Verify raw content is still available as string
assert response.message.raw_content.is_a?(String)
doc_example_output(response)
end
end
test "integration with ActiveModel schema generator for structured output" do
VCR.use_cassette("structured_output_with_model_schema") do
# Create an ActiveModel class for testing
test_model = Class.new do
include ActiveModel::Model
include ActiveModel::Attributes
include ActiveAgent::SchemaGenerator
attribute :name, :string
attribute :age, :integer
attribute :email, :string
end
# Generate schema from ActiveModel
schema = test_model.to_json_schema(strict: true, name: "user_data")
# Generate response using model-generated schema
response = DataExtractionAgent.with(output_schema: schema).extract_with_model_schema.generate_now
# Verify content_type
assert_equal "application/json", response.message.content_type
# Verify JSON was automatically parsed
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("age")
assert response.message.content.key?("email")
# Verify values make sense
assert_equal "Jane Smith", response.message.content["name"]
assert_equal 25, response.message.content["age"]
assert response.message.content["email"].include?("@")
doc_example_output(response)
end
end
test "integration with ActiveRecord schema generator for structured output" do
VCR.use_cassette("structured_output_with_active_record_schema") do
# Use the existing User model from test/dummy
require_relative "../dummy/app/models/user"
# Generate schema from ActiveRecord model
schema = User.to_json_schema(strict: true, name: "user_data")
# Generate response using ActiveRecord-generated schema
response = DataExtractionAgent.with(output_schema: schema).extract_with_active_record_schema.generate_now
# Verify content_type
assert_equal "application/json", response.message.content_type
# Verify JSON was automatically parsed
assert response.message.content.is_a?(Hash)
assert response.message.content.key?("name")
assert response.message.content.key?("email")
assert response.message.content.key?("age")
# Verify the data makes sense
assert response.message.content["name"].is_a?(String)
assert response.message.content["age"].is_a?(Integer)
assert response.message.content["email"].include?("@")
doc_example_output(response)
end
end
test "without structured output uses text/plain content_type" do
VCR.use_cassette("plain_text_response") do
# Generate without structured output (no output_schema)
response = DataExtractionAgent.with(message: "What is the capital of France?").prompt_context.generate_now
# Verify content_type is plain text
assert_equal "text/plain", response.message.content_type
# Content should not be parsed as JSON
assert response.message.content.is_a?(String)
assert response.message.content.downcase.include?("paris")
doc_example_output(response)
end
end
test "handles invalid JSON gracefully" do
# This test ensures that if for some reason the provider returns invalid JSON
# with application/json content_type, we handle it gracefully
# Create a message with invalid JSON but JSON content_type
message = ActiveAgent::ActionPrompt::Message.new(
content: "{invalid json}",
content_type: "application/json",
role: :assistant
)
# Should return the raw string since parsing failed
assert_equal "{invalid json}", message.content
assert_equal "{invalid json}", message.raw_content
end
end
Best Practices
1. Use Model Schemas
Leverage ActiveRecord/ActiveModel for single source of truth:
class User < ApplicationRecord
include ActiveAgent::SchemaGenerator
validates :email, presence: true, format: { with: URI::MailTo::EMAIL_REGEXP }
validates :age, numericality: { greater_than: 18 }
end
# In your agent
schema = User.to_json_schema(strict: true, name: "user_data")
prompt(output_schema: schema)
2. Schema Design
- Keep schemas focused and minimal
- Use strict mode for critical data
- Include validation constraints
- Provide clear descriptions for complex fields
3. Testing
Always test structured output with real providers:
test "extracts data with correct schema" do
VCR.use_cassette("structured_extraction") do
response = agent.extract_data.generate_now
assert_equal "application/json", response.message.content_type
assert response.message.content.is_a?(Hash)
assert_valid_schema response.message.content, expected_schema
end
end
Migration Guide
From Manual JSON Parsing
Before:
response = agent.prompt(message: "Extract data as JSON").generate_now
data = JSON.parse(response.message.content) rescue {}
After:
response = agent.prompt(
message: "Extract data",
output_schema: MyModel.to_json_schema(strict: true)
).generate_now
data = response.message.content # Already parsed!
From Custom Schemas
Before:
schema = {
type: "object",
properties: {
name: { type: "string" },
age: { type: "integer" }
}
}
After:
class ExtractedUser
include ActiveModel::Model
include ActiveAgent::SchemaGenerator
attribute :name, :string
attribute :age, :integer
end
schema = ExtractedUser.to_json_schema(strict: true)
Troubleshooting
Common Issues
Invalid JSON Response
- Ensure provider supports structured output
- Check model compatibility
- Verify schema is valid JSON Schema
Missing Fields
- Use strict mode to require all fields
- Add validation constraints to model
- Check provider documentation for limitations
Type Mismatches
- Ensure schema types match provider capabilities
- Use appropriate type coercion in models
- Test with actual provider responses
See Also
- Data Extraction Agent - Complete extraction examples
- OpenAI Structured Output - OpenAI implementation details
- OpenRouter Structured Output - Multimodal extraction