Skip to content

Error Handling

ActiveAgent provides two complementary layers of error handling for building resilient agents:

  1. Retries - Automatically retry transient network failures
  2. Rescue Handlers - Application-level error recovery with agent context

Retries

ActiveAgent automatically retries network failures 3 times with exponential backoff. Configure globally or per-agent:

ruby
class RobustAgent < ApplicationAgent
  generate_with :openai,
    model: "gpt-4o",
    max_retries: 5

  def analyze(content)
    prompt "Analyze this content: #{content}"
  end
end

See Retries for custom retry strategies, conditional logic, and monitoring.

Rescue Handlers

Use rescue_from for application-level error recovery with full agent context. Handle different error types with specific strategies:

ruby
class MonitoredAgent < ApplicationAgent
  rescue_from Timeout::Error, with: :handle_timeout
  rescue_from StandardError, with: :handle_error

  def analyze(data)
    prompt "Analyze: #{data}"
  end

  private

  def handle_timeout(exception)
    Rails.logger.error("Timeout: #{exception.message}")
    ErrorNotifier.notify(exception, agent: self.class.name, params:)
    { error: "Processing timed out", retry_after: 60 }
  end

  def handle_error(exception)
    Rails.logger.error("Error: #{exception.class} - #{exception.message}")
    Sentry.capture_exception(exception)
    { error: "Request failed" }
  end
end

Combining Strategies

Combine retries with rescue handlers for comprehensive error handling:

ruby
class ProductionAgent < ApplicationAgent
  generate_with :openai,
    model: "gpt-4o",
    max_retries: 3

  rescue_from Timeout::Error, with: :handle_timeout

  def analyze(content)
    prompt "Analyze content: #{content}"
  end

  private

  def handle_timeout(exception)
    { error: "Timeout" }
  end
end

Execution flow:

  1. Retries run first for transient network failures
  2. Rescue handlers catch exceptions after retries are exhausted

Monitoring

Monitor errors using ActiveSupport::Notifications:

ruby
# config/initializers/active_agent.rb
ActiveSupport::Notifications.subscribe("generate.active_agent") do |name, start, finish, id, payload|
  if payload[:error]
    ErrorMetrics.increment("active_agent.errors",
      tags: [ "agent:#{payload[:agent]}", "error:#{payload[:error].class.name}" ])
  end
end

See Instrumentation for complete monitoring documentation.

Patterns

Fast Failure for Real-Time

Disable retries and provide immediate fallback for user-facing features:

ruby
class RealtimeChatAgent < ApplicationAgent
  generate_with :anthropic,
    model: "claude-3-5-sonnet-20241022",
    max_retries: 0

  rescue_from StandardError, with: :handle_error

  def chat(message)
    prompt message
  end

  private

  def handle_error(exception)
    { error: "Service unavailable" }
  end
end

Background Job Integration

Let job framework handle retries:

ruby
class ProcessingJob < ApplicationJob
  retry_on Timeout::Error, wait: 30.seconds, attempts: 5
  discard_on SomeUnrecoverableError

  def perform(data)
    # Disable Network retries, let Sidekiq handle it
    AsyncAgent.with(data:, max_retries: 0).process
  end
end

Graceful Degradation

Provide cached or simplified responses when primary service fails:

ruby
class ResilientAgent < ApplicationAgent
  rescue_from StandardError, with: :handle_error

  def analyze(data)
    prompt "Complex analysis of: #{data}"
  end

  private

  def handle_error(exception)
    Rails.logger.warn("Primary failed, using fallback")
    Rails.cache.fetch("last_successful_response") do
      { error: "Service unavailable" }
    end
  end
end

Testing

Test error handling in your agent specs:

ruby
require "test_helper"

class MonitoredAgentTest < ActiveSupport::TestCase
  test "handles timeout gracefully" do
    agent = MonitoredAgent.new

    agent.stub :prompt, -> { raise Timeout::Error } do
      result = agent.analyze("test data")

      assert_equal "Processing timed out", result[:error]
      assert_equal 60, result[:retry_after]
    end
  end
end