Skip to content

Embeddings

Embeddings are numerical representations of text that capture semantic meaning, enabling similarity searches, clustering, and other vector-based operations. ActiveAgent provides a unified interface for generating embeddings across all supported providers.

Overview

Embeddings transform text into high-dimensional vectors that represent semantic meaning. Similar texts produce similar vectors, enabling powerful features like:

  • Semantic Search - Find related content by meaning, not just keywords
  • Clustering - Group similar documents automatically
  • Classification - Categorize text based on similarity to examples
  • Recommendation - Suggest related content based on embeddings
  • Anomaly Detection - Identify outliers in text data

Basic Usage

Generating Embeddings

Use the embed_now method to generate embeddings synchronously:

ruby
test "generates embeddings synchronously with embed_now" do
  VCR.use_cassette("embedding_agent_sync") do
    # Create a generation for embedding
    generation = ApplicationAgent.with(
      message: "The quick brown fox jumps over the lazy dog"
    ).prompt_context

    # Generate embedding synchronously
    response = generation.embed_now

    # Extract embedding vector
    embedding_vector = response.message.content

    assert_kind_of Array, embedding_vector
    assert embedding_vector.all? { |v| v.is_a?(Float) }
    assert_includes [ 1536, 3072 ], embedding_vector.size  # OpenAI dimensions vary by model

    # Document the example
    doc_example_output(response)

    embedding_vector
  end
end
Response Example

Async Embeddings

Generate embeddings in background jobs:

ruby
test "generates embeddings asynchronously with embed_later" do
  # Create a generation for async embedding
  generation = ApplicationAgent.with(
    message: "Artificial intelligence is transforming technology"
  ).prompt_context

  # Mock the enqueue_generation private method
  generation.instance_eval do
    def enqueue_generation(method, options = {})
      @enqueue_called = true
      @enqueue_method = method
      @enqueue_options = options
      true
    end

    def enqueue_called?
      @enqueue_called
    end

    def enqueue_method
      @enqueue_method
    end

    def enqueue_options
      @enqueue_options
    end
  end

  # Queue embedding for background processing
  result = generation.embed_later(
    priority: :low,
    queue: :embeddings
  )

  assert result
  assert generation.enqueue_called?
  assert_equal :embed_now, generation.enqueue_method
  assert_equal({ priority: :low, queue: :embeddings }, generation.enqueue_options)
end

Embedding Callbacks

Use callbacks to process embeddings before and after generation:

ruby
test "processes embeddings with callbacks" do
  VCR.use_cassette("embedding_agent_callbacks") do
    # Create a custom agent with embedding callbacks
    custom_agent_class = Class.new(ApplicationAgent) do
      attr_accessor :before_embedding_called, :after_embedding_called

      before_embedding :track_before
      after_embedding :track_after

      def track_before
        self.before_embedding_called = true
      end

      def track_after
        self.after_embedding_called = true
      end
    end

    # Generate embedding with callbacks
    generation = custom_agent_class.with(
      message: "Testing embedding callbacks"
    ).prompt_context

    agent = generation.send(:processed_agent)
    response = generation.embed_now

    assert agent.before_embedding_called
    assert agent.after_embedding_called
    assert_not_nil response.message.content

    doc_example_output(response)
  end
end
Response Example

Provider Configuration

Each provider supports different embedding models and configurations:

OpenAI

Configure OpenAI-specific embedding models:

ruby
test "uses configured OpenAI embedding model" do
  VCR.use_cassette("embedding_openai_model") do
    # Create agent with specific OpenAI model configuration
    custom_agent_class = Class.new(ApplicationAgent) do
      generate_with :openai,
        model: "gpt-4o",
        embedding_model: "text-embedding-3-small"
    end

    generation = custom_agent_class.with(
      message: "Testing OpenAI embedding model configuration"
    ).prompt_context

    response = generation.embed_now
    embedding = response.message.content

    # text-embedding-3-small can have different dimensions depending on truncation
    assert_includes [ 1536, 3072 ], embedding.size
    assert embedding.all? { |v| v.is_a?(Float) }

    doc_example_output({
      model: "text-embedding-3-small",
      dimensions: embedding.size,
      sample: embedding[0..2]
    })
  end
end
Response Example

Ollama

Configure Ollama for local embedding generation:

ruby
test "generates embeddings with Ollama provider" do
  VCR.use_cassette("embedding_ollama_provider") do
    # Create agent configured for Ollama
    ollama_agent_class = Class.new(ApplicationAgent) do
      generate_with :ollama,
        model: "llama3",
        embedding_model: "nomic-embed-text",
        host: "http://localhost:11434"
    end

    generation = ollama_agent_class.with(
      message: "Testing Ollama embedding generation"
    ).prompt_context

    begin
      response = generation.embed_now
      embedding = response.message.content

      assert_kind_of Array, embedding
      assert embedding.all? { |v| v.is_a?(Numeric) }
      assert embedding.size > 0

      doc_example_output({
        provider: "ollama",
        model: "nomic-embed-text",
        dimensions: embedding.size,
        sample: embedding[0..2]
      })
    rescue Errno::ECONNREFUSED, Net::OpenTimeout => e
      # Document the expected error when Ollama is not running
      doc_example_output({
        error: "Connection refused",
        message: "Ollama is not running locally",
        solution: "Start Ollama with: ollama serve"
      })
      skip "Ollama is not running locally: #{e.message}"
    end
  end
end
Response Example

Error Handling

ActiveAgent provides proper error handling for connection issues:

ruby
provider = ActiveAgent::GenerationProvider::OllamaProvider.new(@config)
prompt = ActiveAgent::ActionPrompt::Prompt.new(
  message: ActiveAgent::ActionPrompt::Message.new(content: "Generate an embedding for this text"),
  instructions: "You are an embedding test agent"
)

response = provider.embed(prompt)
Response Example

Working with Embeddings

Find similar documents using cosine similarity:

ruby
test "performs similarity search with embeddings" do
  VCR.use_cassette("embedding_similarity_search") do
    documents = [
      "The cat sat on the mat",
      "Dogs are loyal companions",
      "Machine learning is a subset of AI",
      "The feline rested on the rug"
    ]

    # Generate embeddings for all documents
    embeddings = documents.map do |doc|
      generation = ApplicationAgent.with(message: doc).prompt_context
      generation.embed_now.message.content
    end

    # Query embedding
    query = "cat on mat"
    query_generation = ApplicationAgent.with(message: query).prompt_context
    query_embedding = query_generation.embed_now.message.content

    # Calculate cosine similarities
    similarities = embeddings.map.with_index do |embedding, index|
      similarity = cosine_similarity(query_embedding, embedding)
      { document: documents[index], similarity: similarity }
    end

    # Sort by similarity
    results = similarities.sort_by { |s| -s[:similarity] }

    # Most similar should be the cat/mat documents
    assert_equal "The cat sat on the mat", results.first[:document]
    assert results.first[:similarity] > 0.5, "Similarity should be > 0.5, got #{results.first[:similarity]}"

    # Document the results
    doc_example_output(results.first(2))
  end
end
Response Example

Batch Processing

Process multiple embeddings efficiently:

ruby
test "processes multiple embeddings in batch" do
  VCR.use_cassette("embedding_batch_processing") do
    texts = [
      "First document for embedding",
      "Second document with different content",
      "Third document about technology"
    ]

    embeddings = []
    texts.each do |text|
      generation = ApplicationAgent.with(message: text).prompt_context
      embedding = generation.embed_now.message.content
      embeddings << {
        text: text[0..20] + "...",
        dimensions: embedding.size,
        sample: embedding[0..2]
      }
    end

    assert_equal 3, embeddings.size
    embeddings.each do |result|
      assert result[:dimensions] > 0
      assert result[:sample].all? { |v| v.is_a?(Float) }
    end

    doc_example_output(embeddings)
  end
end
Response Example

Embedding Dimensions

Different models produce different embedding dimensions:

ruby
test "verifies embedding dimensions for different models" do
  VCR.use_cassette("embedding_dimensions") do
    # Test with default model (usually text-embedding-3-small or ada-002)
    generation = ApplicationAgent.with(
      message: "Testing embedding dimensions"
    ).prompt_context

    response = generation.embed_now
    embedding = response.message.content

    # Most OpenAI models return 1536 dimensions by default
    assert_includes [ 1536, 3072 ], embedding.size

    doc_example_output({
      model: "default",
      dimensions: embedding.size,
      sample: embedding[0..4]
    })
  end
end
Response Example

Advanced Patterns

Caching Embeddings

Cache embeddings to avoid regenerating them:

ruby
class CachedEmbeddingAgent < ApplicationAgent
  def get_embedding(text)
    cache_key = "embedding:#{Digest::SHA256.hexdigest(text)}"
    
    Rails.cache.fetch(cache_key, expires_in: 30.days) do
      generation = self.class.with(message: text).prompt_context
      generation.embed_now.message.content
    end
  end
end

Multi-Model Embeddings

Use different models for different purposes:

ruby
class MultiModelEmbeddingAgent < ApplicationAgent
  def generate_semantic_embedding(text)
    # High-quality semantic embedding
    self.class.generate_with :openai, 
      embedding_model: "text-embedding-3-large"
    
    generation = self.class.with(message: text).prompt_context
    generation.embed_now
  end
  
  def generate_fast_embedding(text)
    # Faster, smaller embedding for real-time use
    self.class.generate_with :openai,
      embedding_model: "text-embedding-3-small"
    
    generation = self.class.with(message: text).prompt_context
    generation.embed_now
  end
end

Vector Databases

Store and query embeddings using vector databases:

PostgreSQL with pgvector

ruby
class PgVectorAgent < ApplicationAgent
  def store_document(text)
    # Generate embedding
    generation = self.class.with(message: text).prompt_context
    embedding = generation.embed_now.message.content
    
    # Store in PostgreSQL with pgvector
    Document.create!(
      content: text,
      embedding: embedding  # pgvector column
    )
  end
  
  def search_similar(query, limit: 10)
    query_embedding = get_embedding(query)
    
    # Use pgvector's <-> operator for cosine distance
    Document
      .order(Arel.sql("embedding <-> '#{query_embedding}'"))
      .limit(limit)
  end
end

Pinecone Integration

ruby
class PineconeAgent < ApplicationAgent
  def initialize
    super
    @pinecone = Pinecone::Client.new(api_key: ENV['PINECONE_API_KEY'])
    @index = @pinecone.index('documents')
  end
  
  def upsert_document(id, text, metadata = {})
    embedding = get_embedding(text)
    
    @index.upsert(
      vectors: [{
        id: id,
        values: embedding,
        metadata: metadata.merge(text: text)
      }]
    )
  end
  
  def query_similar(text, top_k: 10)
    embedding = get_embedding(text)
    
    @index.query(
      vector: embedding,
      top_k: top_k,
      include_metadata: true
    )
  end
end

Testing Embeddings

Test embedding functionality with comprehensive test coverage including callbacks, similarity search, and batch processing as shown in the examples above.

Performance Optimization

Batch Processing

Process embeddings in batches for better performance:

ruby
class BatchOptimizedAgent < ApplicationAgent
  def process_documents(documents)
    documents.each_slice(100) do |batch|
      Parallel.each(batch, in_threads: 5) do |doc|
        generation = self.class.with(message: doc.content).prompt_context
        doc.embedding = generation.embed_now.message.content
        doc.save!
      end
    end
  end
end

Caching Strategy

Implement intelligent caching:

ruby
class SmartCacheAgent < ApplicationAgent
  def get_or_generate_embedding(text)
    # Check cache first
    cached = fetch_from_cache(text)
    return cached if cached
    
    # Generate if not cached
    embedding = generate_embedding(text)
    
    # Cache based on text length and importance
    if should_cache?(text)
      cache_embedding(text, embedding)
    end
    
    embedding
  end
  
  private
  
  def should_cache?(text)
    text.length > 100 || text.include?("important")
  end
end

Best Practices

  1. Choose the Right Model - Balance quality, speed, and cost
  2. Normalize Text - Preprocess consistently before embedding
  3. Cache Aggressively - Embeddings are expensive to generate
  4. Batch When Possible - Process multiple texts together
  5. Monitor Dimensions - Different models produce different sizes
  6. Use Callbacks - Process embeddings consistently
  7. Handle Failures - Implement retry logic and fallbacks
  8. Version Embeddings - Track which model generated each embedding

Common Use Cases

ruby
class SemanticSearchAgent < ApplicationAgent
  def build_search_index(documents)
    documents.each do |doc|
      generation = self.class.with(message: doc.content).prompt_context
      doc.update!(embedding: generation.embed_now.message.content)
    end
  end
  
  def search(query)
    query_embedding = get_embedding(query)
    
    Document
      .select("*, embedding <-> '#{query_embedding}' as distance")
      .order("distance")
      .limit(10)
  end
end

Content Recommendations

ruby
class RecommendationAgent < ApplicationAgent
  def recommend_similar(article)
    article_embedding = article.embedding || generate_embedding(article.content)
    
    Article
      .where.not(id: article.id)
      .select("*, embedding <-> '#{article_embedding}' as similarity")
      .order("similarity")
      .limit(5)
  end
end

Clustering

ruby
class ClusteringAgent < ApplicationAgent
  def cluster_documents(documents, num_clusters: 5)
    # Generate embeddings
    embeddings = documents.map do |doc|
      get_embedding(doc.content)
    end
    
    # Use k-means or other clustering algorithm
    clusters = perform_clustering(embeddings, num_clusters)
    
    # Assign documents to clusters
    documents.zip(clusters).each do |doc, cluster_id|
      doc.update!(cluster_id: cluster_id)
    end
  end
end

Troubleshooting

Common Issues

  1. Dimension Mismatch - Ensure all embeddings use the same model
  2. Memory Issues - Large embedding vectors can consume significant RAM
  3. Rate Limits - Implement exponential backoff for API limits
  4. Cost Management - Monitor embedding API usage and costs
  5. Connection Errors - Handle network issues with Ollama and other providers

Debugging

ruby
class DebuggingAgent < ApplicationAgent
  def debug_embedding(text)
    generation = self.class.with(message: text).prompt_context
    
    Rails.logger.info "Generating embedding for: #{text[0..100]}..."
    Rails.logger.info "Provider: #{generation_provider.class.name}"
    Rails.logger.info "Model: #{generation_provider.embedding_model}"
    
    response = generation.embed_now
    embedding = response.message.content
    
    Rails.logger.info "Dimensions: #{embedding.size}"
    Rails.logger.info "Range: [#{embedding.min}, #{embedding.max}]"
    Rails.logger.info "Mean: #{embedding.sum / embedding.size}"
    
    embedding
  end
end