Embeddings
Embeddings are numerical representations of text that capture semantic meaning, enabling similarity searches, clustering, and other vector-based operations. ActiveAgent provides a unified interface for generating embeddings across all supported providers.
Overview
Embeddings transform text into high-dimensional vectors that represent semantic meaning. Similar texts produce similar vectors, enabling powerful features like:
- Semantic Search - Find related content by meaning, not just keywords
- Clustering - Group similar documents automatically
- Classification - Categorize text based on similarity to examples
- Recommendation - Suggest related content based on embeddings
- Anomaly Detection - Identify outliers in text data
Basic Usage
Generating Embeddings
Use the embed_now method to generate embeddings synchronously:
test "generates embeddings synchronously with embed_now" do
VCR.use_cassette("embedding_agent_sync") do
# Create a generation for embedding
generation = ApplicationAgent.with(
message: "The quick brown fox jumps over the lazy dog"
).prompt_context
# Generate embedding synchronously
response = generation.embed_now
# Extract embedding vector
embedding_vector = response.message.content
assert_kind_of Array, embedding_vector
assert embedding_vector.all? { |v| v.is_a?(Float) }
assert_includes [ 1536, 3072 ], embedding_vector.size # OpenAI dimensions vary by model
# Document the example
doc_example_output(response)
embedding_vector
end
end2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
Response Example
Async Embeddings
Generate embeddings in background jobs:
test "generates embeddings asynchronously with embed_later" do
# Create a generation for async embedding
generation = ApplicationAgent.with(
message: "Artificial intelligence is transforming technology"
).prompt_context
# Mock the enqueue_generation private method
generation.instance_eval do
def enqueue_generation(method, options = {})
@enqueue_called = true
@enqueue_method = method
@enqueue_options = options
true
end
def enqueue_called?
@enqueue_called
end
def enqueue_method
@enqueue_method
end
def enqueue_options
@enqueue_options
end
end
# Queue embedding for background processing
result = generation.embed_later(
priority: :low,
queue: :embeddings
)
assert result
assert generation.enqueue_called?
assert_equal :embed_now, generation.enqueue_method
assert_equal({ priority: :low, queue: :embeddings }, generation.enqueue_options)
end2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
Embedding Callbacks
Use callbacks to process embeddings before and after generation:
test "processes embeddings with callbacks" do
VCR.use_cassette("embedding_agent_callbacks") do
# Create a custom agent with embedding callbacks
custom_agent_class = Class.new(ApplicationAgent) do
attr_accessor :before_embedding_called, :after_embedding_called
before_embedding :track_before
after_embedding :track_after
def track_before
self.before_embedding_called = true
end
def track_after
self.after_embedding_called = true
end
end
# Generate embedding with callbacks
generation = custom_agent_class.with(
message: "Testing embedding callbacks"
).prompt_context
agent = generation.send(:processed_agent)
response = generation.embed_now
assert agent.before_embedding_called
assert agent.after_embedding_called
assert_not_nil response.message.content
doc_example_output(response)
end
end2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
Response Example
Provider Configuration
Each provider supports different embedding models and configurations:
OpenAI
Configure OpenAI-specific embedding models:
test "uses configured OpenAI embedding model" do
VCR.use_cassette("embedding_openai_model") do
# Create agent with specific OpenAI model configuration
custom_agent_class = Class.new(ApplicationAgent) do
generate_with :openai,
model: "gpt-4o",
embedding_model: "text-embedding-3-small"
end
generation = custom_agent_class.with(
message: "Testing OpenAI embedding model configuration"
).prompt_context
response = generation.embed_now
embedding = response.message.content
# text-embedding-3-small can have different dimensions depending on truncation
assert_includes [ 1536, 3072 ], embedding.size
assert embedding.all? { |v| v.is_a?(Float) }
doc_example_output({
model: "text-embedding-3-small",
dimensions: embedding.size,
sample: embedding[0..2]
})
end
end2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
Response Example
Ollama
Configure Ollama for local embedding generation:
test "generates embeddings with Ollama provider" do
VCR.use_cassette("embedding_ollama_provider") do
# Create agent configured for Ollama
ollama_agent_class = Class.new(ApplicationAgent) do
generate_with :ollama,
model: "llama3",
embedding_model: "nomic-embed-text",
host: "http://localhost:11434"
end
generation = ollama_agent_class.with(
message: "Testing Ollama embedding generation"
).prompt_context
begin
response = generation.embed_now
embedding = response.message.content
assert_kind_of Array, embedding
assert embedding.all? { |v| v.is_a?(Numeric) }
assert embedding.size > 0
doc_example_output({
provider: "ollama",
model: "nomic-embed-text",
dimensions: embedding.size,
sample: embedding[0..2]
})
rescue Errno::ECONNREFUSED, Net::OpenTimeout => e
# Document the expected error when Ollama is not running
doc_example_output({
error: "Connection refused",
message: "Ollama is not running locally",
solution: "Start Ollama with: ollama serve"
})
skip "Ollama is not running locally: #{e.message}"
end
end
end2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
Response Example
Error Handling
ActiveAgent provides proper error handling for connection issues:
provider = ActiveAgent::GenerationProvider::OllamaProvider.new(@config)
prompt = ActiveAgent::ActionPrompt::Prompt.new(
message: ActiveAgent::ActionPrompt::Message.new(content: "Generate an embedding for this text"),
instructions: "You are an embedding test agent"
)
response = provider.embed(prompt)2
3
4
5
6
7
Response Example
Working with Embeddings
Similarity Search
Find similar documents using cosine similarity:
test "performs similarity search with embeddings" do
VCR.use_cassette("embedding_similarity_search") do
documents = [
"The cat sat on the mat",
"Dogs are loyal companions",
"Machine learning is a subset of AI",
"The feline rested on the rug"
]
# Generate embeddings for all documents
embeddings = documents.map do |doc|
generation = ApplicationAgent.with(message: doc).prompt_context
generation.embed_now.message.content
end
# Query embedding
query = "cat on mat"
query_generation = ApplicationAgent.with(message: query).prompt_context
query_embedding = query_generation.embed_now.message.content
# Calculate cosine similarities
similarities = embeddings.map.with_index do |embedding, index|
similarity = cosine_similarity(query_embedding, embedding)
{ document: documents[index], similarity: similarity }
end
# Sort by similarity
results = similarities.sort_by { |s| -s[:similarity] }
# Most similar should be the cat/mat documents
assert_equal "The cat sat on the mat", results.first[:document]
assert results.first[:similarity] > 0.5, "Similarity should be > 0.5, got #{results.first[:similarity]}"
# Document the results
doc_example_output(results.first(2))
end
end2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
Response Example
Batch Processing
Process multiple embeddings efficiently:
test "processes multiple embeddings in batch" do
VCR.use_cassette("embedding_batch_processing") do
texts = [
"First document for embedding",
"Second document with different content",
"Third document about technology"
]
embeddings = []
texts.each do |text|
generation = ApplicationAgent.with(message: text).prompt_context
embedding = generation.embed_now.message.content
embeddings << {
text: text[0..20] + "...",
dimensions: embedding.size,
sample: embedding[0..2]
}
end
assert_equal 3, embeddings.size
embeddings.each do |result|
assert result[:dimensions] > 0
assert result[:sample].all? { |v| v.is_a?(Float) }
end
doc_example_output(embeddings)
end
end2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
Response Example
Embedding Dimensions
Different models produce different embedding dimensions:
test "verifies embedding dimensions for different models" do
VCR.use_cassette("embedding_dimensions") do
# Test with default model (usually text-embedding-3-small or ada-002)
generation = ApplicationAgent.with(
message: "Testing embedding dimensions"
).prompt_context
response = generation.embed_now
embedding = response.message.content
# Most OpenAI models return 1536 dimensions by default
assert_includes [ 1536, 3072 ], embedding.size
doc_example_output({
model: "default",
dimensions: embedding.size,
sample: embedding[0..4]
})
end
end2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
Response Example
Advanced Patterns
Caching Embeddings
Cache embeddings to avoid regenerating them:
class CachedEmbeddingAgent < ApplicationAgent
def get_embedding(text)
cache_key = "embedding:#{Digest::SHA256.hexdigest(text)}"
Rails.cache.fetch(cache_key, expires_in: 30.days) do
generation = self.class.with(message: text).prompt_context
generation.embed_now.message.content
end
end
endMulti-Model Embeddings
Use different models for different purposes:
class MultiModelEmbeddingAgent < ApplicationAgent
def generate_semantic_embedding(text)
# High-quality semantic embedding
self.class.generate_with :openai,
embedding_model: "text-embedding-3-large"
generation = self.class.with(message: text).prompt_context
generation.embed_now
end
def generate_fast_embedding(text)
# Faster, smaller embedding for real-time use
self.class.generate_with :openai,
embedding_model: "text-embedding-3-small"
generation = self.class.with(message: text).prompt_context
generation.embed_now
end
endVector Databases
Store and query embeddings using vector databases:
PostgreSQL with pgvector
class PgVectorAgent < ApplicationAgent
def store_document(text)
# Generate embedding
generation = self.class.with(message: text).prompt_context
embedding = generation.embed_now.message.content
# Store in PostgreSQL with pgvector
Document.create!(
content: text,
embedding: embedding # pgvector column
)
end
def search_similar(query, limit: 10)
query_embedding = get_embedding(query)
# Use pgvector's <-> operator for cosine distance
Document
.order(Arel.sql("embedding <-> '#{query_embedding}'"))
.limit(limit)
end
endPinecone Integration
class PineconeAgent < ApplicationAgent
def initialize
super
@pinecone = Pinecone::Client.new(api_key: ENV['PINECONE_API_KEY'])
@index = @pinecone.index('documents')
end
def upsert_document(id, text, metadata = {})
embedding = get_embedding(text)
@index.upsert(
vectors: [{
id: id,
values: embedding,
metadata: metadata.merge(text: text)
}]
)
end
def query_similar(text, top_k: 10)
embedding = get_embedding(text)
@index.query(
vector: embedding,
top_k: top_k,
include_metadata: true
)
end
endTesting Embeddings
Test embedding functionality with comprehensive test coverage including callbacks, similarity search, and batch processing as shown in the examples above.
Performance Optimization
Batch Processing
Process embeddings in batches for better performance:
class BatchOptimizedAgent < ApplicationAgent
def process_documents(documents)
documents.each_slice(100) do |batch|
Parallel.each(batch, in_threads: 5) do |doc|
generation = self.class.with(message: doc.content).prompt_context
doc.embedding = generation.embed_now.message.content
doc.save!
end
end
end
endCaching Strategy
Implement intelligent caching:
class SmartCacheAgent < ApplicationAgent
def get_or_generate_embedding(text)
# Check cache first
cached = fetch_from_cache(text)
return cached if cached
# Generate if not cached
embedding = generate_embedding(text)
# Cache based on text length and importance
if should_cache?(text)
cache_embedding(text, embedding)
end
embedding
end
private
def should_cache?(text)
text.length > 100 || text.include?("important")
end
endBest Practices
- Choose the Right Model - Balance quality, speed, and cost
- Normalize Text - Preprocess consistently before embedding
- Cache Aggressively - Embeddings are expensive to generate
- Batch When Possible - Process multiple texts together
- Monitor Dimensions - Different models produce different sizes
- Use Callbacks - Process embeddings consistently
- Handle Failures - Implement retry logic and fallbacks
- Version Embeddings - Track which model generated each embedding
Common Use Cases
Semantic Search
class SemanticSearchAgent < ApplicationAgent
def build_search_index(documents)
documents.each do |doc|
generation = self.class.with(message: doc.content).prompt_context
doc.update!(embedding: generation.embed_now.message.content)
end
end
def search(query)
query_embedding = get_embedding(query)
Document
.select("*, embedding <-> '#{query_embedding}' as distance")
.order("distance")
.limit(10)
end
endContent Recommendations
class RecommendationAgent < ApplicationAgent
def recommend_similar(article)
article_embedding = article.embedding || generate_embedding(article.content)
Article
.where.not(id: article.id)
.select("*, embedding <-> '#{article_embedding}' as similarity")
.order("similarity")
.limit(5)
end
endClustering
class ClusteringAgent < ApplicationAgent
def cluster_documents(documents, num_clusters: 5)
# Generate embeddings
embeddings = documents.map do |doc|
get_embedding(doc.content)
end
# Use k-means or other clustering algorithm
clusters = perform_clustering(embeddings, num_clusters)
# Assign documents to clusters
documents.zip(clusters).each do |doc, cluster_id|
doc.update!(cluster_id: cluster_id)
end
end
endTroubleshooting
Common Issues
- Dimension Mismatch - Ensure all embeddings use the same model
- Memory Issues - Large embedding vectors can consume significant RAM
- Rate Limits - Implement exponential backoff for API limits
- Cost Management - Monitor embedding API usage and costs
- Connection Errors - Handle network issues with Ollama and other providers
Debugging
class DebuggingAgent < ApplicationAgent
def debug_embedding(text)
generation = self.class.with(message: text).prompt_context
Rails.logger.info "Generating embedding for: #{text[0..100]}..."
Rails.logger.info "Provider: #{generation_provider.class.name}"
Rails.logger.info "Model: #{generation_provider.embedding_model}"
response = generation.embed_now
embedding = response.message.content
Rails.logger.info "Dimensions: #{embedding.size}"
Rails.logger.info "Range: [#{embedding.min}, #{embedding.max}]"
Rails.logger.info "Mean: #{embedding.sum / embedding.size}"
embedding
end
end