From 939a96790dde4986004eb7677ac48617802b8ebf Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 15:43:56 -0500
Subject: [PATCH 01/10] update

---
 lib/ruby_llm/active_record/acts_as_legacy.rb  |  56 ++++++-
 lib/ruby_llm/active_record/chat_methods.rb    |  87 ++++++++++
 lib/ruby_llm/active_record/message_methods.rb |   7 +-
 lib/ruby_llm/chat.rb                          |   8 +
 ...s_not_leave_orphaned_messages_on_error.yml |  68 ++++++++
 .../active_record/acts_as_attachment_spec.rb  | 152 ++++++++++++++++++
 spec/ruby_llm/active_record/acts_as_spec.rb   | 111 +++++++++++++
 7 files changed, 483 insertions(+), 6 deletions(-)
 create mode 100644 spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_error_handling_does_not_leave_orphaned_messages_on_error.yml

diff --git a/lib/ruby_llm/active_record/acts_as_legacy.rb b/lib/ruby_llm/active_record/acts_as_legacy.rb
index 97679c126..794b3adc9 100644
--- a/lib/ruby_llm/active_record/acts_as_legacy.rb
+++ b/lib/ruby_llm/active_record/acts_as_legacy.rb
@@ -198,6 +198,55 @@ def ask(message, with: nil, &)
 
       alias say ask
 
+      ##
+      # Prompt the chat without persisting anything to the database.
+      # See ChatMethods#prompt for full documentation and examples.
+      #
+      def prompt(message, with: nil, &)
+        # Get configured chat instance (preserves tools, temperature, schema, etc.)
+        llm_chat = to_llm
+        
+        # Capture starting point to isolate messages from this prompt
+        messages_before_count = llm_chat.messages.count
+        
+        # Temporarily disable persistence callbacks
+        on_hash = llm_chat.instance_variable_get(:@on)
+        original_new_message = on_hash[:new_message]
+        original_end_message = on_hash[:end_message]
+        
+        on_hash[:new_message] = nil
+        on_hash[:end_message] = nil
+        
+        # Add prompt message (not persisted)
+        if message.is_a?(::ActiveRecord::Base) && message.respond_to?(:to_llm)
+          llm_chat.add_message(message.to_llm)
+        else
+          content = if message.is_a?(RubyLLM::Content) || message.is_a?(RubyLLM::Content::Raw)
+            message
+          elsif with
+            RubyLLM::Content.new(message, with)
+          else
+            message
+          end
+          
+          llm_chat.add_message role: :user, content: content
+        end
+        
+        # Complete without persistence - returns final response
+        response = llm_chat.complete(&)
+        
+        # Attach all messages generated by this prompt (user + tools + assistant)
+        # Make a frozen copy for memory safety
+        prompt_messages = llm_chat.messages[messages_before_count..-1].dup.freeze
+        response.define_singleton_method(:prompt_messages) { prompt_messages }
+        
+        response
+      ensure
+        # Restore persistence callbacks for subsequent ask()/complete() calls
+        on_hash[:new_message] = original_new_message if on_hash
+        on_hash[:end_message] = original_end_message if on_hash
+      end
+
       def complete(...)
         to_llm.complete(...)
       rescue RubyLLM::Error => e
@@ -336,10 +385,10 @@ module MessageLegacyMethods
         attr_reader :chat_class, :tool_call_class, :chat_foreign_key, :tool_call_foreign_key
       end
 
-      def to_llm
+      def to_llm(include_attachments: true)
         RubyLLM::Message.new(
           role: role.to_sym,
-          content: extract_content,
+          content: extract_content(include_attachments: include_attachments),
           tool_calls: extract_tool_calls,
           tool_call_id: extract_tool_call_id,
           input_tokens: input_tokens,
@@ -367,7 +416,8 @@ def extract_tool_call_id
         parent_tool_call&.tool_call_id
       end
 
-      def extract_content
+      def extract_content(include_attachments: true)
+        return content unless include_attachments
         return content unless respond_to?(:attachments) && attachments.attached?
 
         RubyLLM::Content.new(content).tap do |content_obj|
diff --git a/lib/ruby_llm/active_record/chat_methods.rb b/lib/ruby_llm/active_record/chat_methods.rb
index 41930548c..adaf00081 100644
--- a/lib/ruby_llm/active_record/chat_methods.rb
+++ b/lib/ruby_llm/active_record/chat_methods.rb
@@ -194,6 +194,93 @@ def ask(message, with: nil, &)
 
       alias say ask
 
+      ##
+      # Prompt the chat without persisting anything to the database.
+      #
+      # Like ask(), but nothing is saved to DB - no user message, no assistant response,
+      # no tool calls/results. Perfect for A/B testing, RAG, or speculative generation.
+      #
+      # @param message [String, Message, RubyLLM::Content] The prompt content or existing message record
+      # @param with [Array<ActiveStorage::Blob>, nil] Optional file attachments
+      # @param block [Proc] Optional streaming block for real-time response chunks
+      #
+      # @return [RubyLLM::Message] Rich response object with:
+      #   - content: The assistant's response text
+      #   - tool_calls: Hash of tool calls (if LLM used tools on first response)
+      #   - input_tokens, output_tokens: Token usage for final response
+      #   - prompt_messages: Array of all messages generated by this prompt (user + tools + assistant)
+      #   - role, model_id, etc.
+      #
+      # @note Tool side effects (API calls, DB writes) WILL happen, only messages don't persist
+      #
+      # @example A/B testing
+      #   msg = chat.messages.create!(content: "Explain quantum computing")
+      #   response_a = chat.with_temperature(0.3).prompt(msg)
+      #   response_b = chat.with_temperature(0.9).prompt(msg)
+      #   # User picks best, then: chat.messages.create!(role: :assistant, content: response_a.content)
+      #
+      # @example RAG without persistence
+      #   docs = vector_search(query)
+      #   response = chat.prompt("Context: #{docs}\n\nUser: #{query}")
+      #
+      # @example Pre-created message with attachments
+      #   msg = chat.messages.create!(content: text, attachments: files)
+      #   response = chat.prompt(msg) { |chunk| broadcast(chunk) }
+      #
+      # @example Access full prompt conversation (including tool calls)
+      #   response = chat.with_tool(WeatherTool).prompt("What's the weather?")
+      #   response.content  # => "It's 72°F and sunny"
+      #   response.prompt_messages.each { |m| puts "#{m.role}: #{m.content}" }
+      #   # => user: What's the weather?
+      #   # => assistant: [tool_call]
+      #   # => tool: 72°F sunny
+      #   # => assistant: It's 72°F and sunny
+      #
+      def prompt(message, with: nil, &)
+        # Get configured chat instance (preserves tools, temperature, schema, etc.)
+        llm_chat = to_llm
+        
+        # Capture starting point to isolate messages from this prompt
+        messages_before_count = llm_chat.messages.count
+        
+        # Temporarily disable persistence callbacks
+        on_hash = llm_chat.instance_variable_get(:@on)
+        original_new_message = on_hash[:new_message]
+        original_end_message = on_hash[:end_message]
+        
+        on_hash[:new_message] = nil
+        on_hash[:end_message] = nil
+        
+        # Add prompt message (not persisted)
+        if message.is_a?(::ActiveRecord::Base) && message.respond_to?(:to_llm)
+          llm_chat.add_message(message.to_llm)
+        else
+          content = if message.is_a?(RubyLLM::Content) || message.is_a?(RubyLLM::Content::Raw)
+            message
+          elsif with
+            RubyLLM::Content.new(message, with)
+          else
+            message
+          end
+          
+          llm_chat.add_message role: :user, content: content
+        end
+        
+        # Complete without persistence - returns final response
+        response = llm_chat.complete(&)
+        
+        # Attach all messages generated by this prompt (user + tools + assistant)
+        # Make a frozen copy for memory safety
+        prompt_messages = llm_chat.messages[messages_before_count..-1].dup.freeze
+        response.define_singleton_method(:prompt_messages) { prompt_messages }
+        
+        response
+      ensure
+        # Restore persistence callbacks for subsequent ask()/complete() calls
+        on_hash[:new_message] = original_new_message if on_hash
+        on_hash[:end_message] = original_end_message if on_hash
+      end
+
       def complete(...)
         to_llm.complete(...)
       rescue RubyLLM::Error => e
diff --git a/lib/ruby_llm/active_record/message_methods.rb b/lib/ruby_llm/active_record/message_methods.rb
index 334352409..3d4d42154 100644
--- a/lib/ruby_llm/active_record/message_methods.rb
+++ b/lib/ruby_llm/active_record/message_methods.rb
@@ -10,13 +10,13 @@ module MessageMethods
         attr_reader :chat_class, :tool_call_class, :chat_foreign_key, :tool_call_foreign_key
       end
 
-      def to_llm
+      def to_llm(include_attachments: true)
         cached = has_attribute?(:cached_tokens) ? self[:cached_tokens] : nil
         cache_creation = has_attribute?(:cache_creation_tokens) ? self[:cache_creation_tokens] : nil
 
         RubyLLM::Message.new(
           role: role.to_sym,
-          content: extract_content,
+          content: extract_content(include_attachments: include_attachments),
           tool_calls: extract_tool_calls,
           tool_call_id: extract_tool_call_id,
           input_tokens: input_tokens,
@@ -46,11 +46,12 @@ def extract_tool_call_id
         parent_tool_call&.tool_call_id
       end
 
-      def extract_content
+      def extract_content(include_attachments: true)
         return RubyLLM::Content::Raw.new(content_raw) if has_attribute?(:content_raw) && content_raw.present?
 
         content_value = self[:content]
 
+        return content_value unless include_attachments
         return content_value unless respond_to?(:attachments) && attachments.attached?
 
         RubyLLM::Content.new(content_value).tap do |content_obj|
diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index d03d872ca..f61474956 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -36,6 +36,14 @@ def ask(message = nil, with: nil, &)
     end
 
     alias say ask
+    
+    ##
+    # For memory-only chats, prompt behaves identically to ask.
+    # 
+    # Since there's no persistence layer, both methods add the message to the
+    # in-memory chat and call the LLM. ActiveRecord chats override this to
+    # provide non-persisting behavior.
+    alias prompt ask
 
     def with_instructions(instructions, replace: false)
       @messages = @messages.reject { |msg| msg.role == :system } if replace
diff --git a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_error_handling_does_not_leave_orphaned_messages_on_error.yml b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_error_handling_does_not_leave_orphaned_messages_on_error.yml
new file mode 100644
index 000000000..5333339af
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_error_handling_does_not_leave_orphaned_messages_on_error.yml
@@ -0,0 +1,68 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"Test"}],"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer test
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 401
+      message: Unauthorized
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 20:33:51 GMT
+      Content-Type:
+      - application/json; charset=utf-8
+      Content-Length:
+      - '254'
+      Connection:
+      - keep-alive
+      Vary:
+      - Origin
+      X-Request-Id:
+      - "<X_REQUEST_ID>"
+      X-Envoy-Upstream-Service-Time:
+      - '1'
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - "<COOKIE>"
+      - "<COOKIE>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: UTF-8
+      string: |
+        {
+            "error": {
+                "message": "Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "invalid_api_key"
+            }
+        }
+  recorded_at: Thu, 27 Nov 2025 20:33:51 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/ruby_llm/active_record/acts_as_attachment_spec.rb b/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
index 2a6033f03..1796cbec5 100644
--- a/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
+++ b/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
@@ -120,4 +120,156 @@ def uploaded_file(path, type)
       expect(attachment.type).to eq(:pdf)
     end
   end
+
+  describe 'include_attachments parameter' do
+    describe 'basic functionality' do
+      it 'excludes attachments when include_attachments: false' do
+        chat = Chat.create!(model: model)
+        message = chat.messages.create!(role: 'user', content: 'Test message')
+        message.attachments.attach(
+          io: File.open(image_path),
+          filename: 'ruby.png',
+          content_type: 'image/png'
+        )
+
+        llm_message = message.to_llm(include_attachments: false)
+
+        expect(llm_message.content).to be_a(String)
+        expect(llm_message.content).to eq('Test message')
+        expect(llm_message.content).not_to be_a(RubyLLM::Content)
+      end
+
+      it 'includes attachments by default' do
+        chat = Chat.create!(model: model)
+        message = chat.messages.create!(role: 'user', content: 'Test message')
+        message.attachments.attach(
+          io: File.open(image_path),
+          filename: 'ruby.png',
+          content_type: 'image/png'
+        )
+
+        llm_message = message.to_llm
+
+        expect(llm_message.content).to be_a(RubyLLM::Content)
+        expect(llm_message.content.attachments).not_to be_empty
+      end
+
+      it 'includes attachments when include_attachments: true' do
+        chat = Chat.create!(model: model)
+        message = chat.messages.create!(role: 'user', content: 'Test message')
+        message.attachments.attach(
+          io: File.open(image_path),
+          filename: 'ruby.png',
+          content_type: 'image/png'
+        )
+
+        llm_message = message.to_llm(include_attachments: true)
+
+        expect(llm_message.content).to be_a(RubyLLM::Content)
+        expect(llm_message.content.attachments).not_to be_empty
+      end
+    end
+
+    describe 'with multiple attachments' do
+      it 'excludes all attachments when false' do
+        chat = Chat.create!(model: model)
+        message = chat.messages.create!(role: 'user', content: 'Multiple files')
+
+        message.attachments.attach(
+          io: File.open(image_path),
+          filename: 'ruby.png',
+          content_type: 'image/png'
+        )
+        message.attachments.attach(
+          io: File.open(pdf_path),
+          filename: 'sample.pdf',
+          content_type: 'application/pdf'
+        )
+
+        llm_message = message.to_llm(include_attachments: false)
+
+        expect(llm_message.content).to eq('Multiple files')
+        expect(llm_message.content).not_to be_a(RubyLLM::Content)
+      end
+    end
+
+    describe 'with messages without attachments' do
+      it 'works normally when include_attachments: false and no attachments' do
+        chat = Chat.create!(model: model)
+        message = chat.messages.create!(role: 'user', content: 'Plain text')
+
+        llm_message = message.to_llm(include_attachments: false)
+
+        expect(llm_message.content).to eq('Plain text')
+      end
+
+      it 'works normally when include_attachments: true and no attachments' do
+        chat = Chat.create!(model: model)
+        message = chat.messages.create!(role: 'user', content: 'Plain text')
+
+        llm_message = message.to_llm(include_attachments: true)
+
+        expect(llm_message.content).to eq('Plain text')
+      end
+    end
+
+    describe 'performance use case' do
+      it 'enables skipping attachment downloads for old messages' do
+        chat = Chat.create!(model: model)
+
+        # Create messages with attachments (stagger timestamps to ensure order)
+        message_ids = []
+        5.times do |i|
+          msg = chat.messages.create!(role: 'user', content: "Message #{i}")
+          msg.attachments.attach(
+            io: File.open(image_path),
+            filename: "image_#{i}.png",
+            content_type: 'image/png'
+          )
+          message_ids << msg.id
+          sleep 0.01 if i < 4 # Small delay to ensure distinct timestamps
+        end
+
+        # For performance testing: only download attachments for recent messages
+        # Treat last 2 messages as "recent" (by ID)
+        all_ids = chat.messages.order(id: :asc).pluck(:id)
+        recent_ids = all_ids.last(2) # Get the last 2 IDs
+
+        llm_messages = chat.messages.order(id: :asc).map do |msg|
+          include_attachments = recent_ids.include?(msg.id)
+          msg.to_llm(include_attachments: include_attachments)
+        end
+
+        # Old messages (first 3) should NOT have attachments (just strings)
+        old_messages = llm_messages.first(3)
+        expect(old_messages.all? { |m| m.content.is_a?(String) }).to be true
+
+        # Recent messages (last 2) should have attachments (Content objects)
+        recent_messages = llm_messages.last(2)
+        expect(recent_messages.all? { |m| m.content.is_a?(RubyLLM::Content) }).to be true
+      end
+    end
+
+    describe 'with content_raw' do
+      it 'respects content_raw even when include_attachments: false' do
+        skip 'content_raw requires v1.9 migration' unless Chat.new.respond_to?(:content_raw=)
+
+        chat = Chat.create!(model: model)
+        message = chat.messages.create!(
+          role: 'user',
+          content: 'Text',
+          content_raw: { type: 'complex', data: 'raw' }
+        )
+        message.attachments.attach(
+          io: File.open(image_path),
+          filename: 'ruby.png'
+        )
+
+        llm_message = message.to_llm(include_attachments: false)
+
+        # content_raw takes precedence
+        expect(llm_message.content).to be_a(RubyLLM::Content::Raw)
+      end
+    end
+  end
 end
diff --git a/spec/ruby_llm/active_record/acts_as_spec.rb b/spec/ruby_llm/active_record/acts_as_spec.rb
index a36c00b49..7f8be9763 100644
--- a/spec/ruby_llm/active_record/acts_as_spec.rb
+++ b/spec/ruby_llm/active_record/acts_as_spec.rb
@@ -764,4 +764,115 @@ def uploaded_file(path, type)
       expect(chat.provider).to eq('bedrock')
     end
   end
+
+  describe 'prompt method' do
+    describe 'basic functionality' do
+      it 'does not persist user message', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'does not persist assistant response', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'returns valid RubyLLM::Message', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'works with existing message records', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+    end
+
+    describe 'prompt_messages accessor' do
+      it 'provides prompt_messages accessor', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'includes user and assistant messages', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'includes tool call messages when tools are used', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'only includes messages from this prompt call', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+    end
+
+    describe 'streaming support' do
+      it 'supports streaming blocks', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+    end
+
+    describe 'configuration preservation' do
+      it 'preserves tools configuration', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'preserves temperature configuration', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'works with system instructions', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+    end
+
+    describe 'with file attachments' do
+      it 'supports with: parameter for attachments', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+
+      it 'works with pre-created message with attachments', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+    end
+
+    describe 'error handling' do
+      it 'does not leave orphaned messages on error' do
+        # This test doesn't need API calls - just checks behavior
+        chat = Chat.create!(model: model)
+        initial_count = chat.messages.count
+
+        # Mock the complete call to avoid API
+        allow_any_instance_of(RubyLLM::Chat).to receive(:complete).and_raise(StandardError, 'Simulated error')
+
+        expect {
+          chat.prompt('Test')
+        }.to raise_error(StandardError)
+
+        # No messages should be persisted
+        expect(chat.messages.reload.count).to eq(initial_count)
+      end
+
+      it 'restores callbacks after exception' do
+        chat = Chat.create!(model: model)
+
+        # Mock to avoid API call
+        allow_any_instance_of(RubyLLM::Chat).to receive(:complete).and_raise(StandardError, 'Error')
+
+        begin
+          chat.prompt('Test')
+        rescue StandardError
+          # Callbacks should be restored
+        end
+
+        # The key test: callbacks are restored (we can check this without API call)
+        llm_chat = chat.to_llm
+        on_hash = llm_chat.instance_variable_get(:@on)
+        expect(on_hash[:new_message]).not_to be_nil
+        expect(on_hash[:end_message]).not_to be_nil
+      end
+    end
+
+    describe 'A/B testing use case' do
+      it 'enables A/B testing by generating multiple responses', :pending_vcr do
+        skip 'Requires VCR cassette recording with real API credentials'
+      end
+    end
+  end
 end

From 071f26eb551f150af3c5088ff713ad150b33c3af Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 16:21:08 -0500
Subject: [PATCH 02/10] rubocop fixes

---
 lib/ruby_llm/active_record/acts_as_legacy.rb  | 49 ++++++++++---------
 lib/ruby_llm/active_record/chat_methods.rb    | 46 +++++++++--------
 lib/ruby_llm/active_record/message_methods.rb |  3 +-
 3 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/lib/ruby_llm/active_record/acts_as_legacy.rb b/lib/ruby_llm/active_record/acts_as_legacy.rb
index 794b3adc9..641534482 100644
--- a/lib/ruby_llm/active_record/acts_as_legacy.rb
+++ b/lib/ruby_llm/active_record/acts_as_legacy.rb
@@ -205,41 +205,29 @@ def ask(message, with: nil, &)
       def prompt(message, with: nil, &)
         # Get configured chat instance (preserves tools, temperature, schema, etc.)
         llm_chat = to_llm
-        
+
         # Capture starting point to isolate messages from this prompt
         messages_before_count = llm_chat.messages.count
-        
+
         # Temporarily disable persistence callbacks
         on_hash = llm_chat.instance_variable_get(:@on)
         original_new_message = on_hash[:new_message]
         original_end_message = on_hash[:end_message]
-        
+
         on_hash[:new_message] = nil
         on_hash[:end_message] = nil
-        
+
         # Add prompt message (not persisted)
-        if message.is_a?(::ActiveRecord::Base) && message.respond_to?(:to_llm)
-          llm_chat.add_message(message.to_llm)
-        else
-          content = if message.is_a?(RubyLLM::Content) || message.is_a?(RubyLLM::Content::Raw)
-            message
-          elsif with
-            RubyLLM::Content.new(message, with)
-          else
-            message
-          end
-          
-          llm_chat.add_message role: :user, content: content
-        end
-        
+        add_prompt_message(llm_chat, message, with)
+
         # Complete without persistence - returns final response
         response = llm_chat.complete(&)
-        
+
         # Attach all messages generated by this prompt (user + tools + assistant)
         # Make a frozen copy for memory safety
-        prompt_messages = llm_chat.messages[messages_before_count..-1].dup.freeze
+        prompt_messages = llm_chat.messages[messages_before_count..].dup.freeze
         response.define_singleton_method(:prompt_messages) { prompt_messages }
-        
+
         response
       ensure
         # Restore persistence callbacks for subsequent ask()/complete() calls
@@ -257,6 +245,22 @@ def complete(...)
 
       private
 
+      def add_prompt_message(llm_chat, message, with)
+        if message.is_a?(::ActiveRecord::Base) && message.respond_to?(:to_llm)
+          llm_chat.add_message(message.to_llm)
+        else
+          content = prepare_prompt_content(message, with)
+          llm_chat.add_message role: :user, content: content
+        end
+      end
+
+      def prepare_prompt_content(message, with)
+        return message if message.is_a?(RubyLLM::Content) || message.is_a?(RubyLLM::Content::Raw)
+        return RubyLLM::Content.new(message, with) if with
+
+        message
+      end
+
       def cleanup_failed_messages
         RubyLLM.logger.warn "RubyLLM: API call failed, destroying message: #{@message.id}"
         @message.destroy
@@ -417,8 +421,7 @@ def extract_tool_call_id
       end
 
       def extract_content(include_attachments: true)
-        return content unless include_attachments
-        return content unless respond_to?(:attachments) && attachments.attached?
+        return content unless include_attachments && respond_to?(:attachments) && attachments.attached?
 
         RubyLLM::Content.new(content).tap do |content_obj|
           @_tempfiles = []
diff --git a/lib/ruby_llm/active_record/chat_methods.rb b/lib/ruby_llm/active_record/chat_methods.rb
index adaf00081..0a3113ca8 100644
--- a/lib/ruby_llm/active_record/chat_methods.rb
+++ b/lib/ruby_llm/active_record/chat_methods.rb
@@ -239,41 +239,29 @@ def ask(message, with: nil, &)
       def prompt(message, with: nil, &)
         # Get configured chat instance (preserves tools, temperature, schema, etc.)
         llm_chat = to_llm
-        
+
         # Capture starting point to isolate messages from this prompt
         messages_before_count = llm_chat.messages.count
-        
+
         # Temporarily disable persistence callbacks
         on_hash = llm_chat.instance_variable_get(:@on)
         original_new_message = on_hash[:new_message]
         original_end_message = on_hash[:end_message]
-        
+
         on_hash[:new_message] = nil
         on_hash[:end_message] = nil
-        
+
         # Add prompt message (not persisted)
-        if message.is_a?(::ActiveRecord::Base) && message.respond_to?(:to_llm)
-          llm_chat.add_message(message.to_llm)
-        else
-          content = if message.is_a?(RubyLLM::Content) || message.is_a?(RubyLLM::Content::Raw)
-            message
-          elsif with
-            RubyLLM::Content.new(message, with)
-          else
-            message
-          end
-          
-          llm_chat.add_message role: :user, content: content
-        end
-        
+        add_prompt_message(llm_chat, message, with)
+
         # Complete without persistence - returns final response
         response = llm_chat.complete(&)
-        
+
         # Attach all messages generated by this prompt (user + tools + assistant)
         # Make a frozen copy for memory safety
-        prompt_messages = llm_chat.messages[messages_before_count..-1].dup.freeze
+        prompt_messages = llm_chat.messages[messages_before_count..].dup.freeze
         response.define_singleton_method(:prompt_messages) { prompt_messages }
-        
+
         response
       ensure
         # Restore persistence callbacks for subsequent ask()/complete() calls
@@ -291,6 +279,22 @@ def complete(...)
 
       private
 
+      def add_prompt_message(llm_chat, message, with)
+        if message.is_a?(::ActiveRecord::Base) && message.respond_to?(:to_llm)
+          llm_chat.add_message(message.to_llm)
+        else
+          content = prepare_prompt_content(message, with)
+          llm_chat.add_message role: :user, content: content
+        end
+      end
+
+      def prepare_prompt_content(message, with)
+        return message if message.is_a?(RubyLLM::Content) || message.is_a?(RubyLLM::Content::Raw)
+        return RubyLLM::Content.new(message, with) if with
+
+        message
+      end
+
       def cleanup_failed_messages
         RubyLLM.logger.warn "RubyLLM: API call failed, destroying message: #{@message.id}"
         @message.destroy
diff --git a/lib/ruby_llm/active_record/message_methods.rb b/lib/ruby_llm/active_record/message_methods.rb
index 3d4d42154..bb66f532b 100644
--- a/lib/ruby_llm/active_record/message_methods.rb
+++ b/lib/ruby_llm/active_record/message_methods.rb
@@ -51,8 +51,7 @@ def extract_content(include_attachments: true)
 
         content_value = self[:content]
 
-        return content_value unless include_attachments
-        return content_value unless respond_to?(:attachments) && attachments.attached?
+        return content_value unless include_attachments && respond_to?(:attachments) && attachments.attached?
 
         RubyLLM::Content.new(content_value).tap do |content_obj|
           @_tempfiles = []

From ac860e20ce112bb5b99214b8a2cf1f29a3b644d3 Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 16:51:06 -0500
Subject: [PATCH 03/10] Remove verbose prompt alias comment in memory chat

---
 lib/ruby_llm/chat.rb | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/lib/ruby_llm/chat.rb b/lib/ruby_llm/chat.rb
index f61474956..3d4a4de78 100644
--- a/lib/ruby_llm/chat.rb
+++ b/lib/ruby_llm/chat.rb
@@ -36,13 +36,6 @@ def ask(message = nil, with: nil, &)
     end
 
     alias say ask
-    
-    ##
-    # For memory-only chats, prompt behaves identically to ask.
-    # 
-    # Since there's no persistence layer, both methods add the message to the
-    # in-memory chat and call the LLM. ActiveRecord chats override this to
-    # provide non-persisting behavior.
     alias prompt ask
 
     def with_instructions(instructions, replace: false)

From 4aa4d31e9e19aa899d434371532f898430521606 Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 17:25:00 -0500
Subject: [PATCH 04/10] update

---
 spec/ruby_llm/active_record/acts_as_spec.rb | 94 ++++-----------------
 1 file changed, 17 insertions(+), 77 deletions(-)

diff --git a/spec/ruby_llm/active_record/acts_as_spec.rb b/spec/ruby_llm/active_record/acts_as_spec.rb
index 7f8be9763..54efff8a6 100644
--- a/spec/ruby_llm/active_record/acts_as_spec.rb
+++ b/spec/ruby_llm/active_record/acts_as_spec.rb
@@ -766,71 +766,13 @@ def uploaded_file(path, type)
   end
 
   describe 'prompt method' do
-    describe 'basic functionality' do
-      it 'does not persist user message', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'does not persist assistant response', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'returns valid RubyLLM::Message', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'works with existing message records', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-    end
-
-    describe 'prompt_messages accessor' do
-      it 'provides prompt_messages accessor', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'includes user and assistant messages', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'includes tool call messages when tools are used', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'only includes messages from this prompt call', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-    end
-
-    describe 'streaming support' do
-      it 'supports streaming blocks', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-    end
-
-    describe 'configuration preservation' do
-      it 'preserves tools configuration', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'preserves temperature configuration', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'works with system instructions', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-    end
-
-    describe 'with file attachments' do
-      it 'supports with: parameter for attachments', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-
-      it 'works with pre-created message with attachments', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-    end
+    # TODO: Add integration tests once VCR cassettes are recorded with real API credentials:
+    # - basic functionality (persistence, response format, existing message records)
+    # - prompt_messages accessor (user/assistant/tool messages, isolation)
+    # - streaming support
+    # - configuration preservation (tools, temperature, instructions)
+    # - file attachments (with: parameter, pre-created messages)
+    # - A/B testing use case
 
     describe 'error handling' do
       it 'does not leave orphaned messages on error' do
@@ -839,11 +781,13 @@ def uploaded_file(path, type)
         initial_count = chat.messages.count
 
         # Mock the complete call to avoid API
-        allow_any_instance_of(RubyLLM::Chat).to receive(:complete).and_raise(StandardError, 'Simulated error')
+        llm_chat = chat.to_llm
+        allow(llm_chat).to receive(:complete).and_raise(StandardError, 'Simulated error')
+        allow(chat).to receive(:to_llm).and_return(llm_chat)
 
-        expect {
+        expect do
           chat.prompt('Test')
-        }.to raise_error(StandardError)
+        end.to raise_error(StandardError)
 
         # No messages should be persisted
         expect(chat.messages.reload.count).to eq(initial_count)
@@ -853,7 +797,9 @@ def uploaded_file(path, type)
         chat = Chat.create!(model: model)
 
         # Mock to avoid API call
-        allow_any_instance_of(RubyLLM::Chat).to receive(:complete).and_raise(StandardError, 'Error')
+        llm_chat = chat.to_llm
+        allow(llm_chat).to receive(:complete).and_raise(StandardError, 'Error')
+        allow(chat).to receive(:to_llm).and_return(llm_chat)
 
         begin
           chat.prompt('Test')
@@ -862,17 +808,11 @@ def uploaded_file(path, type)
         end
 
         # The key test: callbacks are restored (we can check this without API call)
-        llm_chat = chat.to_llm
-        on_hash = llm_chat.instance_variable_get(:@on)
+        llm_chat_after = chat.to_llm
+        on_hash = llm_chat_after.instance_variable_get(:@on)
         expect(on_hash[:new_message]).not_to be_nil
         expect(on_hash[:end_message]).not_to be_nil
       end
     end
-
-    describe 'A/B testing use case' do
-      it 'enables A/B testing by generating multiple responses', :pending_vcr do
-        skip 'Requires VCR cassette recording with real API credentials'
-      end
-    end
   end
 end

From 7141b88bb8911ac4ffcb06f48067c21869b173fd Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:11:29 -0500
Subject: [PATCH 05/10] update spec

---
 .../active_record/acts_as_attachment_spec.rb  | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/spec/ruby_llm/active_record/acts_as_attachment_spec.rb b/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
index 1796cbec5..6e5494e21 100644
--- a/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
+++ b/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
@@ -271,5 +271,44 @@ def uploaded_file(path, type)
         expect(llm_message.content).to be_a(RubyLLM::Content::Raw)
       end
     end
+
+    describe 'custom overrides' do
+      around do |example|
+        Message.class_eval do
+          attr_accessor :include_attachment_override
+          alias_method :__original_to_llm, :to_llm
+
+          def to_llm(include_attachments: true)
+            include_attachments &&= @include_attachment_override != false
+            __original_to_llm(include_attachments:)
+          end
+        end
+
+        example.run
+      ensure
+        Message.class_eval do
+          alias_method :to_llm, :__original_to_llm
+          remove_method :__original_to_llm
+          remove_method :include_attachment_override
+          remove_method :include_attachment_override=
+        end
+      end
+
+      it 'allows message-level include_attachments customizations' do
+        chat = Chat.create!(model: model)
+        message = chat.messages.create!(role: 'user', content: 'Configurable')
+        message.attachments.attach(
+          io: File.open(image_path),
+          filename: 'ruby.png',
+          content_type: 'image/png'
+        )
+
+        message.include_attachment_override = false
+
+        llm_message = message.to_llm
+        expect(llm_message.content).to eq('Configurable')
+        expect(llm_message.content).not_to be_a(RubyLLM::Content)
+      end
+    end
   end
 end

From 0b71c9307b35aba50eaa563ebb323bc55802b22e Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:33:34 -0500
Subject: [PATCH 06/10] fix warnings

---
 spec/ruby_llm/active_record/acts_as_attachment_spec.rb | 2 +-
 spec/ruby_llm/active_record/acts_as_spec.rb            | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/spec/ruby_llm/active_record/acts_as_attachment_spec.rb b/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
index 6e5494e21..bf76fe492 100644
--- a/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
+++ b/spec/ruby_llm/active_record/acts_as_attachment_spec.rb
@@ -279,7 +279,7 @@ def uploaded_file(path, type)
           alias_method :__original_to_llm, :to_llm
 
           def to_llm(include_attachments: true)
-            include_attachments &&= @include_attachment_override != false
+            include_attachments &&= include_attachment_override != false
             __original_to_llm(include_attachments:)
           end
         end
diff --git a/spec/ruby_llm/active_record/acts_as_spec.rb b/spec/ruby_llm/active_record/acts_as_spec.rb
index 54efff8a6..7dbee8b51 100644
--- a/spec/ruby_llm/active_record/acts_as_spec.rb
+++ b/spec/ruby_llm/active_record/acts_as_spec.rb
@@ -770,9 +770,6 @@ def uploaded_file(path, type)
     # - basic functionality (persistence, response format, existing message records)
     # - prompt_messages accessor (user/assistant/tool messages, isolation)
     # - streaming support
-    # - configuration preservation (tools, temperature, instructions)
-    # - file attachments (with: parameter, pre-created messages)
-    # - A/B testing use case
 
     describe 'error handling' do
       it 'does not leave orphaned messages on error' do

From 81b02e32fb8b10b9f225fec94c7c5a1c2752d5ec Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:52:23 -0500
Subject: [PATCH 07/10] specs

---
 ...ures_tool_calls_inside_prompt_messages.yml | 246 ++++++++++++++++++
 ...a_response_without_persisting_messages.yml | 117 +++++++++
 ...ion_behavior_supports_streaming_blocks.yml | 126 +++++++++
 ...or_works_with_existing_message_records.yml | 118 +++++++++
 spec/ruby_llm/active_record/acts_as_spec.rb   |  51 +++-
 5 files changed, 654 insertions(+), 4 deletions(-)
 create mode 100644 spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_captures_tool_calls_inside_prompt_messages.yml
 create mode 100644 spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_returns_a_response_without_persisting_messages.yml
 create mode 100644 spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_supports_streaming_blocks.yml
 create mode 100644 spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_works_with_existing_message_records.yml

diff --git a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_captures_tool_calls_inside_prompt_messages.yml b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_captures_tool_calls_inside_prompt_messages.yml
new file mode 100644
index 000000000..5d0689735
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_captures_tool_calls_inside_prompt_messages.yml
@@ -0,0 +1,246 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"What
+        is 7 * 8?"}],"stream":false,"tools":[{"type":"function","function":{"name":"calculator","description":"Performs
+        basic arithmetic","parameters":{"type":"object","properties":{"expression":{"type":"string","description":"Math
+        expression to evaluate"}},"required":["expression"],"additionalProperties":false,"strict":true}}}]}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 23:20:35 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - "<OPENAI_ORGANIZATION>"
+      Openai-Processing-Ms:
+      - '377'
+      Openai-Project:
+      - proj_8IvF223ClZDk5Wb6ow89q6az
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '488'
+      X-Ratelimit-Limit-Requests:
+      - '10000'
+      X-Ratelimit-Limit-Tokens:
+      - '10000000'
+      X-Ratelimit-Remaining-Requests:
+      - '9999'
+      X-Ratelimit-Remaining-Tokens:
+      - '9999994'
+      X-Ratelimit-Reset-Requests:
+      - 6ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - "<X_REQUEST_ID>"
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - "<COOKIE>"
+      - "<COOKIE>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-CgfZrBAWK0G8TY5MEe1zUDz8fNc5n",
+          "object": "chat.completion",
+          "created": 1764285635,
+          "model": "gpt-4.1-nano-2025-04-14",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": null,
+                "tool_calls": [
+                  {
+                    "id": "call_wgr66A6uhO1X6ax4ObqbDXcj",
+                    "type": "function",
+                    "function": {
+                      "name": "calculator",
+                      "arguments": "{\"expression\":\"7 * 8\"}"
+                    }
+                  }
+                ],
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "tool_calls"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 54,
+            "completion_tokens": 17,
+            "total_tokens": 71,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_eb30fd4545"
+        }
+  recorded_at: Thu, 27 Nov 2025 23:20:35 GMT
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"What
+        is 7 * 8?"},{"role":"assistant","tool_calls":[{"id":"call_wgr66A6uhO1X6ax4ObqbDXcj","type":"function","function":{"name":"calculator","arguments":"{\"expression\":\"7
+        * 8\"}"}}]},{"role":"tool","content":"56","tool_call_id":"call_wgr66A6uhO1X6ax4ObqbDXcj"}],"stream":false,"tools":[{"type":"function","function":{"name":"calculator","description":"Performs
+        basic arithmetic","parameters":{"type":"object","properties":{"expression":{"type":"string","description":"Math
+        expression to evaluate"}},"required":["expression"],"additionalProperties":false,"strict":true}}}]}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 23:20:36 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - "<OPENAI_ORGANIZATION>"
+      Openai-Processing-Ms:
+      - '220'
+      Openai-Project:
+      - proj_8IvF223ClZDk5Wb6ow89q6az
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '408'
+      X-Ratelimit-Limit-Requests:
+      - '10000'
+      X-Ratelimit-Limit-Tokens:
+      - '10000000'
+      X-Ratelimit-Remaining-Requests:
+      - '9999'
+      X-Ratelimit-Remaining-Tokens:
+      - '9999992'
+      X-Ratelimit-Reset-Requests:
+      - 6ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - "<X_REQUEST_ID>"
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - "<COOKIE>"
+      - "<COOKIE>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-CgfZsWrO7QQJgsmGNbdJq4kSLM3so",
+          "object": "chat.completion",
+          "created": 1764285636,
+          "model": "gpt-4.1-nano-2025-04-14",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "7 multiplied by 8 equals 56.",
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 79,
+            "completion_tokens": 10,
+            "total_tokens": 89,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_eb30fd4545"
+        }
+  recorded_at: Thu, 27 Nov 2025 23:20:36 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_returns_a_response_without_persisting_messages.yml b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_returns_a_response_without_persisting_messages.yml
new file mode 100644
index 000000000..ba55d8b8b
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_returns_a_response_without_persisting_messages.yml
@@ -0,0 +1,117 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"Say hello
+        from prompt specs"}],"stream":false}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 23:20:33 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - "<OPENAI_ORGANIZATION>"
+      Openai-Processing-Ms:
+      - '284'
+      Openai-Project:
+      - proj_8IvF223ClZDk5Wb6ow89q6az
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '437'
+      X-Ratelimit-Limit-Requests:
+      - '10000'
+      X-Ratelimit-Limit-Tokens:
+      - '10000000'
+      X-Ratelimit-Remaining-Requests:
+      - '9999'
+      X-Ratelimit-Remaining-Tokens:
+      - '9999991'
+      X-Ratelimit-Reset-Requests:
+      - 6ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - "<X_REQUEST_ID>"
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - "<COOKIE>"
+      - "<COOKIE>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-CgfZpKIR6GtGbruk9RbbaW1O3tClo",
+          "object": "chat.completion",
+          "created": 1764285633,
+          "model": "gpt-4.1-nano-2025-04-14",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "Hello from prompt specs!",
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 12,
+            "completion_tokens": 5,
+            "total_tokens": 17,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_1a97b5aa6c"
+        }
+  recorded_at: Thu, 27 Nov 2025 23:20:33 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_supports_streaming_blocks.yml b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_supports_streaming_blocks.yml
new file mode 100644
index 000000000..dc3364867
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_supports_streaming_blocks.yml
@@ -0,0 +1,126 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"List
+        three gemstones"}],"stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 23:20:37 GMT
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - "<OPENAI_ORGANIZATION>"
+      Openai-Processing-Ms:
+      - '225'
+      Openai-Project:
+      - proj_8IvF223ClZDk5Wb6ow89q6az
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '239'
+      X-Ratelimit-Limit-Requests:
+      - '10000'
+      X-Ratelimit-Limit-Tokens:
+      - '10000000'
+      X-Ratelimit-Remaining-Requests:
+      - '9999'
+      X-Ratelimit-Remaining-Tokens:
+      - '9999993'
+      X-Ratelimit-Reset-Requests:
+      - 6ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - "<X_REQUEST_ID>"
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - "<COOKIE>"
+      - "<COOKIE>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: UTF-8
+      string: |+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"6HUKK6cd"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"Sure"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"WqhaQ4"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"tQg66yTf6"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":" Here"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"k0Rhb"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":" are"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"g23kQh"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":" three"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xd94"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":" gemstones"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":""}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":":\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"sIhbXyv"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"8ROL6asDz"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Z4kRXJsx2"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":" Diamond"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"4U"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"fmiGam"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"JphMf4nHS"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"bnmNH44Vi"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":" Sapphire"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"f"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"  \n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"WTLPUm"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wIckDtu5n"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"bKFfcYbMZ"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{"content":" Emerald"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Zu"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"dveH"}
+
+        data: {"id":"chatcmpl-CgfZssYjtUGNsiIUR3vnd2E0fxos8","object":"chat.completion.chunk","created":1764285636,"model":"gpt-4.1-nano-2025-04-14","service_tier":"default","system_fingerprint":"fp_1a97b5aa6c","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":18,"total_tokens":28,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"gLbQEhSni"}
+
+        data: [DONE]
+
+  recorded_at: Thu, 27 Nov 2025 23:20:37 GMT
+recorded_with: VCR 6.3.1
+...
diff --git a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_works_with_existing_message_records.yml b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_works_with_existing_message_records.yml
new file mode 100644
index 000000000..b8cf02229
--- /dev/null
+++ b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_works_with_existing_message_records.yml
@@ -0,0 +1,118 @@
+---
+http_interactions:
+- request:
+    method: post
+    uri: https://api.openai.com/v1/chat/completions
+    body:
+      encoding: UTF-8
+      string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"Use the
+        persisted message for prompt"},{"role":"user","content":"Use the persisted
+        message for prompt"}],"stream":false}'
+    headers:
+      User-Agent:
+      - Faraday v2.14.0
+      Authorization:
+      - Bearer <OPENAI_API_KEY>
+      Content-Type:
+      - application/json
+      Accept-Encoding:
+      - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
+      Accept:
+      - "*/*"
+  response:
+    status:
+      code: 200
+      message: OK
+    headers:
+      Date:
+      - Thu, 27 Nov 2025 23:20:34 GMT
+      Content-Type:
+      - application/json
+      Transfer-Encoding:
+      - chunked
+      Connection:
+      - keep-alive
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Openai-Organization:
+      - "<OPENAI_ORGANIZATION>"
+      Openai-Processing-Ms:
+      - '406'
+      Openai-Project:
+      - proj_8IvF223ClZDk5Wb6ow89q6az
+      Openai-Version:
+      - '2020-10-01'
+      X-Envoy-Upstream-Service-Time:
+      - '610'
+      X-Ratelimit-Limit-Requests:
+      - '10000'
+      X-Ratelimit-Limit-Tokens:
+      - '10000000'
+      X-Ratelimit-Remaining-Requests:
+      - '9999'
+      X-Ratelimit-Remaining-Tokens:
+      - '9999979'
+      X-Ratelimit-Reset-Requests:
+      - 6ms
+      X-Ratelimit-Reset-Tokens:
+      - 0s
+      X-Request-Id:
+      - "<X_REQUEST_ID>"
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      Cf-Cache-Status:
+      - DYNAMIC
+      Set-Cookie:
+      - "<COOKIE>"
+      - "<COOKIE>"
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      Server:
+      - cloudflare
+      Cf-Ray:
+      - "<CF_RAY>"
+      Alt-Svc:
+      - h3=":443"; ma=86400
+    body:
+      encoding: ASCII-8BIT
+      string: |
+        {
+          "id": "chatcmpl-CgfZqdax7FKa74jbileS5kXXJgVdC",
+          "object": "chat.completion",
+          "created": 1764285634,
+          "model": "gpt-4.1-nano-2025-04-14",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "I'm sorry, but I don't have access to any previous persisted messages. Could you please provide the message or specify how I can assist you?",
+                "refusal": null,
+                "annotations": []
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 23,
+            "completion_tokens": 28,
+            "total_tokens": 51,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "service_tier": "default",
+          "system_fingerprint": "fp_1a97b5aa6c"
+        }
+  recorded_at: Thu, 27 Nov 2025 23:20:34 GMT
+recorded_with: VCR 6.3.1
diff --git a/spec/ruby_llm/active_record/acts_as_spec.rb b/spec/ruby_llm/active_record/acts_as_spec.rb
index 7dbee8b51..aab452fc5 100644
--- a/spec/ruby_llm/active_record/acts_as_spec.rb
+++ b/spec/ruby_llm/active_record/acts_as_spec.rb
@@ -766,10 +766,53 @@ def uploaded_file(path, type)
   end
 
   describe 'prompt method' do
-    # TODO: Add integration tests once VCR cassettes are recorded with real API credentials:
-    # - basic functionality (persistence, response format, existing message records)
-    # - prompt_messages accessor (user/assistant/tool messages, isolation)
-    # - streaming support
+    describe 'integration behavior' do
+      let(:chat) { Chat.create!(model: model) }
+
+      it 'returns a response without persisting messages' do
+        response = chat.prompt('Say hello from prompt specs')
+
+        expect(response).to be_a(RubyLLM::Message)
+        expect(response.content).to be_present
+        expect(chat.messages.count).to eq(0)
+
+        roles = response.prompt_messages.map(&:role)
+        expect(roles).to include(:user, :assistant)
+      end
+
+      it 'works with existing message records' do
+        message = chat.messages.create!(role: :user, content: 'Use the persisted message for prompt')
+
+        response = chat.prompt(message)
+
+        expect(response.content).to be_present
+        expect(chat.messages.count).to eq(1)
+        expect(response.prompt_messages.first.role).to eq(:user)
+        expect(response.prompt_messages.last.role).to eq(:assistant)
+      end
+
+      it 'captures tool calls inside prompt_messages' do
+        chat.with_tool(Calculator)
+
+        response = chat.prompt('What is 7 * 8?')
+
+        expect(response.prompt_messages.any?(&:tool_call?)).to be true
+        expect(response.prompt_messages.map(&:role)).to include(:tool)
+        expect(chat.messages.count).to eq(0)
+      end
+
+      it 'supports streaming blocks' do
+        collected_chunks = []
+
+        response = chat.prompt('List three gemstones') do |chunk|
+          collected_chunks << chunk.content if chunk.content
+        end
+
+        expect(response.content).to be_present
+        expect(collected_chunks.join).to be_present
+        expect(chat.messages.count).to eq(0)
+      end
+    end
 
     describe 'error handling' do
       it 'does not leave orphaned messages on error' do

From f9b03f03c2312a27720b4b6d43b72405eb2fa2ec Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:50:09 -0500
Subject: [PATCH 08/10] rm comments

---
 lib/ruby_llm/active_record/chat_methods.rb | 53 +---------------------
 1 file changed, 1 insertion(+), 52 deletions(-)

diff --git a/lib/ruby_llm/active_record/chat_methods.rb b/lib/ruby_llm/active_record/chat_methods.rb
index 0a3113ca8..d9c62c2f0 100644
--- a/lib/ruby_llm/active_record/chat_methods.rb
+++ b/lib/ruby_llm/active_record/chat_methods.rb
@@ -194,71 +194,20 @@ def ask(message, with: nil, &)
 
       alias say ask
 
-      ##
-      # Prompt the chat without persisting anything to the database.
-      #
-      # Like ask(), but nothing is saved to DB - no user message, no assistant response,
-      # no tool calls/results. Perfect for A/B testing, RAG, or speculative generation.
-      #
-      # @param message [String, Message, RubyLLM::Content] The prompt content or existing message record
-      # @param with [Array<ActiveStorage::Blob>, nil] Optional file attachments
-      # @param block [Proc] Optional streaming block for real-time response chunks
-      #
-      # @return [RubyLLM::Message] Rich response object with:
-      #   - content: The assistant's response text
-      #   - tool_calls: Hash of tool calls (if LLM used tools on first response)
-      #   - input_tokens, output_tokens: Token usage for final response
-      #   - prompt_messages: Array of all messages generated by this prompt (user + tools + assistant)
-      #   - role, model_id, etc.
-      #
-      # @note Tool side effects (API calls, DB writes) WILL happen, only messages don't persist
-      #
-      # @example A/B testing
-      #   msg = chat.messages.create!(content: "Explain quantum computing")
-      #   response_a = chat.with_temperature(0.3).prompt(msg)
-      #   response_b = chat.with_temperature(0.9).prompt(msg)
-      #   # User picks best, then: chat.messages.create!(role: :assistant, content: response_a.content)
-      #
-      # @example RAG without persistence
-      #   docs = vector_search(query)
-      #   response = chat.prompt("Context: #{docs}\n\nUser: #{query}")
-      #
-      # @example Pre-created message with attachments
-      #   msg = chat.messages.create!(content: text, attachments: files)
-      #   response = chat.prompt(msg) { |chunk| broadcast(chunk) }
-      #
-      # @example Access full prompt conversation (including tool calls)
-      #   response = chat.with_tool(WeatherTool).prompt("What's the weather?")
-      #   response.content  # => "It's 72°F and sunny"
-      #   response.prompt_messages.each { |m| puts "#{m.role}: #{m.content}" }
-      #   # => user: What's the weather?
-      #   # => assistant: [tool_call]
-      #   # => tool: 72°F sunny
-      #   # => assistant: It's 72°F and sunny
-      #
       def prompt(message, with: nil, &)
-        # Get configured chat instance (preserves tools, temperature, schema, etc.)
         llm_chat = to_llm
-
-        # Capture starting point to isolate messages from this prompt
         messages_before_count = llm_chat.messages.count
 
         # Temporarily disable persistence callbacks
         on_hash = llm_chat.instance_variable_get(:@on)
         original_new_message = on_hash[:new_message]
         original_end_message = on_hash[:end_message]
-
         on_hash[:new_message] = nil
         on_hash[:end_message] = nil
 
-        # Add prompt message (not persisted)
+        # Prompt response and expose prompt_messages without persistence
         add_prompt_message(llm_chat, message, with)
-
-        # Complete without persistence - returns final response
         response = llm_chat.complete(&)
-
-        # Attach all messages generated by this prompt (user + tools + assistant)
-        # Make a frozen copy for memory safety
         prompt_messages = llm_chat.messages[messages_before_count..].dup.freeze
         response.define_singleton_method(:prompt_messages) { prompt_messages }
 

From efe01510726fe7d722fdebdc232bd4bf9b2e15b5 Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:58:06 -0500
Subject: [PATCH 09/10] rm comments

---
 lib/ruby_llm/active_record/acts_as_legacy.rb | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/lib/ruby_llm/active_record/acts_as_legacy.rb b/lib/ruby_llm/active_record/acts_as_legacy.rb
index 641534482..b7f63b4b6 100644
--- a/lib/ruby_llm/active_record/acts_as_legacy.rb
+++ b/lib/ruby_llm/active_record/acts_as_legacy.rb
@@ -198,33 +198,20 @@ def ask(message, with: nil, &)
 
       alias say ask
 
-      ##
-      # Prompt the chat without persisting anything to the database.
-      # See ChatMethods#prompt for full documentation and examples.
-      #
       def prompt(message, with: nil, &)
-        # Get configured chat instance (preserves tools, temperature, schema, etc.)
         llm_chat = to_llm
-
-        # Capture starting point to isolate messages from this prompt
         messages_before_count = llm_chat.messages.count
 
         # Temporarily disable persistence callbacks
         on_hash = llm_chat.instance_variable_get(:@on)
         original_new_message = on_hash[:new_message]
         original_end_message = on_hash[:end_message]
-
         on_hash[:new_message] = nil
         on_hash[:end_message] = nil
 
-        # Add prompt message (not persisted)
+        # Prompt response and expose prompt_messages without persistence
         add_prompt_message(llm_chat, message, with)
-
-        # Complete without persistence - returns final response
         response = llm_chat.complete(&)
-
-        # Attach all messages generated by this prompt (user + tools + assistant)
-        # Make a frozen copy for memory safety
         prompt_messages = llm_chat.messages[messages_before_count..].dup.freeze
         response.define_singleton_method(:prompt_messages) { prompt_messages }
 

From 0ed3b04646ae27029102d9f63d7170d86f9ea0fe Mon Sep 17 00:00:00 2001
From: Eric Arnold <nativestranger@users.noreply.github.com>
Date: Thu, 27 Nov 2025 23:05:04 -0500
Subject: [PATCH 10/10] cleanup

---
 lib/ruby_llm/active_record/acts_as_legacy.rb  | 39 ++++++++++---------
 lib/ruby_llm/active_record/chat_methods.rb    | 39 ++++++++++---------
 ...ures_tool_calls_inside_prompt_messages.yml | 30 +++++++-------
 3 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/lib/ruby_llm/active_record/acts_as_legacy.rb b/lib/ruby_llm/active_record/acts_as_legacy.rb
index b7f63b4b6..9d1ec3e76 100644
--- a/lib/ruby_llm/active_record/acts_as_legacy.rb
+++ b/lib/ruby_llm/active_record/acts_as_legacy.rb
@@ -202,24 +202,13 @@ def prompt(message, with: nil, &)
         llm_chat = to_llm
         messages_before_count = llm_chat.messages.count
 
-        # Temporarily disable persistence callbacks
-        on_hash = llm_chat.instance_variable_get(:@on)
-        original_new_message = on_hash[:new_message]
-        original_end_message = on_hash[:end_message]
-        on_hash[:new_message] = nil
-        on_hash[:end_message] = nil
-
-        # Prompt response and expose prompt_messages without persistence
-        add_prompt_message(llm_chat, message, with)
-        response = llm_chat.complete(&)
-        prompt_messages = llm_chat.messages[messages_before_count..].dup.freeze
-        response.define_singleton_method(:prompt_messages) { prompt_messages }
-
-        response
-      ensure
-        # Restore persistence callbacks for subsequent ask()/complete() calls
-        on_hash[:new_message] = original_new_message if on_hash
-        on_hash[:end_message] = original_end_message if on_hash
+        with_persistence_disabled(llm_chat) do
+          add_prompt_message(llm_chat, message, with)
+          response = llm_chat.complete(&)
+          prompt_messages = llm_chat.messages[messages_before_count..].dup.freeze
+          response.define_singleton_method(:prompt_messages) { prompt_messages }
+          response
+        end
       end
 
       def complete(...)
@@ -283,6 +272,20 @@ def setup_persistence_callbacks
         @chat
       end
 
+      def with_persistence_disabled(llm_chat)
+        on_hash = llm_chat.instance_variable_get(:@on)
+        original_new_message = on_hash[:new_message]
+        original_end_message = on_hash[:end_message]
+
+        on_hash[:new_message] = nil
+        on_hash[:end_message] = nil
+
+        yield
+      ensure
+        on_hash[:new_message] = original_new_message if on_hash
+        on_hash[:end_message] = original_end_message if on_hash
+      end
+
       def persist_new_message
         @message = messages.create!(role: :assistant, content: '')
       end
diff --git a/lib/ruby_llm/active_record/chat_methods.rb b/lib/ruby_llm/active_record/chat_methods.rb
index d9c62c2f0..8ca3b6aef 100644
--- a/lib/ruby_llm/active_record/chat_methods.rb
+++ b/lib/ruby_llm/active_record/chat_methods.rb
@@ -198,24 +198,13 @@ def prompt(message, with: nil, &)
         llm_chat = to_llm
         messages_before_count = llm_chat.messages.count
 
-        # Temporarily disable persistence callbacks
-        on_hash = llm_chat.instance_variable_get(:@on)
-        original_new_message = on_hash[:new_message]
-        original_end_message = on_hash[:end_message]
-        on_hash[:new_message] = nil
-        on_hash[:end_message] = nil
-
-        # Prompt response and expose prompt_messages without persistence
-        add_prompt_message(llm_chat, message, with)
-        response = llm_chat.complete(&)
-        prompt_messages = llm_chat.messages[messages_before_count..].dup.freeze
-        response.define_singleton_method(:prompt_messages) { prompt_messages }
-
-        response
-      ensure
-        # Restore persistence callbacks for subsequent ask()/complete() calls
-        on_hash[:new_message] = original_new_message if on_hash
-        on_hash[:end_message] = original_end_message if on_hash
+        with_persistence_disabled(llm_chat) do
+          add_prompt_message(llm_chat, message, with)
+          response = llm_chat.complete(&)
+          prompt_messages = llm_chat.messages[messages_before_count..].dup.freeze
+          response.define_singleton_method(:prompt_messages) { prompt_messages }
+          response
+        end
       end
 
       def complete(...)
@@ -279,6 +268,20 @@ def setup_persistence_callbacks
         @chat
       end
 
+      def with_persistence_disabled(llm_chat)
+        on_hash = llm_chat.instance_variable_get(:@on)
+        original_new_message = on_hash[:new_message]
+        original_end_message = on_hash[:end_message]
+
+        on_hash[:new_message] = nil
+        on_hash[:end_message] = nil
+
+        yield
+      ensure
+        on_hash[:new_message] = original_new_message if on_hash
+        on_hash[:end_message] = original_end_message if on_hash
+      end
+
       def persist_new_message
         @message = messages_association.create!(role: :assistant, content: '')
       end
diff --git a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_captures_tool_calls_inside_prompt_messages.yml b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_captures_tool_calls_inside_prompt_messages.yml
index 5d0689735..18e65245c 100644
--- a/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_captures_tool_calls_inside_prompt_messages.yml
+++ b/spec/fixtures/vcr_cassettes/activerecord_actsas_prompt_method_integration_behavior_captures_tool_calls_inside_prompt_messages.yml
@@ -26,7 +26,7 @@ http_interactions:
       message: OK
     headers:
       Date:
-      - Thu, 27 Nov 2025 23:20:35 GMT
+      - Fri, 28 Nov 2025 03:58:41 GMT
       Content-Type:
       - application/json
       Transfer-Encoding:
@@ -38,13 +38,13 @@ http_interactions:
       Openai-Organization:
       - "<OPENAI_ORGANIZATION>"
       Openai-Processing-Ms:
-      - '377'
+      - '233'
       Openai-Project:
       - proj_8IvF223ClZDk5Wb6ow89q6az
       Openai-Version:
       - '2020-10-01'
       X-Envoy-Upstream-Service-Time:
-      - '488'
+      - '247'
       X-Ratelimit-Limit-Requests:
       - '10000'
       X-Ratelimit-Limit-Tokens:
@@ -80,9 +80,9 @@ http_interactions:
       encoding: ASCII-8BIT
       string: |
         {
-          "id": "chatcmpl-CgfZrBAWK0G8TY5MEe1zUDz8fNc5n",
+          "id": "chatcmpl-CgjuyVgfbrldG3glCDzJEuWc3m5ek",
           "object": "chat.completion",
-          "created": 1764285635,
+          "created": 1764302320,
           "model": "gpt-4.1-nano-2025-04-14",
           "choices": [
             {
@@ -92,7 +92,7 @@ http_interactions:
                 "content": null,
                 "tool_calls": [
                   {
-                    "id": "call_wgr66A6uhO1X6ax4ObqbDXcj",
+                    "id": "call_VpuwOJy4d0CubwKiJeAol32s",
                     "type": "function",
                     "function": {
                       "name": "calculator",
@@ -125,15 +125,15 @@ http_interactions:
           "service_tier": "default",
           "system_fingerprint": "fp_eb30fd4545"
         }
-  recorded_at: Thu, 27 Nov 2025 23:20:35 GMT
+  recorded_at: Fri, 28 Nov 2025 03:58:41 GMT
 - request:
     method: post
     uri: https://api.openai.com/v1/chat/completions
     body:
       encoding: UTF-8
       string: '{"model":"gpt-4.1-nano","messages":[{"role":"user","content":"What
-        is 7 * 8?"},{"role":"assistant","tool_calls":[{"id":"call_wgr66A6uhO1X6ax4ObqbDXcj","type":"function","function":{"name":"calculator","arguments":"{\"expression\":\"7
-        * 8\"}"}}]},{"role":"tool","content":"56","tool_call_id":"call_wgr66A6uhO1X6ax4ObqbDXcj"}],"stream":false,"tools":[{"type":"function","function":{"name":"calculator","description":"Performs
+        is 7 * 8?"},{"role":"assistant","tool_calls":[{"id":"call_VpuwOJy4d0CubwKiJeAol32s","type":"function","function":{"name":"calculator","arguments":"{\"expression\":\"7
+        * 8\"}"}}]},{"role":"tool","content":"56","tool_call_id":"call_VpuwOJy4d0CubwKiJeAol32s"}],"stream":false,"tools":[{"type":"function","function":{"name":"calculator","description":"Performs
         basic arithmetic","parameters":{"type":"object","properties":{"expression":{"type":"string","description":"Math
         expression to evaluate"}},"required":["expression"],"additionalProperties":false,"strict":true}}}]}'
     headers:
@@ -153,7 +153,7 @@ http_interactions:
       message: OK
     headers:
       Date:
-      - Thu, 27 Nov 2025 23:20:36 GMT
+      - Fri, 28 Nov 2025 03:58:41 GMT
       Content-Type:
       - application/json
       Transfer-Encoding:
@@ -165,13 +165,13 @@ http_interactions:
       Openai-Organization:
       - "<OPENAI_ORGANIZATION>"
       Openai-Processing-Ms:
-      - '220'
+      - '163'
       Openai-Project:
       - proj_8IvF223ClZDk5Wb6ow89q6az
       Openai-Version:
       - '2020-10-01'
       X-Envoy-Upstream-Service-Time:
-      - '408'
+      - '305'
       X-Ratelimit-Limit-Requests:
       - '10000'
       X-Ratelimit-Limit-Tokens:
@@ -207,9 +207,9 @@ http_interactions:
       encoding: ASCII-8BIT
       string: |
         {
-          "id": "chatcmpl-CgfZsWrO7QQJgsmGNbdJq4kSLM3so",
+          "id": "chatcmpl-Cgjuz30ITfhJWOq6kauAYeTGTvtIU",
           "object": "chat.completion",
-          "created": 1764285636,
+          "created": 1764302321,
           "model": "gpt-4.1-nano-2025-04-14",
           "choices": [
             {
@@ -242,5 +242,5 @@ http_interactions:
           "service_tier": "default",
           "system_fingerprint": "fp_eb30fd4545"
         }
-  recorded_at: Thu, 27 Nov 2025 23:20:36 GMT
+  recorded_at: Fri, 28 Nov 2025 03:58:41 GMT
 recorded_with: VCR 6.3.1