From d11cd0131f87f46c48ccf550aa21632b9a47ebbe Mon Sep 17 00:00:00 2001
From: Noel Blaschke <noelblaschke@users.noreply.github.com>
Date: Tue, 24 Mar 2026 12:00:13 +0100
Subject: [PATCH 1/6] [FIX] slug method should return string instead of symbol
 to allow Model.refresh!

---
 lib/ruby_llm/providers/openai_responses.rb       | 2 +-
 spec/ruby_llm/providers/openai_responses_spec.rb | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/ruby_llm/providers/openai_responses.rb b/lib/ruby_llm/providers/openai_responses.rb
index 5b8afba..526b669 100644
--- a/lib/ruby_llm/providers/openai_responses.rb
+++ b/lib/ruby_llm/providers/openai_responses.rb
@@ -214,7 +214,7 @@ def configuration_requirements
         end
 
         def slug
-          :openai_responses
+          'openai_responses'
         end
       end
     end
diff --git a/spec/ruby_llm/providers/openai_responses_spec.rb b/spec/ruby_llm/providers/openai_responses_spec.rb
index 45a7f18..11d7b2b 100644
--- a/spec/ruby_llm/providers/openai_responses_spec.rb
+++ b/spec/ruby_llm/providers/openai_responses_spec.rb
@@ -18,8 +18,8 @@
   end
 
   describe '.slug' do
-    it 'returns :openai_responses' do
-      expect(described_class.slug).to eq(:openai_responses)
+    it 'returns "openai_responses"' do
+      expect(described_class.slug).to eq('openai_responses')
     end
   end
 

From a3892b76a15a5dd7f9141a1574ab2d0ab60fce39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Chris=20Hasi=C5=84ski?= <krzysztof.hasinski@gmail.com>
Date: Fri, 27 Mar 2026 00:51:47 +0100
Subject: [PATCH 2/6] v0.5.3: Fix slug return type for Model.refresh!
 compatibility

Return string from slug instead of symbol to match the base Provider
contract, fixing ArgumentError when RubyLLM sorts models by provider.
---
 CHANGELOG.md                   | 6 ++++++
 lib/rubyllm_responses_api.rb   | 2 +-
 ruby_llm-responses_api.gemspec | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d0971cd..1989600 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.5.3] - 2026-03-27
+
+### Fixed
+
+- Return string from `slug` instead of symbol to fix `Model.refresh!` sorting crash (PR #6 by @noelblaschke)
+
 ## [0.5.2] - 2026-03-18
 
 ### Fixed
diff --git a/lib/rubyllm_responses_api.rb b/lib/rubyllm_responses_api.rb
index 3d8a009..65fc942 100644
--- a/lib/rubyllm_responses_api.rb
+++ b/lib/rubyllm_responses_api.rb
@@ -39,7 +39,7 @@
 module RubyLLM
   # ResponsesAPI namespace for direct access to helpers and version
   module ResponsesAPI
-    VERSION = '0.5.2'
+    VERSION = '0.5.3'
 
     # Shorthand access to built-in tool helpers
     BuiltInTools = Providers::OpenAIResponses::BuiltInTools
diff --git a/ruby_llm-responses_api.gemspec b/ruby_llm-responses_api.gemspec
index e061f8f..dda8eea 100644
--- a/ruby_llm-responses_api.gemspec
+++ b/ruby_llm-responses_api.gemspec
@@ -2,7 +2,7 @@
 
 Gem::Specification.new do |spec|
   spec.name = 'ruby_llm-responses_api'
-  spec.version = '0.5.2'
+  spec.version = '0.5.3'
   spec.authors = ['Chris Hasinski']
   spec.email = ['krzysztof.hasinski@gmail.com']
 

From 2ec882f4914ec5facc929c3cc0e98f58da3e61df Mon Sep 17 00:00:00 2001
From: Alexander Popov <sasho@hey.com>
Date: Thu, 19 Mar 2026 11:11:58 +0200
Subject: [PATCH 3/6] Preserve shell tool results in streaming

Accumulate final shell done events during Responses API streaming and rebuild the final output payload for the returned message. This preserves shell metadata such as call ids, action settings, environment, and stdout/stderr/outcome for both HTTP and WebSocket streaming.

Normalize shell result parsing around the provider-shaped fields and add coverage for streaming preservation, multi-call joining, sync parsing, and function-call regression safety. This keeps built-in shell results inspectable after streamed responses complete.
---
 .../openai_responses/built_in_tools.rb        |  71 ++++-
 .../providers/openai_responses/streaming.rb   | 186 ++++++++++++
 .../providers/openai_responses/web_socket.rb  |   5 +-
 .../openai_responses/shell_tool_spec.rb       |  96 +++++-
 .../openai_responses/streaming_spec.rb        | 275 ++++++++++++++++++
 5 files changed, 617 insertions(+), 16 deletions(-)

diff --git a/lib/ruby_llm/providers/openai_responses/built_in_tools.rb b/lib/ruby_llm/providers/openai_responses/built_in_tools.rb
index 5168c4f..91e7ce2 100644
--- a/lib/ruby_llm/providers/openai_responses/built_in_tools.rb
+++ b/lib/ruby_llm/providers/openai_responses/built_in_tools.rb
@@ -189,19 +189,31 @@ def parse_apply_patch_results(output)
 
         # Parse shell call results from output
         # @param output [Array] Response output array
-        # @return [Array<Hash>] Parsed shell call results
+        # @return [Array<Hash>] Parsed shell call results joined with output by call_id
         def parse_shell_call_results(output)
-          output
-            .select { |item| item['type'] == 'shell_call' }
-            .map do |item|
-              {
-                id: item['id'],
-                call_id: item['call_id'],
-                status: item['status'],
-                action: item['action'],
-                container_id: item['container_id']
-              }
-            end
+          items = Array(output)
+          call_order = shell_call_order(items)
+          shell_calls_by_call_id = shell_call_items_by_call_id(items)
+          shell_outputs_by_call_id = shell_output_items_by_call_id(items)
+
+          call_order.map do |call_id|
+            build_shell_call_result(
+              call_id,
+              shell_call: shell_calls_by_call_id[call_id],
+              shell_outputs: shell_outputs_by_call_id[call_id]
+            )
+          end
+        end
+
+        # Parse shell call results from a final RubyLLM::Message
+        # @param message [RubyLLM::Message] Final message returned by chat completion
+        # @return [Array<Hash>] Parsed shell call results
+        def parse_shell_call_results_from_message(message)
+          body = message&.raw&.body
+          body = JSON.parse(body) if body.is_a?(String)
+          parse_shell_call_results(body.is_a?(Hash) ? body['output'] : nil)
+        rescue JSON::ParserError
+          []
         end
 
         # Extract all citations from message content
@@ -236,6 +248,41 @@ def extract_citations(content)
             }.compact
           end
         end
+
+        private_class_method def shell_call_order(items)
+          items.filter_map do |item|
+            item['call_id'] if %w[shell_call shell_call_output].include?(item['type'])
+          end.uniq
+        end
+
+        private_class_method def shell_call_items_by_call_id(items)
+          items
+            .select { |item| item['type'] == 'shell_call' }
+            .to_h { |item| [item['call_id'], item] }
+        end
+
+        private_class_method def shell_output_items_by_call_id(items)
+          items
+            .select { |item| item['type'] == 'shell_call_output' }
+            .each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |item, result|
+              result[item['call_id']] << item
+            end
+        end
+
+        private_class_method def build_shell_call_result(call_id, shell_call:, shell_outputs:)
+          shell_call ||= {}
+          shell_outputs ||= []
+          last_shell_output = shell_outputs.last
+
+          {
+            id: shell_call['id'],
+            call_id: call_id,
+            status: shell_call['status'] || last_shell_output&.dig('status'),
+            environment: RubyLLM::Utils.deep_dup(shell_call['environment']),
+            action: RubyLLM::Utils.deep_dup(shell_call['action']),
+            output: shell_outputs.flat_map { |item| RubyLLM::Utils.deep_dup(item['output'] || []) }
+          }.compact
+        end
       end
     end
   end
diff --git a/lib/ruby_llm/providers/openai_responses/streaming.rb b/lib/ruby_llm/providers/openai_responses/streaming.rb
index d697677..70a5343 100644
--- a/lib/ruby_llm/providers/openai_responses/streaming.rb
+++ b/lib/ruby_llm/providers/openai_responses/streaming.rb
@@ -1,17 +1,173 @@
 # frozen_string_literal: true
 
+require 'delegate'
+
 module RubyLLM
   module Providers
     class OpenAIResponses
       # Streaming methods for the OpenAI Responses API.
       # Handles SSE events with typed event format.
       module Streaming
+        class StreamRawResponse < SimpleDelegator
+          attr_reader :body
+
+          def initialize(response, body)
+            super(response)
+            @body = body
+          end
+        end
+
+        class CompletedResponseAccumulator
+          def initialize
+            @completed_response = nil
+            @output_items_by_index = {}
+            @shell_commands_by_output_index = Hash.new { |hash, key| hash[key] = {} }
+            @shell_outputs_by_item_id = {}
+            @shell_outputs_by_output_index = {}
+          end
+
+          def add(event)
+            return unless event.is_a?(Hash)
+
+            case event['type']
+            when 'response.output_item.done'
+              add_output_item(event)
+            when 'response.shell_call_command.done'
+              add_shell_command(event)
+            when 'response.shell_call_output_content.done'
+              add_shell_output_content(event)
+            when 'response.completed'
+              @completed_response = RubyLLM::Utils.deep_dup(event['response'] || {})
+            end
+          end
+
+          def build_response(raw_response)
+            body = build_body
+            base_response = raw_response || build_default_response
+            StreamRawResponse.new(base_response, body)
+          end
+
+          private
+
+          def add_output_item(event)
+            output_index = event['output_index']
+            return if output_index.nil?
+
+            item = RubyLLM::Utils.deep_dup(event['item'] || {})
+
+            case item['type']
+            when 'shell_call'
+              merge_shell_commands!(item, output_index)
+            when 'shell_call_output'
+              merge_shell_output!(item, output_index)
+            end
+
+            @output_items_by_index[output_index] = item
+          end
+
+          def add_shell_command(event)
+            output_index = event['output_index']
+            command_index = event['command_index']
+            command = event['command']
+            return if output_index.nil? || command_index.nil? || command.nil?
+
+            @shell_commands_by_output_index[output_index][command_index] = command
+
+            item = @output_items_by_index[output_index]
+            merge_shell_commands!(item, output_index) if item&.dig('type') == 'shell_call'
+          end
+
+          def add_shell_output_content(event)
+            output_index = event['output_index']
+            item_id = event['item_id']
+            output = RubyLLM::Utils.deep_dup(event['output'] || [])
+
+            @shell_outputs_by_item_id[item_id] = output if item_id
+            @shell_outputs_by_output_index[output_index] = output unless output_index.nil?
+
+            item = @output_items_by_index[output_index]
+            merge_shell_output!(item, output_index) if item&.dig('type') == 'shell_call_output'
+          end
+
+          def merge_shell_commands!(item, output_index)
+            return unless item
+
+            commands_by_index = @shell_commands_by_output_index[output_index]
+            return if commands_by_index.empty?
+
+            action = item['action'] ||= {}
+            commands = Array(action['commands'])
+
+            commands_by_index.each do |command_index, command|
+              commands[command_index] = command
+            end
+
+            action['commands'] = commands
+          end
+
+          def merge_shell_output!(item, output_index)
+            return unless item
+
+            output = @shell_outputs_by_item_id[item['id']] || @shell_outputs_by_output_index[output_index]
+            item['output'] = RubyLLM::Utils.deep_dup(output) if output
+          end
+
+          def build_body
+            body = RubyLLM::Utils.deep_dup(@completed_response || {})
+            output = merged_output(body['output'])
+            body['output'] = output if output.any? || body.key?('output')
+            body
+          end
+
+          def build_default_response
+            Class.new do
+              def status
+                200
+              end
+
+              def success?
+                true
+              end
+            end.new
+          end
+
+          def merged_output(base_output)
+            output_by_index = {}
+
+            Array(base_output).each_with_index do |item, index|
+              output_by_index[index] = RubyLLM::Utils.deep_dup(item)
+            end
+
+            @output_items_by_index.each do |index, item|
+              output_by_index[index] = RubyLLM::Utils.deep_dup(item)
+            end
+
+            output_by_index.sort_by(&:first).map(&:last)
+          end
+        end
+
         module_function
 
         def stream_url
           'responses'
         end
 
+        def stream_response(connection, payload, additional_headers = {}, &block)
+          accumulator = StreamAccumulator.new
+          completed_response = CompletedResponseAccumulator.new
+
+          response = connection.post stream_url, payload do |req|
+            req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
+            apply_stream_on_data_handler(req, accumulator, completed_response, &block)
+          end
+
+          raw_response = completed_response.build_response(response)
+          message = accumulator.to_message(raw_response)
+          assign_response_id(message, raw_response)
+          RubyLLM.logger.debug { "Stream completed: #{message.content}" }
+          message
+        end
+
         def build_chunk(data) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
           event_type = data['type']
 
@@ -126,6 +282,36 @@ def parse_streaming_error(data)
         rescue JSON::ParserError
           [500, data]
         end
+
+        private
+
+        def apply_stream_on_data_handler(req, accumulator, completed_response, &block)
+          on_data = build_on_data_handler do |data|
+            handle_stream_event(data, accumulator, completed_response, &block)
+          end
+
+          if faraday_1?
+            req.options[:on_data] = on_data
+          else
+            req.options.on_data = on_data
+          end
+        end
+
+        def handle_stream_event(data, accumulator, completed_response)
+          return unless data.is_a?(Hash)
+
+          completed_response.add(data)
+          chunk = build_chunk(data)
+          accumulator.add(chunk)
+          yield chunk if block_given?
+        end
+
+        def assign_response_id(message, raw_response)
+          return unless message.respond_to?(:response_id=)
+          return unless raw_response.body.is_a?(Hash)
+
+          message.response_id = raw_response.body['id']
+        end
       end
     end
   end
diff --git a/lib/ruby_llm/providers/openai_responses/web_socket.rb b/lib/ruby_llm/providers/openai_responses/web_socket.rb
index 432137d..cdb2662 100644
--- a/lib/ruby_llm/providers/openai_responses/web_socket.rb
+++ b/lib/ruby_llm/providers/openai_responses/web_socket.rb
@@ -241,6 +241,7 @@ def send_json(payload)
 
         def accumulate_response(queue, &block)
           accumulator = StreamAccumulator.new
+          completed_response = Streaming::CompletedResponseAccumulator.new
 
           loop do
             raw = queue.pop
@@ -249,6 +250,7 @@ def accumulate_response(queue, &block)
             data = JSON.parse(raw)
             event_type = data['type']
 
+            completed_response.add(data)
             chunk = Streaming.build_chunk(data)
             accumulator.add(chunk)
             block&.call(chunk)
@@ -259,7 +261,8 @@ def accumulate_response(queue, &block)
             end
           end
 
-          message = accumulator.to_message(nil)
+          raw_response = completed_response.build_response(nil)
+          message = accumulator.to_message(raw_response)
           message.response_id = @last_response_id
           message
         end
diff --git a/spec/ruby_llm/providers/openai_responses/shell_tool_spec.rb b/spec/ruby_llm/providers/openai_responses/shell_tool_spec.rb
index 2c47c3b..0ba2374 100644
--- a/spec/ruby_llm/providers/openai_responses/shell_tool_spec.rb
+++ b/spec/ruby_llm/providers/openai_responses/shell_tool_spec.rb
@@ -71,7 +71,7 @@
     end
 
     describe '.parse_shell_call_results' do
-      it 'extracts shell_call items from output' do
+      it 'joins shell_call metadata with shell_call_output items by call_id' do
         output = [
           {
             'type' => 'shell_call',
@@ -79,7 +79,20 @@
             'call_id' => 'call_123',
             'status' => 'completed',
             'action' => { 'commands' => ['ls -la'], 'timeout_ms' => 120_000 },
-            'container_id' => 'cntr_abc'
+            'environment' => { 'type' => 'container_reference', 'container_id' => 'cntr_abc' }
+          },
+          {
+            'type' => 'shell_call_output',
+            'id' => 'sho_123',
+            'call_id' => 'call_123',
+            'status' => 'completed',
+            'output' => [
+              {
+                'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+                'stderr' => '',
+                'stdout' => "total 1\n"
+              }
+            ]
           },
           {
             'type' => 'message',
@@ -94,13 +107,90 @@
         expect(results.first[:call_id]).to eq('call_123')
         expect(results.first[:status]).to eq('completed')
         expect(results.first[:action]['commands']).to eq(['ls -la'])
-        expect(results.first[:container_id]).to eq('cntr_abc')
+        expect(results.first[:environment]).to eq(
+          { 'type' => 'container_reference', 'container_id' => 'cntr_abc' }
+        )
+        expect(results.first[:output]).to eq(
+          [
+            {
+              'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+              'stderr' => '',
+              'stdout' => "total 1\n"
+            }
+          ]
+        )
       end
 
       it 'returns empty array when no shell calls' do
         output = [{ 'type' => 'message', 'content' => [] }]
         expect(built_in.parse_shell_call_results(output)).to be_empty
       end
+
+      it 'parses shell results from a final message in sync mode' do
+        response = mock_response(
+          {
+            'id' => 'resp_shell',
+            'model' => 'gpt-5.2',
+            'output' => [
+              {
+                'type' => 'shell_call',
+                'id' => 'sh_1',
+                'call_id' => 'call_shell_1',
+                'status' => 'completed',
+                'action' => {
+                  'commands' => ['bundle exec ruby -v'],
+                  'timeout_ms' => nil
+                },
+                'environment' => {
+                  'type' => 'container_reference',
+                  'container_id' => 'cntr_sync'
+                }
+              },
+              {
+                'type' => 'shell_call_output',
+                'id' => 'sho_1',
+                'call_id' => 'call_shell_1',
+                'status' => 'completed',
+                'output' => [
+                  {
+                    'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+                    'stderr' => '',
+                    'stdout' => "ruby 3.4.8\n"
+                  }
+                ]
+              }
+            ]
+          }
+        )
+
+        message = RubyLLM::Providers::OpenAIResponses::Chat.parse_completion_response(response)
+        results = built_in.parse_shell_call_results_from_message(message)
+
+        expect(results).to eq(
+          [
+            {
+              id: 'sh_1',
+              call_id: 'call_shell_1',
+              status: 'completed',
+              environment: {
+                'type' => 'container_reference',
+                'container_id' => 'cntr_sync'
+              },
+              action: {
+                'commands' => ['bundle exec ruby -v'],
+                'timeout_ms' => nil
+              },
+              output: [
+                {
+                  'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+                  'stderr' => '',
+                  'stdout' => "ruby 3.4.8\n"
+                }
+              ]
+            }
+          ]
+        )
+      end
     end
   end
 end
diff --git a/spec/ruby_llm/providers/openai_responses/streaming_spec.rb b/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
index 5493749..b99d1ea 100644
--- a/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
+++ b/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
@@ -212,4 +212,279 @@
       expect(result).to be_nil
     end
   end
+
+  describe '.stream_response' do
+    let(:provider) { RubyLLM::Providers::OpenAIResponses.new(RubyLLM.config) }
+    let(:payload) { { model: 'gpt-5.2', input: [], stream: true } }
+
+    def build_stream_connection(events, response: mock_response('', status: 200))
+      connection = instance_double(RubyLLM::Connection)
+
+      allow(connection).to receive(:post) do |_url, _payload, &block|
+        request = Struct.new(:headers, :options).new({}, Struct.new(:on_data).new)
+        block.call(request)
+
+        env = Struct.new(:status).new(200)
+        sse_body = build_sse_body(events)
+        request.options.on_data.call(sse_body, sse_body.bytesize, env)
+
+        response
+      end
+
+      connection
+    end
+
+    it 'preserves completed shell executions on the final message raw output' do
+      events = [
+        { 'type' => 'response.output_text.delta', 'delta' => 'ruby 3.4.8' },
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'sh_1',
+            'type' => 'shell_call',
+            'status' => 'completed',
+            'action' => { 'commands' => ['placeholder'], 'timeout_ms' => nil },
+            'call_id' => 'call_shell_1',
+            'environment' => {
+              'type' => 'container_reference',
+              'container_id' => 'cntr_1'
+            }
+          },
+          'output_index' => 0
+        },
+        {
+          'type' => 'response.shell_call_command.done',
+          'command' => 'cd /mnt/data/project && bundle exec ruby -v',
+          'command_index' => 0,
+          'output_index' => 0
+        },
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'sho_1',
+            'type' => 'shell_call_output',
+            'status' => 'completed',
+            'call_id' => 'call_shell_1',
+            'output' => []
+          },
+          'output_index' => 1
+        },
+        {
+          'type' => 'response.shell_call_output_content.done',
+          'command_index' => 0,
+          'item_id' => 'sho_1',
+          'output' => [
+            {
+              'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+              'stderr' => '',
+              'stdout' => "ruby 3.4.8\n"
+            }
+          ],
+          'output_index' => 1
+        },
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'msg_1',
+            'type' => 'message',
+            'status' => 'completed',
+            'role' => 'assistant',
+            'content' => [
+              {
+                'type' => 'output_text',
+                'text' => 'ruby 3.4.8'
+              }
+            ]
+          },
+          'output_index' => 2
+        },
+        {
+          'type' => 'response.completed',
+          'response' => {
+            'id' => 'resp_shell',
+            'model' => 'gpt-5.2',
+            'output' => [
+              {
+                'id' => 'msg_1',
+                'type' => 'message',
+                'status' => 'completed',
+                'role' => 'assistant',
+                'content' => [
+                  {
+                    'type' => 'output_text',
+                    'text' => 'ruby 3.4.8'
+                  }
+                ]
+              }
+            ],
+            'usage' => { 'input_tokens' => 12, 'output_tokens' => 8 }
+          }
+        }
+      ]
+
+      message = provider.send(:stream_response, build_stream_connection(events), payload)
+      results = RubyLLM::Providers::OpenAIResponses::BuiltInTools.parse_shell_call_results_from_message(message)
+
+      expect(message.response_id).to eq('resp_shell')
+      expect(message.raw.body['output'].map { |item| item['type'] }).to eq(
+        %w[shell_call shell_call_output message]
+      )
+      expect(results).to eq(
+        [
+          {
+            id: 'sh_1',
+            call_id: 'call_shell_1',
+            status: 'completed',
+            environment: {
+              'type' => 'container_reference',
+              'container_id' => 'cntr_1'
+            },
+            action: {
+              'commands' => ['cd /mnt/data/project && bundle exec ruby -v'],
+              'timeout_ms' => nil
+            },
+            output: [
+              {
+                'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+                'stderr' => '',
+                'stdout' => "ruby 3.4.8\n"
+              }
+            ]
+          }
+        ]
+      )
+    end
+
+    it 'handles multiple completed shell calls in one streamed response' do
+      events = [
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'sh_1',
+            'type' => 'shell_call',
+            'status' => 'completed',
+            'action' => { 'commands' => ['echo one'] },
+            'call_id' => 'call_shell_1',
+            'environment' => { 'type' => 'container_reference', 'container_id' => 'cntr_1' }
+          },
+          'output_index' => 0
+        },
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'sho_1',
+            'type' => 'shell_call_output',
+            'status' => 'completed',
+            'call_id' => 'call_shell_1',
+            'output' => [
+              {
+                'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+                'stderr' => '',
+                'stdout' => "one\n"
+              }
+            ]
+          },
+          'output_index' => 1
+        },
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'sh_2',
+            'type' => 'shell_call',
+            'status' => 'completed',
+            'action' => { 'commands' => ['echo two'] },
+            'call_id' => 'call_shell_2',
+            'environment' => { 'type' => 'container_reference', 'container_id' => 'cntr_2' }
+          },
+          'output_index' => 2
+        },
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'sho_2',
+            'type' => 'shell_call_output',
+            'status' => 'completed',
+            'call_id' => 'call_shell_2',
+            'output' => [
+              {
+                'outcome' => { 'type' => 'timeout' },
+                'stderr' => 'timed out',
+                'stdout' => ''
+              }
+            ]
+          },
+          'output_index' => 3
+        },
+        {
+          'type' => 'response.completed',
+          'response' => {
+            'id' => 'resp_shell_multi',
+            'model' => 'gpt-5.2',
+            'output' => [],
+            'usage' => { 'input_tokens' => 10, 'output_tokens' => 5 }
+          }
+        }
+      ]
+
+      message = provider.send(:stream_response, build_stream_connection(events), payload)
+      results = RubyLLM::Providers::OpenAIResponses::BuiltInTools.parse_shell_call_results_from_message(message)
+
+      expect(results.map { |result| result[:call_id] }).to eq(%w[call_shell_1 call_shell_2])
+      expect(results.map { |result| result[:action]['commands'] }).to eq([['echo one'], ['echo two']])
+      expect(results.last[:output]).to eq(
+        [
+          {
+            'outcome' => { 'type' => 'timeout' },
+            'stderr' => 'timed out',
+            'stdout' => ''
+          }
+        ]
+      )
+    end
+
+    it 'keeps function-call streaming reconstruction intact' do
+      events = [
+        {
+          'type' => 'response.output_item.added',
+          'item' => {
+            'type' => 'function_call',
+            'call_id' => 'call_fn_1',
+            'name' => 'get_weather'
+          }
+        },
+        {
+          'type' => 'response.function_call_arguments.delta',
+          'call_id' => 'call_fn_1',
+          'delta' => '{"city":"'
+        },
+        {
+          'type' => 'response.function_call_arguments.delta',
+          'call_id' => 'call_fn_1',
+          'delta' => 'Berlin"}'
+        },
+        {
+          'type' => 'response.completed',
+          'response' => {
+            'id' => 'resp_fn_1',
+            'model' => 'gpt-4o',
+            'output' => [
+              {
+                'type' => 'function_call',
+                'call_id' => 'call_fn_1',
+                'name' => 'get_weather',
+                'arguments' => '{"city":"Berlin"}'
+              }
+            ],
+            'usage' => { 'input_tokens' => 6, 'output_tokens' => 4 }
+          }
+        }
+      ]
+
+      message = provider.send(:stream_response, build_stream_connection(events), payload)
+
+      expect(message.tool_calls['call_fn_1'].name).to eq('get_weather')
+      expect(message.tool_calls['call_fn_1'].arguments).to eq({ 'city' => 'Berlin' })
+      expect(message.raw.body['output'].first['type']).to eq('function_call')
+    end
+  end
 end

From cd34151c56716066433e70ad12a7abe57ea346f6 Mon Sep 17 00:00:00 2001
From: Alexander Popov <sasho@hey.com>
Date: Thu, 19 Mar 2026 11:59:17 +0200
Subject: [PATCH 4/6] Keep all shell command outputs in streaming

Accumulate final shell output content by command index instead of overwriting prior entries for the same shell call output item. This preserves stdout, stderr, and outcomes for multi-command shell executions in the final streamed message.

Add a regression spec covering a streamed shell call with multiple commands so earlier command results are retained in the rebuilt output payload and parser results.
---
 .../providers/openai_responses/streaming.rb   | 22 ++++-
 .../openai_responses/streaming_spec.rb        | 95 +++++++++++++++++++
 2 files changed, 112 insertions(+), 5 deletions(-)

diff --git a/lib/ruby_llm/providers/openai_responses/streaming.rb b/lib/ruby_llm/providers/openai_responses/streaming.rb
index 70a5343..460f61a 100644
--- a/lib/ruby_llm/providers/openai_responses/streaming.rb
+++ b/lib/ruby_llm/providers/openai_responses/streaming.rb
@@ -22,8 +22,8 @@ def initialize
             @completed_response = nil
             @output_items_by_index = {}
             @shell_commands_by_output_index = Hash.new { |hash, key| hash[key] = {} }
-            @shell_outputs_by_item_id = {}
-            @shell_outputs_by_output_index = {}
+            @shell_outputs_by_item_id = Hash.new { |hash, key| hash[key] = {} }
+            @shell_outputs_by_output_index = Hash.new { |hash, key| hash[key] = {} }
           end
 
           def add(event)
@@ -78,12 +78,14 @@ def add_shell_command(event)
           end
 
           def add_shell_output_content(event)
+            command_index = event['command_index']
             output_index = event['output_index']
             item_id = event['item_id']
             output = RubyLLM::Utils.deep_dup(event['output'] || [])
 
-            @shell_outputs_by_item_id[item_id] = output if item_id
-            @shell_outputs_by_output_index[output_index] = output unless output_index.nil?
+            @shell_outputs_by_item_id[item_id][command_index] = output if item_id
+
+            @shell_outputs_by_output_index[output_index][command_index] = output unless output_index.nil?
 
             item = @output_items_by_index[output_index]
             merge_shell_output!(item, output_index) if item&.dig('type') == 'shell_call_output'
@@ -108,7 +110,7 @@ def merge_shell_commands!(item, output_index)
           def merge_shell_output!(item, output_index)
             return unless item
 
-            output = @shell_outputs_by_item_id[item['id']] || @shell_outputs_by_output_index[output_index]
+            output = flattened_shell_output(item['id'], output_index)
             item['output'] = RubyLLM::Utils.deep_dup(output) if output
           end
 
@@ -144,6 +146,16 @@ def merged_output(base_output)
 
             output_by_index.sort_by(&:first).map(&:last)
           end
+
+          def flattened_shell_output(item_id, output_index)
+            output_by_command_index = @shell_outputs_by_item_id[item_id]
+            output_by_command_index = @shell_outputs_by_output_index[output_index] if output_by_command_index.empty?
+            return if output_by_command_index.empty?
+
+            output_by_command_index
+              .sort_by { |command_index, _| command_index || -1 }
+              .flat_map(&:last)
+          end
         end
 
         module_function
diff --git a/spec/ruby_llm/providers/openai_responses/streaming_spec.rb b/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
index b99d1ea..8090bbd 100644
--- a/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
+++ b/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
@@ -442,6 +442,101 @@ def build_stream_connection(events, response: mock_response('', status: 200))
       )
     end
 
+    it 'preserves output for each command in a multi-command shell call' do
+      events = [
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'sh_multi',
+            'type' => 'shell_call',
+            'status' => 'completed',
+            'action' => { 'commands' => ['echo one', 'echo two'] },
+            'call_id' => 'call_shell_multi',
+            'environment' => { 'type' => 'container_reference', 'container_id' => 'cntr_multi' }
+          },
+          'output_index' => 0
+        },
+        {
+          'type' => 'response.output_item.done',
+          'item' => {
+            'id' => 'sho_multi',
+            'type' => 'shell_call_output',
+            'status' => 'completed',
+            'call_id' => 'call_shell_multi',
+            'output' => []
+          },
+          'output_index' => 1
+        },
+        {
+          'type' => 'response.shell_call_output_content.done',
+          'command_index' => 0,
+          'item_id' => 'sho_multi',
+          'output' => [
+            {
+              'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+              'stderr' => '',
+              'stdout' => "one\n"
+            }
+          ],
+          'output_index' => 1
+        },
+        {
+          'type' => 'response.shell_call_output_content.done',
+          'command_index' => 1,
+          'item_id' => 'sho_multi',
+          'output' => [
+            {
+              'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+              'stderr' => '',
+              'stdout' => "two\n"
+            }
+          ],
+          'output_index' => 1
+        },
+        {
+          'type' => 'response.completed',
+          'response' => {
+            'id' => 'resp_shell_multi_command',
+            'model' => 'gpt-5.2',
+            'output' => [],
+            'usage' => { 'input_tokens' => 8, 'output_tokens' => 4 }
+          }
+        }
+      ]
+
+      message = provider.send(:stream_response, build_stream_connection(events), payload)
+      results = RubyLLM::Providers::OpenAIResponses::BuiltInTools.parse_shell_call_results_from_message(message)
+
+      expect(results).to eq(
+        [
+          {
+            id: 'sh_multi',
+            call_id: 'call_shell_multi',
+            status: 'completed',
+            environment: {
+              'type' => 'container_reference',
+              'container_id' => 'cntr_multi'
+            },
+            action: {
+              'commands' => ['echo one', 'echo two']
+            },
+            output: [
+              {
+                'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+                'stderr' => '',
+                'stdout' => "one\n"
+              },
+              {
+                'outcome' => { 'type' => 'exit', 'exit_code' => 0 },
+                'stderr' => '',
+                'stdout' => "two\n"
+              }
+            ]
+          }
+        ]
+      )
+    end
+
     it 'keeps function-call streaming reconstruction intact' do
       events = [
         {

From b8db35a9f38d2a80b00d9823ecfd0c9a794e9e06 Mon Sep 17 00:00:00 2001
From: Alexander Popov <sasho@hey.com>
Date: Thu, 16 Apr 2026 15:54:23 +0300
Subject: [PATCH 5/6] Support local shell Responses lifecycle

Parse local Responses API shell_call output as executable RubyLLM tool calls, including streamed completions and nil shell environments when a local shell tool is declared.

Serialize executor results as shell_call_output continuations with previous_response_id so function and local shell tool results continue incrementally without resending prior conversation state. Keep local_shell_executor params out of API payloads for both symbol and string keys.

Preserve Responses function_call_output string values when regular tools return Content::Raw, and add lifecycle, streaming, and regression coverage plus README guidance for local shell executors.
---
 README.md                                     |  61 ++-
 lib/ruby_llm/providers/openai_responses.rb    |   2 +
 .../providers/openai_responses/chat.rb        | 118 +++--
 .../openai_responses/local_shell_executor.rb  | 151 +++++++
 .../providers/openai_responses/streaming.rb   |  18 +-
 lib/rubyllm_responses_api.rb                  |   1 +
 .../function_calling_complex_args.yml         |   4 +-
 spec/cassettes/function_calling_multiple.yml  |   4 +-
 spec/cassettes/function_calling_single.yml    |   3 +-
 .../providers/openai_responses/chat_spec.rb   |  16 +
 .../local_shell_lifecycle_spec.rb             | 405 ++++++++++++++++++
 .../openai_responses/streaming_spec.rb        |  63 +++
 12 files changed, 799 insertions(+), 47 deletions(-)
 create mode 100644 lib/ruby_llm/providers/openai_responses/local_shell_executor.rb
 create mode 100644 spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb

diff --git a/README.md b/README.md
index a9af0ea..98e7946 100644
--- a/README.md
+++ b/README.md
@@ -122,10 +122,69 @@ tool = RubyLLM::ResponsesAPI::BuiltInTools.shell(
 # With memory limit
 tool = RubyLLM::ResponsesAPI::BuiltInTools.shell(memory_limit: '4g')
 
-# Local execution (you handle running commands yourself)
+# Local execution
 tool = RubyLLM::ResponsesAPI::BuiltInTools.shell(environment_type: 'local')
 ```
 
+For local shell environments, provide an executor with `local_shell_executor`. The executor can be any object that responds to `#call(shell_call)`, such as a lambda, service object, adapter, or class instance. It can run commands in a local process, a sandbox, a remote worker, or any other environment your application controls.
+
+The executor is responsible for security, cwd, sandboxing, timeout behavior, output truncation, and permissions.
+
+The executor receives the raw `shell_call` hash from the Responses API. Common fields include:
+
+```ruby
+shell_call['call_id']
+shell_call.dig('action', 'commands')
+shell_call.dig('action', 'timeout_ms')
+shell_call.dig('action', 'max_output_length')
+shell_call['environment']
+```
+
+Return an array of command result hashes. Each result should include `stdout`, `stderr`, and `outcome`:
+
+```ruby
+[
+  {
+    'stdout' => "output\n",
+    'stderr' => '',
+    'outcome' => { 'type' => 'exit', 'exit_code' => 0 }
+  }
+]
+```
+
+For a timeout, return an outcome like:
+
+```ruby
+{ 'type' => 'timeout' }
+```
+
+Example:
+
+```ruby
+chat = RubyLLM.chat(model: 'gpt-5.2', provider: :openai_responses)
+
+chat.with_params(
+  tools: [
+    RubyLLM::ResponsesAPI::BuiltInTools.shell(environment_type: 'local')
+  ],
+  local_shell_executor: lambda do |shell_call|
+    commands = Array(shell_call.dig('action', 'commands'))
+
+    commands.map do |command|
+      result = run_command(command)
+
+      {
+        'stdout' => result.stdout,
+        'stderr' => result.stderr,
+        'outcome' => { 'type' => 'exit', 'exit_code' => result.exit_code }
+      }
+    end
+  end
+)
+
+chat.ask('Inspect the repo')
+```
+
 ### Apply Patch
 
 Structured diff-based file editing. Requires GPT-5 family models.
diff --git a/lib/ruby_llm/providers/openai_responses.rb b/lib/ruby_llm/providers/openai_responses.rb
index 526b669..6ad9b7c 100644
--- a/lib/ruby_llm/providers/openai_responses.rb
+++ b/lib/ruby_llm/providers/openai_responses.rb
@@ -20,6 +20,8 @@ def api_base
       # rubocop:disable Metrics/ParameterLists
       def complete(messages, tools:, temperature:, model:, params: {}, headers: {},
                    schema: nil, thinking: nil, tool_prefs: nil, &block)
+        params = params.except(:local_shell_executor, 'local_shell_executor')
+
         if params[:transport]&.to_sym == :websocket
           ws_complete(messages, tools: tools, temperature: temperature, model: model,
                                 params: params.except(:transport), schema: schema,
diff --git a/lib/ruby_llm/providers/openai_responses/chat.rb b/lib/ruby_llm/providers/openai_responses/chat.rb
index e62f4e0..ffdd0c0 100644
--- a/lib/ruby_llm/providers/openai_responses/chat.rb
+++ b/lib/ruby_llm/providers/openai_responses/chat.rb
@@ -19,14 +19,15 @@ def render_payload(messages, tools:, temperature:, model:, stream: false,
           system_messages, non_system_messages = messages.partition { |m| m.role == :system }
 
           instructions = system_messages.map { |m| extract_text_content(m.content) }.join("\n\n")
+          continuation_input = continuation_input_messages(non_system_messages)
 
           payload = {
             model: model.id,
-            input: format_input(non_system_messages),
+            input: format_input(continuation_input || non_system_messages),
             stream: stream
           }
 
-          payload[:instructions] = instructions unless instructions.empty?
+          payload[:instructions] = instructions unless instructions.empty? || continuation_input
           payload[:temperature] = temperature unless temperature.nil?
           apply_tools(payload, tools, tool_prefs)
           payload[:text] = build_schema_format(schema) if schema
@@ -85,6 +86,19 @@ def extract_last_response_id(messages)
             .last
         end
 
+        def continuation_input_messages(messages)
+          last_response_index = messages.rindex do |message|
+            message.role == :assistant && message.respond_to?(:response_id) && message.response_id
+          end
+          return nil unless last_response_index
+
+          trailing_messages = messages[(last_response_index + 1)..]
+          return nil unless trailing_messages&.any?
+          return nil unless trailing_messages.all? { |message| message.role == :tool }
+
+          trailing_messages
+        end
+
         def parse_completion_response(response)
           data = response.body
           return if data.nil? || data.empty?
@@ -98,8 +112,8 @@ def parse_completion_response(response)
           # Extract text content from output
           content = extract_output_text(output)
 
-          # Extract tool calls from function_call outputs
-          tool_calls = extract_tool_calls(output)
+          # Extract executable tool calls from function_call and local shell_call outputs
+          tool_calls = extract_tool_calls(output, response_tools: data['tools'])
 
           usage = data['usage'] || {}
           cached_tokens = usage.dig('input_tokens_details', 'cached_tokens')
@@ -118,17 +132,12 @@ def parse_completion_response(response)
           )
         end
 
-        def format_input(messages) # rubocop:disable Metrics/MethodLength
+        def format_input(messages)
           result = []
 
           messages.each do |msg|
             if msg.tool_call_id
-              # Tool result message - function_call_output type
-              result << {
-                type: 'function_call_output',
-                call_id: msg.tool_call_id,
-                output: extract_text_content(msg.content)
-              }
+              result << format_tool_result(msg)
             elsif msg.tool_calls&.any?
               # Assistant message with tool calls
               # First add any text content as a message
@@ -143,12 +152,7 @@ def format_input(messages) # rubocop:disable Metrics/MethodLength
 
               # Then add each function call as a separate item
               msg.tool_calls.each_value do |tc|
-                result << {
-                  type: 'function_call',
-                  call_id: tc.id,
-                  name: tc.name,
-                  arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments)
-                }
+                result << format_tool_call(tc)
               end
             else
               # Regular message
@@ -163,6 +167,38 @@ def format_input(messages) # rubocop:disable Metrics/MethodLength
           result
         end
 
+        def format_tool_result(msg)
+          content = msg.content
+          return content.value if LocalShellExecutor.shell_call_output?(content)
+
+          {
+            type: 'function_call_output',
+            call_id: msg.tool_call_id,
+            output: format_function_tool_output(content)
+          }
+        end
+
+        def format_function_tool_output(content)
+          return raw_tool_output(content.value) if content.is_a?(RubyLLM::Content::Raw)
+
+          extract_text_content(content)
+        end
+
+        def raw_tool_output(value)
+          value.is_a?(String) ? value : JSON.generate(value)
+        end
+
+        def format_tool_call(tool_call)
+          return tool_call.shell_call if tool_call.is_a?(LocalShellToolCall)
+
+          {
+            type: 'function_call',
+            call_id: tool_call.id,
+            name: tool_call.name,
+            arguments: tool_call.arguments.is_a?(String) ? tool_call.arguments : JSON.generate(tool_call.arguments)
+          }
+        end
+
         def format_message_content(content, tool_calls = nil)
           parts = []
 
@@ -180,12 +216,7 @@ def format_message_content(content, tool_calls = nil)
           # Add tool calls if present (for assistant messages)
           if tool_calls&.any?
             tool_calls.each_value do |tc|
-              parts << {
-                type: 'function_call',
-                call_id: tc.id,
-                name: tc.name,
-                arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments)
-              }
+              parts << format_tool_call(tc)
             end
           end
 
@@ -263,22 +294,37 @@ def extract_output_text(output)
             .join
         end
 
-        def extract_tool_calls(output)
-          function_calls = output.select { |item| item['type'] == 'function_call' }
-          return nil if function_calls.empty?
-
-          function_calls.to_h do |fc|
-            [
-              fc['call_id'],
-              ToolCall.new(
-                id: fc['call_id'],
-                name: fc['name'],
-                arguments: parse_arguments(fc['arguments'])
-              )
-            ]
+        def extract_tool_calls(output, response_tools: nil)
+          executable_calls = output.select do |item|
+            item['type'] == 'function_call' || local_shell_call?(item, response_tools: response_tools)
+          end
+          return nil if executable_calls.empty?
+
+          executable_calls.to_h do |item|
+            tool_call = if local_shell_call?(item, response_tools: response_tools)
+                          LocalShellToolCall.new(item)
+                        else
+                          ToolCall.new(
+                            id: item['call_id'],
+                            name: item['name'],
+                            arguments: parse_arguments(item['arguments'])
+                          )
+                        end
+            [tool_call.id, tool_call]
           end
         end
 
+        def local_shell_call?(item, response_tools: nil)
+          return false unless item['type'] == 'shell_call'
+          return true if item.dig('environment', 'type') == 'local'
+
+          item['environment'].nil? && local_shell_tool_configured?(response_tools)
+        end
+
+        def local_shell_tool_configured?(tools)
+          Array(tools).any? { |tool| tool['type'] == 'shell' && tool.dig('environment', 'type') == 'local' }
+        end
+
         def parse_arguments(arguments)
           return {} if arguments.nil? || arguments.empty?
           return arguments if arguments.is_a?(Hash)
diff --git a/lib/ruby_llm/providers/openai_responses/local_shell_executor.rb b/lib/ruby_llm/providers/openai_responses/local_shell_executor.rb
new file mode 100644
index 0000000..3697424
--- /dev/null
+++ b/lib/ruby_llm/providers/openai_responses/local_shell_executor.rb
@@ -0,0 +1,151 @@
+# frozen_string_literal: true
+
+module RubyLLM
+  module Providers
+    class OpenAIResponses
+      # Executes Responses API local shell calls through RubyLLM's normal tool lifecycle.
+      class LocalShellExecutor
+        TOOL_NAME = 'openai_responses_local_shell'
+
+        attr_reader :executor
+
+        def initialize(executor = nil, &block)
+          @executor = executor || block
+          return if @executor.nil? || @executor.respond_to?(:call)
+
+          raise ArgumentError, 'local_shell_executor must respond to #call.'
+        end
+
+        def name
+          TOOL_NAME
+        end
+
+        def description
+          'Execute an OpenAI Responses API local shell call.'
+        end
+
+        def parameters
+          []
+        end
+
+        def params_schema
+          {
+            'type' => 'object',
+            'properties' => {},
+            'additionalProperties' => true
+          }
+        end
+
+        def provider_params
+          {}
+        end
+
+        def call(shell_call)
+          raise RubyLLM::Error, 'OpenAI Responses local shell call requires a local_shell_executor.' unless executor
+
+          RubyLLM::Content::Raw.new(normalize_output(shell_call, executor.call(shell_call)))
+        end
+
+        def self.local_shell_tool_call?(tool_call)
+          tool_call.name == TOOL_NAME
+        end
+
+        def self.shell_call_output?(content)
+          content.is_a?(RubyLLM::Content::Raw) && content.value.is_a?(Hash) &&
+            content.value['type'] == 'shell_call_output'
+        end
+
+        def self.shell_call_id(shell_call)
+          shell_call['call_id'] || shell_call['id']
+        end
+
+        private
+
+        def normalize_output(shell_call, result)
+          normalized = wrap_output(shell_call, normalize_command_results(result))
+          validate_output!(normalized)
+          normalized
+        end
+
+        def normalize_command_results(result)
+          unless result.is_a?(Array)
+            raise RubyLLM::Error, 'local_shell_executor must return an array of command result hashes.'
+          end
+
+          result.map do |item|
+            raise RubyLLM::Error, 'local_shell_executor command results must be hashes.' unless item.is_a?(Hash)
+
+            stringify_keys(item)
+          end
+        end
+
+        def wrap_output(shell_call, command_results)
+          action = shell_call['action'] || {}
+          {
+            'type' => 'shell_call_output',
+            'call_id' => self.class.shell_call_id(shell_call),
+            'max_output_length' => action['max_output_length'],
+            'output' => command_results
+          }.compact
+        end
+
+        def validate_output!(output)
+          raise RubyLLM::Error, 'local shell calls must include a call_id.' if blank?(output['call_id'])
+
+          return if output['output'].is_a?(Array)
+
+          raise RubyLLM::Error, 'local shell output must include an output array.'
+        end
+
+        def blank?(value)
+          value.nil? || (value.respond_to?(:empty?) && value.empty?)
+        end
+
+        def stringify_keys(value)
+          case value
+          when Hash
+            value.each_with_object({}) do |(key, val), result|
+              result[key.to_s] = stringify_keys(val)
+            end
+          when Array
+            value.map { |item| stringify_keys(item) }
+          else
+            value
+          end
+        end
+      end
+
+      # ToolCall subtype carrying the original Responses API shell_call item.
+      class LocalShellToolCall < RubyLLM::ToolCall
+        attr_reader :shell_call
+
+        def initialize(shell_call)
+          @shell_call = shell_call
+          super(
+            id: LocalShellExecutor.shell_call_id(shell_call),
+            name: LocalShellExecutor::TOOL_NAME,
+            arguments: shell_call
+          )
+        end
+      end
+
+      # Lets with_params(local_shell_executor: ...) execute local shell calls without
+      # exposing the executor as a function tool in the model request.
+      module ChatExtension
+        def execute_tool(tool_call)
+          return super unless LocalShellExecutor.local_shell_tool_call?(tool_call)
+
+          registered_tool = tools[tool_call.name.to_sym]
+          return registered_tool.call(tool_call.arguments) if registered_tool
+
+          executor = @params[:local_shell_executor] || @params['local_shell_executor']
+          LocalShellExecutor.new(executor).call(tool_call.arguments)
+        end
+
+        private :execute_tool
+      end
+    end
+  end
+end
+
+RubyLLM::Chat.prepend(RubyLLM::Providers::OpenAIResponses::ChatExtension)
diff --git a/lib/ruby_llm/providers/openai_responses/streaming.rb b/lib/ruby_llm/providers/openai_responses/streaming.rb
index 460f61a..7f38eb1 100644
--- a/lib/ruby_llm/providers/openai_responses/streaming.rb
+++ b/lib/ruby_llm/providers/openai_responses/streaming.rb
@@ -174,12 +174,26 @@ def stream_response(connection, payload, additional_headers = {}, &block)
           end
 
           raw_response = completed_response.build_response(response)
-          message = accumulator.to_message(raw_response)
+          message = message_from_stream(accumulator, raw_response)
           assign_response_id(message, raw_response)
-          RubyLLM.logger.debug { "Stream completed: #{message.content}" }
+          log_stream_completion(message)
           message
         end
 
+        def log_stream_completion(message)
+          content = message.content.to_s
+          return if content.empty?
+
+          RubyLLM.logger.debug { "Stream completed: #{content}" }
+        end
+
+        def message_from_stream(accumulator, raw_response)
+          parsed_message = Chat.parse_completion_response(raw_response)
+          return parsed_message if parsed_message&.tool_call?
+
+          accumulator.to_message(raw_response)
+        end
+
         def build_chunk(data) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength
           event_type = data['type']
 
diff --git a/lib/rubyllm_responses_api.rb b/lib/rubyllm_responses_api.rb
index 65fc942..35b0642 100644
--- a/lib/rubyllm_responses_api.rb
+++ b/lib/rubyllm_responses_api.rb
@@ -9,6 +9,7 @@
 require_relative 'ruby_llm/providers/openai_responses/capabilities'
 require_relative 'ruby_llm/providers/openai_responses/media'
 require_relative 'ruby_llm/providers/openai_responses/tools'
+require_relative 'ruby_llm/providers/openai_responses/local_shell_executor'
 require_relative 'ruby_llm/providers/openai_responses/models'
 require_relative 'ruby_llm/providers/openai_responses/streaming'
 require_relative 'ruby_llm/providers/openai_responses/chat'
diff --git a/spec/cassettes/function_calling_complex_args.yml b/spec/cassettes/function_calling_complex_args.yml
index f6697ba..b769ead 100644
--- a/spec/cassettes/function_calling_complex_args.yml
+++ b/spec/cassettes/function_calling_complex_args.yml
@@ -166,9 +166,7 @@ http_interactions:
     uri: https://api.openai.com/v1/responses
     body:
       encoding: UTF-8
-      string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"Calculate
-        (100 + 50) / 3"},{"type":"function_call","call_id":"call_ItxW0AA5Q8deQPzZfPxyJVYQ","name":"test_calculator","arguments":"{\"expression\":\"(100
-        + 50) / 3\"}"},{"type":"function_call_output","call_id":"call_ItxW0AA5Q8deQPzZfPxyJVYQ","output":"50"}],"stream":false,"tools":[{"type":"function","name":"test_calculator","description":"Perform
+      string: '{"model":"gpt-4o-mini","input":[{"type":"function_call_output","call_id":"call_ItxW0AA5Q8deQPzZfPxyJVYQ","output":"50"}],"stream":false,"tools":[{"type":"function","name":"test_calculator","description":"Perform
         basic math calculations","parameters":{"type":"object","properties":{"expression":{"type":"string","description":"Math
         expression to evaluate"}},"required":["expression"],"additionalProperties":false,"strict":true},"strict":true}],"previous_response_id":"resp_01a29ecb13810a7800699593b44cb481909e469516314f9bdc"}'
     headers:
diff --git a/spec/cassettes/function_calling_multiple.yml b/spec/cassettes/function_calling_multiple.yml
index 3fb08d3..b1ee4c5 100644
--- a/spec/cassettes/function_calling_multiple.yml
+++ b/spec/cassettes/function_calling_multiple.yml
@@ -196,9 +196,7 @@ http_interactions:
     uri: https://api.openai.com/v1/responses
     body:
       encoding: UTF-8
-      string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"What''s
-        the weather in Paris and what is 15 * 7?"},{"type":"function_call","call_id":"call_0vUxT1QXrfMbvAjuPGLhSNd9","name":"test_weather","arguments":"{\"location\":\"Paris\"}"},{"type":"function_call","call_id":"call_0uT1bW92IvkGQe0dPne6WBYM","name":"test_calculator","arguments":"{\"expression\":\"15
-        * 7\"}"},{"type":"function_call_output","call_id":"call_0vUxT1QXrfMbvAjuPGLhSNd9","output":"The
+      string: '{"model":"gpt-4o-mini","input":[{"type":"function_call_output","call_id":"call_0vUxT1QXrfMbvAjuPGLhSNd9","output":"The
         weather in Paris is sunny, 72°F"},{"type":"function_call_output","call_id":"call_0uT1bW92IvkGQe0dPne6WBYM","output":"105"}],"stream":false,"tools":[{"type":"function","name":"test_weather","description":"Get
         the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
         city name"}},"required":["location"],"additionalProperties":false,"strict":true},"strict":true},{"type":"function","name":"test_calculator","description":"Perform
diff --git a/spec/cassettes/function_calling_single.yml b/spec/cassettes/function_calling_single.yml
index 850d1e9..88f0f9d 100644
--- a/spec/cassettes/function_calling_single.yml
+++ b/spec/cassettes/function_calling_single.yml
@@ -166,8 +166,7 @@ http_interactions:
     uri: https://api.openai.com/v1/responses
     body:
       encoding: UTF-8
-      string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"What''s
-        the weather in Tokyo?"},{"type":"function_call","call_id":"call_7xVIaW6HuwHg3hlPdSV6JVtJ","name":"test_weather","arguments":"{\"location\":\"Tokyo\"}"},{"type":"function_call_output","call_id":"call_7xVIaW6HuwHg3hlPdSV6JVtJ","output":"The
+      string: '{"model":"gpt-4o-mini","input":[{"type":"function_call_output","call_id":"call_7xVIaW6HuwHg3hlPdSV6JVtJ","output":"The
         weather in Tokyo is sunny, 72°F"}],"stream":false,"tools":[{"type":"function","name":"test_weather","description":"Get
         the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The
         city name"}},"required":["location"],"additionalProperties":false,"strict":true},"strict":true}],"previous_response_id":"resp_0adbe5450d1f3f4200699593b21dbc819694ad619e8649c8c9"}'
diff --git a/spec/ruby_llm/providers/openai_responses/chat_spec.rb b/spec/ruby_llm/providers/openai_responses/chat_spec.rb
index 30b870c..82c3de2 100644
--- a/spec/ruby_llm/providers/openai_responses/chat_spec.rb
+++ b/spec/ruby_llm/providers/openai_responses/chat_spec.rb
@@ -159,5 +159,21 @@
       expect(input.first[:call_id]).to eq('call_123')
       expect(input.first[:output]).to eq('{"result": "success"}')
     end
+
+    it 'serializes non-shell raw tool results as function output strings' do
+      messages = [
+        RubyLLM::Message.new(
+          role: :tool,
+          content: RubyLLM::Content::Raw.new({ 'result' => ['success'] }),
+          tool_call_id: 'call_123'
+        )
+      ]
+
+      input = chat_module.format_input(messages)
+
+      expect(input.first[:type]).to eq('function_call_output')
+      expect(input.first[:call_id]).to eq('call_123')
+      expect(input.first[:output]).to eq('{"result":["success"]}')
+    end
   end
 end
diff --git a/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb b/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb
new file mode 100644
index 0000000..e8a655b
--- /dev/null
+++ b/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb
@@ -0,0 +1,405 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+class LocalShellLifecycleEchoTool < RubyLLM::Tool
+  description 'Echo a value'
+  param :value, type: 'string', desc: 'Value to echo'
+
+  def execute(value:)
+    value
+  end
+end
+
+RSpec.describe 'OpenAI Responses local shell lifecycle' do
+  let(:endpoint) { 'https://api.openai.com/v1/responses' }
+  let(:model) { 'gpt-5.4' }
+  let(:shell_tool) { RubyLLM::ResponsesAPI::BuiltInTools.shell(environment_type: 'local') }
+
+  def build_chat
+    RubyLLM.chat(model: model, provider: :openai_responses, assume_model_exists: true)
+  end
+
+  def stub_responses(*bodies)
+    requests = []
+    response_index = 0
+
+    stub_request(:post, endpoint).to_return do |request|
+      requests << JSON.parse(request.body)
+      body = bodies.fetch(response_index)
+      response_index += 1
+      {
+        status: 200,
+        body: JSON.generate(body),
+        headers: { 'Content-Type' => 'application/json' }
+      }
+    end
+
+    requests
+  end
+
+  def stub_streaming_responses(*event_groups)
+    requests = []
+    response_index = 0
+
+    stub_request(:post, endpoint).to_return do |request|
+      requests << JSON.parse(request.body)
+      events = event_groups.fetch(response_index)
+      response_index += 1
+      {
+        status: 200,
+        body: build_sse_body(events),
+        headers: { 'Content-Type' => 'text/event-stream' }
+      }
+    end
+
+    requests
+  end
+
+  def shell_call_response(environment: { 'type' => 'local' })
+    {
+      'id' => 'resp_shell_1',
+      'model' => model,
+      'output' => [
+        {
+          'type' => 'shell_call',
+          'id' => 'sh_1',
+          'call_id' => 'call_shell_1',
+          'status' => 'in_progress',
+          'environment' => environment,
+          'action' => {
+            'commands' => ['pwd'],
+            'timeout_ms' => 10_000,
+            'max_output_length' => 2_000
+          }
+        }
+      ],
+      'tools' => [shell_tool],
+      'usage' => { 'input_tokens' => 12, 'output_tokens' => 6 }
+    }
+  end
+
+  def final_response
+    {
+      'id' => 'resp_final_1',
+      'model' => model,
+      'output' => [
+        {
+          'type' => 'message',
+          'role' => 'assistant',
+          'content' => [{ 'type' => 'output_text', 'text' => 'Done from shell.' }]
+        }
+      ],
+      'usage' => { 'input_tokens' => 8, 'output_tokens' => 4 }
+    }
+  end
+
+  def completed_event(response)
+    {
+      'type' => 'response.completed',
+      'response' => response
+    }
+  end
+
+  def final_response_stream_events
+    [
+      { 'type' => 'response.output_text.delta', 'delta' => 'Done ' },
+      { 'type' => 'response.output_text.delta', 'delta' => 'from shell.' },
+      completed_event(final_response)
+    ]
+  end
+
+  it 'executes local shell calls and continues with only shell_call_output' do
+    requests = stub_responses(shell_call_response, final_response)
+    executor_calls = []
+    tool_calls = []
+    tool_results = []
+    end_messages = []
+
+    local_shell_executor = lambda do |shell_call|
+      executor_calls << shell_call
+      [
+        {
+          'stdout' => "/repo\n",
+          'stderr' => '',
+          'outcome' => { 'type' => 'exit', 'exit_code' => 0 }
+        }
+      ]
+    end
+
+    chat = build_chat
+    chat.with_instructions('Be brief.')
+    chat.with_params(tools: [shell_tool], local_shell_executor: local_shell_executor)
+    chat.on_tool_call { |tool_call| tool_calls << tool_call }
+    chat.on_tool_result { |result| tool_results << result }
+    chat.on_end_message { |message| end_messages << message }
+
+    response = chat.ask('Inspect the repo')
+
+    expect(response.content).to eq('Done from shell.')
+    expect(executor_calls.length).to eq(1)
+    expect(executor_calls.first['call_id']).to eq('call_shell_1')
+    expect(tool_calls.first).to be_a(RubyLLM::Providers::OpenAIResponses::LocalShellToolCall)
+    expect(tool_results.first).to be_a(RubyLLM::Content::Raw)
+    expect(end_messages.map(&:role)).to eq(%i[assistant tool assistant])
+
+    expect(requests.length).to eq(2)
+    expect(requests.first['input'].first['content']).to eq('Inspect the repo')
+    expect(requests.first['instructions']).to eq('Be brief.')
+    expect(requests.first).not_to have_key('local_shell_executor')
+
+    continuation = requests.last
+    expect(continuation['previous_response_id']).to eq('resp_shell_1')
+    expect(continuation).not_to have_key('instructions')
+    expect(continuation).not_to have_key('local_shell_executor')
+    expect(continuation['input']).to eq(
+      [
+        {
+          'type' => 'shell_call_output',
+          'call_id' => 'call_shell_1',
+          'max_output_length' => 2_000,
+          'output' => [
+            {
+              'stdout' => "/repo\n",
+              'stderr' => '',
+              'outcome' => { 'type' => 'exit', 'exit_code' => 0 }
+            }
+          ]
+        }
+      ]
+    )
+  end
+
+  it 'streams final assistant content after executing a local shell call' do
+    requests = stub_streaming_responses(
+      [completed_event(shell_call_response(environment: nil))],
+      final_response_stream_events
+    )
+    executor_calls = []
+    streamed_content = []
+
+    chat = build_chat
+    chat.with_params(
+      tools: [shell_tool],
+      local_shell_executor: lambda do |shell_call|
+        executor_calls << shell_call
+        [{ 'stdout' => "/repo\n", 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }]
+      end
+    )
+
+    response = chat.ask('Inspect the repo') do |chunk|
+      streamed_content << chunk.content if chunk.content
+    end
+
+    expect(response.content).to eq('Done from shell.')
+    expect(streamed_content).to eq(['Done ', 'from shell.'])
+    expect(executor_calls.length).to eq(1)
+    expect(requests.length).to eq(2)
+    expect(requests.map { |request| request['stream'] }).to eq([true, true])
+    expect(requests.last['previous_response_id']).to eq('resp_shell_1')
+    expect(requests.last['input'].first['type']).to eq('shell_call_output')
+  end
+
+  it 'fails clearly when a local shell call has no executor' do
+    stub_responses(shell_call_response)
+
+    chat = build_chat
+    chat.with_params(tools: [shell_tool])
+
+    expect { chat.ask('Inspect the repo') }
+      .to raise_error(RubyLLM::Error, /local_shell_executor/)
+  end
+
+  it 'keeps string-keyed local shell executor params local-only' do
+    requests = stub_responses(shell_call_response, final_response)
+    executor_calls = []
+
+    chat = build_chat
+    chat.with_params(
+      **{
+        tools: [shell_tool],
+        'local_shell_executor' => lambda do |shell_call|
+          executor_calls << shell_call
+          [{ 'stdout' => "/repo\n", 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }]
+        end
+      }
+    )
+
+    response = chat.ask('Inspect the repo')
+
+    expect(response.content).to eq('Done from shell.')
+    expect(executor_calls.length).to eq(1)
+    expect(requests.length).to eq(2)
+    expect(requests.first).not_to have_key('local_shell_executor')
+    expect(requests.last).not_to have_key('local_shell_executor')
+  end
+
+  it 'uses shell call id as the output call_id when call_id is missing' do
+    shell_response = shell_call_response
+    shell_response['output'].first.delete('call_id')
+    requests = stub_responses(shell_response, final_response)
+
+    chat = build_chat
+    chat.with_params(
+      tools: [shell_tool],
+      local_shell_executor: lambda do |_shell_call|
+        [{ 'stdout' => "/repo\n", 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }]
+      end
+    )
+
+    response = chat.ask('Inspect the repo')
+
+    expect(response.content).to eq('Done from shell.')
+    expect(requests.last['input'].first['type']).to eq('shell_call_output')
+    expect(requests.last['input'].first['call_id']).to eq('sh_1')
+  end
+
+  it 'leaves hosted shell calls to existing built-in tool behavior' do
+    hosted_shell_tool = RubyLLM::ResponsesAPI::BuiltInTools.shell
+    response = shell_call_response(environment: { 'type' => 'container_auto' }).merge('tools' => [hosted_shell_tool])
+    requests = stub_responses(response)
+    executor_calls = []
+
+    chat = build_chat
+    chat.with_params(
+      tools: [RubyLLM::ResponsesAPI::BuiltInTools.shell],
+      local_shell_executor: ->(shell_call) { executor_calls << shell_call }
+    )
+
+    response = chat.ask('Inspect the repo')
+
+    expect(response.tool_call?).to be false
+    expect(executor_calls).to be_empty
+    expect(requests.length).to eq(1)
+  end
+
+  it 'executes shell calls with nil environment when the response declares a local shell tool' do
+    response = shell_call_response(environment: nil)
+    requests = stub_responses(response, final_response)
+    executor_calls = []
+
+    chat = build_chat
+    chat.with_params(
+      tools: [shell_tool],
+      local_shell_executor: lambda do |shell_call|
+        executor_calls << shell_call
+        [{ 'stdout' => "/repo\n", 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }]
+      end
+    )
+
+    final_message = chat.ask('Inspect the repo')
+
+    expect(final_message.content).to eq('Done from shell.')
+    expect(executor_calls.length).to eq(1)
+    expect(executor_calls.first['environment']).to be_nil
+    expect(requests.last['previous_response_id']).to eq('resp_shell_1')
+    expect(requests.last['input'].first['type']).to eq('shell_call_output')
+  end
+
+  it 'leaves nil-environment shell calls alone when the response does not declare a local shell tool' do
+    hosted_shell_tool = RubyLLM::ResponsesAPI::BuiltInTools.shell
+    response = shell_call_response(environment: nil).merge('tools' => [hosted_shell_tool])
+    requests = stub_responses(response)
+    executor_calls = []
+
+    chat = build_chat
+    chat.with_params(
+      tools: [hosted_shell_tool],
+      local_shell_executor: ->(shell_call) { executor_calls << shell_call }
+    )
+
+    response_message = chat.ask('Inspect the repo')
+
+    expect(response_message.tool_call?).to be false
+    expect(executor_calls).to be_empty
+    expect(requests.length).to eq(1)
+  end
+
+  it 'rejects non-array executor results' do
+    stub_responses(shell_call_response)
+
+    chat = build_chat
+    chat.with_params(
+      tools: [shell_tool],
+      local_shell_executor: ->(_shell_call) { { 'stdout' => 'ok' } }
+    )
+
+    expect { chat.ask('Inspect the repo') }
+      .to raise_error(RubyLLM::Error, /array of command result hashes/)
+  end
+
+  it 'mixes function tool calls and local shell calls in the same continuation' do
+    first_response = shell_call_response.merge(
+      'output' => [
+        {
+          'type' => 'function_call',
+          'call_id' => 'call_echo_1',
+          'name' => 'local_shell_lifecycle_echo',
+          'arguments' => '{"value":"hello"}'
+        },
+        shell_call_response['output'].first
+      ]
+    )
+    requests = stub_responses(first_response, final_response)
+
+    chat = build_chat
+    chat.with_tool(LocalShellLifecycleEchoTool)
+    function_tool = RubyLLM::Providers::OpenAIResponses::Tools.tool_for(chat.tools.fetch(:local_shell_lifecycle_echo))
+    chat.with_params(
+      tools: [function_tool, shell_tool],
+      local_shell_executor: lambda do |_shell_call|
+        [{ 'stdout' => 'ok', 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }]
+      end
+    )
+
+    response = chat.ask('Use both tools')
+
+    expect(response.content).to eq('Done from shell.')
+    expect(requests.last['previous_response_id']).to eq('resp_shell_1')
+    expect(requests.last['input'].map { |item| item['type'] }).to eq(%w[function_call_output shell_call_output])
+    expect(requests.last['input'].first['output']).to eq('hello')
+    expect(requests.last['input'].last['call_id']).to eq('call_shell_1')
+  end
+
+  it 'continues function-only tool calls with incremental input' do
+    first_response = {
+      'id' => 'resp_function_1',
+      'model' => model,
+      'output' => [
+        {
+          'type' => 'function_call',
+          'call_id' => 'call_echo_1',
+          'name' => 'local_shell_lifecycle_echo',
+          'arguments' => '{"value":"hello"}'
+        }
+      ],
+      'usage' => { 'input_tokens' => 12, 'output_tokens' => 6 }
+    }
+    requests = stub_responses(first_response, final_response)
+
+    chat = build_chat
+    chat.with_tool(LocalShellLifecycleEchoTool)
+    function_tool = RubyLLM::Providers::OpenAIResponses::Tools.tool_for(chat.tools.fetch(:local_shell_lifecycle_echo))
+    chat.with_params(
+      tools: [function_tool, shell_tool],
+      local_shell_executor: lambda do |_shell_call|
+        [{ 'stdout' => 'ok', 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }]
+      end
+    )
+
+    response = chat.ask('Use the function tool')
+
+    expect(response.content).to eq('Done from shell.')
+    expect(requests.last['previous_response_id']).to eq('resp_function_1')
+    expect(requests.last).not_to have_key('instructions')
+    expect(requests.last['input']).to eq(
+      [
+        {
+          'type' => 'function_call_output',
+          'call_id' => 'call_echo_1',
+          'output' => 'hello'
+        }
+      ]
+    )
+  end
+end
diff --git a/spec/ruby_llm/providers/openai_responses/streaming_spec.rb b/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
index 8090bbd..b7a2e56 100644
--- a/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
+++ b/spec/ruby_llm/providers/openai_responses/streaming_spec.rb
@@ -581,5 +581,68 @@ def build_stream_connection(events, response: mock_response('', status: 200))
       expect(message.tool_calls['call_fn_1'].arguments).to eq({ 'city' => 'Berlin' })
       expect(message.raw.body['output'].first['type']).to eq('function_call')
     end
+
+    it 'returns executable local shell tool calls from completed streamed output' do
+      events = [
+        {
+          'type' => 'response.completed',
+          'response' => {
+            'id' => 'resp_shell_local',
+            'model' => 'gpt-5.4',
+            'output' => [
+              {
+                'id' => 'sh_local_1',
+                'type' => 'shell_call',
+                'status' => 'completed',
+                'action' => { 'commands' => ['pwd'], 'timeout_ms' => 10_000 },
+                'call_id' => 'call_shell_local_1',
+                'environment' => nil
+              }
+            ],
+            'tools' => [
+              {
+                'type' => 'shell',
+                'environment' => { 'type' => 'local' }
+              }
+            ],
+            'usage' => { 'input_tokens' => 8, 'output_tokens' => 3 }
+          }
+        }
+      ]
+
+      message = provider.send(:stream_response, build_stream_connection(events), payload)
+
+      expect(message.response_id).to eq('resp_shell_local')
+      expect(message.tool_calls['call_shell_local_1']).to be_a(
+        RubyLLM::Providers::OpenAIResponses::LocalShellToolCall
+      )
+      expect(message.tool_calls['call_shell_local_1'].arguments['action']['commands']).to eq(['pwd'])
+    end
+  end
+
+  describe '.log_stream_completion' do
+    let(:logger) { instance_double(Logger) }
+
+    before do
+      allow(RubyLLM).to receive(:logger).and_return(logger)
+    end
+
+    it 'skips empty tool-call completions' do
+      message = instance_double(RubyLLM::Message, content: '')
+
+      expect(logger).not_to receive(:debug)
+
+      described_class.log_stream_completion(message)
+    end
+
+    it 'logs streamed assistant content' do
+      message = instance_double(RubyLLM::Message, content: 'Hello')
+
+      expect(logger).to receive(:debug) do |&block|
+        expect(block.call).to eq('Stream completed: Hello')
+      end
+
+      described_class.log_stream_completion(message)
+    end
   end
 end

From 13faa159c394d510f29033e523c03d64b5d77524 Mon Sep 17 00:00:00 2001
From: Alexander Popov <sasho@hey.com>
Date: Tue, 21 Apr 2026 12:32:29 +0300
Subject: [PATCH 6/6] Preserve instructions in Responses API state

OpenAI's Responses API treats the top-level instructions parameter as request-local when a request is chained with previous_response_id. This provider is intentionally stateful and relies on previous_response_id for follow-up requests, including tool-result continuation calls, so sending RubyLLM with_instructions content only through top-level instructions made those high-authority instructions fragile across the response chain.

Match the core ruby_llm OpenAI provider more closely by leaving system messages in the normal input list and relying on format_role(:system) to render them as developer messages. Initial requests now persist those instructions as part of the conversation state, while tool-only continuation requests still detect trailing tool results from the non-system messages and submit only the incremental function or shell outputs.

Update the focused specs to assert that system instructions become developer input messages, top-level instructions is omitted from chat payloads, multiple system messages remain ordered, and local shell continuations keep using previous_response_id without resending instructions or non-tool input.
---
 .../providers/openai_responses/chat.rb        |  7 +--
 spec/cassettes/multiple_instructions.yml      | 54 ++++++++---------
 spec/cassettes/system_instructions.yml        | 58 +++++++++----------
 .../providers/openai_responses/chat_spec.rb   | 29 +++++++++-
 .../local_shell_lifecycle_spec.rb             | 11 +++-
 5 files changed, 93 insertions(+), 66 deletions(-)

diff --git a/lib/ruby_llm/providers/openai_responses/chat.rb b/lib/ruby_llm/providers/openai_responses/chat.rb
index ffdd0c0..469bb37 100644
--- a/lib/ruby_llm/providers/openai_responses/chat.rb
+++ b/lib/ruby_llm/providers/openai_responses/chat.rb
@@ -16,18 +16,15 @@ def completion_url
         def render_payload(messages, tools:, temperature:, model:, stream: false,
                            schema: nil, thinking: nil, tool_prefs: nil) # rubocop:disable Lint/UnusedMethodArgument
           tool_prefs ||= {}
-          system_messages, non_system_messages = messages.partition { |m| m.role == :system }
-
-          instructions = system_messages.map { |m| extract_text_content(m.content) }.join("\n\n")
+          non_system_messages = messages.reject { |m| m.role == :system }
           continuation_input = continuation_input_messages(non_system_messages)
 
           payload = {
             model: model.id,
-            input: format_input(continuation_input || non_system_messages),
+            input: format_input(continuation_input || messages),
             stream: stream
           }
 
-          payload[:instructions] = instructions unless instructions.empty? || continuation_input
           payload[:temperature] = temperature unless temperature.nil?
           apply_tools(payload, tools, tool_prefs)
           payload[:text] = build_schema_format(schema) if schema
diff --git a/spec/cassettes/multiple_instructions.yml b/spec/cassettes/multiple_instructions.yml
index 5f8022d..1430e38 100644
--- a/spec/cassettes/multiple_instructions.yml
+++ b/spec/cassettes/multiple_instructions.yml
@@ -5,11 +5,11 @@ http_interactions:
     uri: https://api.openai.com/v1/responses
     body:
       encoding: UTF-8
-      string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"What
-        is Ruby?"}],"stream":false,"instructions":"Always be concise."}'
+      string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"developer","content":"Always
+        be concise."},{"type":"message","role":"user","content":"What is Ruby?"}],"stream":false}'
     headers:
       User-Agent:
-      - Faraday v2.14.0
+      - Faraday v2.14.1
       Authorization:
       - Bearer <OPENAI_API_KEY>
       Content-Type:
@@ -24,7 +24,7 @@ http_interactions:
       message: OK
     headers:
       Date:
-      - Wed, 18 Feb 2026 10:26:21 GMT
+      - Tue, 21 Apr 2026 10:08:25 GMT
       Content-Type:
       - application/json
       Transfer-Encoding:
@@ -34,64 +34,64 @@ http_interactions:
       Server:
       - cloudflare
       X-Ratelimit-Limit-Requests:
-      - '5000'
+      - '30000'
       X-Ratelimit-Limit-Tokens:
-      - '4000000'
+      - '150000000'
       X-Ratelimit-Remaining-Requests:
-      - '4999'
+      - '29999'
       X-Ratelimit-Remaining-Tokens:
-      - '3999961'
+      - '149999960'
       X-Ratelimit-Reset-Requests:
-      - 12ms
+      - 2ms
       X-Ratelimit-Reset-Tokens:
       - 0s
       Openai-Version:
       - '2020-10-01'
       Openai-Organization:
-      - user-h7m2t30jnyqzec1thkvt5rcd
+      - zipchat
       Openai-Project:
-      - proj_yBBvayevSgRR3SVjSmBgm0so
+      - proj_tor65x8ddwYUBmnOFz47CzNA
       X-Request-Id:
-      - req_c2611485e638467f8d99e639a2340253
+      - req_b31618922b4c460db4be53bafb2d2e32
       Openai-Processing-Ms:
-      - '1630'
+      - '1800'
       Cf-Cache-Status:
       - DYNAMIC
       Set-Cookie:
-      - __cf_bm=vmIcw8dS4jtu8nkYU1hHgk75dBOiB62C98FnPW481P8-1771410380.039279-1.0.1.1-DOwKKstDvjCjT0Wul_M04mVoc5dGKnGCCLg0cBz8ICCNAgLeD8SG4Z8fkhRWXHtcw7zXlEESlDS8oP_8A2sOG_n_mIdnfvcfh3Mltw5wFR.4j0HQkNz.z9Pzuhf0LVgX;
-        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 18 Feb 2026
-        10:56:21 GMT
+      - __cf_bm=a_I8PUgYXnYdTH19jacwD_MEDNOY5rswxrImrVj2sfE-1776766102.725094-1.0.1.1-lt1V_qYIHlKtKgrhA03tHyfwBVOcGLK0MYrsbI_i3iHfHfUmM.0wRzrF9fX4rENvrYzFQH07Y5Rc7dzuaNI_8qM6TI7PNbUZ2.MA2nFrrb1Kxnokbv98MPM8WrD7LCGC;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 21 Apr 2026
+        10:38:25 GMT
       Strict-Transport-Security:
       - max-age=31536000; includeSubDomains; preload
       X-Content-Type-Options:
       - nosniff
       Cf-Ray:
-      - 9cfcd35b39273572-WAW
+      - 9efb964e0aab9e0c-SOF
       Alt-Svc:
       - h3=":443"; ma=86400
     body:
       encoding: ASCII-8BIT
       string: |-
         {
-          "id": "resp_0c507c033809d5e300699593cc23d081968e6506bb7ca3419d",
+          "id": "resp_05a31f415c9584410069e74c972980819ead042eac7fd719b9",
           "object": "response",
-          "created_at": 1771410380,
+          "created_at": 1776766103,
           "status": "completed",
           "background": false,
           "billing": {
             "payer": "developer"
           },
-          "completed_at": 1771410381,
+          "completed_at": 1776766104,
           "error": null,
           "frequency_penalty": 0.0,
           "incomplete_details": null,
-          "instructions": "Always be concise.",
+          "instructions": null,
           "max_output_tokens": null,
           "max_tool_calls": null,
           "model": "gpt-4o-mini-2024-07-18",
           "output": [
             {
-              "id": "msg_0c507c033809d5e300699593cc6ab881969ed5ba3dc8c8a2fa",
+              "id": "msg_05a31f415c9584410069e74c97f02c819eb3ed5ab894ff85d0",
               "type": "message",
               "status": "completed",
               "content": [
@@ -99,7 +99,7 @@ http_interactions:
                   "type": "output_text",
                   "annotations": [],
                   "logprobs": [],
-                  "text": "Ruby is a dynamic, open-source programming language known for its simplicity and productivity. It emphasizes object-oriented programming and features elegant syntax that is easy to read and write. Ruby is often used for web development, particularly with the Ruby on Rails framework, which streamlines the process of building web applications. The language supports various paradigms, including functional and imperative programming."
+                  "text": "Ruby is a dynamic, object-oriented programming language known for its simplicity and productivity. Created by Yukihiro \"Matz\" Matsumoto in the mid-1990s, Ruby emphasizes readability and allows developers to express ideas naturally. It's widely used in web development, particularly with the Ruby on Rails framework, which enables rapid application development. Ruby supports multiple programming paradigms, including functional and imperative styles."
                 }
               ],
               "role": "assistant"
@@ -109,7 +109,7 @@ http_interactions:
           "presence_penalty": 0.0,
           "previous_response_id": null,
           "prompt_cache_key": null,
-          "prompt_cache_retention": null,
+          "prompt_cache_retention": "in_memory",
           "reasoning": {
             "effort": null,
             "summary": null
@@ -134,14 +134,14 @@ http_interactions:
             "input_tokens_details": {
               "cached_tokens": 0
             },
-            "output_tokens": 73,
+            "output_tokens": 82,
             "output_tokens_details": {
               "reasoning_tokens": 0
             },
-            "total_tokens": 92
+            "total_tokens": 101
           },
           "user": null,
           "metadata": {}
         }
-  recorded_at: Wed, 18 Feb 2026 10:26:21 GMT
+  recorded_at: Tue, 21 Apr 2026 10:08:24 GMT
 recorded_with: VCR 6.4.0
diff --git a/spec/cassettes/system_instructions.yml b/spec/cassettes/system_instructions.yml
index 8311445..ee5b49d 100644
--- a/spec/cassettes/system_instructions.yml
+++ b/spec/cassettes/system_instructions.yml
@@ -5,12 +5,12 @@ http_interactions:
     uri: https://api.openai.com/v1/responses
     body:
       encoding: UTF-8
-      string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"Say
-        hello"}],"stream":false,"instructions":"You are a pirate. Always respond like
-        a pirate. Use arr and matey."}'
+      string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"developer","content":"You
+        are a pirate. Always respond like a pirate. Use arr and matey."},{"type":"message","role":"user","content":"Say
+        hello"}],"stream":false}'
     headers:
       User-Agent:
-      - Faraday v2.14.0
+      - Faraday v2.14.1
       Authorization:
       - Bearer <OPENAI_API_KEY>
       Content-Type:
@@ -25,7 +25,7 @@ http_interactions:
       message: OK
     headers:
       Date:
-      - Wed, 18 Feb 2026 10:26:19 GMT
+      - Tue, 21 Apr 2026 10:08:22 GMT
       Content-Type:
       - application/json
       Transfer-Encoding:
@@ -35,64 +35,64 @@ http_interactions:
       Server:
       - cloudflare
       X-Ratelimit-Limit-Requests:
-      - '5000'
+      - '30000'
       X-Ratelimit-Limit-Tokens:
-      - '4000000'
+      - '150000000'
       X-Ratelimit-Remaining-Requests:
-      - '4999'
+      - '29999'
       X-Ratelimit-Remaining-Tokens:
-      - '3999951'
+      - '149999950'
       X-Ratelimit-Reset-Requests:
-      - 12ms
+      - 2ms
       X-Ratelimit-Reset-Tokens:
       - 0s
       Openai-Version:
       - '2020-10-01'
       Openai-Organization:
-      - user-h7m2t30jnyqzec1thkvt5rcd
+      - zipchat
       Openai-Project:
-      - proj_yBBvayevSgRR3SVjSmBgm0so
+      - proj_tor65x8ddwYUBmnOFz47CzNA
       X-Request-Id:
-      - req_574d853dfe60413a80c29a5679b3cf72
+      - req_0a77a65a1120455083b9b49eee73ecbe
       Openai-Processing-Ms:
-      - '1110'
+      - '1761'
       Cf-Cache-Status:
       - DYNAMIC
       Set-Cookie:
-      - __cf_bm=zQx5QSDyeV.6DDJC7cIvjyAUtZzsqPvrYcwuBP4lytQ-1771410378.7320893-1.0.1.1-D_hOL2Ytee5MrfSC9u5ByG0CAAhh73eBCeSKGra0Q0oVCIncteDVyLxUYMlfHgjsqFgrhvI71ceH4olpw1sU.RGNrKvNk_Bdf3HwkvRTL6sgpJTS3CwLMxlSOCc8joxt;
-        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 18 Feb 2026
-        10:56:19 GMT
+      - __cf_bm=J97FjzXR4dWHvPrj.F2fLnKHHEqWDhLDSHzoa8pnZuw-1776766100.3974354-1.0.1.1-nu3o4jkVQamamVlBf7xsFjFjeMxJAoYXwlaqIAY0J1C9HKQDnHpw.Rx7bx2MKvkR1UfY4mnYFxJemHkYO_bGhRZe4CzIlndGSMFu95IM2okl3Yzw_Nl0yQy54M9PWzzj;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 21 Apr 2026
+        10:38:22 GMT
       Strict-Transport-Security:
       - max-age=31536000; includeSubDomains; preload
       X-Content-Type-Options:
       - nosniff
       Cf-Ray:
-      - 9cfcd35318e5ee48-WAW
+      - 9efb963f7819bda6-SOF
       Alt-Svc:
       - h3=":443"; ma=86400
     body:
       encoding: ASCII-8BIT
       string: |-
         {
-          "id": "resp_0f7853d04f51fbf700699593cad1308195afb021c508857bb3",
+          "id": "resp_09ce6a7b72e1e5980069e74c94dca881909706cc9ebfb3d8ed",
           "object": "response",
-          "created_at": 1771410378,
+          "created_at": 1776766100,
           "status": "completed",
           "background": false,
           "billing": {
-            "payer": "developer"
+            "payer": "openai"
           },
-          "completed_at": 1771410379,
+          "completed_at": 1776766102,
           "error": null,
           "frequency_penalty": 0.0,
           "incomplete_details": null,
-          "instructions": "You are a pirate. Always respond like a pirate. Use arr and matey.",
+          "instructions": null,
           "max_output_tokens": null,
           "max_tool_calls": null,
           "model": "gpt-4o-mini-2024-07-18",
           "output": [
             {
-              "id": "msg_0f7853d04f51fbf700699593cb73048195b8edf56a0b2c1e85",
+              "id": "msg_09ce6a7b72e1e5980069e74c9625dc8190a8375c07f30aa3d9",
               "type": "message",
               "status": "completed",
               "content": [
@@ -100,7 +100,7 @@ http_interactions:
                   "type": "output_text",
                   "annotations": [],
                   "logprobs": [],
-                  "text": "Ahoy there, matey! A hearty hello to ye! Arr! How be ye on this fine day?"
+                  "text": "Ahoy, matey! A hearty hello to ye! What be yer desire on this fine day upon the high seas? Arr!"
                 }
               ],
               "role": "assistant"
@@ -110,7 +110,7 @@ http_interactions:
           "presence_penalty": 0.0,
           "previous_response_id": null,
           "prompt_cache_key": null,
-          "prompt_cache_retention": null,
+          "prompt_cache_retention": "in_memory",
           "reasoning": {
             "effort": null,
             "summary": null
@@ -135,14 +135,14 @@ http_interactions:
             "input_tokens_details": {
               "cached_tokens": 0
             },
-            "output_tokens": 24,
+            "output_tokens": 28,
             "output_tokens_details": {
               "reasoning_tokens": 0
             },
-            "total_tokens": 54
+            "total_tokens": 58
           },
           "user": null,
           "metadata": {}
         }
-  recorded_at: Wed, 18 Feb 2026 10:26:19 GMT
+  recorded_at: Tue, 21 Apr 2026 10:08:22 GMT
 recorded_with: VCR 6.4.0
diff --git a/spec/ruby_llm/providers/openai_responses/chat_spec.rb b/spec/ruby_llm/providers/openai_responses/chat_spec.rb
index 82c3de2..f5d1d5e 100644
--- a/spec/ruby_llm/providers/openai_responses/chat_spec.rb
+++ b/spec/ruby_llm/providers/openai_responses/chat_spec.rb
@@ -29,7 +29,7 @@
       expect(payload[:stream]).to be false
     end
 
-    it 'extracts system messages to instructions' do
+    it 'formats system messages as developer input messages' do
       payload = chat_module.render_payload(
         [system_message, user_message],
         tools: {},
@@ -38,8 +38,31 @@
         stream: false
       )
 
-      expect(payload[:instructions]).to eq('You are a helpful assistant')
-      expect(payload[:input].length).to eq(1)
+      expect(payload).not_to have_key(:instructions)
+      expect(payload[:input]).to eq(
+        [
+          { type: 'message', role: 'developer', content: 'You are a helpful assistant' },
+          { type: 'message', role: 'user', content: 'Hello' }
+        ]
+      )
+    end
+
+    it 'preserves multiple system messages as developer input messages' do
+      second_system_message = RubyLLM::Message.new(role: :system, content: 'Always be concise')
+
+      payload = chat_module.render_payload(
+        [system_message, second_system_message, user_message],
+        tools: {},
+        temperature: nil,
+        model: model,
+        stream: false
+      )
+
+      expect(payload).not_to have_key(:instructions)
+      expect(payload[:input].map { |item| item[:role] }).to eq(%w[developer developer user])
+      expect(payload[:input].map { |item| item[:content] }).to eq(
+        ['You are a helpful assistant', 'Always be concise', 'Hello']
+      )
     end
 
     it 'includes temperature when provided' do
diff --git a/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb b/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb
index e8a655b..2ce824b 100644
--- a/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb
+++ b/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb
@@ -144,8 +144,15 @@ def final_response_stream_events
     expect(end_messages.map(&:role)).to eq(%i[assistant tool assistant])
 
     expect(requests.length).to eq(2)
-    expect(requests.first['input'].first['content']).to eq('Inspect the repo')
-    expect(requests.first['instructions']).to eq('Be brief.')
+    expect(requests.first['input'].first).to eq(
+      {
+        'type' => 'message',
+        'role' => 'developer',
+        'content' => 'Be brief.'
+      }
+    )
+    expect(requests.first['input'].last['content']).to eq('Inspect the repo')
+    expect(requests.first).not_to have_key('instructions')
     expect(requests.first).not_to have_key('local_shell_executor')
 
     continuation = requests.last