diff --git a/CHANGELOG.md b/CHANGELOG.md index d0971cd..1989600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.3] - 2026-03-27 + +### Fixed + +- Return string from `slug` instead of symbol to fix `Model.refresh!` sorting crash (PR #6 by @noelblaschke) + ## [0.5.2] - 2026-03-18 ### Fixed diff --git a/README.md b/README.md index a9af0ea..98e7946 100644 --- a/README.md +++ b/README.md @@ -122,10 +122,69 @@ tool = RubyLLM::ResponsesAPI::BuiltInTools.shell( # With memory limit tool = RubyLLM::ResponsesAPI::BuiltInTools.shell(memory_limit: '4g') -# Local execution (you handle running commands yourself) +# Local execution tool = RubyLLM::ResponsesAPI::BuiltInTools.shell(environment_type: 'local') ``` +For local shell environments, provide an executor with `local_shell_executor`. The executor can be any object that responds to `#call(shell_call)`, such as a lambda, service object, adapter, or class instance. It can run commands in a local process, a sandbox, a remote worker, or any other environment your application controls. + +The executor is responsible for security, cwd, sandboxing, timeout behavior, output truncation, and permissions. + +The executor receives the raw `shell_call` hash from the Responses API. Common fields include: + +```ruby +shell_call['call_id'] +shell_call.dig('action', 'commands') +shell_call.dig('action', 'timeout_ms') +shell_call.dig('action', 'max_output_length') +shell_call['environment'] +``` + +Return an array of command result hashes. Each result should include `stdout`, `stderr`, and `outcome`: + +```ruby +[ + { + 'stdout' => "output\n", + 'stderr' => '', + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } + } +] +``` + +For a timeout, return an outcome like: + +```ruby +{ 'type' => 'timeout' } +``` + +Example: + +```ruby +chat = RubyLLM.chat(model: 'gpt-5.2', provider: :openai_responses) + +chat.with_params( + tools: [ + RubyLLM::ResponsesAPI::BuiltInTools.shell(environment_type: 'local') + ], + local_shell_executor: lambda do |shell_call| + commands = Array(shell_call.dig('action', 'commands')) + + commands.map do |command| + result = run_command(command) + + { + 'stdout' => result.stdout, + 'stderr' => result.stderr, + 'outcome' => { 'type' => 'exit', 'exit_code' => result.exit_code } + } + end + end +) + +chat.ask('Inspect the repo') +``` + ### Apply Patch Structured diff-based file editing. Requires GPT-5 family models. diff --git a/lib/ruby_llm/providers/openai_responses.rb b/lib/ruby_llm/providers/openai_responses.rb index 5b8afba..6ad9b7c 100644 --- a/lib/ruby_llm/providers/openai_responses.rb +++ b/lib/ruby_llm/providers/openai_responses.rb @@ -20,6 +20,8 @@ def api_base # rubocop:disable Metrics/ParameterLists def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, tool_prefs: nil, &block) + params = params.except(:local_shell_executor, 'local_shell_executor') + if params[:transport]&.to_sym == :websocket ws_complete(messages, tools: tools, temperature: temperature, model: model, params: params.except(:transport), schema: schema, @@ -214,7 +216,7 @@ def configuration_requirements end def slug - :openai_responses + 'openai_responses' end end end diff --git a/lib/ruby_llm/providers/openai_responses/built_in_tools.rb b/lib/ruby_llm/providers/openai_responses/built_in_tools.rb index 5168c4f..91e7ce2 100644 --- a/lib/ruby_llm/providers/openai_responses/built_in_tools.rb +++ b/lib/ruby_llm/providers/openai_responses/built_in_tools.rb @@ -189,19 +189,31 @@ def parse_apply_patch_results(output) # Parse shell call results from output # @param output [Array] Response output array - # @return [Array] Parsed shell call results + # @return [Array] Parsed shell call results joined with output by call_id def parse_shell_call_results(output) - output - .select { |item| item['type'] == 'shell_call' } - .map do |item| - { - id: item['id'], - call_id: item['call_id'], - status: item['status'], - action: item['action'], - container_id: item['container_id'] - } - end + items = Array(output) + call_order = shell_call_order(items) + shell_calls_by_call_id = shell_call_items_by_call_id(items) + shell_outputs_by_call_id = shell_output_items_by_call_id(items) + + call_order.map do |call_id| + build_shell_call_result( + call_id, + shell_call: shell_calls_by_call_id[call_id], + shell_outputs: shell_outputs_by_call_id[call_id] + ) + end + end + + # Parse shell call results from a final RubyLLM::Message + # @param message [RubyLLM::Message] Final message returned by chat completion + # @return [Array] Parsed shell call results + def parse_shell_call_results_from_message(message) + body = message&.raw&.body + body = JSON.parse(body) if body.is_a?(String) + parse_shell_call_results(body.is_a?(Hash) ? body['output'] : nil) + rescue JSON::ParserError + [] end # Extract all citations from message content @@ -236,6 +248,41 @@ def extract_citations(content) }.compact end end + + private_class_method def shell_call_order(items) + items.filter_map do |item| + item['call_id'] if %w[shell_call shell_call_output].include?(item['type']) + end.uniq + end + + private_class_method def shell_call_items_by_call_id(items) + items + .select { |item| item['type'] == 'shell_call' } + .to_h { |item| [item['call_id'], item] } + end + + private_class_method def shell_output_items_by_call_id(items) + items + .select { |item| item['type'] == 'shell_call_output' } + .each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |item, result| + result[item['call_id']] << item + end + end + + private_class_method def build_shell_call_result(call_id, shell_call:, shell_outputs:) + shell_call ||= {} + shell_outputs ||= [] + last_shell_output = shell_outputs.last + + { + id: shell_call['id'], + call_id: call_id, + status: shell_call['status'] || last_shell_output&.dig('status'), + environment: RubyLLM::Utils.deep_dup(shell_call['environment']), + action: RubyLLM::Utils.deep_dup(shell_call['action']), + output: shell_outputs.flat_map { |item| RubyLLM::Utils.deep_dup(item['output'] || []) } + }.compact + end end end end diff --git a/lib/ruby_llm/providers/openai_responses/chat.rb b/lib/ruby_llm/providers/openai_responses/chat.rb index e62f4e0..469bb37 100644 --- a/lib/ruby_llm/providers/openai_responses/chat.rb +++ b/lib/ruby_llm/providers/openai_responses/chat.rb @@ -16,17 +16,15 @@ def completion_url def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil, tool_prefs: nil) # rubocop:disable Lint/UnusedMethodArgument tool_prefs ||= {} - system_messages, non_system_messages = messages.partition { |m| m.role == :system } - - instructions = system_messages.map { |m| extract_text_content(m.content) }.join("\n\n") + non_system_messages = messages.reject { |m| m.role == :system } + continuation_input = continuation_input_messages(non_system_messages) payload = { model: model.id, - input: format_input(non_system_messages), + input: format_input(continuation_input || messages), stream: stream } - payload[:instructions] = instructions unless instructions.empty? payload[:temperature] = temperature unless temperature.nil? apply_tools(payload, tools, tool_prefs) payload[:text] = build_schema_format(schema) if schema @@ -85,6 +83,19 @@ def extract_last_response_id(messages) .last end + def continuation_input_messages(messages) + last_response_index = messages.rindex do |message| + message.role == :assistant && message.respond_to?(:response_id) && message.response_id + end + return nil unless last_response_index + + trailing_messages = messages[(last_response_index + 1)..] + return nil unless trailing_messages&.any? + return nil unless trailing_messages.all? { |message| message.role == :tool } + + trailing_messages + end + def parse_completion_response(response) data = response.body return if data.nil? || data.empty? @@ -98,8 +109,8 @@ def parse_completion_response(response) # Extract text content from output content = extract_output_text(output) - # Extract tool calls from function_call outputs - tool_calls = extract_tool_calls(output) + # Extract executable tool calls from function_call and local shell_call outputs + tool_calls = extract_tool_calls(output, response_tools: data['tools']) usage = data['usage'] || {} cached_tokens = usage.dig('input_tokens_details', 'cached_tokens') @@ -118,17 +129,12 @@ def parse_completion_response(response) ) end - def format_input(messages) # rubocop:disable Metrics/MethodLength + def format_input(messages) result = [] messages.each do |msg| if msg.tool_call_id - # Tool result message - function_call_output type - result << { - type: 'function_call_output', - call_id: msg.tool_call_id, - output: extract_text_content(msg.content) - } + result << format_tool_result(msg) elsif msg.tool_calls&.any? # Assistant message with tool calls # First add any text content as a message @@ -143,12 +149,7 @@ def format_input(messages) # rubocop:disable Metrics/MethodLength # Then add each function call as a separate item msg.tool_calls.each_value do |tc| - result << { - type: 'function_call', - call_id: tc.id, - name: tc.name, - arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments) - } + result << format_tool_call(tc) end else # Regular message @@ -163,6 +164,38 @@ def format_input(messages) # rubocop:disable Metrics/MethodLength result end + def format_tool_result(msg) + content = msg.content + return content.value if LocalShellExecutor.shell_call_output?(content) + + { + type: 'function_call_output', + call_id: msg.tool_call_id, + output: format_function_tool_output(content) + } + end + + def format_function_tool_output(content) + return raw_tool_output(content.value) if content.is_a?(RubyLLM::Content::Raw) + + extract_text_content(content) + end + + def raw_tool_output(value) + value.is_a?(String) ? value : JSON.generate(value) + end + + def format_tool_call(tool_call) + return tool_call.shell_call if tool_call.is_a?(LocalShellToolCall) + + { + type: 'function_call', + call_id: tool_call.id, + name: tool_call.name, + arguments: tool_call.arguments.is_a?(String) ? tool_call.arguments : JSON.generate(tool_call.arguments) + } + end + def format_message_content(content, tool_calls = nil) parts = [] @@ -180,12 +213,7 @@ def format_message_content(content, tool_calls = nil) # Add tool calls if present (for assistant messages) if tool_calls&.any? tool_calls.each_value do |tc| - parts << { - type: 'function_call', - call_id: tc.id, - name: tc.name, - arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments) - } + parts << format_tool_call(tc) end end @@ -263,20 +291,35 @@ def extract_output_text(output) .join end - def extract_tool_calls(output) - function_calls = output.select { |item| item['type'] == 'function_call' } - return nil if function_calls.empty? - - function_calls.to_h do |fc| - [ - fc['call_id'], - ToolCall.new( - id: fc['call_id'], - name: fc['name'], - arguments: parse_arguments(fc['arguments']) - ) - ] + def extract_tool_calls(output, response_tools: nil) + executable_calls = output.select do |item| + item['type'] == 'function_call' || local_shell_call?(item, response_tools: response_tools) end + return nil if executable_calls.empty? + + executable_calls.to_h do |item| + tool_call = if local_shell_call?(item, response_tools: response_tools) + LocalShellToolCall.new(item) + else + ToolCall.new( + id: item['call_id'], + name: item['name'], + arguments: parse_arguments(item['arguments']) + ) + end + [tool_call.id, tool_call] + end + end + + def local_shell_call?(item, response_tools: nil) + return false unless item['type'] == 'shell_call' + return true if item.dig('environment', 'type') == 'local' + + item['environment'].nil? && local_shell_tool_configured?(response_tools) + end + + def local_shell_tool_configured?(tools) + Array(tools).any? { |tool| tool['type'] == 'shell' && tool.dig('environment', 'type') == 'local' } end def parse_arguments(arguments) diff --git a/lib/ruby_llm/providers/openai_responses/local_shell_executor.rb b/lib/ruby_llm/providers/openai_responses/local_shell_executor.rb new file mode 100644 index 0000000..3697424 --- /dev/null +++ b/lib/ruby_llm/providers/openai_responses/local_shell_executor.rb @@ -0,0 +1,151 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class OpenAIResponses + # Executes Responses API local shell calls through RubyLLM's normal tool lifecycle. + class LocalShellExecutor + TOOL_NAME = 'openai_responses_local_shell' + + attr_reader :executor + + def initialize(executor = nil, &block) + @executor = executor || block + return if @executor.nil? || @executor.respond_to?(:call) + + raise ArgumentError, 'local_shell_executor must respond to #call.' + end + + def name + TOOL_NAME + end + + def description + 'Execute an OpenAI Responses API local shell call.' + end + + def parameters + [] + end + + def params_schema + { + 'type' => 'object', + 'properties' => {}, + 'additionalProperties' => true + } + end + + def provider_params + {} + end + + def call(shell_call) + raise RubyLLM::Error, 'OpenAI Responses local shell call requires a local_shell_executor.' unless executor + + RubyLLM::Content::Raw.new(normalize_output(shell_call, executor.call(shell_call))) + end + + def self.local_shell_tool_call?(tool_call) + tool_call.name == TOOL_NAME + end + + def self.shell_call_output?(content) + content.is_a?(RubyLLM::Content::Raw) && content.value.is_a?(Hash) && + content.value['type'] == 'shell_call_output' + end + + def self.shell_call_id(shell_call) + shell_call['call_id'] || shell_call['id'] + end + + private + + def normalize_output(shell_call, result) + normalized = wrap_output(shell_call, normalize_command_results(result)) + validate_output!(normalized) + normalized + end + + def normalize_command_results(result) + unless result.is_a?(Array) + raise RubyLLM::Error, 'local_shell_executor must return an array of command result hashes.' + end + + result.map do |item| + raise RubyLLM::Error, 'local_shell_executor command results must be hashes.' unless item.is_a?(Hash) + + stringify_keys(item) + end + end + + def wrap_output(shell_call, command_results) + action = shell_call['action'] || {} + { + 'type' => 'shell_call_output', + 'call_id' => self.class.shell_call_id(shell_call), + 'max_output_length' => action['max_output_length'], + 'output' => command_results + }.compact + end + + def validate_output!(output) + raise RubyLLM::Error, 'local shell calls must include a call_id.' if blank?(output['call_id']) + + return if output['output'].is_a?(Array) + + raise RubyLLM::Error, 'local shell output must include an output array.' + end + + def blank?(value) + value.nil? || (value.respond_to?(:empty?) && value.empty?) + end + + def stringify_keys(value) + case value + when Hash + value.each_with_object({}) do |(key, val), result| + result[key.to_s] = stringify_keys(val) + end + when Array + value.map { |item| stringify_keys(item) } + else + value + end + end + end + + # ToolCall subtype carrying the original Responses API shell_call item. + class LocalShellToolCall < RubyLLM::ToolCall + attr_reader :shell_call + + def initialize(shell_call) + @shell_call = shell_call + super( + id: LocalShellExecutor.shell_call_id(shell_call), + name: LocalShellExecutor::TOOL_NAME, + arguments: shell_call + ) + end + end + + # Lets with_params(local_shell_executor: ...) execute local shell calls without + # exposing the executor as a function tool in the model request. + module ChatExtension + def execute_tool(tool_call) + return super unless LocalShellExecutor.local_shell_tool_call?(tool_call) + + registered_tool = tools[tool_call.name.to_sym] + return registered_tool.call(tool_call.arguments) if registered_tool + + executor = @params[:local_shell_executor] || @params['local_shell_executor'] + LocalShellExecutor.new(executor).call(tool_call.arguments) + end + + private :execute_tool + end + end + end +end + +RubyLLM::Chat.prepend(RubyLLM::Providers::OpenAIResponses::ChatExtension) diff --git a/lib/ruby_llm/providers/openai_responses/streaming.rb b/lib/ruby_llm/providers/openai_responses/streaming.rb index d697677..7f38eb1 100644 --- a/lib/ruby_llm/providers/openai_responses/streaming.rb +++ b/lib/ruby_llm/providers/openai_responses/streaming.rb @@ -1,17 +1,199 @@ # frozen_string_literal: true +require 'delegate' + module RubyLLM module Providers class OpenAIResponses # Streaming methods for the OpenAI Responses API. # Handles SSE events with typed event format. module Streaming + class StreamRawResponse < SimpleDelegator + attr_reader :body + + def initialize(response, body) + super(response) + @body = body + end + end + + class CompletedResponseAccumulator + def initialize + @completed_response = nil + @output_items_by_index = {} + @shell_commands_by_output_index = Hash.new { |hash, key| hash[key] = {} } + @shell_outputs_by_item_id = Hash.new { |hash, key| hash[key] = {} } + @shell_outputs_by_output_index = Hash.new { |hash, key| hash[key] = {} } + end + + def add(event) + return unless event.is_a?(Hash) + + case event['type'] + when 'response.output_item.done' + add_output_item(event) + when 'response.shell_call_command.done' + add_shell_command(event) + when 'response.shell_call_output_content.done' + add_shell_output_content(event) + when 'response.completed' + @completed_response = RubyLLM::Utils.deep_dup(event['response'] || {}) + end + end + + def build_response(raw_response) + body = build_body + base_response = raw_response || build_default_response + StreamRawResponse.new(base_response, body) + end + + private + + def add_output_item(event) + output_index = event['output_index'] + return if output_index.nil? + + item = RubyLLM::Utils.deep_dup(event['item'] || {}) + + case item['type'] + when 'shell_call' + merge_shell_commands!(item, output_index) + when 'shell_call_output' + merge_shell_output!(item, output_index) + end + + @output_items_by_index[output_index] = item + end + + def add_shell_command(event) + output_index = event['output_index'] + command_index = event['command_index'] + command = event['command'] + return if output_index.nil? || command_index.nil? || command.nil? + + @shell_commands_by_output_index[output_index][command_index] = command + + item = @output_items_by_index[output_index] + merge_shell_commands!(item, output_index) if item&.dig('type') == 'shell_call' + end + + def add_shell_output_content(event) + command_index = event['command_index'] + output_index = event['output_index'] + item_id = event['item_id'] + output = RubyLLM::Utils.deep_dup(event['output'] || []) + + @shell_outputs_by_item_id[item_id][command_index] = output if item_id + + @shell_outputs_by_output_index[output_index][command_index] = output unless output_index.nil? + + item = @output_items_by_index[output_index] + merge_shell_output!(item, output_index) if item&.dig('type') == 'shell_call_output' + end + + def merge_shell_commands!(item, output_index) + return unless item + + commands_by_index = @shell_commands_by_output_index[output_index] + return if commands_by_index.empty? + + action = item['action'] ||= {} + commands = Array(action['commands']) + + commands_by_index.each do |command_index, command| + commands[command_index] = command + end + + action['commands'] = commands + end + + def merge_shell_output!(item, output_index) + return unless item + + output = flattened_shell_output(item['id'], output_index) + item['output'] = RubyLLM::Utils.deep_dup(output) if output + end + + def build_body + body = RubyLLM::Utils.deep_dup(@completed_response || {}) + output = merged_output(body['output']) + body['output'] = output if output.any? || body.key?('output') + body + end + + def build_default_response + Class.new do + def status + 200 + end + + def success? + true + end + end.new + end + + def merged_output(base_output) + output_by_index = {} + + Array(base_output).each_with_index do |item, index| + output_by_index[index] = RubyLLM::Utils.deep_dup(item) + end + + @output_items_by_index.each do |index, item| + output_by_index[index] = RubyLLM::Utils.deep_dup(item) + end + + output_by_index.sort_by(&:first).map(&:last) + end + + def flattened_shell_output(item_id, output_index) + output_by_command_index = @shell_outputs_by_item_id[item_id] + output_by_command_index = @shell_outputs_by_output_index[output_index] if output_by_command_index.empty? + return if output_by_command_index.empty? + + output_by_command_index + .sort_by { |command_index, _| command_index || -1 } + .flat_map(&:last) + end + end + module_function def stream_url 'responses' end + def stream_response(connection, payload, additional_headers = {}, &block) + accumulator = StreamAccumulator.new + completed_response = CompletedResponseAccumulator.new + + response = connection.post stream_url, payload do |req| + req.headers = additional_headers.merge(req.headers) unless additional_headers.empty? + apply_stream_on_data_handler(req, accumulator, completed_response, &block) + end + + raw_response = completed_response.build_response(response) + message = message_from_stream(accumulator, raw_response) + assign_response_id(message, raw_response) + log_stream_completion(message) + message + end + + def log_stream_completion(message) + content = message.content.to_s + return if content.empty? + + RubyLLM.logger.debug { "Stream completed: #{content}" } + end + + def message_from_stream(accumulator, raw_response) + parsed_message = Chat.parse_completion_response(raw_response) + return parsed_message if parsed_message&.tool_call? + + accumulator.to_message(raw_response) + end + def build_chunk(data) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength event_type = data['type'] @@ -126,6 +308,36 @@ def parse_streaming_error(data) rescue JSON::ParserError [500, data] end + + private + + def apply_stream_on_data_handler(req, accumulator, completed_response, &block) + on_data = build_on_data_handler do |data| + handle_stream_event(data, accumulator, completed_response, &block) + end + + if faraday_1? + req.options[:on_data] = on_data + else + req.options.on_data = on_data + end + end + + def handle_stream_event(data, accumulator, completed_response) + return unless data.is_a?(Hash) + + completed_response.add(data) + chunk = build_chunk(data) + accumulator.add(chunk) + yield chunk if block_given? + end + + def assign_response_id(message, raw_response) + return unless message.respond_to?(:response_id=) + return unless raw_response.body.is_a?(Hash) + + message.response_id = raw_response.body['id'] + end end end end diff --git a/lib/ruby_llm/providers/openai_responses/web_socket.rb b/lib/ruby_llm/providers/openai_responses/web_socket.rb index 432137d..cdb2662 100644 --- a/lib/ruby_llm/providers/openai_responses/web_socket.rb +++ b/lib/ruby_llm/providers/openai_responses/web_socket.rb @@ -241,6 +241,7 @@ def send_json(payload) def accumulate_response(queue, &block) accumulator = StreamAccumulator.new + completed_response = Streaming::CompletedResponseAccumulator.new loop do raw = queue.pop @@ -249,6 +250,7 @@ def accumulate_response(queue, &block) data = JSON.parse(raw) event_type = data['type'] + completed_response.add(data) chunk = Streaming.build_chunk(data) accumulator.add(chunk) block&.call(chunk) @@ -259,7 +261,8 @@ def accumulate_response(queue, &block) end end - message = accumulator.to_message(nil) + raw_response = completed_response.build_response(nil) + message = accumulator.to_message(raw_response) message.response_id = @last_response_id message end diff --git a/lib/rubyllm_responses_api.rb b/lib/rubyllm_responses_api.rb index 3d8a009..35b0642 100644 --- a/lib/rubyllm_responses_api.rb +++ b/lib/rubyllm_responses_api.rb @@ -9,6 +9,7 @@ require_relative 'ruby_llm/providers/openai_responses/capabilities' require_relative 'ruby_llm/providers/openai_responses/media' require_relative 'ruby_llm/providers/openai_responses/tools' +require_relative 'ruby_llm/providers/openai_responses/local_shell_executor' require_relative 'ruby_llm/providers/openai_responses/models' require_relative 'ruby_llm/providers/openai_responses/streaming' require_relative 'ruby_llm/providers/openai_responses/chat' @@ -39,7 +40,7 @@ module RubyLLM # ResponsesAPI namespace for direct access to helpers and version module ResponsesAPI - VERSION = '0.5.2' + VERSION = '0.5.3' # Shorthand access to built-in tool helpers BuiltInTools = Providers::OpenAIResponses::BuiltInTools diff --git a/ruby_llm-responses_api.gemspec b/ruby_llm-responses_api.gemspec index e061f8f..dda8eea 100644 --- a/ruby_llm-responses_api.gemspec +++ b/ruby_llm-responses_api.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |spec| spec.name = 'ruby_llm-responses_api' - spec.version = '0.5.2' + spec.version = '0.5.3' spec.authors = ['Chris Hasinski'] spec.email = ['krzysztof.hasinski@gmail.com'] diff --git a/spec/cassettes/function_calling_complex_args.yml b/spec/cassettes/function_calling_complex_args.yml index f6697ba..b769ead 100644 --- a/spec/cassettes/function_calling_complex_args.yml +++ b/spec/cassettes/function_calling_complex_args.yml @@ -166,9 +166,7 @@ http_interactions: uri: https://api.openai.com/v1/responses body: encoding: UTF-8 - string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"Calculate - (100 + 50) / 3"},{"type":"function_call","call_id":"call_ItxW0AA5Q8deQPzZfPxyJVYQ","name":"test_calculator","arguments":"{\"expression\":\"(100 - + 50) / 3\"}"},{"type":"function_call_output","call_id":"call_ItxW0AA5Q8deQPzZfPxyJVYQ","output":"50"}],"stream":false,"tools":[{"type":"function","name":"test_calculator","description":"Perform + string: '{"model":"gpt-4o-mini","input":[{"type":"function_call_output","call_id":"call_ItxW0AA5Q8deQPzZfPxyJVYQ","output":"50"}],"stream":false,"tools":[{"type":"function","name":"test_calculator","description":"Perform basic math calculations","parameters":{"type":"object","properties":{"expression":{"type":"string","description":"Math expression to evaluate"}},"required":["expression"],"additionalProperties":false,"strict":true},"strict":true}],"previous_response_id":"resp_01a29ecb13810a7800699593b44cb481909e469516314f9bdc"}' headers: diff --git a/spec/cassettes/function_calling_multiple.yml b/spec/cassettes/function_calling_multiple.yml index 3fb08d3..b1ee4c5 100644 --- a/spec/cassettes/function_calling_multiple.yml +++ b/spec/cassettes/function_calling_multiple.yml @@ -196,9 +196,7 @@ http_interactions: uri: https://api.openai.com/v1/responses body: encoding: UTF-8 - string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"What''s - the weather in Paris and what is 15 * 7?"},{"type":"function_call","call_id":"call_0vUxT1QXrfMbvAjuPGLhSNd9","name":"test_weather","arguments":"{\"location\":\"Paris\"}"},{"type":"function_call","call_id":"call_0uT1bW92IvkGQe0dPne6WBYM","name":"test_calculator","arguments":"{\"expression\":\"15 - * 7\"}"},{"type":"function_call_output","call_id":"call_0vUxT1QXrfMbvAjuPGLhSNd9","output":"The + string: '{"model":"gpt-4o-mini","input":[{"type":"function_call_output","call_id":"call_0vUxT1QXrfMbvAjuPGLhSNd9","output":"The weather in Paris is sunny, 72°F"},{"type":"function_call_output","call_id":"call_0uT1bW92IvkGQe0dPne6WBYM","output":"105"}],"stream":false,"tools":[{"type":"function","name":"test_weather","description":"Get the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The city name"}},"required":["location"],"additionalProperties":false,"strict":true},"strict":true},{"type":"function","name":"test_calculator","description":"Perform diff --git a/spec/cassettes/function_calling_single.yml b/spec/cassettes/function_calling_single.yml index 850d1e9..88f0f9d 100644 --- a/spec/cassettes/function_calling_single.yml +++ b/spec/cassettes/function_calling_single.yml @@ -166,8 +166,7 @@ http_interactions: uri: https://api.openai.com/v1/responses body: encoding: UTF-8 - string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"What''s - the weather in Tokyo?"},{"type":"function_call","call_id":"call_7xVIaW6HuwHg3hlPdSV6JVtJ","name":"test_weather","arguments":"{\"location\":\"Tokyo\"}"},{"type":"function_call_output","call_id":"call_7xVIaW6HuwHg3hlPdSV6JVtJ","output":"The + string: '{"model":"gpt-4o-mini","input":[{"type":"function_call_output","call_id":"call_7xVIaW6HuwHg3hlPdSV6JVtJ","output":"The weather in Tokyo is sunny, 72°F"}],"stream":false,"tools":[{"type":"function","name":"test_weather","description":"Get the current weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The city name"}},"required":["location"],"additionalProperties":false,"strict":true},"strict":true}],"previous_response_id":"resp_0adbe5450d1f3f4200699593b21dbc819694ad619e8649c8c9"}' diff --git a/spec/cassettes/multiple_instructions.yml b/spec/cassettes/multiple_instructions.yml index 5f8022d..1430e38 100644 --- a/spec/cassettes/multiple_instructions.yml +++ b/spec/cassettes/multiple_instructions.yml @@ -5,11 +5,11 @@ http_interactions: uri: https://api.openai.com/v1/responses body: encoding: UTF-8 - string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"What - is Ruby?"}],"stream":false,"instructions":"Always be concise."}' + string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"developer","content":"Always + be concise."},{"type":"message","role":"user","content":"What is Ruby?"}],"stream":false}' headers: User-Agent: - - Faraday v2.14.0 + - Faraday v2.14.1 Authorization: - Bearer Content-Type: @@ -24,7 +24,7 @@ http_interactions: message: OK headers: Date: - - Wed, 18 Feb 2026 10:26:21 GMT + - Tue, 21 Apr 2026 10:08:25 GMT Content-Type: - application/json Transfer-Encoding: @@ -34,64 +34,64 @@ http_interactions: Server: - cloudflare X-Ratelimit-Limit-Requests: - - '5000' + - '30000' X-Ratelimit-Limit-Tokens: - - '4000000' + - '150000000' X-Ratelimit-Remaining-Requests: - - '4999' + - '29999' X-Ratelimit-Remaining-Tokens: - - '3999961' + - '149999960' X-Ratelimit-Reset-Requests: - - 12ms + - 2ms X-Ratelimit-Reset-Tokens: - 0s Openai-Version: - '2020-10-01' Openai-Organization: - - user-h7m2t30jnyqzec1thkvt5rcd + - zipchat Openai-Project: - - proj_yBBvayevSgRR3SVjSmBgm0so + - proj_tor65x8ddwYUBmnOFz47CzNA X-Request-Id: - - req_c2611485e638467f8d99e639a2340253 + - req_b31618922b4c460db4be53bafb2d2e32 Openai-Processing-Ms: - - '1630' + - '1800' Cf-Cache-Status: - DYNAMIC Set-Cookie: - - __cf_bm=vmIcw8dS4jtu8nkYU1hHgk75dBOiB62C98FnPW481P8-1771410380.039279-1.0.1.1-DOwKKstDvjCjT0Wul_M04mVoc5dGKnGCCLg0cBz8ICCNAgLeD8SG4Z8fkhRWXHtcw7zXlEESlDS8oP_8A2sOG_n_mIdnfvcfh3Mltw5wFR.4j0HQkNz.z9Pzuhf0LVgX; - HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 18 Feb 2026 - 10:56:21 GMT + - __cf_bm=a_I8PUgYXnYdTH19jacwD_MEDNOY5rswxrImrVj2sfE-1776766102.725094-1.0.1.1-lt1V_qYIHlKtKgrhA03tHyfwBVOcGLK0MYrsbI_i3iHfHfUmM.0wRzrF9fX4rENvrYzFQH07Y5Rc7dzuaNI_8qM6TI7PNbUZ2.MA2nFrrb1Kxnokbv98MPM8WrD7LCGC; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 21 Apr 2026 + 10:38:25 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload X-Content-Type-Options: - nosniff Cf-Ray: - - 9cfcd35b39273572-WAW + - 9efb964e0aab9e0c-SOF Alt-Svc: - h3=":443"; ma=86400 body: encoding: ASCII-8BIT string: |- { - "id": "resp_0c507c033809d5e300699593cc23d081968e6506bb7ca3419d", + "id": "resp_05a31f415c9584410069e74c972980819ead042eac7fd719b9", "object": "response", - "created_at": 1771410380, + "created_at": 1776766103, "status": "completed", "background": false, "billing": { "payer": "developer" }, - "completed_at": 1771410381, + "completed_at": 1776766104, "error": null, "frequency_penalty": 0.0, "incomplete_details": null, - "instructions": "Always be concise.", + "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "model": "gpt-4o-mini-2024-07-18", "output": [ { - "id": "msg_0c507c033809d5e300699593cc6ab881969ed5ba3dc8c8a2fa", + "id": "msg_05a31f415c9584410069e74c97f02c819eb3ed5ab894ff85d0", "type": "message", "status": "completed", "content": [ @@ -99,7 +99,7 @@ http_interactions: "type": "output_text", "annotations": [], "logprobs": [], - "text": "Ruby is a dynamic, open-source programming language known for its simplicity and productivity. It emphasizes object-oriented programming and features elegant syntax that is easy to read and write. Ruby is often used for web development, particularly with the Ruby on Rails framework, which streamlines the process of building web applications. The language supports various paradigms, including functional and imperative programming." + "text": "Ruby is a dynamic, object-oriented programming language known for its simplicity and productivity. Created by Yukihiro \"Matz\" Matsumoto in the mid-1990s, Ruby emphasizes readability and allows developers to express ideas naturally. It's widely used in web development, particularly with the Ruby on Rails framework, which enables rapid application development. Ruby supports multiple programming paradigms, including functional and imperative styles." } ], "role": "assistant" @@ -109,7 +109,7 @@ http_interactions: "presence_penalty": 0.0, "previous_response_id": null, "prompt_cache_key": null, - "prompt_cache_retention": null, + "prompt_cache_retention": "in_memory", "reasoning": { "effort": null, "summary": null @@ -134,14 +134,14 @@ http_interactions: "input_tokens_details": { "cached_tokens": 0 }, - "output_tokens": 73, + "output_tokens": 82, "output_tokens_details": { "reasoning_tokens": 0 }, - "total_tokens": 92 + "total_tokens": 101 }, "user": null, "metadata": {} } - recorded_at: Wed, 18 Feb 2026 10:26:21 GMT + recorded_at: Tue, 21 Apr 2026 10:08:24 GMT recorded_with: VCR 6.4.0 diff --git a/spec/cassettes/system_instructions.yml b/spec/cassettes/system_instructions.yml index 8311445..ee5b49d 100644 --- a/spec/cassettes/system_instructions.yml +++ b/spec/cassettes/system_instructions.yml @@ -5,12 +5,12 @@ http_interactions: uri: https://api.openai.com/v1/responses body: encoding: UTF-8 - string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"user","content":"Say - hello"}],"stream":false,"instructions":"You are a pirate. Always respond like - a pirate. Use arr and matey."}' + string: '{"model":"gpt-4o-mini","input":[{"type":"message","role":"developer","content":"You + are a pirate. Always respond like a pirate. Use arr and matey."},{"type":"message","role":"user","content":"Say + hello"}],"stream":false}' headers: User-Agent: - - Faraday v2.14.0 + - Faraday v2.14.1 Authorization: - Bearer Content-Type: @@ -25,7 +25,7 @@ http_interactions: message: OK headers: Date: - - Wed, 18 Feb 2026 10:26:19 GMT + - Tue, 21 Apr 2026 10:08:22 GMT Content-Type: - application/json Transfer-Encoding: @@ -35,64 +35,64 @@ http_interactions: Server: - cloudflare X-Ratelimit-Limit-Requests: - - '5000' + - '30000' X-Ratelimit-Limit-Tokens: - - '4000000' + - '150000000' X-Ratelimit-Remaining-Requests: - - '4999' + - '29999' X-Ratelimit-Remaining-Tokens: - - '3999951' + - '149999950' X-Ratelimit-Reset-Requests: - - 12ms + - 2ms X-Ratelimit-Reset-Tokens: - 0s Openai-Version: - '2020-10-01' Openai-Organization: - - user-h7m2t30jnyqzec1thkvt5rcd + - zipchat Openai-Project: - - proj_yBBvayevSgRR3SVjSmBgm0so + - proj_tor65x8ddwYUBmnOFz47CzNA X-Request-Id: - - req_574d853dfe60413a80c29a5679b3cf72 + - req_0a77a65a1120455083b9b49eee73ecbe Openai-Processing-Ms: - - '1110' + - '1761' Cf-Cache-Status: - DYNAMIC Set-Cookie: - - __cf_bm=zQx5QSDyeV.6DDJC7cIvjyAUtZzsqPvrYcwuBP4lytQ-1771410378.7320893-1.0.1.1-D_hOL2Ytee5MrfSC9u5ByG0CAAhh73eBCeSKGra0Q0oVCIncteDVyLxUYMlfHgjsqFgrhvI71ceH4olpw1sU.RGNrKvNk_Bdf3HwkvRTL6sgpJTS3CwLMxlSOCc8joxt; - HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 18 Feb 2026 - 10:56:19 GMT + - __cf_bm=J97FjzXR4dWHvPrj.F2fLnKHHEqWDhLDSHzoa8pnZuw-1776766100.3974354-1.0.1.1-nu3o4jkVQamamVlBf7xsFjFjeMxJAoYXwlaqIAY0J1C9HKQDnHpw.Rx7bx2MKvkR1UfY4mnYFxJemHkYO_bGhRZe4CzIlndGSMFu95IM2okl3Yzw_Nl0yQy54M9PWzzj; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 21 Apr 2026 + 10:38:22 GMT Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload X-Content-Type-Options: - nosniff Cf-Ray: - - 9cfcd35318e5ee48-WAW + - 9efb963f7819bda6-SOF Alt-Svc: - h3=":443"; ma=86400 body: encoding: ASCII-8BIT string: |- { - "id": "resp_0f7853d04f51fbf700699593cad1308195afb021c508857bb3", + "id": "resp_09ce6a7b72e1e5980069e74c94dca881909706cc9ebfb3d8ed", "object": "response", - "created_at": 1771410378, + "created_at": 1776766100, "status": "completed", "background": false, "billing": { - "payer": "developer" + "payer": "openai" }, - "completed_at": 1771410379, + "completed_at": 1776766102, "error": null, "frequency_penalty": 0.0, "incomplete_details": null, - "instructions": "You are a pirate. Always respond like a pirate. Use arr and matey.", + "instructions": null, "max_output_tokens": null, "max_tool_calls": null, "model": "gpt-4o-mini-2024-07-18", "output": [ { - "id": "msg_0f7853d04f51fbf700699593cb73048195b8edf56a0b2c1e85", + "id": "msg_09ce6a7b72e1e5980069e74c9625dc8190a8375c07f30aa3d9", "type": "message", "status": "completed", "content": [ @@ -100,7 +100,7 @@ http_interactions: "type": "output_text", "annotations": [], "logprobs": [], - "text": "Ahoy there, matey! A hearty hello to ye! Arr! How be ye on this fine day?" + "text": "Ahoy, matey! A hearty hello to ye! What be yer desire on this fine day upon the high seas? Arr!" } ], "role": "assistant" @@ -110,7 +110,7 @@ http_interactions: "presence_penalty": 0.0, "previous_response_id": null, "prompt_cache_key": null, - "prompt_cache_retention": null, + "prompt_cache_retention": "in_memory", "reasoning": { "effort": null, "summary": null @@ -135,14 +135,14 @@ http_interactions: "input_tokens_details": { "cached_tokens": 0 }, - "output_tokens": 24, + "output_tokens": 28, "output_tokens_details": { "reasoning_tokens": 0 }, - "total_tokens": 54 + "total_tokens": 58 }, "user": null, "metadata": {} } - recorded_at: Wed, 18 Feb 2026 10:26:19 GMT + recorded_at: Tue, 21 Apr 2026 10:08:22 GMT recorded_with: VCR 6.4.0 diff --git a/spec/ruby_llm/providers/openai_responses/chat_spec.rb b/spec/ruby_llm/providers/openai_responses/chat_spec.rb index 30b870c..f5d1d5e 100644 --- a/spec/ruby_llm/providers/openai_responses/chat_spec.rb +++ b/spec/ruby_llm/providers/openai_responses/chat_spec.rb @@ -29,7 +29,7 @@ expect(payload[:stream]).to be false end - it 'extracts system messages to instructions' do + it 'formats system messages as developer input messages' do payload = chat_module.render_payload( [system_message, user_message], tools: {}, @@ -38,8 +38,31 @@ stream: false ) - expect(payload[:instructions]).to eq('You are a helpful assistant') - expect(payload[:input].length).to eq(1) + expect(payload).not_to have_key(:instructions) + expect(payload[:input]).to eq( + [ + { type: 'message', role: 'developer', content: 'You are a helpful assistant' }, + { type: 'message', role: 'user', content: 'Hello' } + ] + ) + end + + it 'preserves multiple system messages as developer input messages' do + second_system_message = RubyLLM::Message.new(role: :system, content: 'Always be concise') + + payload = chat_module.render_payload( + [system_message, second_system_message, user_message], + tools: {}, + temperature: nil, + model: model, + stream: false + ) + + expect(payload).not_to have_key(:instructions) + expect(payload[:input].map { |item| item[:role] }).to eq(%w[developer developer user]) + expect(payload[:input].map { |item| item[:content] }).to eq( + ['You are a helpful assistant', 'Always be concise', 'Hello'] + ) end it 'includes temperature when provided' do @@ -159,5 +182,21 @@ expect(input.first[:call_id]).to eq('call_123') expect(input.first[:output]).to eq('{"result": "success"}') end + + it 'serializes non-shell raw tool results as function output strings' do + messages = [ + RubyLLM::Message.new( + role: :tool, + content: RubyLLM::Content::Raw.new({ 'result' => ['success'] }), + tool_call_id: 'call_123' + ) + ] + + input = chat_module.format_input(messages) + + expect(input.first[:type]).to eq('function_call_output') + expect(input.first[:call_id]).to eq('call_123') + expect(input.first[:output]).to eq('{"result":["success"]}') + end end end diff --git a/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb b/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb new file mode 100644 index 0000000..2ce824b --- /dev/null +++ b/spec/ruby_llm/providers/openai_responses/local_shell_lifecycle_spec.rb @@ -0,0 +1,412 @@ +# frozen_string_literal: true + +require 'spec_helper' + +class LocalShellLifecycleEchoTool < RubyLLM::Tool + description 'Echo a value' + param :value, type: 'string', desc: 'Value to echo' + + def execute(value:) + value + end +end + +RSpec.describe 'OpenAI Responses local shell lifecycle' do + let(:endpoint) { 'https://api.openai.com/v1/responses' } + let(:model) { 'gpt-5.4' } + let(:shell_tool) { RubyLLM::ResponsesAPI::BuiltInTools.shell(environment_type: 'local') } + + def build_chat + RubyLLM.chat(model: model, provider: :openai_responses, assume_model_exists: true) + end + + def stub_responses(*bodies) + requests = [] + response_index = 0 + + stub_request(:post, endpoint).to_return do |request| + requests << JSON.parse(request.body) + body = bodies.fetch(response_index) + response_index += 1 + { + status: 200, + body: JSON.generate(body), + headers: { 'Content-Type' => 'application/json' } + } + end + + requests + end + + def stub_streaming_responses(*event_groups) + requests = [] + response_index = 0 + + stub_request(:post, endpoint).to_return do |request| + requests << JSON.parse(request.body) + events = event_groups.fetch(response_index) + response_index += 1 + { + status: 200, + body: build_sse_body(events), + headers: { 'Content-Type' => 'text/event-stream' } + } + end + + requests + end + + def shell_call_response(environment: { 'type' => 'local' }) + { + 'id' => 'resp_shell_1', + 'model' => model, + 'output' => [ + { + 'type' => 'shell_call', + 'id' => 'sh_1', + 'call_id' => 'call_shell_1', + 'status' => 'in_progress', + 'environment' => environment, + 'action' => { + 'commands' => ['pwd'], + 'timeout_ms' => 10_000, + 'max_output_length' => 2_000 + } + } + ], + 'tools' => [shell_tool], + 'usage' => { 'input_tokens' => 12, 'output_tokens' => 6 } + } + end + + def final_response + { + 'id' => 'resp_final_1', + 'model' => model, + 'output' => [ + { + 'type' => 'message', + 'role' => 'assistant', + 'content' => [{ 'type' => 'output_text', 'text' => 'Done from shell.' }] + } + ], + 'usage' => { 'input_tokens' => 8, 'output_tokens' => 4 } + } + end + + def completed_event(response) + { + 'type' => 'response.completed', + 'response' => response + } + end + + def final_response_stream_events + [ + { 'type' => 'response.output_text.delta', 'delta' => 'Done ' }, + { 'type' => 'response.output_text.delta', 'delta' => 'from shell.' }, + completed_event(final_response) + ] + end + + it 'executes local shell calls and continues with only shell_call_output' do + requests = stub_responses(shell_call_response, final_response) + executor_calls = [] + tool_calls = [] + tool_results = [] + end_messages = [] + + local_shell_executor = lambda do |shell_call| + executor_calls << shell_call + [ + { + 'stdout' => "/repo\n", + 'stderr' => '', + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } + } + ] + end + + chat = build_chat + chat.with_instructions('Be brief.') + chat.with_params(tools: [shell_tool], local_shell_executor: local_shell_executor) + chat.on_tool_call { |tool_call| tool_calls << tool_call } + chat.on_tool_result { |result| tool_results << result } + chat.on_end_message { |message| end_messages << message } + + response = chat.ask('Inspect the repo') + + expect(response.content).to eq('Done from shell.') + expect(executor_calls.length).to eq(1) + expect(executor_calls.first['call_id']).to eq('call_shell_1') + expect(tool_calls.first).to be_a(RubyLLM::Providers::OpenAIResponses::LocalShellToolCall) + expect(tool_results.first).to be_a(RubyLLM::Content::Raw) + expect(end_messages.map(&:role)).to eq(%i[assistant tool assistant]) + + expect(requests.length).to eq(2) + expect(requests.first['input'].first).to eq( + { + 'type' => 'message', + 'role' => 'developer', + 'content' => 'Be brief.' + } + ) + expect(requests.first['input'].last['content']).to eq('Inspect the repo') + expect(requests.first).not_to have_key('instructions') + expect(requests.first).not_to have_key('local_shell_executor') + + continuation = requests.last + expect(continuation['previous_response_id']).to eq('resp_shell_1') + expect(continuation).not_to have_key('instructions') + expect(continuation).not_to have_key('local_shell_executor') + expect(continuation['input']).to eq( + [ + { + 'type' => 'shell_call_output', + 'call_id' => 'call_shell_1', + 'max_output_length' => 2_000, + 'output' => [ + { + 'stdout' => "/repo\n", + 'stderr' => '', + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } + } + ] + } + ] + ) + end + + it 'streams final assistant content after executing a local shell call' do + requests = stub_streaming_responses( + [completed_event(shell_call_response(environment: nil))], + final_response_stream_events + ) + executor_calls = [] + streamed_content = [] + + chat = build_chat + chat.with_params( + tools: [shell_tool], + local_shell_executor: lambda do |shell_call| + executor_calls << shell_call + [{ 'stdout' => "/repo\n", 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }] + end + ) + + response = chat.ask('Inspect the repo') do |chunk| + streamed_content << chunk.content if chunk.content + end + + expect(response.content).to eq('Done from shell.') + expect(streamed_content).to eq(['Done ', 'from shell.']) + expect(executor_calls.length).to eq(1) + expect(requests.length).to eq(2) + expect(requests.map { |request| request['stream'] }).to eq([true, true]) + expect(requests.last['previous_response_id']).to eq('resp_shell_1') + expect(requests.last['input'].first['type']).to eq('shell_call_output') + end + + it 'fails clearly when a local shell call has no executor' do + stub_responses(shell_call_response) + + chat = build_chat + chat.with_params(tools: [shell_tool]) + + expect { chat.ask('Inspect the repo') } + .to raise_error(RubyLLM::Error, /local_shell_executor/) + end + + it 'keeps string-keyed local shell executor params local-only' do + requests = stub_responses(shell_call_response, final_response) + executor_calls = [] + + chat = build_chat + chat.with_params( + **{ + tools: [shell_tool], + 'local_shell_executor' => lambda do |shell_call| + executor_calls << shell_call + [{ 'stdout' => "/repo\n", 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }] + end + } + ) + + response = chat.ask('Inspect the repo') + + expect(response.content).to eq('Done from shell.') + expect(executor_calls.length).to eq(1) + expect(requests.length).to eq(2) + expect(requests.first).not_to have_key('local_shell_executor') + expect(requests.last).not_to have_key('local_shell_executor') + end + + it 'uses shell call id as the output call_id when call_id is missing' do + shell_response = shell_call_response + shell_response['output'].first.delete('call_id') + requests = stub_responses(shell_response, final_response) + + chat = build_chat + chat.with_params( + tools: [shell_tool], + local_shell_executor: lambda do |_shell_call| + [{ 'stdout' => "/repo\n", 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }] + end + ) + + response = chat.ask('Inspect the repo') + + expect(response.content).to eq('Done from shell.') + expect(requests.last['input'].first['type']).to eq('shell_call_output') + expect(requests.last['input'].first['call_id']).to eq('sh_1') + end + + it 'leaves hosted shell calls to existing built-in tool behavior' do + hosted_shell_tool = RubyLLM::ResponsesAPI::BuiltInTools.shell + response = shell_call_response(environment: { 'type' => 'container_auto' }).merge('tools' => [hosted_shell_tool]) + requests = stub_responses(response) + executor_calls = [] + + chat = build_chat + chat.with_params( + tools: [RubyLLM::ResponsesAPI::BuiltInTools.shell], + local_shell_executor: ->(shell_call) { executor_calls << shell_call } + ) + + response = chat.ask('Inspect the repo') + + expect(response.tool_call?).to be false + expect(executor_calls).to be_empty + expect(requests.length).to eq(1) + end + + it 'executes shell calls with nil environment when the response declares a local shell tool' do + response = shell_call_response(environment: nil) + requests = stub_responses(response, final_response) + executor_calls = [] + + chat = build_chat + chat.with_params( + tools: [shell_tool], + local_shell_executor: lambda do |shell_call| + executor_calls << shell_call + [{ 'stdout' => "/repo\n", 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }] + end + ) + + final_message = chat.ask('Inspect the repo') + + expect(final_message.content).to eq('Done from shell.') + expect(executor_calls.length).to eq(1) + expect(executor_calls.first['environment']).to be_nil + expect(requests.last['previous_response_id']).to eq('resp_shell_1') + expect(requests.last['input'].first['type']).to eq('shell_call_output') + end + + it 'leaves nil-environment shell calls alone when the response does not declare a local shell tool' do + hosted_shell_tool = RubyLLM::ResponsesAPI::BuiltInTools.shell + response = shell_call_response(environment: nil).merge('tools' => [hosted_shell_tool]) + requests = stub_responses(response) + executor_calls = [] + + chat = build_chat + chat.with_params( + tools: [hosted_shell_tool], + local_shell_executor: ->(shell_call) { executor_calls << shell_call } + ) + + response_message = chat.ask('Inspect the repo') + + expect(response_message.tool_call?).to be false + expect(executor_calls).to be_empty + expect(requests.length).to eq(1) + end + + it 'rejects non-array executor results' do + stub_responses(shell_call_response) + + chat = build_chat + chat.with_params( + tools: [shell_tool], + local_shell_executor: ->(_shell_call) { { 'stdout' => 'ok' } } + ) + + expect { chat.ask('Inspect the repo') } + .to raise_error(RubyLLM::Error, /array of command result hashes/) + end + + it 'mixes function tool calls and local shell calls in the same continuation' do + first_response = shell_call_response.merge( + 'output' => [ + { + 'type' => 'function_call', + 'call_id' => 'call_echo_1', + 'name' => 'local_shell_lifecycle_echo', + 'arguments' => '{"value":"hello"}' + }, + shell_call_response['output'].first + ] + ) + requests = stub_responses(first_response, final_response) + + chat = build_chat + chat.with_tool(LocalShellLifecycleEchoTool) + function_tool = RubyLLM::Providers::OpenAIResponses::Tools.tool_for(chat.tools.fetch(:local_shell_lifecycle_echo)) + chat.with_params( + tools: [function_tool, shell_tool], + local_shell_executor: lambda do |_shell_call| + [{ 'stdout' => 'ok', 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }] + end + ) + + response = chat.ask('Use both tools') + + expect(response.content).to eq('Done from shell.') + expect(requests.last['previous_response_id']).to eq('resp_shell_1') + expect(requests.last['input'].map { |item| item['type'] }).to eq(%w[function_call_output shell_call_output]) + expect(requests.last['input'].first['output']).to eq('hello') + expect(requests.last['input'].last['call_id']).to eq('call_shell_1') + end + + it 'continues function-only tool calls with incremental input' do + first_response = { + 'id' => 'resp_function_1', + 'model' => model, + 'output' => [ + { + 'type' => 'function_call', + 'call_id' => 'call_echo_1', + 'name' => 'local_shell_lifecycle_echo', + 'arguments' => '{"value":"hello"}' + } + ], + 'usage' => { 'input_tokens' => 12, 'output_tokens' => 6 } + } + requests = stub_responses(first_response, final_response) + + chat = build_chat + chat.with_tool(LocalShellLifecycleEchoTool) + function_tool = RubyLLM::Providers::OpenAIResponses::Tools.tool_for(chat.tools.fetch(:local_shell_lifecycle_echo)) + chat.with_params( + tools: [function_tool, shell_tool], + local_shell_executor: lambda do |_shell_call| + [{ 'stdout' => 'ok', 'stderr' => '', 'outcome' => { 'type' => 'exit', 'exit_code' => 0 } }] + end + ) + + response = chat.ask('Use the function tool') + + expect(response.content).to eq('Done from shell.') + expect(requests.last['previous_response_id']).to eq('resp_function_1') + expect(requests.last).not_to have_key('instructions') + expect(requests.last['input']).to eq( + [ + { + 'type' => 'function_call_output', + 'call_id' => 'call_echo_1', + 'output' => 'hello' + } + ] + ) + end +end diff --git a/spec/ruby_llm/providers/openai_responses/shell_tool_spec.rb b/spec/ruby_llm/providers/openai_responses/shell_tool_spec.rb index 2c47c3b..0ba2374 100644 --- a/spec/ruby_llm/providers/openai_responses/shell_tool_spec.rb +++ b/spec/ruby_llm/providers/openai_responses/shell_tool_spec.rb @@ -71,7 +71,7 @@ end describe '.parse_shell_call_results' do - it 'extracts shell_call items from output' do + it 'joins shell_call metadata with shell_call_output items by call_id' do output = [ { 'type' => 'shell_call', @@ -79,7 +79,20 @@ 'call_id' => 'call_123', 'status' => 'completed', 'action' => { 'commands' => ['ls -la'], 'timeout_ms' => 120_000 }, - 'container_id' => 'cntr_abc' + 'environment' => { 'type' => 'container_reference', 'container_id' => 'cntr_abc' } + }, + { + 'type' => 'shell_call_output', + 'id' => 'sho_123', + 'call_id' => 'call_123', + 'status' => 'completed', + 'output' => [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "total 1\n" + } + ] }, { 'type' => 'message', @@ -94,13 +107,90 @@ expect(results.first[:call_id]).to eq('call_123') expect(results.first[:status]).to eq('completed') expect(results.first[:action]['commands']).to eq(['ls -la']) - expect(results.first[:container_id]).to eq('cntr_abc') + expect(results.first[:environment]).to eq( + { 'type' => 'container_reference', 'container_id' => 'cntr_abc' } + ) + expect(results.first[:output]).to eq( + [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "total 1\n" + } + ] + ) end it 'returns empty array when no shell calls' do output = [{ 'type' => 'message', 'content' => [] }] expect(built_in.parse_shell_call_results(output)).to be_empty end + + it 'parses shell results from a final message in sync mode' do + response = mock_response( + { + 'id' => 'resp_shell', + 'model' => 'gpt-5.2', + 'output' => [ + { + 'type' => 'shell_call', + 'id' => 'sh_1', + 'call_id' => 'call_shell_1', + 'status' => 'completed', + 'action' => { + 'commands' => ['bundle exec ruby -v'], + 'timeout_ms' => nil + }, + 'environment' => { + 'type' => 'container_reference', + 'container_id' => 'cntr_sync' + } + }, + { + 'type' => 'shell_call_output', + 'id' => 'sho_1', + 'call_id' => 'call_shell_1', + 'status' => 'completed', + 'output' => [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "ruby 3.4.8\n" + } + ] + } + ] + } + ) + + message = RubyLLM::Providers::OpenAIResponses::Chat.parse_completion_response(response) + results = built_in.parse_shell_call_results_from_message(message) + + expect(results).to eq( + [ + { + id: 'sh_1', + call_id: 'call_shell_1', + status: 'completed', + environment: { + 'type' => 'container_reference', + 'container_id' => 'cntr_sync' + }, + action: { + 'commands' => ['bundle exec ruby -v'], + 'timeout_ms' => nil + }, + output: [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "ruby 3.4.8\n" + } + ] + } + ] + ) + end end end end diff --git a/spec/ruby_llm/providers/openai_responses/streaming_spec.rb b/spec/ruby_llm/providers/openai_responses/streaming_spec.rb index 5493749..b7a2e56 100644 --- a/spec/ruby_llm/providers/openai_responses/streaming_spec.rb +++ b/spec/ruby_llm/providers/openai_responses/streaming_spec.rb @@ -212,4 +212,437 @@ expect(result).to be_nil end end + + describe '.stream_response' do + let(:provider) { RubyLLM::Providers::OpenAIResponses.new(RubyLLM.config) } + let(:payload) { { model: 'gpt-5.2', input: [], stream: true } } + + def build_stream_connection(events, response: mock_response('', status: 200)) + connection = instance_double(RubyLLM::Connection) + + allow(connection).to receive(:post) do |_url, _payload, &block| + request = Struct.new(:headers, :options).new({}, Struct.new(:on_data).new) + block.call(request) + + env = Struct.new(:status).new(200) + sse_body = build_sse_body(events) + request.options.on_data.call(sse_body, sse_body.bytesize, env) + + response + end + + connection + end + + it 'preserves completed shell executions on the final message raw output' do + events = [ + { 'type' => 'response.output_text.delta', 'delta' => 'ruby 3.4.8' }, + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'sh_1', + 'type' => 'shell_call', + 'status' => 'completed', + 'action' => { 'commands' => ['placeholder'], 'timeout_ms' => nil }, + 'call_id' => 'call_shell_1', + 'environment' => { + 'type' => 'container_reference', + 'container_id' => 'cntr_1' + } + }, + 'output_index' => 0 + }, + { + 'type' => 'response.shell_call_command.done', + 'command' => 'cd /mnt/data/project && bundle exec ruby -v', + 'command_index' => 0, + 'output_index' => 0 + }, + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'sho_1', + 'type' => 'shell_call_output', + 'status' => 'completed', + 'call_id' => 'call_shell_1', + 'output' => [] + }, + 'output_index' => 1 + }, + { + 'type' => 'response.shell_call_output_content.done', + 'command_index' => 0, + 'item_id' => 'sho_1', + 'output' => [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "ruby 3.4.8\n" + } + ], + 'output_index' => 1 + }, + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'msg_1', + 'type' => 'message', + 'status' => 'completed', + 'role' => 'assistant', + 'content' => [ + { + 'type' => 'output_text', + 'text' => 'ruby 3.4.8' + } + ] + }, + 'output_index' => 2 + }, + { + 'type' => 'response.completed', + 'response' => { + 'id' => 'resp_shell', + 'model' => 'gpt-5.2', + 'output' => [ + { + 'id' => 'msg_1', + 'type' => 'message', + 'status' => 'completed', + 'role' => 'assistant', + 'content' => [ + { + 'type' => 'output_text', + 'text' => 'ruby 3.4.8' + } + ] + } + ], + 'usage' => { 'input_tokens' => 12, 'output_tokens' => 8 } + } + } + ] + + message = provider.send(:stream_response, build_stream_connection(events), payload) + results = RubyLLM::Providers::OpenAIResponses::BuiltInTools.parse_shell_call_results_from_message(message) + + expect(message.response_id).to eq('resp_shell') + expect(message.raw.body['output'].map { |item| item['type'] }).to eq( + %w[shell_call shell_call_output message] + ) + expect(results).to eq( + [ + { + id: 'sh_1', + call_id: 'call_shell_1', + status: 'completed', + environment: { + 'type' => 'container_reference', + 'container_id' => 'cntr_1' + }, + action: { + 'commands' => ['cd /mnt/data/project && bundle exec ruby -v'], + 'timeout_ms' => nil + }, + output: [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "ruby 3.4.8\n" + } + ] + } + ] + ) + end + + it 'handles multiple completed shell calls in one streamed response' do + events = [ + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'sh_1', + 'type' => 'shell_call', + 'status' => 'completed', + 'action' => { 'commands' => ['echo one'] }, + 'call_id' => 'call_shell_1', + 'environment' => { 'type' => 'container_reference', 'container_id' => 'cntr_1' } + }, + 'output_index' => 0 + }, + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'sho_1', + 'type' => 'shell_call_output', + 'status' => 'completed', + 'call_id' => 'call_shell_1', + 'output' => [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "one\n" + } + ] + }, + 'output_index' => 1 + }, + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'sh_2', + 'type' => 'shell_call', + 'status' => 'completed', + 'action' => { 'commands' => ['echo two'] }, + 'call_id' => 'call_shell_2', + 'environment' => { 'type' => 'container_reference', 'container_id' => 'cntr_2' } + }, + 'output_index' => 2 + }, + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'sho_2', + 'type' => 'shell_call_output', + 'status' => 'completed', + 'call_id' => 'call_shell_2', + 'output' => [ + { + 'outcome' => { 'type' => 'timeout' }, + 'stderr' => 'timed out', + 'stdout' => '' + } + ] + }, + 'output_index' => 3 + }, + { + 'type' => 'response.completed', + 'response' => { + 'id' => 'resp_shell_multi', + 'model' => 'gpt-5.2', + 'output' => [], + 'usage' => { 'input_tokens' => 10, 'output_tokens' => 5 } + } + } + ] + + message = provider.send(:stream_response, build_stream_connection(events), payload) + results = RubyLLM::Providers::OpenAIResponses::BuiltInTools.parse_shell_call_results_from_message(message) + + expect(results.map { |result| result[:call_id] }).to eq(%w[call_shell_1 call_shell_2]) + expect(results.map { |result| result[:action]['commands'] }).to eq([['echo one'], ['echo two']]) + expect(results.last[:output]).to eq( + [ + { + 'outcome' => { 'type' => 'timeout' }, + 'stderr' => 'timed out', + 'stdout' => '' + } + ] + ) + end + + it 'preserves output for each command in a multi-command shell call' do + events = [ + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'sh_multi', + 'type' => 'shell_call', + 'status' => 'completed', + 'action' => { 'commands' => ['echo one', 'echo two'] }, + 'call_id' => 'call_shell_multi', + 'environment' => { 'type' => 'container_reference', 'container_id' => 'cntr_multi' } + }, + 'output_index' => 0 + }, + { + 'type' => 'response.output_item.done', + 'item' => { + 'id' => 'sho_multi', + 'type' => 'shell_call_output', + 'status' => 'completed', + 'call_id' => 'call_shell_multi', + 'output' => [] + }, + 'output_index' => 1 + }, + { + 'type' => 'response.shell_call_output_content.done', + 'command_index' => 0, + 'item_id' => 'sho_multi', + 'output' => [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "one\n" + } + ], + 'output_index' => 1 + }, + { + 'type' => 'response.shell_call_output_content.done', + 'command_index' => 1, + 'item_id' => 'sho_multi', + 'output' => [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "two\n" + } + ], + 'output_index' => 1 + }, + { + 'type' => 'response.completed', + 'response' => { + 'id' => 'resp_shell_multi_command', + 'model' => 'gpt-5.2', + 'output' => [], + 'usage' => { 'input_tokens' => 8, 'output_tokens' => 4 } + } + } + ] + + message = provider.send(:stream_response, build_stream_connection(events), payload) + results = RubyLLM::Providers::OpenAIResponses::BuiltInTools.parse_shell_call_results_from_message(message) + + expect(results).to eq( + [ + { + id: 'sh_multi', + call_id: 'call_shell_multi', + status: 'completed', + environment: { + 'type' => 'container_reference', + 'container_id' => 'cntr_multi' + }, + action: { + 'commands' => ['echo one', 'echo two'] + }, + output: [ + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "one\n" + }, + { + 'outcome' => { 'type' => 'exit', 'exit_code' => 0 }, + 'stderr' => '', + 'stdout' => "two\n" + } + ] + } + ] + ) + end + + it 'keeps function-call streaming reconstruction intact' do + events = [ + { + 'type' => 'response.output_item.added', + 'item' => { + 'type' => 'function_call', + 'call_id' => 'call_fn_1', + 'name' => 'get_weather' + } + }, + { + 'type' => 'response.function_call_arguments.delta', + 'call_id' => 'call_fn_1', + 'delta' => '{"city":"' + }, + { + 'type' => 'response.function_call_arguments.delta', + 'call_id' => 'call_fn_1', + 'delta' => 'Berlin"}' + }, + { + 'type' => 'response.completed', + 'response' => { + 'id' => 'resp_fn_1', + 'model' => 'gpt-4o', + 'output' => [ + { + 'type' => 'function_call', + 'call_id' => 'call_fn_1', + 'name' => 'get_weather', + 'arguments' => '{"city":"Berlin"}' + } + ], + 'usage' => { 'input_tokens' => 6, 'output_tokens' => 4 } + } + } + ] + + message = provider.send(:stream_response, build_stream_connection(events), payload) + + expect(message.tool_calls['call_fn_1'].name).to eq('get_weather') + expect(message.tool_calls['call_fn_1'].arguments).to eq({ 'city' => 'Berlin' }) + expect(message.raw.body['output'].first['type']).to eq('function_call') + end + + it 'returns executable local shell tool calls from completed streamed output' do + events = [ + { + 'type' => 'response.completed', + 'response' => { + 'id' => 'resp_shell_local', + 'model' => 'gpt-5.4', + 'output' => [ + { + 'id' => 'sh_local_1', + 'type' => 'shell_call', + 'status' => 'completed', + 'action' => { 'commands' => ['pwd'], 'timeout_ms' => 10_000 }, + 'call_id' => 'call_shell_local_1', + 'environment' => nil + } + ], + 'tools' => [ + { + 'type' => 'shell', + 'environment' => { 'type' => 'local' } + } + ], + 'usage' => { 'input_tokens' => 8, 'output_tokens' => 3 } + } + } + ] + + message = provider.send(:stream_response, build_stream_connection(events), payload) + + expect(message.response_id).to eq('resp_shell_local') + expect(message.tool_calls['call_shell_local_1']).to be_a( + RubyLLM::Providers::OpenAIResponses::LocalShellToolCall + ) + expect(message.tool_calls['call_shell_local_1'].arguments['action']['commands']).to eq(['pwd']) + end + end + + describe '.log_stream_completion' do + let(:logger) { instance_double(Logger) } + + before do + allow(RubyLLM).to receive(:logger).and_return(logger) + end + + it 'skips empty tool-call completions' do + message = instance_double(RubyLLM::Message, content: '') + + expect(logger).not_to receive(:debug) + + described_class.log_stream_completion(message) + end + + it 'logs streamed assistant content' do + message = instance_double(RubyLLM::Message, content: 'Hello') + + expect(logger).to receive(:debug) do |&block| + expect(block.call).to eq('Stream completed: Hello') + end + + described_class.log_stream_completion(message) + end + end end diff --git a/spec/ruby_llm/providers/openai_responses_spec.rb b/spec/ruby_llm/providers/openai_responses_spec.rb index 45a7f18..11d7b2b 100644 --- a/spec/ruby_llm/providers/openai_responses_spec.rb +++ b/spec/ruby_llm/providers/openai_responses_spec.rb @@ -18,8 +18,8 @@ end describe '.slug' do - it 'returns :openai_responses' do - expect(described_class.slug).to eq(:openai_responses) + it 'returns "openai_responses"' do + expect(described_class.slug).to eq('openai_responses') end end