Skip to content
Open
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.5.3] - 2026-03-27

### Fixed

- Return string from `slug` instead of symbol to fix `Model.refresh!` sorting crash (PR #6 by @noelblaschke)

## [0.5.2] - 2026-03-18

### Fixed
Expand Down
61 changes: 60 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,69 @@ tool = RubyLLM::ResponsesAPI::BuiltInTools.shell(
# With memory limit
tool = RubyLLM::ResponsesAPI::BuiltInTools.shell(memory_limit: '4g')

# Local execution (you handle running commands yourself)
# Local execution
tool = RubyLLM::ResponsesAPI::BuiltInTools.shell(environment_type: 'local')
```

For local shell environments, provide an executor with `local_shell_executor`. The executor can be any object that responds to `#call(shell_call)`, such as a lambda, service object, adapter, or class instance. It can run commands in a local process, a sandbox, a remote worker, or any other environment your application controls.

The executor is responsible for security, cwd, sandboxing, timeout behavior, output truncation, and permissions.

The executor receives the raw `shell_call` hash from the Responses API. Common fields include:

```ruby
shell_call['call_id']
shell_call.dig('action', 'commands')
shell_call.dig('action', 'timeout_ms')
shell_call.dig('action', 'max_output_length')
shell_call['environment']
```

Return an array of command result hashes. Each result should include `stdout`, `stderr`, and `outcome`:

```ruby
[
{
'stdout' => "output\n",
'stderr' => '',
'outcome' => { 'type' => 'exit', 'exit_code' => 0 }
}
]
```

For a timeout, return an outcome like:

```ruby
{ 'type' => 'timeout' }
```

Example:

```ruby
chat = RubyLLM.chat(model: 'gpt-5.2', provider: :openai_responses)

chat.with_params(
tools: [
RubyLLM::ResponsesAPI::BuiltInTools.shell(environment_type: 'local')
],
local_shell_executor: lambda do |shell_call|
commands = Array(shell_call.dig('action', 'commands'))

commands.map do |command|
result = run_command(command)

{
'stdout' => result.stdout,
'stderr' => result.stderr,
'outcome' => { 'type' => 'exit', 'exit_code' => result.exit_code }
}
end
end
)

chat.ask('Inspect the repo')
```

### Apply Patch

Structured diff-based file editing. Requires GPT-5 family models.
Expand Down
4 changes: 3 additions & 1 deletion lib/ruby_llm/providers/openai_responses.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def api_base
# rubocop:disable Metrics/ParameterLists
def complete(messages, tools:, temperature:, model:, params: {}, headers: {},
schema: nil, thinking: nil, tool_prefs: nil, &block)
params = params.except(:local_shell_executor, 'local_shell_executor')

if params[:transport]&.to_sym == :websocket
ws_complete(messages, tools: tools, temperature: temperature, model: model,
params: params.except(:transport), schema: schema,
Expand Down Expand Up @@ -214,7 +216,7 @@ def configuration_requirements
end

def slug
:openai_responses
'openai_responses'
end
end
end
Expand Down
71 changes: 59 additions & 12 deletions lib/ruby_llm/providers/openai_responses/built_in_tools.rb
Original file line number Diff line number Diff line change
Expand Up @@ -189,19 +189,31 @@ def parse_apply_patch_results(output)

# Parse shell call results from output
# @param output [Array] Response output array
# @return [Array<Hash>] Parsed shell call results
# @return [Array<Hash>] Parsed shell call results joined with output by call_id
def parse_shell_call_results(output)
output
.select { |item| item['type'] == 'shell_call' }
.map do |item|
{
id: item['id'],
call_id: item['call_id'],
status: item['status'],
action: item['action'],
container_id: item['container_id']
}
end
items = Array(output)
call_order = shell_call_order(items)
shell_calls_by_call_id = shell_call_items_by_call_id(items)
shell_outputs_by_call_id = shell_output_items_by_call_id(items)

call_order.map do |call_id|
build_shell_call_result(
call_id,
shell_call: shell_calls_by_call_id[call_id],
shell_outputs: shell_outputs_by_call_id[call_id]
)
end
end

# Parse shell call results from a final RubyLLM::Message
# @param message [RubyLLM::Message] Final message returned by chat completion
# @return [Array<Hash>] Parsed shell call results
def parse_shell_call_results_from_message(message)
body = message&.raw&.body
body = JSON.parse(body) if body.is_a?(String)
parse_shell_call_results(body.is_a?(Hash) ? body['output'] : nil)
rescue JSON::ParserError
[]
end

# Extract all citations from message content
Expand Down Expand Up @@ -236,6 +248,41 @@ def extract_citations(content)
}.compact
end
end

private_class_method def shell_call_order(items)
items.filter_map do |item|
item['call_id'] if %w[shell_call shell_call_output].include?(item['type'])
end.uniq
end

private_class_method def shell_call_items_by_call_id(items)
items
.select { |item| item['type'] == 'shell_call' }
.to_h { |item| [item['call_id'], item] }
end

private_class_method def shell_output_items_by_call_id(items)
items
.select { |item| item['type'] == 'shell_call_output' }
.each_with_object(Hash.new { |hash, key| hash[key] = [] }) do |item, result|
result[item['call_id']] << item
end
end

private_class_method def build_shell_call_result(call_id, shell_call:, shell_outputs:)
shell_call ||= {}
shell_outputs ||= []
last_shell_output = shell_outputs.last

{
id: shell_call['id'],
call_id: call_id,
status: shell_call['status'] || last_shell_output&.dig('status'),
environment: RubyLLM::Utils.deep_dup(shell_call['environment']),
action: RubyLLM::Utils.deep_dup(shell_call['action']),
output: shell_outputs.flat_map { |item| RubyLLM::Utils.deep_dup(item['output'] || []) }
}.compact
end
end
end
end
Expand Down
121 changes: 82 additions & 39 deletions lib/ruby_llm/providers/openai_responses/chat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,15 @@ def completion_url
def render_payload(messages, tools:, temperature:, model:, stream: false,
schema: nil, thinking: nil, tool_prefs: nil) # rubocop:disable Lint/UnusedMethodArgument
tool_prefs ||= {}
system_messages, non_system_messages = messages.partition { |m| m.role == :system }

instructions = system_messages.map { |m| extract_text_content(m.content) }.join("\n\n")
non_system_messages = messages.reject { |m| m.role == :system }
continuation_input = continuation_input_messages(non_system_messages)

payload = {
model: model.id,
input: format_input(non_system_messages),
input: format_input(continuation_input || messages),
stream: stream
}

payload[:instructions] = instructions unless instructions.empty?
payload[:temperature] = temperature unless temperature.nil?
apply_tools(payload, tools, tool_prefs)
payload[:text] = build_schema_format(schema) if schema
Expand Down Expand Up @@ -85,6 +83,19 @@ def extract_last_response_id(messages)
.last
end

def continuation_input_messages(messages)
last_response_index = messages.rindex do |message|
message.role == :assistant && message.respond_to?(:response_id) && message.response_id
end
return nil unless last_response_index

trailing_messages = messages[(last_response_index + 1)..]
return nil unless trailing_messages&.any?
return nil unless trailing_messages.all? { |message| message.role == :tool }

trailing_messages
end

def parse_completion_response(response)
data = response.body
return if data.nil? || data.empty?
Expand All @@ -98,8 +109,8 @@ def parse_completion_response(response)
# Extract text content from output
content = extract_output_text(output)

# Extract tool calls from function_call outputs
tool_calls = extract_tool_calls(output)
# Extract executable tool calls from function_call and local shell_call outputs
tool_calls = extract_tool_calls(output, response_tools: data['tools'])

usage = data['usage'] || {}
cached_tokens = usage.dig('input_tokens_details', 'cached_tokens')
Expand All @@ -118,17 +129,12 @@ def parse_completion_response(response)
)
end

def format_input(messages) # rubocop:disable Metrics/MethodLength
def format_input(messages)
result = []

messages.each do |msg|
if msg.tool_call_id
# Tool result message - function_call_output type
result << {
type: 'function_call_output',
call_id: msg.tool_call_id,
output: extract_text_content(msg.content)
}
result << format_tool_result(msg)
elsif msg.tool_calls&.any?
# Assistant message with tool calls
# First add any text content as a message
Expand All @@ -143,12 +149,7 @@ def format_input(messages) # rubocop:disable Metrics/MethodLength

# Then add each function call as a separate item
msg.tool_calls.each_value do |tc|
result << {
type: 'function_call',
call_id: tc.id,
name: tc.name,
arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments)
}
result << format_tool_call(tc)
end
else
# Regular message
Expand All @@ -163,6 +164,38 @@ def format_input(messages) # rubocop:disable Metrics/MethodLength
result
end

def format_tool_result(msg)
content = msg.content
return content.value if LocalShellExecutor.shell_call_output?(content)

{
type: 'function_call_output',
call_id: msg.tool_call_id,
output: format_function_tool_output(content)
}
end

def format_function_tool_output(content)
return raw_tool_output(content.value) if content.is_a?(RubyLLM::Content::Raw)

extract_text_content(content)
end

def raw_tool_output(value)
value.is_a?(String) ? value : JSON.generate(value)
end

def format_tool_call(tool_call)
return tool_call.shell_call if tool_call.is_a?(LocalShellToolCall)

{
type: 'function_call',
call_id: tool_call.id,
name: tool_call.name,
arguments: tool_call.arguments.is_a?(String) ? tool_call.arguments : JSON.generate(tool_call.arguments)
}
end

def format_message_content(content, tool_calls = nil)
parts = []

Expand All @@ -180,12 +213,7 @@ def format_message_content(content, tool_calls = nil)
# Add tool calls if present (for assistant messages)
if tool_calls&.any?
tool_calls.each_value do |tc|
parts << {
type: 'function_call',
call_id: tc.id,
name: tc.name,
arguments: tc.arguments.is_a?(String) ? tc.arguments : JSON.generate(tc.arguments)
}
parts << format_tool_call(tc)
end
end

Expand Down Expand Up @@ -263,20 +291,35 @@ def extract_output_text(output)
.join
end

def extract_tool_calls(output)
function_calls = output.select { |item| item['type'] == 'function_call' }
return nil if function_calls.empty?

function_calls.to_h do |fc|
[
fc['call_id'],
ToolCall.new(
id: fc['call_id'],
name: fc['name'],
arguments: parse_arguments(fc['arguments'])
)
]
def extract_tool_calls(output, response_tools: nil)
executable_calls = output.select do |item|
item['type'] == 'function_call' || local_shell_call?(item, response_tools: response_tools)
end
return nil if executable_calls.empty?

executable_calls.to_h do |item|
tool_call = if local_shell_call?(item, response_tools: response_tools)
LocalShellToolCall.new(item)
else
ToolCall.new(
id: item['call_id'],
name: item['name'],
arguments: parse_arguments(item['arguments'])
)
end
[tool_call.id, tool_call]
end
end

def local_shell_call?(item, response_tools: nil)
return false unless item['type'] == 'shell_call'
return true if item.dig('environment', 'type') == 'local'

item['environment'].nil? && local_shell_tool_configured?(response_tools)
end

def local_shell_tool_configured?(tools)
Array(tools).any? { |tool| tool['type'] == 'shell' && tool.dig('environment', 'type') == 'local' }
end

def parse_arguments(arguments)
Expand Down
Loading