Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@
SendMessageRequest,
SetConfirmationPolicyRequest,
SetSecurityAnalyzerRequest,
StartACPConversationRequest,
StartConversationRequest,
Success,
UpdateConversationRequest,
UpdateSecretsRequest,
)
from openhands.sdk import LLM, Agent, TextContent
from openhands.sdk.conversation.state import ConversationExecutionStatus
from openhands.sdk.utils.seatbelt import is_seatbelt_supported
from openhands.sdk.workspace import LocalWorkspace
from openhands.tools.preset.default import get_default_tools

Expand Down Expand Up @@ -149,9 +151,34 @@ async def batch_get_conversations(
# Write Methods


def _ensure_seatbelt_available(
request: StartConversationRequest | StartACPConversationRequest,
) -> None:
"""Reject the request if seatbelt is requested but unavailable.

Seatbelt (`sandbox-exec`) only ships with macOS, so we fail fast at the API
boundary instead of letting the conversation start in an unsandboxed state
on a host that cannot honor the request. Used by both the legacy and ACP
conversation routers.
"""
if not request.seatbelt:
return
if not is_seatbelt_supported():
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=(
"seatbelt=true is only supported on macOS hosts with "
"`sandbox-exec` available on PATH."
),
)


@conversation_router.post(
"",
responses={409: {"description": "Conversation contract mismatch"}},
responses={
400: {"description": "Seatbelt requested but not available on this host"},
409: {"description": "Conversation contract mismatch"},
},
)
async def start_conversation(
request: Annotated[
Expand All @@ -161,6 +188,7 @@ async def start_conversation(
conversation_service: ConversationService = Depends(get_conversation_service),
) -> ConversationInfo:
"""Start a conversation in the local environment."""
_ensure_seatbelt_available(request)
try:
info, is_new = await conversation_service.start_conversation(request)
except ConversationContractMismatchError as e:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from fastapi import APIRouter, Body, Depends, HTTPException, Query, Response, status
from pydantic import SecretStr

from openhands.agent_server.conversation_router import _ensure_seatbelt_available
from openhands.agent_server.conversation_service import ConversationService
from openhands.agent_server.dependencies import get_conversation_service
from openhands.agent_server.models import (
Expand Down Expand Up @@ -118,7 +119,12 @@ async def batch_get_acp_conversations(
return await conversation_service.batch_get_acp_conversations(ids)


@conversation_router_acp.post("")
@conversation_router_acp.post(
"",
responses={
400: {"description": "Seatbelt requested but not available on this host"},
},
)
async def start_acp_conversation(
request: Annotated[
StartACPConversationRequest,
Expand All @@ -128,6 +134,7 @@ async def start_acp_conversation(
conversation_service: ConversationService = Depends(get_conversation_service),
) -> ACPConversationInfo:
"""Start a conversation using the ACP-capable contract."""
_ensure_seatbelt_available(request)
info, is_new = await conversation_service.start_acp_conversation(request)
response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK
return info
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,7 @@ def _token_streaming_callback(chunk: LLMStreamChunk) -> None:
cipher=self.cipher,
hook_config=self.stored.hook_config,
tags=self.stored.tags,
seatbelt=self.stored.seatbelt,
)

conversation.set_confirmation_policy(self.stored.confirmation_policy)
Expand Down
7 changes: 7 additions & 0 deletions openhands-agent-server/openhands/agent_server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,13 @@ class _ConversationInfoBase(BaseModel):
"alphanumeric. Values are arbitrary strings up to 256 characters."
),
)
seatbelt: bool = Field(
default=False,
description=(
"If true, the conversation's shell tools run inside macOS' "
"Seatbelt (`sandbox-exec`) sandbox."
),
)


class ConversationInfo(_ConversationInfoBase):
Expand Down
5 changes: 5 additions & 0 deletions openhands-sdk/openhands/sdk/conversation/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def __new__(
secrets: dict[str, SecretValue] | dict[str, str] | None = None,
delete_on_close: bool = True,
tags: dict[str, str] | None = None,
seatbelt: bool = False,
) -> "LocalConversation": ...

@overload
Expand All @@ -104,6 +105,7 @@ def __new__(
secrets: dict[str, SecretValue] | dict[str, str] | None = None,
delete_on_close: bool = True,
tags: dict[str, str] | None = None,
seatbelt: bool = False,
) -> "RemoteConversation": ...

def __new__(
Expand All @@ -128,6 +130,7 @@ def __new__(
secrets: dict[str, SecretValue] | dict[str, str] | None = None,
delete_on_close: bool = True,
tags: dict[str, str] | None = None,
seatbelt: bool = False,
) -> BaseConversation:
from openhands.sdk.conversation.impl.local_conversation import LocalConversation
from openhands.sdk.conversation.impl.remote_conversation import (
Expand Down Expand Up @@ -181,6 +184,7 @@ def __new__(
secrets=secrets,
delete_on_close=delete_on_close,
tags=effective_tags if effective_tags else None,
seatbelt=seatbelt,
)

return LocalConversation(
Expand All @@ -199,4 +203,5 @@ def __new__(
secrets=secrets,
delete_on_close=delete_on_close,
tags=tags,
seatbelt=seatbelt,
)
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def __init__(
delete_on_close: bool = True,
cipher: Cipher | None = None,
tags: dict[str, str] | None = None,
seatbelt: bool = False,
**_: object,
):
"""Initialize the conversation.
Expand Down Expand Up @@ -186,6 +187,7 @@ def __init__(
else None,
max_iterations=max_iteration_per_run,
stuck_detection=stuck_detection,
seatbelt=seatbelt,
cipher=cipher,
tags=tags,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ def __init__(
secrets: Mapping[str, SecretValue] | None = None,
delete_on_close: bool = False,
tags: dict[str, str] | None = None,
seatbelt: bool = False,
**_: object,
) -> None:
"""Remote conversation proxy that talks to an agent server.
Expand Down Expand Up @@ -768,6 +769,8 @@ def __init__(
"hook_config": hook_config.model_dump() if hook_config else None,
# Include tags if provided
"tags": tags or {},
# Opt-in macOS Seatbelt sandbox for shell tools
"seatbelt": seatbelt,
}
if stuck_detection_thresholds is not None:
# Convert to StuckDetectionThresholds if dict, then serialize
Expand Down
11 changes: 11 additions & 0 deletions openhands-sdk/openhands/sdk/conversation/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,17 @@ class _StartConversationRequestBase(BaseModel):
"loads) → agent.llm → message truncation."
),
)
seatbelt: bool = Field(
default=False,
description=(
"If true, run shell commands spawned by this conversation inside "
"macOS' Seatbelt sandbox via `sandbox-exec`. Only supported when "
"the agent server is running on macOS with `sandbox-exec` available; "
"the server will reject the request otherwise. The default sandbox "
"profile permits reads anywhere, network access, and writes only "
"to the conversation's workspace and the standard temp directories."
),
)
title_llm_profile: str | None = Field(
default=None,
description=(
Expand Down
11 changes: 11 additions & 0 deletions openhands-sdk/openhands/sdk/conversation/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@ class ConversationState(OpenHandsModel):
default=True,
description="Whether to enable stuck detection for the agent.",
)
seatbelt: bool = Field(
default=False,
description=(
"If true, shell tools spawned by this conversation are wrapped "
"with macOS' Seatbelt sandbox (`sandbox-exec`). Tools opt in by "
"consulting this flag on the conversation state."
),
)

# Enum-based state management
execution_status: ConversationExecutionStatus = Field(
Expand Down Expand Up @@ -280,6 +288,7 @@ def create(
persistence_dir: str | None = None,
max_iterations: int = 500,
stuck_detection: bool = True,
seatbelt: bool = False,
cipher: Cipher | None = None,
tags: dict[str, str] | None = None,
) -> "ConversationState":
Expand Down Expand Up @@ -361,6 +370,7 @@ def create(
state.agent = agent
state.workspace = workspace
state.max_iterations = max_iterations
state.seatbelt = seatbelt

# Note: stats are already deserialized from base_state.json above.
# Do NOT reset stats here - this would lose accumulated metrics.
Expand All @@ -385,6 +395,7 @@ def create(
persistence_dir=persistence_dir,
max_iterations=max_iterations,
stuck_detection=stuck_detection,
seatbelt=seatbelt,
tags=tags or {},
)
state._fs = file_store
Expand Down
93 changes: 93 additions & 0 deletions openhands-sdk/openhands/sdk/utils/seatbelt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Helpers for running shells inside macOS' Seatbelt (`sandbox-exec`) sandbox.

Seatbelt is the user-facing name for the macOS application sandbox. It is
configured through a TinyScheme profile and applied to a child process via
``/usr/bin/sandbox-exec``. This module centralises the small surface we need:

* a reasonable default profile that allows agent-style work in a workspace
while restricting writes elsewhere on disk, and
* the helper used by terminal backends to wrap a shell command in
``sandbox-exec -p <profile> ...``.

The agent-server is responsible for validating availability up-front (macOS +
``sandbox-exec`` on ``PATH``); this module is intentionally side-effect free.
"""

from __future__ import annotations

import platform
import shutil


SANDBOX_EXEC_BIN = "/usr/bin/sandbox-exec"


def is_seatbelt_supported() -> bool:
"""Return True if Seatbelt (`sandbox-exec`) can be used on this host."""
if platform.system() != "Darwin":
return False
return shutil.which("sandbox-exec") is not None or _binary_exists(SANDBOX_EXEC_BIN)


def _binary_exists(path: str) -> bool:
import os

return os.path.isfile(path) and os.access(path, os.X_OK)


def default_profile(workspace_dir: str) -> str:
"""Return a Seatbelt profile string for an agent operating in a workspace.

The profile allows arbitrary file reads, network access, process
fork/exec, and IPC, but restricts writes to the workspace, the standard
temporary directories, and the user's caches/log dirs. This roughly mirrors
what other agent runtimes (e.g. Claude Code) ship as a default profile.
"""
# The workspace path is interpolated as-is. Seatbelt profiles are
# TinyScheme; embedded double-quotes would be a syntax error. Reject them
# rather than silently producing an invalid profile.
if '"' in workspace_dir:
raise ValueError(
f"workspace_dir cannot contain a double-quote: {workspace_dir!r}"
)

return f"""(version 1)
(deny default)
(allow process-fork)
(allow process-exec)
(allow signal (target self))
(allow sysctl-read)
(allow mach-lookup)
(allow ipc-posix-shm)
(allow file-read*)
(allow file-write*
(subpath "{workspace_dir}")
(subpath "/tmp")
(subpath "/private/tmp")
(subpath "/var/folders")
(subpath "/private/var/folders")
(subpath "/dev")
)
(allow network*)
"""


def wrap_with_sandbox_exec(
command: list[str],
workspace_dir: str,
profile: str | None = None,
) -> list[str]:
"""Prefix ``command`` with ``sandbox-exec -p <profile>`` for execution.

Args:
command: The command (argv) to run inside the sandbox.
workspace_dir: Workspace path used to render the default profile.
profile: Optional explicit profile string. If omitted, the default
workspace profile is used.

Returns:
A new argv list that, when executed, runs ``command`` under Seatbelt.
"""
sb_profile = profile if profile is not None else default_profile(workspace_dir)
sb_path = shutil.which("sandbox-exec") or SANDBOX_EXEC_BIN
return [sb_path, "-p", sb_profile, *command]
1 change: 1 addition & 0 deletions openhands-tools/openhands/tools/terminal/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ def create(
terminal_type=terminal_type,
shell_path=shell_path,
full_output_save_dir=conv_state.env_observation_persistence_dir,
seatbelt=getattr(conv_state, "seatbelt", False),
)

tool_description = (
Expand Down
9 changes: 9 additions & 0 deletions openhands-tools/openhands/tools/terminal/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def __init__(
shell_path: str | None = None,
full_output_save_dir: str | None = None,
max_panes: int = DEFAULT_MAX_PANES,
seatbelt: bool = False,
):
"""Initialize TerminalExecutor with auto-detected or specified session type.

Expand All @@ -86,13 +87,18 @@ def __init__(
full_output_save_dir: Path to directory to save full output
logs and files, used when truncation is needed.
max_panes: Maximum number of concurrent panes in pool mode.
seatbelt: If True, wrap shell processes with macOS' `sandbox-exec`
so reads are unrestricted but writes are confined to the
working directory and standard temp paths. Only valid on
macOS; the caller is expected to validate availability.
"""
self.shell_path = shell_path
self._working_dir = working_dir
self._username = username
self._no_change_timeout_seconds = no_change_timeout_seconds
self._terminal_type = terminal_type
self._max_panes = max_panes
self._seatbelt = seatbelt
self.full_output_save_dir: str | None = full_output_save_dir

# Pool mode: use TmuxPanePool for parallel execution
Expand All @@ -113,6 +119,7 @@ def __init__(
no_change_timeout_seconds=no_change_timeout_seconds,
terminal_type=terminal_type,
shell_path=shell_path,
seatbelt=seatbelt,
)
self._session.initialize()
logger.info(
Expand All @@ -133,6 +140,7 @@ def _initialize_pool(self) -> None:
self._working_dir,
self._username,
max_panes=self._max_panes,
seatbelt=self._seatbelt,
)
self._pool.initialize()
logger.info(
Expand Down Expand Up @@ -398,6 +406,7 @@ def _reset_single_session(self) -> TerminalObservation:
no_change_timeout_seconds=original_no_change_timeout,
terminal_type=None,
shell_path=self.shell_path,
seatbelt=self._seatbelt,
)
self._session.initialize()

Expand Down
Loading
Loading