vllm-project · daniel-salib · Dec 6, 2025 · Dec 7, 2025 · chatgpt-codex-connector · Dec 7, 2025
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
+from openai.types.responses.response_output_item import McpCall
 from openai_harmony import Author, Message, Role, TextContent
 
 from vllm.entrypoints.openai.parser.harmony_utils import (
@@ -400,17 +401,19 @@ def test_commentary_with_multiple_function_calls(self):
         assert output_items[0].arguments == '{"location": "San Francisco"}'
         assert output_items[1].arguments == '{"location": "New York"}'
 
-    def test_commentary_with_unknown_recipient_raises_error(self):
-        """Test that commentary with unknown recipient raises ValueError."""
-        message = Message.from_role_and_content(Role.ASSISTANT, "some content")
+    def test_commentary_with_unknown_recipient_creates_mcp_call(self):
+        """Test that commentary with unknown recipient creates MCP call."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
         message = message.with_channel("commentary")
-        message = message.with_recipient("unknown_recipient")
+        message = message.with_recipient("custom_tool")
 
-        try:
-            parse_output_message(message)
-            raise AssertionError("Expected ValueError to be raised")
-        except ValueError as e:
-            assert "Unknown recipient: unknown_recipient" in str(e)
+        output_items = parse_output_message(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], McpCall)
+        assert output_items[0].type == "mcp_call"
+        assert output_items[0].name == "custom_tool"
+        assert output_items[0].server_label == "custom_tool"
 
     def test_analysis_channel_creates_reasoning(self):
         """Test that analysis channel creates reasoning items."""
@@ -451,3 +454,167 @@ def test_has_custom_tools() -> None:
     assert has_custom_tools(
         {"web_search_preview", "code_interpreter", "container", "others"}
     )
+
+
+def test_parse_mcp_call_basic() -> None:
+    """Test that MCP calls are parsed with correct type and server_label."""
+    message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
+    message = message.with_recipient("filesystem")
+    message = message.with_channel("commentary")
+
+    output_items = parse_output_message(message)
+
+    assert len(output_items) == 1
+    assert isinstance(output_items[0], McpCall)
+    assert output_items[0].type == "mcp_call"
+    assert output_items[0].name == "filesystem"
+    assert output_items[0].server_label == "filesystem"
+    assert output_items[0].arguments == '{"path": "/tmp"}'
+    assert output_items[0].status == "completed"
+
+
+def test_parse_mcp_call_dotted_recipient() -> None:
+    """Test that dotted recipients extract the tool name correctly."""
+    message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
+    message = message.with_recipient("repo_browser.list")
+    message = message.with_channel("commentary")
+
+    output_items = parse_output_message(message)
+
+    assert len(output_items) == 1
+    assert isinstance(output_items[0], McpCall)
+    assert output_items[0].name == "list"
+    assert output_items[0].server_label == "repo_browser"
+
+
+def test_mcp_vs_function_call() -> None:
+    """Test that function calls are not parsed as MCP calls."""
+    func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+    func_message = func_message.with_recipient("functions.my_tool")
+    func_message = func_message.with_channel("commentary")
+
+    func_items = parse_output_message(func_message)
+
+    assert len(func_items) == 1
+    assert not isinstance(func_items[0], McpCall)
+    assert func_items[0].type == "function_call"
+
+
+def test_mcp_vs_builtin_tools() -> None:
+    """Test that built-in tools (python, container) are not parsed as MCP calls."""
+    # Test python (built-in tool) - should be reasoning, not MCP
+    python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
+    python_message = python_message.with_recipient("python")
+    python_message = python_message.with_channel("commentary")
+
+    python_items = parse_output_message(python_message)
+
+    assert len(python_items) == 1
+    assert not isinstance(python_items[0], McpCall)
+    assert python_items[0].type == "reasoning"
+
+
+def test_parse_remaining_state_commentary_channel() -> None:
+    """Test parse_remaining_state with commentary channel and various recipients."""
+    from unittest.mock import Mock
+
+    from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state
+
+    # Test 1: functions.* recipient → should return function tool call
+    parser_func = Mock()
+    parser_func.current_content = '{"arg": "value"}'
+    parser_func.current_role = Role.ASSISTANT
+    parser_func.current_channel = "commentary"
+    parser_func.current_recipient = "functions.my_tool"
+
+    func_items = parse_remaining_state(parser_func)
+
+    assert len(func_items) == 1
+    assert not isinstance(func_items[0], McpCall)
+    assert func_items[0].type == "function_call"
+    assert func_items[0].name == "my_tool"
+    assert func_items[0].status == "in_progress"
+
+    # Test 2: MCP tool (not builtin) → should return MCP call
+    parser_mcp = Mock()
+    parser_mcp.current_content = '{"path": "/tmp"}'
+    parser_mcp.current_role = Role.ASSISTANT
+    parser_mcp.current_channel = "commentary"
+    parser_mcp.current_recipient = "filesystem"
+
+    mcp_items = parse_remaining_state(parser_mcp)
+
+    assert len(mcp_items) == 1
+    assert isinstance(mcp_items[0], McpCall)
+    assert mcp_items[0].type == "mcp_call"
+    assert mcp_items[0].name == "filesystem"
+    assert mcp_items[0].server_label == "filesystem"
+    assert mcp_items[0].status == "in_progress"
+
+    # Test 3: Built-in tool (python)
+    # should NOT return MCP call, falls through to reasoning
+    parser_builtin = Mock()
+    parser_builtin.current_content = "print('hello')"
+    parser_builtin.current_role = Role.ASSISTANT
+    parser_builtin.current_channel = "commentary"
+    parser_builtin.current_recipient = "python"
+
+    builtin_items = parse_remaining_state(parser_builtin)
+
+    # Should fall through to reasoning logic
+    assert len(builtin_items) == 1
+    assert not isinstance(builtin_items[0], McpCall)
+    assert builtin_items[0].type == "reasoning"
+
+
+def test_parse_remaining_state_analysis_channel() -> None:
+    """Test parse_remaining_state with analysis channel and various recipients."""
+    from unittest.mock import Mock
+
+    from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state
+
+    # Test 1: functions.* recipient → should return function tool call
+    parser_func = Mock()
+    parser_func.current_content = '{"arg": "value"}'
+    parser_func.current_role = Role.ASSISTANT
+    parser_func.current_channel = "analysis"
+    parser_func.current_recipient = "functions.my_tool"
+
+    func_items = parse_remaining_state(parser_func)
+
+    assert len(func_items) == 1
+    assert not isinstance(func_items[0], McpCall)
+    assert func_items[0].type == "function_call"
+    assert func_items[0].name == "my_tool"
+    assert func_items[0].status == "in_progress"
+
+    # Test 2: MCP tool (not builtin) → should return MCP call
+    parser_mcp = Mock()
+    parser_mcp.current_content = '{"query": "test"}'
+    parser_mcp.current_role = Role.ASSISTANT
+    parser_mcp.current_channel = "analysis"
+    parser_mcp.current_recipient = "database"
+
+    mcp_items = parse_remaining_state(parser_mcp)
+
+    assert len(mcp_items) == 1
+    assert isinstance(mcp_items[0], McpCall)
+    assert mcp_items[0].type == "mcp_call"
+    assert mcp_items[0].name == "database"
+    assert mcp_items[0].server_label == "database"
+    assert mcp_items[0].status == "in_progress"
+
+    # Test 3: Built-in tool (container)
+    # should NOT return MCP call, falls through to reasoning
+    parser_builtin = Mock()
+    parser_builtin.current_content = "docker run"
+    parser_builtin.current_role = Role.ASSISTANT
+    parser_builtin.current_channel = "analysis"
+    parser_builtin.current_recipient = "container"
+
+    builtin_items = parse_remaining_state(parser_builtin)
+
+    # Should fall through to reasoning logic
+    assert len(builtin_items) == 1
+    assert not isinstance(builtin_items[0], McpCall)
+    assert builtin_items[0].type == "reasoning"
@@ -3,9 +3,9 @@
 
 import pytest
 import pytest_asyncio
-from openai import OpenAI
 from openai_harmony import ToolDescription, ToolNamespaceConfig
 
+from openai import OpenAI
 from vllm.entrypoints.tool_server import MCPToolServer
 
 from ...utils import RemoteOpenAIServer
@@ -206,6 +206,65 @@ async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI, model_nam
         )
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_mcp_tool_calling_streaming_types(
+    mcp_enabled_client: OpenAI, model_name: str
+):
+    pairs_of_event_types = {
+        "response.completed": "response.created",
+        "response.output_item.done": "response.output_item.added",
+        "response.content_part.done": "response.content_part.added",
+        "response.output_text.done": "response.output_text.delta",
+        "response.reasoning_text.done": "response.reasoning_text.delta",
+        "response.reasoning_part.done": "response.reasoning_part.added",
+        "response.mcp_call_arguments.done": "response.mcp_call_arguments.delta",
+        "response.mcp_call.completed": "response.mcp_call.in_progress",
+    }
+
+    tools = [
+        {
+            "type": "mcp",
+            "server_label": "code_interpreter",
+        }
+    ]
+    input_text = "What is 13 * 24? Use python to calculate the result."
+
+    stream_response = await mcp_enabled_client.responses.create(
+        model=model_name,
+        input=input_text,
+        tools=tools,
+        stream=True,
+        instructions=(
+            "You must use the Python tool to execute code. Never simulate execution."
+        ),
+    )
+
+    stack_of_event_types = []
+    async for event in stream_response:
+        assert "mcp_call" in event.type
+
+        if event.type == "response.created":
+            stack_of_event_types.append(event.type)
+        elif event.type == "response.completed":
+            assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
+            stack_of_event_types.pop()
+        if (
+            event.type.endswith("added")
+            or event.type == "response.mcp_call.in_progress"
+        ):
+            stack_of_event_types.append(event.type)
+        elif event.type.endswith("delta"):
+            if stack_of_event_types[-1] == event.type:
+                continue
+            stack_of_event_types.append(event.type)
+        elif event.type.endswith("done") or event.type == "response.mcp_call.completed":
+            assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
+            stack_of_event_types.pop()
+
+    assert len(stack_of_event_types) == 0
+
+
 def test_get_tool_description():
     """Test MCPToolServer.get_tool_description filtering logic.