Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 176 additions & 9 deletions tests/entrypoints/openai/parser/test_harmony_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
from openai.types.responses.response_output_item import McpCall
from openai_harmony import Author, Message, Role, TextContent

from vllm.entrypoints.openai.parser.harmony_utils import (
Expand Down Expand Up @@ -400,17 +401,19 @@ def test_commentary_with_multiple_function_calls(self):
assert output_items[0].arguments == '{"location": "San Francisco"}'
assert output_items[1].arguments == '{"location": "New York"}'

def test_commentary_with_unknown_recipient_raises_error(self):
"""Test that commentary with unknown recipient raises ValueError."""
message = Message.from_role_and_content(Role.ASSISTANT, "some content")
def test_commentary_with_unknown_recipient_creates_mcp_call(self):
"""Test that commentary with unknown recipient creates MCP call."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
message = message.with_channel("commentary")
message = message.with_recipient("unknown_recipient")
message = message.with_recipient("custom_tool")

try:
parse_output_message(message)
raise AssertionError("Expected ValueError to be raised")
except ValueError as e:
assert "Unknown recipient: unknown_recipient" in str(e)
output_items = parse_output_message(message)

assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].type == "mcp_call"
assert output_items[0].name == "custom_tool"
assert output_items[0].server_label == "custom_tool"

def test_analysis_channel_creates_reasoning(self):
"""Test that analysis channel creates reasoning items."""
Expand Down Expand Up @@ -451,3 +454,167 @@ def test_has_custom_tools() -> None:
assert has_custom_tools(
{"web_search_preview", "code_interpreter", "container", "others"}
)


def test_parse_mcp_call_basic() -> None:
"""Test that MCP calls are parsed with correct type and server_label."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
message = message.with_recipient("filesystem")
message = message.with_channel("commentary")

output_items = parse_output_message(message)

assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].type == "mcp_call"
assert output_items[0].name == "filesystem"
assert output_items[0].server_label == "filesystem"
assert output_items[0].arguments == '{"path": "/tmp"}'
assert output_items[0].status == "completed"


def test_parse_mcp_call_dotted_recipient() -> None:
"""Test that dotted recipients extract the tool name correctly."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
message = message.with_recipient("repo_browser.list")
message = message.with_channel("commentary")

output_items = parse_output_message(message)

assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].name == "list"
assert output_items[0].server_label == "repo_browser"


def test_mcp_vs_function_call() -> None:
"""Test that function calls are not parsed as MCP calls."""
func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
func_message = func_message.with_recipient("functions.my_tool")
func_message = func_message.with_channel("commentary")

func_items = parse_output_message(func_message)

assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"


def test_mcp_vs_builtin_tools() -> None:
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
# Test python (built-in tool) - should be reasoning, not MCP
python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
python_message = python_message.with_recipient("python")
python_message = python_message.with_channel("commentary")

python_items = parse_output_message(python_message)

assert len(python_items) == 1
assert not isinstance(python_items[0], McpCall)
assert python_items[0].type == "reasoning"


def test_parse_remaining_state_commentary_channel() -> None:
"""Test parse_remaining_state with commentary channel and various recipients."""
from unittest.mock import Mock

from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state

# Test 1: functions.* recipient → should return function tool call
parser_func = Mock()
parser_func.current_content = '{"arg": "value"}'
parser_func.current_role = Role.ASSISTANT
parser_func.current_channel = "commentary"
parser_func.current_recipient = "functions.my_tool"

func_items = parse_remaining_state(parser_func)

assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"
assert func_items[0].name == "my_tool"
assert func_items[0].status == "in_progress"

# Test 2: MCP tool (not builtin) → should return MCP call
parser_mcp = Mock()
parser_mcp.current_content = '{"path": "/tmp"}'
parser_mcp.current_role = Role.ASSISTANT
parser_mcp.current_channel = "commentary"
parser_mcp.current_recipient = "filesystem"

mcp_items = parse_remaining_state(parser_mcp)

assert len(mcp_items) == 1
assert isinstance(mcp_items[0], McpCall)
assert mcp_items[0].type == "mcp_call"
assert mcp_items[0].name == "filesystem"
assert mcp_items[0].server_label == "filesystem"
assert mcp_items[0].status == "in_progress"

# Test 3: Built-in tool (python)
# should NOT return MCP call, falls through to reasoning
parser_builtin = Mock()
parser_builtin.current_content = "print('hello')"
parser_builtin.current_role = Role.ASSISTANT
parser_builtin.current_channel = "commentary"
parser_builtin.current_recipient = "python"

builtin_items = parse_remaining_state(parser_builtin)

# Should fall through to reasoning logic
assert len(builtin_items) == 1
assert not isinstance(builtin_items[0], McpCall)
assert builtin_items[0].type == "reasoning"


def test_parse_remaining_state_analysis_channel() -> None:
"""Test parse_remaining_state with analysis channel and various recipients."""
from unittest.mock import Mock

from vllm.entrypoints.openai.parser.harmony_utils import parse_remaining_state

# Test 1: functions.* recipient → should return function tool call
parser_func = Mock()
parser_func.current_content = '{"arg": "value"}'
parser_func.current_role = Role.ASSISTANT
parser_func.current_channel = "analysis"
parser_func.current_recipient = "functions.my_tool"

func_items = parse_remaining_state(parser_func)

assert len(func_items) == 1
assert not isinstance(func_items[0], McpCall)
assert func_items[0].type == "function_call"
assert func_items[0].name == "my_tool"
assert func_items[0].status == "in_progress"

# Test 2: MCP tool (not builtin) → should return MCP call
parser_mcp = Mock()
parser_mcp.current_content = '{"query": "test"}'
parser_mcp.current_role = Role.ASSISTANT
parser_mcp.current_channel = "analysis"
parser_mcp.current_recipient = "database"

mcp_items = parse_remaining_state(parser_mcp)

assert len(mcp_items) == 1
assert isinstance(mcp_items[0], McpCall)
assert mcp_items[0].type == "mcp_call"
assert mcp_items[0].name == "database"
assert mcp_items[0].server_label == "database"
assert mcp_items[0].status == "in_progress"

# Test 3: Built-in tool (container)
# should NOT return MCP call, falls through to reasoning
parser_builtin = Mock()
parser_builtin.current_content = "docker run"
parser_builtin.current_role = Role.ASSISTANT
parser_builtin.current_channel = "analysis"
parser_builtin.current_recipient = "container"

builtin_items = parse_remaining_state(parser_builtin)

# Should fall through to reasoning logic
assert len(builtin_items) == 1
assert not isinstance(builtin_items[0], McpCall)
assert builtin_items[0].type == "reasoning"
61 changes: 60 additions & 1 deletion tests/entrypoints/openai/test_response_api_mcp_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

import pytest
import pytest_asyncio
from openai import OpenAI
from openai_harmony import ToolDescription, ToolNamespaceConfig

from openai import OpenAI
from vllm.entrypoints.tool_server import MCPToolServer

from ...utils import RemoteOpenAIServer
Expand Down Expand Up @@ -206,6 +206,65 @@ async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI, model_nam
)


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_mcp_tool_calling_streaming_types(
mcp_enabled_client: OpenAI, model_name: str
):
pairs_of_event_types = {
"response.completed": "response.created",
"response.output_item.done": "response.output_item.added",
"response.content_part.done": "response.content_part.added",
"response.output_text.done": "response.output_text.delta",
"response.reasoning_text.done": "response.reasoning_text.delta",
"response.reasoning_part.done": "response.reasoning_part.added",
"response.mcp_call_arguments.done": "response.mcp_call_arguments.delta",
"response.mcp_call.completed": "response.mcp_call.in_progress",
}

tools = [
{
"type": "mcp",
"server_label": "code_interpreter",
}
]
input_text = "What is 13 * 24? Use python to calculate the result."

stream_response = await mcp_enabled_client.responses.create(
model=model_name,
input=input_text,
tools=tools,
stream=True,
instructions=(
"You must use the Python tool to execute code. Never simulate execution."
),
)

stack_of_event_types = []
async for event in stream_response:
assert "mcp_call" in event.type
Comment on lines +243 to +245

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Fix blanket mcp_call assertion in streaming test

In test_mcp_tool_calling_streaming_types every streamed event is immediately asserted to contain the substring "mcp_call" before any dispatch logic runs. The Responses API stream always begins with response.created (and response.in_progress) which do not include that substring, so this assertion fails on the first event and the rest of the test logic never executes. As written the test cannot pass even when the streaming implementation is correct; the assertion needs to be limited to the MCP events it is intended to check.

Useful? React with 👍 / 👎.


if event.type == "response.created":
stack_of_event_types.append(event.type)
elif event.type == "response.completed":
assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
stack_of_event_types.pop()
if (
event.type.endswith("added")
or event.type == "response.mcp_call.in_progress"
):
stack_of_event_types.append(event.type)
elif event.type.endswith("delta"):
if stack_of_event_types[-1] == event.type:
continue
stack_of_event_types.append(event.type)
elif event.type.endswith("done") or event.type == "response.mcp_call.completed":
assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
stack_of_event_types.pop()

assert len(stack_of_event_types) == 0
Comment on lines +244 to +265
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The logic in this test has a couple of issues:

  1. The assertion assert "mcp_call" in event.type on line 245 is too strict. The event stream includes many events unrelated to MCP calls (e.g., response.created, response.reasoning_text.delta), which will cause this assertion to fail.
  2. The event handling logic uses an if followed by an elif chain, and then another if (line 252). This second if should be an elif to form a single conditional block. Otherwise, an event matching endswith("added") will be processed by the second if block, and then the subsequent elif blocks for delta and done will be skipped for that event, which is not the intended logic for pairing events.

I've suggested a fix that addresses both points by introducing a flag to check for MCP events and correcting the conditional logic.

    mcp_event_seen = False
    stack_of_event_types = []
    async for event in stream_response:
        if "mcp_call" in event.type:
            mcp_event_seen = True

        if event.type == "response.created":
            stack_of_event_types.append(event.type)
        elif event.type == "response.completed":
            assert stack_of_event_types.pop() == pairs_of_event_types[event.type]
        elif (
            event.type.endswith("added")
            or event.type == "response.mcp_call.in_progress"
        ):
            stack_of_event_types.append(event.type)
        elif event.type.endswith("delta"):
            if not stack_of_event_types or stack_of_event_types[-1] != event.type:
                stack_of_event_types.append(event.type)
        elif event.type.endswith("done") or event.type == "response.mcp_call.completed":
            assert stack_of_event_types.pop() == pairs_of_event_types[event.type]

    assert mcp_event_seen, "No MCP call events were observed in the stream."
    assert len(stack_of_event_types) == 0



def test_get_tool_description():
"""Test MCPToolServer.get_tool_description filtering logic.

Expand Down
Loading
Loading