Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
f73bafb
[Renderer] Introduce Renderer
DarkLight1337 Dec 7, 2025
b7222cb
Simplify
DarkLight1337 Dec 7, 2025
4c81f01
Move simplify
DarkLight1337 Dec 7, 2025
4a37cda
Typo
DarkLight1337 Dec 7, 2025
3d11a96
Set the tokenizer name
DarkLight1337 Dec 7, 2025
e66ebd8
Simplify
DarkLight1337 Dec 7, 2025
02e64f5
Fix init
DarkLight1337 Dec 7, 2025
d8177a1
Reuse code
DarkLight1337 Dec 7, 2025
a00c164
Type checking
DarkLight1337 Dec 7, 2025
8a931d7
Cached
DarkLight1337 Dec 7, 2025
6909fe7
Fix mypy
DarkLight1337 Dec 7, 2025
942c94d
Allow `tokenizer=None`
DarkLight1337 Dec 7, 2025
86d1104
Handle tokenize
DarkLight1337 Dec 7, 2025
4be9517
Ignore mypy
DarkLight1337 Dec 7, 2025
bab3ff9
Fix
DarkLight1337 Dec 7, 2025
a40c5e4
Fix mypy
DarkLight1337 Dec 7, 2025
7652853
Fix
DarkLight1337 Dec 7, 2025
c709ef9
Fix
DarkLight1337 Dec 7, 2025
7b9aa93
mypy
DarkLight1337 Dec 7, 2025
4555841
Fix tests
DarkLight1337 Dec 7, 2025
8ebde23
Simplify
DarkLight1337 Dec 7, 2025
2ff7532
Fix mypy
DarkLight1337 Dec 7, 2025
2f2aaf7
Update imports
DarkLight1337 Dec 7, 2025
05e9b46
Fix
DarkLight1337 Dec 7, 2025
10460d6
Fix
DarkLight1337 Dec 7, 2025
49d10ff
Simplify
DarkLight1337 Dec 7, 2025
41d5ffa
Rename
DarkLight1337 Dec 7, 2025
06ff811
Fix
DarkLight1337 Dec 7, 2025
cd8c8fb
Fix test
DarkLight1337 Dec 7, 2025
fe2fbc9
Fix wrong truncation side
DarkLight1337 Dec 8, 2025
f7c1b2b
Update
DarkLight1337 Dec 8, 2025
ddd9590
Remove `RequestPrompt`
DarkLight1337 Dec 8, 2025
ed3fd86
Simplify
DarkLight1337 Dec 8, 2025
10814a7
Fix
DarkLight1337 Dec 8, 2025
67e5f7b
Comment
DarkLight1337 Dec 8, 2025
88700ee
Reword
DarkLight1337 Dec 8, 2025
e8787f7
Unnecessary lazy import
DarkLight1337 Dec 8, 2025
9dca5eb
Simplify
DarkLight1337 Dec 8, 2025
854ac77
Reduce diff
DarkLight1337 Dec 8, 2025
06d1f87
Fix
DarkLight1337 Dec 8, 2025
461dc61
Fix
DarkLight1337 Dec 8, 2025
0a86c22
Fix mypy and test
DarkLight1337 Dec 8, 2025
35d36fe
Merge branch 'main' into init-renderer-model
DarkLight1337 Dec 8, 2025
5991f7b
Update tests
DarkLight1337 Dec 8, 2025
38ad20b
Update tests
DarkLight1337 Dec 8, 2025
dc6ed06
Merge branch 'main' into init-renderer-model
DarkLight1337 Dec 8, 2025
89cbe7e
Move more tests
DarkLight1337 Dec 9, 2025
07aefa1
Merge branch 'main' into init-renderer-model
DarkLight1337 Dec 9, 2025
63d6961
Update misc
DarkLight1337 Dec 9, 2025
478b89e
Update
DarkLight1337 Dec 9, 2025
ef183e8
Merge branch 'main' into init-renderer-model
DarkLight1337 Dec 10, 2025
8f619da
Merge branch 'main' into init-renderer-model
DarkLight1337 Dec 10, 2025
e6d5703
Merge branch 'main' into init-renderer-model
DarkLight1337 Dec 10, 2025
bd62632
Merge branch 'main' into init-renderer-model
DarkLight1337 Dec 11, 2025
e4b89bf
Merge branch 'main' into init-renderer-model
DarkLight1337 Dec 13, 2025
5728381
Delay deprecation
DarkLight1337 Dec 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .buildkite/test-amd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ steps:
- pytest -v -s -m 'not cpu_test' multimodal
- pytest -v -s utils_

- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 15min
timeout_in_minutes: 20
- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 20min
timeout_in_minutes: 30
mirror_hardwares: [amdexperimental, amdproduction, amdtentative]
agent_pool: mi325_1
grade: Blocking
Expand All @@ -71,6 +71,7 @@ steps:
- tests/test_inputs.py
- tests/test_outputs.py
- tests/multimodal
- tests/renderers
- tests/standalone_tests/lazy_imports.py
- tests/tokenizers_
- tests/transformers_utils
Expand All @@ -81,6 +82,7 @@ steps:
- pytest -v -s test_inputs.py
- pytest -v -s test_outputs.py
- pytest -v -s -m 'cpu_test' multimodal
- pytest -v -s renderers
- pytest -v -s tokenizers_
- pytest -v -s transformers_utils
- pytest -v -s config
Expand Down
6 changes: 4 additions & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,14 @@ steps:
- pytest -v -s -m 'not cpu_test' multimodal
- pytest -v -s utils_

- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 15min
timeout_in_minutes: 20
- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 20min
timeout_in_minutes: 30
source_file_dependencies:
- vllm/
- tests/test_inputs.py
- tests/test_outputs.py
- tests/multimodal
- tests/renderers
- tests/standalone_tests/lazy_imports.py
- tests/tokenizers_
- tests/transformers_utils
Expand All @@ -74,6 +75,7 @@ steps:
- pytest -v -s test_inputs.py
- pytest -v -s test_outputs.py
- pytest -v -s -m 'cpu_test' multimodal
- pytest -v -s renderers
- pytest -v -s tokenizers_
- pytest -v -s transformers_utils
- pytest -v -s config
Expand Down
4 changes: 3 additions & 1 deletion .buildkite/test_areas/misc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,13 @@ steps:

- label: Async Engine, Inputs, Utils, Worker, Config (CPU)
depends_on: ~
timeout_in_minutes: 20
timeout_in_minutes: 30
source_file_dependencies:
- vllm/
- tests/test_inputs.py
- tests/test_outputs.py
- tests/multimodal
- tests/renderers
- tests/standalone_tests/lazy_imports.py
- tests/tokenizers_
- tests/transformers_utils
Expand All @@ -131,6 +132,7 @@ steps:
- pytest -v -s test_inputs.py
- pytest -v -s test_outputs.py
- pytest -v -s -m 'cpu_test' multimodal
- pytest -v -s renderers
- pytest -v -s tokenizers_
- pytest -v -s transformers_utils
- pytest -v -s config
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def get_tokenizer(
kwargs["use_fast"] = False
if tokenizer_mode == "mistral":
try:
from vllm.tokenizers import MistralTokenizer
from vllm.tokenizers.mistral import MistralTokenizer
except ImportError as e:
raise ImportError(
"MistralTokenizer requires vllm package.\n"
Expand Down
156 changes: 0 additions & 156 deletions tests/entrypoints/openai/test_chat_template.py

This file was deleted.

26 changes: 19 additions & 7 deletions tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.outputs import CompletionOutput, RequestOutput
from vllm.renderers.hf import HfRenderer
from vllm.tokenizers import get_tokenizer
from vllm.tokenizers.registry import tokenizer_args_from_config
from vllm.v1.engine.async_llm import AsyncLLM

from ...utils import RemoteOpenAIServer
Expand Down Expand Up @@ -379,6 +381,15 @@ def get_diff_sampling_param(self):
return self.diff_sampling_param or {}


def _build_renderer(model_config: MockModelConfig):
_, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)

return HfRenderer(
model_config,
tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name},
)


def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
models = OpenAIServingModels(
engine_client=engine,
Expand Down Expand Up @@ -413,6 +424,7 @@ class MockEngine:
model_config: MockModelConfig = field(default_factory=MockModelConfig)
input_processor: MagicMock = field(default_factory=MagicMock)
io_processor: MagicMock = field(default_factory=MagicMock)
renderer: MagicMock = field(default_factory=MagicMock)


async def _async_serving_chat_init():
Expand All @@ -438,11 +450,11 @@ def test_async_serving_chat_init():
@pytest.mark.asyncio
async def test_serving_chat_returns_correct_model_name():
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
mock_engine.renderer = _build_renderer(mock_engine.model_config)

serving_chat = _build_serving_chat(mock_engine)
messages = [{"role": "user", "content": "what is 1+1?"}]
Expand All @@ -468,11 +480,11 @@ async def return_model_name(*args):
@pytest.mark.asyncio
async def test_serving_chat_should_set_correct_max_tokens():
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
mock_engine.renderer = _build_renderer(mock_engine.model_config)

serving_chat = _build_serving_chat(mock_engine)

Expand Down Expand Up @@ -501,11 +513,11 @@ async def test_serving_chat_should_set_correct_max_tokens():

# Reinitialize the engine with new settings
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
mock_engine.renderer = _build_renderer(mock_engine.model_config)

# Initialize the serving chat
serving_chat = _build_serving_chat(mock_engine)
Expand Down Expand Up @@ -546,11 +558,11 @@ async def test_serving_chat_should_set_correct_max_tokens():

# Reinitialize the engine with new settings
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
mock_engine.renderer = _build_renderer(mock_engine.model_config)

# Initialize the serving chat
serving_chat = _build_serving_chat(mock_engine)
Expand Down Expand Up @@ -592,11 +604,11 @@ async def test_serving_chat_could_load_correct_generation_config():
}

mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
mock_engine.renderer = _build_renderer(mock_engine.model_config)

# Initialize the serving chat
serving_chat = _build_serving_chat(mock_engine)
Expand Down Expand Up @@ -638,11 +650,11 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
mock_model_config.hf_config.model_type = model_type

mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
mock_engine.renderer = _build_renderer(mock_engine.model_config)

serving_chat = _build_serving_chat(mock_engine)

Expand Down Expand Up @@ -671,11 +683,11 @@ async def test_serving_chat_data_parallel_rank_extraction():
"""Test that data_parallel_rank is properly extracted from header and
passed to engine."""
mock_engine = MagicMock(spec=AsyncLLM)
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
mock_engine.renderer = _build_renderer(mock_engine.model_config)

# Mock the generate method to return an async generator
async def mock_generate(*args, **kwargs):
Expand Down
Loading