From 1d2c3fadb50e11c14e5161979e836d4c328e06cb Mon Sep 17 00:00:00 2001 From: Aditya Singh Date: Sat, 23 May 2026 07:18:14 -0700 Subject: [PATCH] fix(streaming): guard against parameters.stream=true on model init Closes #1325. When users set parameters.stream: true directly on a model in config.yml, providers like OpenAI forward the flag to the HTTP client, which returns an AsyncStream on every call. The non-streaming completion path then crashes with 'AsyncStream' object has no attribute 'model_dump'. The _prepare_model_kwargs helper now strips the stream flag at init time and logs a clear warning pointing users at the right opt-in (streaming flag on the rails config, or per-call via the API). stream_options and an explicit stream: false are left untouched. The kwargs dict is also defensively copied so the strip does not mutate the user's RailsConfig in place. Added regression tests covering the misconfiguration, the stream_options pass-through, and the silent drop of stream: false. Signed-off-by: Aditya Singh --- nemoguardrails/rails/llm/llmrails.py | 20 +++++++- tests/test_llmrails.py | 74 ++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py index 24a768f30d..b6423ca258 100644 --- a/nemoguardrails/rails/llm/llmrails.py +++ b/nemoguardrails/rails/llm/llmrails.py @@ -406,7 +406,25 @@ def _prepare_model_kwargs(self, model_config): Returns: dict: The prepared kwargs for model initialization """ - kwargs = model_config.parameters or {} + kwargs = dict(model_config.parameters) if model_config.parameters else {} + + # Setting ``stream`` directly in ``parameters`` is a foot-gun: providers + # like OpenAI forward it verbatim to the HTTP client, which returns an + # ``AsyncStream`` object on every call. The non-streaming completion path + # then tries to ``.model_dump()`` that stream and crashes the server. + # See https://github.com/NVIDIA-NeMo/Guardrails/issues/1325. Streaming is + # opted in via the ``streaming`` rails-config flag (and requested per + # call via the API), not by baking ``stream: true`` into model params. + if kwargs.pop("stream", None): + log.warning( + "Ignoring `stream: true` set in `parameters` for model %r (engine %r). " + "Setting `stream` directly on a model causes the provider to return an " + "AsyncStream object on every call, which breaks the non-streaming path. " + "Request streaming via the API or set the `streaming` flag on the rails " + "config instead.", + getattr(model_config, "model", None), + getattr(model_config, "engine", None), + ) # If the optional API Key Environment Variable is set, add it to kwargs if model_config.api_key_env_var: diff --git a/tests/test_llmrails.py b/tests/test_llmrails.py index 8dc727d10d..63e71439fa 100644 --- a/tests/test_llmrails.py +++ b/tests/test_llmrails.py @@ -1004,6 +1004,80 @@ def __init__(self): assert kwargs["temperature"] == 0.3 +def test_prepare_model_kwargs_strips_stream_flag(caplog): + """Regression test for #1325. + + Setting ``stream: true`` directly on a model's parameters causes the + provider client to return an ``AsyncStream`` on every call, which the + non-streaming completion path then crashes on with + ``'AsyncStream' object has no attribute 'model_dump'``. The helper must + strip the flag and log a clear warning instead of forwarding it. + """ + config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")]) + rails = LLMRails(config=config, llm=FakeLLMModel(responses=[])) + + class ModelWithStream: + def __init__(self): + self.api_key_env_var = None + self.model = "gpt-4.1-2025-04-14" + self.engine = "openai" + self.parameters = {"temperature": 0.2, "stream": True} + + model = ModelWithStream() + with caplog.at_level(logging.WARNING, logger="nemoguardrails.rails.llm.llmrails"): + kwargs = rails._prepare_model_kwargs(model) + + assert "stream" not in kwargs + assert kwargs["temperature"] == 0.2 + assert any("stream" in record.message.lower() for record in caplog.records), ( + "Expected a warning that the `stream` parameter was stripped." + ) + + # Confirm the original config dict was not mutated. + assert model.parameters == {"temperature": 0.2, "stream": True} + + +def test_prepare_model_kwargs_preserves_stream_options(): + """The strip is narrow: only the literal ``stream`` flag is removed. + + Provider-specific knobs like ``stream_options`` (used to opt into usage + accounting on OpenAI streaming responses) must pass through untouched. + """ + config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")]) + rails = LLMRails(config=config, llm=FakeLLMModel(responses=[])) + + class ModelWithStreamOptions: + def __init__(self): + self.api_key_env_var = None + self.model = "gpt-4" + self.engine = "openai" + self.parameters = {"stream_options": {"include_usage": True}} + + kwargs = rails._prepare_model_kwargs(ModelWithStreamOptions()) + assert kwargs == {"stream_options": {"include_usage": True}} + + +def test_prepare_model_kwargs_does_not_warn_on_falsy_stream(caplog): + """An explicit ``stream: false`` is a no-op and should not warn.""" + config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")]) + rails = LLMRails(config=config, llm=FakeLLMModel(responses=[])) + + class ModelWithFalseStream: + def __init__(self): + self.api_key_env_var = None + self.model = "gpt-4" + self.engine = "openai" + self.parameters = {"stream": False, "temperature": 0.1} + + with caplog.at_level(logging.WARNING, logger="nemoguardrails.rails.llm.llmrails"): + kwargs = rails._prepare_model_kwargs(ModelWithFalseStream()) + + # ``stream: false`` is harmless, so we silently drop it without a warning. + assert "stream" not in kwargs + assert kwargs["temperature"] == 0.1 + assert not any("stream" in record.message.lower() for record in caplog.records) + + def test_register_methods_return_self(): """Test that all register_* methods return self for method chaining.""" config = RailsConfig.from_content(config={"models": []})