From 1d2c3fadb50e11c14e5161979e836d4c328e06cb Mon Sep 17 00:00:00 2001
From: Aditya Singh <adisin650@gmail.com>
Date: Sat, 23 May 2026 07:18:14 -0700
Subject: [PATCH] fix(streaming): guard against parameters.stream=true on model
 init

Closes #1325.

When users set parameters.stream: true directly on a model in
config.yml, providers like OpenAI forward the flag to the HTTP
client, which returns an AsyncStream on every call. The
non-streaming completion path then crashes with
'AsyncStream' object has no attribute 'model_dump'.

The _prepare_model_kwargs helper now strips the stream flag at
init time and logs a clear warning pointing users at the right
opt-in (streaming flag on the rails config, or per-call via the
API). stream_options and an explicit stream: false are left
untouched. The kwargs dict is also defensively copied so the
strip does not mutate the user's RailsConfig in place.

Added regression tests covering the misconfiguration, the
stream_options pass-through, and the silent drop of stream:
false.

Signed-off-by: Aditya Singh <adisin650@gmail.com>
---
 nemoguardrails/rails/llm/llmrails.py | 20 +++++++-
 tests/test_llmrails.py               | 74 ++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py
index 24a768f30d..b6423ca258 100644
--- a/nemoguardrails/rails/llm/llmrails.py
+++ b/nemoguardrails/rails/llm/llmrails.py
@@ -406,7 +406,25 @@ def _prepare_model_kwargs(self, model_config):
         Returns:
             dict: The prepared kwargs for model initialization
         """
-        kwargs = model_config.parameters or {}
+        kwargs = dict(model_config.parameters) if model_config.parameters else {}
+
+        # Setting ``stream`` directly in ``parameters`` is a foot-gun: providers
+        # like OpenAI forward it verbatim to the HTTP client, which returns an
+        # ``AsyncStream`` object on every call. The non-streaming completion path
+        # then tries to ``.model_dump()`` that stream and crashes the server.
+        # See https://github.com/NVIDIA-NeMo/Guardrails/issues/1325. Streaming is
+        # opted in via the ``streaming`` rails-config flag (and requested per
+        # call via the API), not by baking ``stream: true`` into model params.
+        if kwargs.pop("stream", None):
+            log.warning(
+                "Ignoring `stream: true` set in `parameters` for model %r (engine %r). "
+                "Setting `stream` directly on a model causes the provider to return an "
+                "AsyncStream object on every call, which breaks the non-streaming path. "
+                "Request streaming via the API or set the `streaming` flag on the rails "
+                "config instead.",
+                getattr(model_config, "model", None),
+                getattr(model_config, "engine", None),
+            )
 
         # If the optional API Key Environment Variable is set, add it to kwargs
         if model_config.api_key_env_var:
diff --git a/tests/test_llmrails.py b/tests/test_llmrails.py
index 8dc727d10d..63e71439fa 100644
--- a/tests/test_llmrails.py
+++ b/tests/test_llmrails.py
@@ -1004,6 +1004,80 @@ def __init__(self):
     assert kwargs["temperature"] == 0.3
 
 
+def test_prepare_model_kwargs_strips_stream_flag(caplog):
+    """Regression test for #1325.
+
+    Setting ``stream: true`` directly on a model's parameters causes the
+    provider client to return an ``AsyncStream`` on every call, which the
+    non-streaming completion path then crashes on with
+    ``'AsyncStream' object has no attribute 'model_dump'``. The helper must
+    strip the flag and log a clear warning instead of forwarding it.
+    """
+    config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")])
+    rails = LLMRails(config=config, llm=FakeLLMModel(responses=[]))
+
+    class ModelWithStream:
+        def __init__(self):
+            self.api_key_env_var = None
+            self.model = "gpt-4.1-2025-04-14"
+            self.engine = "openai"
+            self.parameters = {"temperature": 0.2, "stream": True}
+
+    model = ModelWithStream()
+    with caplog.at_level(logging.WARNING, logger="nemoguardrails.rails.llm.llmrails"):
+        kwargs = rails._prepare_model_kwargs(model)
+
+    assert "stream" not in kwargs
+    assert kwargs["temperature"] == 0.2
+    assert any("stream" in record.message.lower() for record in caplog.records), (
+        "Expected a warning that the `stream` parameter was stripped."
+    )
+
+    # Confirm the original config dict was not mutated.
+    assert model.parameters == {"temperature": 0.2, "stream": True}
+
+
+def test_prepare_model_kwargs_preserves_stream_options():
+    """The strip is narrow: only the literal ``stream`` flag is removed.
+
+    Provider-specific knobs like ``stream_options`` (used to opt into usage
+    accounting on OpenAI streaming responses) must pass through untouched.
+    """
+    config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")])
+    rails = LLMRails(config=config, llm=FakeLLMModel(responses=[]))
+
+    class ModelWithStreamOptions:
+        def __init__(self):
+            self.api_key_env_var = None
+            self.model = "gpt-4"
+            self.engine = "openai"
+            self.parameters = {"stream_options": {"include_usage": True}}
+
+    kwargs = rails._prepare_model_kwargs(ModelWithStreamOptions())
+    assert kwargs == {"stream_options": {"include_usage": True}}
+
+
+def test_prepare_model_kwargs_does_not_warn_on_falsy_stream(caplog):
+    """An explicit ``stream: false`` is a no-op and should not warn."""
+    config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")])
+    rails = LLMRails(config=config, llm=FakeLLMModel(responses=[]))
+
+    class ModelWithFalseStream:
+        def __init__(self):
+            self.api_key_env_var = None
+            self.model = "gpt-4"
+            self.engine = "openai"
+            self.parameters = {"stream": False, "temperature": 0.1}
+
+    with caplog.at_level(logging.WARNING, logger="nemoguardrails.rails.llm.llmrails"):
+        kwargs = rails._prepare_model_kwargs(ModelWithFalseStream())
+
+    # ``stream: false`` is harmless, so we silently drop it without a warning.
+    assert "stream" not in kwargs
+    assert kwargs["temperature"] == 0.1
+    assert not any("stream" in record.message.lower() for record in caplog.records)
+
+
 def test_register_methods_return_self():
     """Test that all register_* methods return self for method chaining."""
     config = RailsConfig.from_content(config={"models": []})