Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion nemoguardrails/rails/llm/llmrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,25 @@ def _prepare_model_kwargs(self, model_config):
Returns:
dict: The prepared kwargs for model initialization
"""
kwargs = model_config.parameters or {}
kwargs = dict(model_config.parameters) if model_config.parameters else {}

# Setting ``stream`` directly in ``parameters`` is a foot-gun: providers
# like OpenAI forward it verbatim to the HTTP client, which returns an
# ``AsyncStream`` object on every call. The non-streaming completion path
# then tries to ``.model_dump()`` that stream and crashes the server.
# See https://github.com/NVIDIA-NeMo/Guardrails/issues/1325. Streaming is
# opted in via the ``streaming`` rails-config flag (and requested per
# call via the API), not by baking ``stream: true`` into model params.
if kwargs.pop("stream", None):
log.warning(
"Ignoring `stream: true` set in `parameters` for model %r (engine %r). "
"Setting `stream` directly on a model causes the provider to return an "
"AsyncStream object on every call, which breaks the non-streaming path. "
"Request streaming via the API or set the `streaming` flag on the rails "
"config instead.",
getattr(model_config, "model", None),
getattr(model_config, "engine", None),
)
Comment on lines +418 to +427
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Silent drop of stream: false may confuse users

kwargs.pop("stream", None) always removes the stream key regardless of its value, but the warning branch fires only when the value is truthy. This means a user who explicitly writes stream: false in their config will see the key disappear from the effective kwargs with no log at any level. If they ever try to debug why the parameter is missing, there is nothing in the logs to explain it. A logging.debug(...) line for the falsy path would make the behaviour fully observable without adding noise to normal operation.

Prompt To Fix With AI
This is a comment left during a code review.
Path: nemoguardrails/rails/llm/llmrails.py
Line: 418-427

Comment:
**Silent drop of `stream: false` may confuse users**

`kwargs.pop("stream", None)` always removes the `stream` key regardless of its value, but the warning branch fires only when the value is truthy. This means a user who explicitly writes `stream: false` in their config will see the key disappear from the effective kwargs with no log at any level. If they ever try to debug why the parameter is missing, there is nothing in the logs to explain it. A `logging.debug(...)` line for the falsy path would make the behaviour fully observable without adding noise to normal operation.

How can I resolve this? If you propose a fix, please make it concise.

Comment on lines +418 to +427
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The warning message hardcodes the literal string `stream: true` but the condition fires for any truthy value (e.g. 1, a non-empty string). Embedding the actual value makes the message more actionable when a user sets a non-boolean truthy value.

Suggested change
if kwargs.pop("stream", None):
log.warning(
"Ignoring `stream: true` set in `parameters` for model %r (engine %r). "
"Setting `stream` directly on a model causes the provider to return an "
"AsyncStream object on every call, which breaks the non-streaming path. "
"Request streaming via the API or set the `streaming` flag on the rails "
"config instead.",
getattr(model_config, "model", None),
getattr(model_config, "engine", None),
)
stream_val = kwargs.pop("stream", None)
if stream_val:
log.warning(
"Ignoring `stream: %r` set in `parameters` for model %r (engine %r). "
"Setting `stream` directly on a model causes the provider to return an "
"AsyncStream object on every call, which breaks the non-streaming path. "
"Request streaming via the API or set the `streaming` flag on the rails "
"config instead.",
stream_val,
getattr(model_config, "model", None),
getattr(model_config, "engine", None),
)
Prompt To Fix With AI
This is a comment left during a code review.
Path: nemoguardrails/rails/llm/llmrails.py
Line: 418-427

Comment:
The warning message hardcodes the literal string `` `stream: true` `` but the condition fires for any truthy value (e.g. `1`, a non-empty string). Embedding the actual value makes the message more actionable when a user sets a non-boolean truthy value.

```suggestion
        stream_val = kwargs.pop("stream", None)
        if stream_val:
            log.warning(
                "Ignoring `stream: %r` set in `parameters` for model %r (engine %r). "
                "Setting `stream` directly on a model causes the provider to return an "
                "AsyncStream object on every call, which breaks the non-streaming path. "
                "Request streaming via the API or set the `streaming` flag on the rails "
                "config instead.",
                stream_val,
                getattr(model_config, "model", None),
                getattr(model_config, "engine", None),
            )
```

How can I resolve this? If you propose a fix, please make it concise.


# If the optional API Key Environment Variable is set, add it to kwargs
if model_config.api_key_env_var:
Expand Down
74 changes: 74 additions & 0 deletions tests/test_llmrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,80 @@ def __init__(self):
assert kwargs["temperature"] == 0.3


def test_prepare_model_kwargs_strips_stream_flag(caplog):
"""Regression test for #1325.

Setting ``stream: true`` directly on a model's parameters causes the
provider client to return an ``AsyncStream`` on every call, which the
non-streaming completion path then crashes on with
``'AsyncStream' object has no attribute 'model_dump'``. The helper must
strip the flag and log a clear warning instead of forwarding it.
"""
config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")])
rails = LLMRails(config=config, llm=FakeLLMModel(responses=[]))

class ModelWithStream:
def __init__(self):
self.api_key_env_var = None
self.model = "gpt-4.1-2025-04-14"
self.engine = "openai"
self.parameters = {"temperature": 0.2, "stream": True}

model = ModelWithStream()
with caplog.at_level(logging.WARNING, logger="nemoguardrails.rails.llm.llmrails"):
kwargs = rails._prepare_model_kwargs(model)

assert "stream" not in kwargs
assert kwargs["temperature"] == 0.2
assert any("stream" in record.message.lower() for record in caplog.records), (
"Expected a warning that the `stream` parameter was stripped."
)

# Confirm the original config dict was not mutated.
assert model.parameters == {"temperature": 0.2, "stream": True}


def test_prepare_model_kwargs_preserves_stream_options():
"""The strip is narrow: only the literal ``stream`` flag is removed.

Provider-specific knobs like ``stream_options`` (used to opt into usage
accounting on OpenAI streaming responses) must pass through untouched.
"""
config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")])
rails = LLMRails(config=config, llm=FakeLLMModel(responses=[]))

class ModelWithStreamOptions:
def __init__(self):
self.api_key_env_var = None
self.model = "gpt-4"
self.engine = "openai"
self.parameters = {"stream_options": {"include_usage": True}}

kwargs = rails._prepare_model_kwargs(ModelWithStreamOptions())
assert kwargs == {"stream_options": {"include_usage": True}}


def test_prepare_model_kwargs_does_not_warn_on_falsy_stream(caplog):
"""An explicit ``stream: false`` is a no-op and should not warn."""
config = RailsConfig(models=[Model(type="main", engine="fake", model="fake")])
rails = LLMRails(config=config, llm=FakeLLMModel(responses=[]))

class ModelWithFalseStream:
def __init__(self):
self.api_key_env_var = None
self.model = "gpt-4"
self.engine = "openai"
self.parameters = {"stream": False, "temperature": 0.1}

with caplog.at_level(logging.WARNING, logger="nemoguardrails.rails.llm.llmrails"):
kwargs = rails._prepare_model_kwargs(ModelWithFalseStream())

# ``stream: false`` is harmless, so we silently drop it without a warning.
assert "stream" not in kwargs
assert kwargs["temperature"] == 0.1
assert not any("stream" in record.message.lower() for record in caplog.records)


def test_register_methods_return_self():
"""Test that all register_* methods return self for method chaining."""
config = RailsConfig.from_content(config={"models": []})
Expand Down
Loading