Skip to content

Commit 0b67b64

Browse files
Harshit28jclaude
andcommitted
Fix OTEL span redundancy, orphaned guardrail traces, and missing response IDs
Addresses 4 critical OpenTelemetry span issues in LiteLLM: Issue #3: Remove redundant attributes from raw_gen_ai_request spans - Removed self.set_attributes() call that was duplicating all parent span attributes (gen_ai.*, metadata.*) onto the raw span - Raw span now only contains provider-specific llm.{provider}.* attributes - Reduces storage and eliminates search confusion from duplicate data Issue #4: Prevent attribute duplication on litellm_proxy_request parent span - When litellm_request child span exists, removed redundant set_attributes() call on the parent proxy span - Child span already carries all attributes; parent duplication doubles storage and complicates search Issue #5: Fix orphaned guardrail traces - Guardrail spans were created with context=None when no parent proxy span existed, resulting in orphaned root spans (separate trace_id) - Added _resolve_guardrail_context() helper to ensure guardrails always have a valid parent (litellm_request or proxy span) - Applied fix to both _handle_success and _handle_failure paths Issue #8: Add gen_ai.response.id for embeddings and image generation - EmbeddingResponse and ImageResponse types don't have provider response IDs - Added fallback to standard_logging_payload["id"] (litellm call ID) for correlation across LiteLLM UI, Phoenix traces, and provider logs - Completions still use provider ID (e.g. "chatcmpl-xxx") when available Tests added: - TestRawSpanAttributeIsolation: Verify raw span has no gen_ai/metadata attrs - TestNoParentSpanDuplication: Verify parent span doesn't get duplicated attrs - TestGuardrailSpanParenting: Verify guardrails are children (not orphaned) - TestResponseIdFallback: Verify response ID set for all call types All existing OTEL tests pass (73 passed, 14 pre-existing protocol failures). Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
1 parent 3a2cba4 commit 0b67b64

2 files changed

Lines changed: 390 additions & 16 deletions

File tree

litellm/integrations/opentelemetry.py

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -735,13 +735,10 @@ def _handle_success(self, kwargs, response_obj, start_time, end_time):
735735
self._maybe_log_raw_request(
736736
kwargs, response_obj, start_time, end_time, span
737737
)
738-
# Ensure proxy-request parent span is annotated with the actual operation kind
739-
if (
740-
parent_span is not None
741-
and hasattr(parent_span, "name")
742-
and parent_span.name == LITELLM_PROXY_REQUEST_SPAN_NAME
743-
):
744-
self.set_attributes(parent_span, kwargs, response_obj)
738+
# Do NOT duplicate attributes onto the parent proxy-request span.
739+
# The child litellm_request span already carries all attributes;
740+
# copying them to the parent doubles storage and complicates
741+
# search (Issue #4).
745742
else:
746743
# Do not create primary span (keep hierarchy shallow when parent exists)
747744
from opentelemetry.trace import Status, StatusCode
@@ -757,8 +754,12 @@ def _handle_success(self, kwargs, response_obj, start_time, end_time):
757754
kwargs, response_obj, start_time, end_time, parent_span
758755
)
759756

760-
# 3. Guardrail span
761-
self._create_guardrail_span(kwargs=kwargs, context=ctx)
757+
# 3. Guardrail span — ensure guardrails are always parented to an
758+
# existing span so they never become orphaned root spans (Issue #5).
759+
guardrail_ctx = self._resolve_guardrail_context(
760+
span=span, parent_span=parent_span, fallback_ctx=ctx
761+
)
762+
self._create_guardrail_span(kwargs=kwargs, context=guardrail_ctx)
762763

763764
# 4. Metrics & cost recording
764765
self._record_metrics(kwargs, response_obj, start_time, end_time)
@@ -1145,6 +1146,27 @@ def _emit_semantic_logs(self, kwargs, response_obj, span: Span):
11451146
)
11461147
otel_logger.emit(log_record)
11471148

1149+
@staticmethod
1150+
def _resolve_guardrail_context(
1151+
span: Optional[Any],
1152+
parent_span: Optional[Any],
1153+
fallback_ctx: Optional[Any],
1154+
) -> Optional[Any]:
1155+
"""
1156+
Return a valid OTEL context for guardrail child spans so they are
1157+
never orphaned (Issue #5). Priority:
1158+
1. The litellm_request span that was just created
1159+
2. The parent proxy-request span
1160+
3. The original fallback context (may be None — last resort)
1161+
"""
1162+
from opentelemetry import trace as _trace
1163+
1164+
if span is not None:
1165+
return _trace.set_span_in_context(span)
1166+
if parent_span is not None:
1167+
return _trace.set_span_in_context(parent_span)
1168+
return fallback_ctx
1169+
11481170
def _create_guardrail_span(
11491171
self, kwargs: Optional[dict], context: Optional[Context]
11501172
):
@@ -1250,6 +1272,7 @@ def _handle_failure(self, kwargs, response_obj, start_time, end_time):
12501272
"USE_OTEL_LITELLM_REQUEST_SPAN"
12511273
)
12521274

1275+
span = None
12531276
if should_create_primary_span:
12541277
# Span 1: Request sent to litellm SDK
12551278
otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs)
@@ -1275,8 +1298,11 @@ def _handle_failure(self, kwargs, response_obj, start_time, end_time):
12751298
self.set_attributes(parent_otel_span, kwargs, response_obj)
12761299
self._record_exception_on_span(span=parent_otel_span, kwargs=kwargs)
12771300

1278-
# Create span for guardrail information
1279-
self._create_guardrail_span(kwargs=kwargs, context=_parent_context)
1301+
# Create span for guardrail information — ensure proper parenting (Issue #5)
1302+
guardrail_ctx = self._resolve_guardrail_context(
1303+
span=span, parent_span=parent_otel_span, fallback_ctx=_parent_context
1304+
)
1305+
self._create_guardrail_span(kwargs=kwargs, context=guardrail_ctx)
12801306

12811307
# Do NOT end parent span - it should be managed by its creator
12821308
# External spans (from Langfuse, user code, HTTP headers, global context) must not be closed by LiteLLM
@@ -1579,12 +1605,20 @@ def set_attributes( # noqa: PLR0915
15791605
value=optional_params.get("user"),
15801606
)
15811607

1582-
# The unique identifier for the completion.
1583-
if response_obj and response_obj.get("id"):
1608+
# The unique identifier for the LLM call.
1609+
# Completions have a provider response ID (e.g. "chatcmpl-xxx"),
1610+
# but Embeddings and Image-gen responses do not. Fall back to
1611+
# the litellm call ID so every call type can be correlated
1612+
# across LiteLLM UI, Phoenix traces, and provider logs (Issue #8).
1613+
response_id = (
1614+
(response_obj.get("id") if response_obj else None)
1615+
or standard_logging_payload.get("id")
1616+
)
1617+
if response_id:
15841618
self.safe_set_attribute(
15851619
span=span,
15861620
key="gen_ai.response.id",
1587-
value=response_obj.get("id"),
1621+
value=response_id,
15881622
)
15891623

15901624
# The model used to generate the response.
@@ -1808,8 +1842,10 @@ def _transform_choices_to_otel_semantic_conventions(
18081842

18091843
def set_raw_request_attributes(self, span: Span, kwargs, response_obj):
18101844
try:
1811-
self.set_attributes(span, kwargs, response_obj)
1812-
kwargs.get("optional_params", {})
1845+
# Only set provider-specific raw payload attributes on this span.
1846+
# The parent litellm_request span already carries the standard
1847+
# gen_ai.* / metadata.* attributes — duplicating them here doubles
1848+
# storage and adds noise (Issue #3).
18131849
litellm_params = kwargs.get("litellm_params", {}) or {}
18141850
custom_llm_provider = litellm_params.get("custom_llm_provider", "Unknown")
18151851

0 commit comments

Comments
 (0)