From 689f4ce1a9d1b5e398daca6b5ae8948fa0ddb5fe Mon Sep 17 00:00:00 2001 From: Maximilien Cuony Date: Thu, 11 Jun 2026 14:51:39 +0200 Subject: [PATCH 1/2] [opentelemetry] Split enable_opentelemetry into enable_metrics and enable_tracing --- NEXT_RELEASE_NOTES.md | 2 ++ cmds/core-service/main.go | 21 ++++++++++++------- cmds/core-service/otel.go | 38 ++++++++++++++++++++++++----------- docs/operations/monitoring.md | 10 ++++++--- 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/NEXT_RELEASE_NOTES.md b/NEXT_RELEASE_NOTES.md index 0fdef6de6..a93a6c4fa 100644 --- a/NEXT_RELEASE_NOTES.md +++ b/NEXT_RELEASE_NOTES.md @@ -46,6 +46,8 @@ The release notes should contain at least the following sections: ## Optional migration tasks +* `enable_opentelemetry` has been splited into two flags: `enable_metrics` and `enable_tracing` + ## Important information * Fixed a bug where the `evict` command ignored entries without a locality. If your DSS instance does not have a locality set, the next `evict` run may be slow while it processes the backlog of old entries. diff --git a/cmds/core-service/main.go b/cmds/core-service/main.go index 7db8be387..fa8fcd39d 100644 --- a/cmds/core-service/main.go +++ b/cmds/core-service/main.go @@ -55,8 +55,10 @@ var ( logLevel = flag.String("log_level", logging.DefaultLevel.String(), "The log level") dumpRequests = flag.Bool("dump_requests", false, "Log full HTTP request and response (note: will dump sensitive information to logs; intended only for debugging and/or development)") profServiceName = flag.String("gcp_prof_service_name", "", "Service name for the Go profiler") - enableOpenTelemetry = flag.Bool("enable_opentelemetry", false, "Enable OpenTelemetry, including traces and activation metric endpoint") - metricsListeningAddress = flag.String("metrics_addr", ":8079", "Address and port that the OpenTelemetry prometheus service binds to and listens on for incoming connections") + enableOpenTelemetry = flag.Bool("enable_opentelemetry", false, "DEPRECATED (replaced by enable_tracing) Enable tracing") + enableMetrics = flag.Bool("enable_metrics", false, "Enable metric endpoint") + enableTracing = flag.Bool("enable_tracing", false, "Enable tracing") + metricsListeningAddress = flag.String("metrics_addr", ":8079", "Address and port that the for the prometheus-compatible metric service binds to and listens on for incoming connections") pkFile = flag.String("public_key_files", "", "Path to public Keys to use for JWT decoding, separated by commas.") jwksEndpoint = flag.String("jwks_endpoint", "", "URL pointing to an endpoint serving JWKS") @@ -120,7 +122,7 @@ func createRIDServers(ctx context.Context, locality string, logger *zap.Logger) return nil, nil, stacktrace.Propagate(err, "Unable to interact with store") } - if *enableOpenTelemetry { + if *enableMetrics { err = registerRIDMetrics(ctx, ridStore) if err != nil { @@ -147,7 +149,7 @@ func createSCDServer(ctx context.Context, logger *zap.Logger) (*scd.Server, erro return nil, err } - if *enableOpenTelemetry { + if *enableMetrics { err = registerSCDMetrics(ctx, scdStore) if err != nil { @@ -339,7 +341,7 @@ func RunHTTPServer(ctx context.Context, ctxCanceler func(), address, locality st handler = authorizer.TokenMiddleware(handler) handler = timeoutMiddleware(*timeout, handler) - if *enableOpenTelemetry { + if *enableMetrics || *enableTracing { // We use the default settings; the APIRouter handler will override the span value accordingly, as it has more information. handler = otelhttp.NewHandler(handler, "http") } @@ -447,6 +449,11 @@ func main() { SetDeprecatingHttpFlag(logger, &allowHTTPBaseUrls, &enableHTTP) + if *enableOpenTelemetry { + logger.Warn("'enable_opentelemetry' has been renamed to 'enable_tracing") + *enableTracing = true + } + if *profServiceName != "" { if err := profiler.Start(profiler.Config{Service: *profServiceName}); err != nil { logger.Panic("Failed to start the profiler ", zap.Error(err)) @@ -454,8 +461,8 @@ func main() { } // Set up OpenTelemetry. - if *enableOpenTelemetry { - otelShutdown, err := setupOTelSDK(ctx, *metricsListeningAddress) + if *enableMetrics || *enableTracing { + otelShutdown, err := setupOTelSDK(ctx, *enableMetrics, *enableTracing, *metricsListeningAddress) if err != nil { logger.Panic("Failed to initialize OpenTelemetry", zap.Error(err)) } diff --git a/cmds/core-service/otel.go b/cmds/core-service/otel.go index 811fe0258..f2269a1f0 100644 --- a/cmds/core-service/otel.go +++ b/cmds/core-service/otel.go @@ -23,28 +23,42 @@ import ( // setupOTelSDK bootstraps the OpenTelemetry pipeline. // If it does not return an error, make sure to call shutdown for proper cleanup. -func setupOTelSDK(ctx context.Context, metricsListeningAddress string) (func(context.Context) error, error) { +func setupOTelSDK(ctx context.Context, enableMetrics bool, enableTracing bool, metricsListeningAddress string) (func(context.Context) error, error) { // Set up propagator. prop := newPropagator() otel.SetTextMapPropagator(prop) - // Set up trace provider. - tracerProvider, err := newTracerProvider(ctx) - if err != nil { - return nil, err + var tracerProvider *trace.TracerProvider + var meterProvider *metric.MeterProvider + + if enableTracing { + // Set up trace provider. + tracerProvider, err := newTracerProvider(ctx) + if err != nil { + return nil, err + } + otel.SetTracerProvider(tracerProvider) } - otel.SetTracerProvider(tracerProvider) - // Set up metrics exporter - meterProvider, err := newMeterProvider(ctx, metricsListeningAddress) - if err != nil { - return nil, err + if enableMetrics { + // Set up metrics exporter + meterProvider, err := newMeterProvider(ctx, metricsListeningAddress) + if err != nil { + return nil, err + } + otel.SetMeterProvider(meterProvider) } - otel.SetMeterProvider(meterProvider) shutdown := func(ctx context.Context) error { - return errors.Join(tracerProvider.Shutdown(ctx), meterProvider.Shutdown(ctx)) + var err error + if tracerProvider != nil { + err = errors.Join(err, tracerProvider.Shutdown(ctx)) + } + if meterProvider != nil { + err = errors.Join(err, meterProvider.Shutdown(ctx)) + } + return err } return shutdown, nil } diff --git a/docs/operations/monitoring.md b/docs/operations/monitoring.md index 25a650b8c..0193d25ee 100644 --- a/docs/operations/monitoring.md +++ b/docs/operations/monitoring.md @@ -137,15 +137,17 @@ You can enable it on the DSS server to get: * Tracing for all queries * A Prometheus endpoint with some metrics -Currently, this setting is not yet available in Terraform, Helm or Tanka. +Currently, thoses settings are not yet available in Terraform, Helm or Tanka. !!! warning - By default, when OpenTelemetry is enabled, the metrics service listens on all addresses. + By default, when metrics are enabled, the metrics service listens on all addresses. ### Metrics +Use flag `--enable_metrics` to enable metrics. + Point any Prometheus server to the endpoint (by default on port 8079). You can use the `--metrics_addr` flag to change the listening port and address. @@ -154,6 +156,8 @@ No dashboard has been created yet, but one is planned. ### Tracing +Use flag `--enable_tracing` to enable tracing. + Traces can be sent to any OpenTelemetry-compliant service. Self-hostable examples include [Jaeger](https://www.jaegertracing.io/), [OpenObserve](https://github.com/openobserve/openobserve), [Grafana Tempo](https://grafana.com/docs/tempo/latest/), and [SigNoz](https://github.com/SigNoz/signoz). Multiple SaaS solutions are also available (including some of the previously mentioned tools). You need to use the `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable to configure it. Point it toward your server by following its specific documentation. @@ -202,6 +206,6 @@ index f09bda59..8c96cf3e 100755 -locality local_dev \ - -public_endpoint http://127.0.0.1:8082 + -public_endpoint http://127.0.0.1:8082 \ -+ -enable_opentelemetry ++ -enable_tracing fi ``` From 05899119d0fc41b0ff8be6c0c341c0a4042c7701 Mon Sep 17 00:00:00 2001 From: Maximilien Cuony Date: Thu, 11 Jun 2026 14:51:51 +0200 Subject: [PATCH 2/2] Update NEXT_RELEASE_NOTES.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mickaƫl Misbach --- NEXT_RELEASE_NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEXT_RELEASE_NOTES.md b/NEXT_RELEASE_NOTES.md index a93a6c4fa..c423f84cf 100644 --- a/NEXT_RELEASE_NOTES.md +++ b/NEXT_RELEASE_NOTES.md @@ -46,7 +46,7 @@ The release notes should contain at least the following sections: ## Optional migration tasks -* `enable_opentelemetry` has been splited into two flags: `enable_metrics` and `enable_tracing` +* `enable_opentelemetry` has been split into two flags: `enable_metrics` and `enable_tracing` ## Important information