From 471cd55c1956e693995e90c1157d94e6e28a2e71 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Wed, 3 Dec 2025 16:08:14 +0100 Subject: [PATCH 01/16] feat(tracing): add OpenTelemetry tracing support Add distributed tracing with OTLP HTTP exporter for observability. Components instrumented: - HTTP server (Echo middleware) - HTTP clients (outgoing requests) - GORM database queries - HashiCorp Vault client requests - External crypto storage client Features: - W3C Trace Context propagation (traceparent headers) - Logs enriched with trace_id/span_id for correlation - Logs forwarded to OTLP endpoint when tracing enabled - Audit logs included in trace context Configuration: - tracing.endpoint: OTLP collector endpoint (host:port) - tracing.insecure: use HTTP instead of HTTPS Known limitations: - gRPC connections not instrumented (v5 legacy functionality) - Azure Key Vault uses Azure SDK which requires separate instrumentation via azotel package --- audit/audit.go | 13 +- cmd/root.go | 2 +- core/engine.go | 21 +- core/http_client.go | 17 +- core/server_config.go | 14 +- core/tracing.go | 256 +++++++++++++++++++++++ core/tracing_test.go | 114 ++++++++++ crypto/storage/external/client.go | 16 +- crypto/storage/vault/vault.go | 18 +- docs/pages/deployment/monitoring.rst | 55 +++++ docs/pages/deployment/server_options.rst | 2 + go.mod | 31 ++- go.sum | 73 +++++-- http/client/client.go | 28 ++- http/client/client_test.go | 55 ++++- http/engine.go | 15 ++ pki/validator.go | 16 +- storage/engine.go | 9 + 18 files changed, 710 insertions(+), 45 deletions(-) create mode 100644 core/tracing.go create mode 100644 core/tracing_test.go diff --git a/audit/audit.go b/audit/audit.go index 1733792635..35237f1165 100644 --- a/audit/audit.go +++ b/audit/audit.go @@ -23,9 +23,11 @@ import ( "context" "encoding/json" "fmt" - "github.com/sirupsen/logrus" "strings" "sync" + + "github.com/nuts-foundation/nuts-node/core" + "github.com/sirupsen/logrus" ) const ( @@ -61,6 +63,15 @@ const auditLogLevel = "audit" var auditLoggerInstance *logrus.Logger var initAuditLoggerOnce = &sync.Once{} +func init() { + // Register callback so core.SetupTracing can add hooks to the audit logger. + // This is needed because the audit logger is a separate logrus instance, + // and we can't import audit from core due to circular dependencies. + core.RegisterAuditLogHook = func(hook logrus.Hook) { + auditLogger().AddHook(hook) + } +} + // auditLogger returns the initialized logger instance intended for audit logging. func auditLogger() *logrus.Logger { initAuditLoggerOnce.Do(func() { diff --git a/cmd/root.go b/cmd/root.go index a14feab268..493e138ac8 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -122,7 +122,7 @@ func startServer(ctx context.Context, system *core.System) error { logrus.Info(fmt.Sprintf("Build info: \n%s", core.BuildInfo())) logrus.Info(fmt.Sprintf("Config: \n%s", system.Config.PrintConfig())) - // check config on all engines + // check config on all engines (also initializes tracing) if err := system.Configure(); err != nil { return err } diff --git a/core/engine.go b/core/engine.go index e209664c01..3402c8cf8e 100644 --- a/core/engine.go +++ b/core/engine.go @@ -22,10 +22,11 @@ package core import ( "context" "fmt" - "github.com/sirupsen/logrus" - "github.com/spf13/pflag" "os" "strings" + + "github.com/sirupsen/logrus" + "github.com/spf13/pflag" ) // Routable enables connecting a REST API to the echo server. The API wrappers should implement this interface @@ -57,6 +58,8 @@ type System struct { Context context.Context // ContextCancel is a function to signal the system should shut down. ContextCancel context.CancelFunc + // tracingShutdown is the shutdown function for OpenTelemetry tracing + tracingShutdown func(context.Context) error } var coreLogger = logrus.StandardLogger().WithField(LogFieldModule, "core") @@ -111,13 +114,25 @@ func (system *System) Shutdown() error { } coreLogger.Infof("Stopped %s", name) } + // Shutdown tracing last to ensure all logs are flushed + if system.tracingShutdown != nil { + if err := system.tracingShutdown(context.Background()); err != nil { + coreLogger.WithError(err).Error("Failed to shutdown tracing") + } + } return nil } // Configure configures all engines in the system. func (system *System) Configure() error { + // Set up tracing first, so all logs (including engine configuration) go to the configured destination + tracingShutdown, err := SetupTracing(system.Config.Tracing) + if err != nil { + return fmt.Errorf("failed to setup tracing: %w", err) + } + system.tracingShutdown = tracingShutdown + coreLogger.Debugf("Creating datadir: %s", system.Config.Datadir) - var err error if err = os.MkdirAll(system.Config.Datadir, os.ModePerm); err != nil { return fmt.Errorf("unable to create datadir (dir=%s): %w", system.Config.Datadir, err) } diff --git a/core/http_client.go b/core/http_client.go index 53dbc01918..2960fa327f 100644 --- a/core/http_client.go +++ b/core/http_client.go @@ -22,9 +22,11 @@ package core import ( "context" "fmt" - "github.com/sirupsen/logrus" "io" "net/http" + + "github.com/sirupsen/logrus" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) // HttpResponseBodyLogClipAt is the maximum length of a response body to log. @@ -98,8 +100,17 @@ func (w httpRequestDoerAdapter) Do(req *http.Request) (*http.Response, error) { // If the given authorization token builder is non-nil, it calls it and passes the resulting token as bearer token with requests. func CreateHTTPInternalClient(cfg ClientConfig, generator AuthorizationTokenGenerator) (HTTPRequestDoer, error) { var result *httpRequestDoerAdapter - client := &http.Client{} - client.Timeout = cfg.Timeout + var transport http.RoundTripper = http.DefaultTransport + if TracingEnabled() { + transport = otelhttp.NewTransport(http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "internal-api: " + r.Method + " " + r.URL.Path + })) + } + client := &http.Client{ + Transport: transport, + Timeout: cfg.Timeout, + } result = &httpRequestDoerAdapter{ fn: client.Do, diff --git a/core/server_config.go b/core/server_config.go index c4474a7c45..cd833f3518 100644 --- a/core/server_config.go +++ b/core/server_config.go @@ -72,7 +72,8 @@ type ServerConfig struct { LegacyTLS TLSConfig `koanf:"network"` // HTTP exists to expose http.clientipheader to the nuts-network layer. // This header should contaisn the client IP address for logging. Can be removed together with the nuts-network - HTTP HTTPConfig `koanf:"http"` + HTTP HTTPConfig `koanf:"http"` + Tracing TracingConfig `koanf:"tracing"` configMap *koanf.Koanf } @@ -87,6 +88,15 @@ type HTTPClientConfig struct { Timeout time.Duration `koanf:"timeout"` } +// TracingConfig contains settings for OpenTelemetry tracing. +type TracingConfig struct { + // Endpoint is the OTLP collector endpoint (e.g., "localhost:4318" for HTTP). + // When empty, tracing is disabled. When set, logs are sent to both stdout and the OTLP endpoint. + Endpoint string `koanf:"endpoint"` + // Insecure disables TLS for the OTLP connection. + Insecure bool `koanf:"insecure"` +} + // TLSConfig specifies how TLS should be configured for connections. type TLSConfig struct { // Offload specifies the TLS offloading mode for incoming/outgoing traffic. @@ -274,6 +284,8 @@ func FlagSet() *pflag.FlagSet { flagSet.String("tls.offload", string(defaultCfg.TLS.Offload), fmt.Sprintf("Whether to enable TLS offloading for incoming gRPC connections. "+ "Enable by setting it to '%s'. If enabled 'tls.certheader' must be configured as well.", OffloadIncomingTLS)) flagSet.String("tls.certheader", defaultCfg.TLS.ClientCertHeaderName, "Name of the HTTP header that will contain the client certificate when TLS is offloaded for gRPC.") + flagSet.String("tracing.endpoint", defaultCfg.Tracing.Endpoint, "OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled.") + flagSet.Bool("tracing.insecure", defaultCfg.Tracing.Insecure, "Disable TLS for the OTLP connection.") return flagSet } diff --git a/core/tracing.go b/core/tracing.go new file mode 100644 index 0000000000..bc000770b3 --- /dev/null +++ b/core/tracing.go @@ -0,0 +1,256 @@ +/* + * Nuts node + * Copyright (C) 2025 Nuts community + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package core + +import ( + "context" + "errors" + "fmt" + "sync/atomic" + "time" + + "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + otellog "go.opentelemetry.io/otel/log" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/log" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + oteltrace "go.opentelemetry.io/otel/trace" +) + +const serviceName = "nuts-node" + +// tracingEnabled is set to true when OpenTelemetry tracing is configured. +var tracingEnabled atomic.Bool + +// TracingEnabled returns true if OpenTelemetry tracing is configured. +func TracingEnabled() bool { + return tracingEnabled.Load() +} + +// SetTracingEnabled sets the tracing enabled flag. +// Exported for testing only; do not call from production code. +func SetTracingEnabled(enabled bool) { + tracingEnabled.Store(enabled) +} + +// RegisterAuditLogHook is a function that registers a logrus hook with the audit logger. +// It is set by the audit package during initialization to avoid circular imports. +var RegisterAuditLogHook func(hook logrus.Hook) = func(logrus.Hook) {} + +// SetupTracing initializes OpenTelemetry tracing with the given configuration. +// Returns a shutdown function that should be called on application exit. +// If cfg.Endpoint is empty, tracing is disabled and a no-op shutdown function is returned. +// When tracing is enabled, logs are sent to both stdout and the OTLP endpoint. +func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err error) { + if cfg.Endpoint == "" { + logrus.Info("Tracing disabled (no endpoint configured)") + return func(context.Context) error { return nil }, nil + } + + // Enable tracing flag for HTTP clients and other components + tracingEnabled.Store(true) + + ctx := context.Background() + var shutdownFuncs []func(context.Context) error + + shutdown = func(ctx context.Context) error { + var errs error + for _, fn := range shutdownFuncs { + if err := fn(ctx); err != nil { + errs = errors.Join(errs, err) + } + } + return errs + } + + // Handle errors by cleaning up already-created resources + handleErr := func(err error) (func(context.Context) error, error) { + shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + _ = shutdown(shutdownCtx) + return nil, err + } + + // Set up OpenTelemetry error handler to integrate with logrus + otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { + logrus.WithError(err).Error("OpenTelemetry SDK error") + })) + + // Set up propagator (W3C Trace Context + Baggage) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + // Set up resource with service info + version := Version() + res, err := resource.New(ctx, + resource.WithAttributes( + semconv.ServiceNameKey.String(serviceName), + semconv.ServiceVersionKey.String(version), + ), + ) + if err != nil { + return handleErr(err) + } + + // Set up OTLP HTTP exporter + opts := []otlptracehttp.Option{ + otlptracehttp.WithEndpoint(cfg.Endpoint), + } + if cfg.Insecure { + opts = append(opts, otlptracehttp.WithInsecure()) + } + traceExporter, err := otlptracehttp.New(ctx, opts...) + if err != nil { + return handleErr(err) + } + shutdownFuncs = append(shutdownFuncs, traceExporter.Shutdown) + + // Set up trace provider with batch exporter + tracerProvider := trace.NewTracerProvider( + trace.WithBatcher(traceExporter), + trace.WithResource(res), + ) + shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown) + otel.SetTracerProvider(tracerProvider) + + // Set up OTLP log exporter + logOpts := []otlploghttp.Option{ + otlploghttp.WithEndpoint(cfg.Endpoint), + } + if cfg.Insecure { + logOpts = append(logOpts, otlploghttp.WithInsecure()) + } + logExporter, err := otlploghttp.New(ctx, logOpts...) + if err != nil { + return handleErr(err) + } + shutdownFuncs = append(shutdownFuncs, logExporter.Shutdown) + + // Set up log provider + loggerProvider := log.NewLoggerProvider( + log.WithProcessor(log.NewBatchProcessor(logExporter)), + log.WithResource(res), + ) + shutdownFuncs = append(shutdownFuncs, loggerProvider.Shutdown) + + // Create OTEL hook for sending logs via OTLP (logs go to both stdout and OTLP) + otelHook := &OtelLogrusHook{logger: loggerProvider.Logger(serviceName)} + logrus.AddHook(otelHook) + + // Also add trace context to stdout logs + logrus.AddHook(&tracingLogrusHook{}) + + // Register hook with audit logger (which uses its own logger instance) + RegisterAuditLogHook(otelHook) + + logrus.WithFields(logrus.Fields{ + "endpoint": cfg.Endpoint, + "version": version, + }).Info("OpenTelemetry tracing initialized") + + return shutdown, nil +} + +// tracingLogrusHook is a logrus hook that injects trace context into log entries. +type tracingLogrusHook struct{} + +func (h *tracingLogrusHook) Levels() []logrus.Level { + return logrus.AllLevels +} + +func (h *tracingLogrusHook) Fire(entry *logrus.Entry) error { + if entry.Context == nil { + return nil + } + span := oteltrace.SpanFromContext(entry.Context) + if !span.SpanContext().IsValid() { + return nil + } + spanCtx := span.SpanContext() + entry.Data["trace_id"] = spanCtx.TraceID().String() + entry.Data["span_id"] = spanCtx.SpanID().String() + return nil +} + +// OtelLogrusHook is a logrus hook that sends logs to an OTLP endpoint. +// It is exported so other loggers (like the audit logger) can use it. +type OtelLogrusHook struct { + logger otellog.Logger +} + +func (h *OtelLogrusHook) Levels() []logrus.Level { + return logrus.AllLevels +} + +func (h *OtelLogrusHook) Fire(entry *logrus.Entry) error { + ctx := entry.Context + if ctx == nil { + ctx = context.Background() + } + + // Convert logrus level to otel severity + var severity otellog.Severity + switch entry.Level { + case logrus.TraceLevel: + severity = otellog.SeverityTrace + case logrus.DebugLevel: + severity = otellog.SeverityDebug + case logrus.InfoLevel: + severity = otellog.SeverityInfo + case logrus.WarnLevel: + severity = otellog.SeverityWarn + case logrus.ErrorLevel: + severity = otellog.SeverityError + case logrus.FatalLevel, logrus.PanicLevel: + severity = otellog.SeverityFatal + default: + severity = otellog.SeverityInfo + } + + // Build log record + record := otellog.Record{} + record.SetTimestamp(entry.Time) + record.SetSeverity(severity) + record.SetBody(otellog.StringValue(entry.Message)) + + // Add logrus fields as attributes + attrs := make([]otellog.KeyValue, 0, len(entry.Data)) + for k, v := range entry.Data { + attrs = append(attrs, otellog.String(k, formatValue(v))) + } + record.AddAttributes(attrs...) + + h.logger.Emit(ctx, record) + return nil +} + +func formatValue(v any) string { + if err, ok := v.(error); ok { + return err.Error() + } + return fmt.Sprintf("%v", v) +} diff --git a/core/tracing_test.go b/core/tracing_test.go new file mode 100644 index 0000000000..4e1dd04632 --- /dev/null +++ b/core/tracing_test.go @@ -0,0 +1,114 @@ +/* + * Nuts node + * Copyright (C) 2025 Nuts community + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package core + +import ( + "context" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/trace" +) + +func TestSetupTracing(t *testing.T) { + t.Run("disabled when endpoint is empty", func(t *testing.T) { + cfg := TracingConfig{Endpoint: ""} + + shutdown, err := SetupTracing(cfg) + + require.NoError(t, err) + assert.NotNil(t, shutdown) + // Shutdown should be a no-op + assert.NoError(t, shutdown(context.Background())) + }) +} + +func TestTracingLogrusHook(t *testing.T) { + hook := &tracingLogrusHook{} + + t.Run("no-op when context is nil", func(t *testing.T) { + entry := &logrus.Entry{ + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.NotContains(t, entry.Data, "trace_id") + assert.NotContains(t, entry.Data, "span_id") + }) + + t.Run("no-op when span context is invalid", func(t *testing.T) { + entry := &logrus.Entry{ + Context: context.Background(), + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.NotContains(t, entry.Data, "trace_id") + assert.NotContains(t, entry.Data, "span_id") + }) + + t.Run("adds trace context when span is valid", func(t *testing.T) { + // Create a valid span context + traceID, _ := trace.TraceIDFromHex("0102030405060708090a0b0c0d0e0f10") + spanID, _ := trace.SpanIDFromHex("0102030405060708") + spanCtx := trace.NewSpanContext(trace.SpanContextConfig{ + TraceID: traceID, + SpanID: spanID, + TraceFlags: trace.FlagsSampled, + }) + + // Use noop tracer but with our span context + ctx := trace.ContextWithSpanContext(context.Background(), spanCtx) + + entry := &logrus.Entry{ + Context: ctx, + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.Equal(t, "0102030405060708090a0b0c0d0e0f10", entry.Data["trace_id"]) + assert.Equal(t, "0102030405060708", entry.Data["span_id"]) + }) +} + + +func TestFormatValue(t *testing.T) { + t.Run("string value", func(t *testing.T) { + result := formatValue("test") + assert.Equal(t, "test", result) + }) + + t.Run("error value", func(t *testing.T) { + result := formatValue(assert.AnError) + assert.Equal(t, assert.AnError.Error(), result) + }) + + t.Run("int value", func(t *testing.T) { + result := formatValue(42) + assert.Equal(t, "42", result) + }) + + t.Run("nil value", func(t *testing.T) { + result := formatValue(nil) + assert.Equal(t, "", result) + }) +} diff --git a/crypto/storage/external/client.go b/crypto/storage/external/client.go index e810b8a2df..23a1e84599 100644 --- a/crypto/storage/external/client.go +++ b/crypto/storage/external/client.go @@ -30,6 +30,7 @@ import ( "github.com/nuts-foundation/nuts-node/core" "github.com/nuts-foundation/nuts-node/crypto/storage/spi" "github.com/nuts-foundation/nuts-node/crypto/util" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) // StorageType is the name of this storage type, used in health check reports and configuration. @@ -82,8 +83,19 @@ func NewAPIClient(config Config) (spi.Storage, error) { if _, err := url.ParseRequestURI(config.Address); err != nil { return nil, err } - client, _ := NewClientWithResponses(config.Address, WithHTTPClient(&http.Client{Timeout: config.Timeout})) - return &APIClient{httpClient: client}, nil + var transport http.RoundTripper = http.DefaultTransport + if core.TracingEnabled() { + transport = otelhttp.NewTransport(http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "crypto-storage: " + r.Method + " " + r.URL.Path + })) + } + httpClient := &http.Client{ + Transport: transport, + Timeout: config.Timeout, + } + apiClient, _ := NewClientWithResponses(config.Address, WithHTTPClient(httpClient)) + return &APIClient{httpClient: apiClient}, nil } func (c APIClient) GetPrivateKey(ctx context.Context, keyName string, _ string) (crypto.Signer, error) { diff --git a/crypto/storage/vault/vault.go b/crypto/storage/vault/vault.go index 4dc0909e4c..117e6e6870 100644 --- a/crypto/storage/vault/vault.go +++ b/crypto/storage/vault/vault.go @@ -23,13 +23,16 @@ import ( "crypto" "errors" "fmt" + "net/http" + "path/filepath" + "time" + vault "github.com/hashicorp/vault/api" "github.com/nuts-foundation/nuts-node/core" "github.com/nuts-foundation/nuts-node/crypto/log" "github.com/nuts-foundation/nuts-node/crypto/storage/spi" "github.com/nuts-foundation/nuts-node/crypto/util" - "path/filepath" - "time" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) const privateKeyPathName = "nuts-private-keys" @@ -110,6 +113,17 @@ func (v vaultKVStorage) NewPrivateKey(ctx context.Context, keyPath string) (cryp func configureVaultClient(cfg Config) (*vault.Client, error) { vaultConfig := vault.DefaultConfig() vaultConfig.Timeout = cfg.Timeout + + // Add tracing if enabled + if core.TracingEnabled() { + vaultConfig.HttpClient.Transport = otelhttp.NewTransport( + vaultConfig.HttpClient.Transport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "vault: " + r.Method + " " + r.URL.Path + }), + ) + } + client, err := vault.NewClient(vaultConfig) if err != nil { return nil, fmt.Errorf("unable to initialize Vault client: %w", err) diff --git a/docs/pages/deployment/monitoring.rst b/docs/pages/deployment/monitoring.rst index 7529dca086..d670c62d6c 100644 --- a/docs/pages/deployment/monitoring.rst +++ b/docs/pages/deployment/monitoring.rst @@ -178,6 +178,61 @@ The Nuts service executable exports the following metric namespaces: * ``go_`` contains Go metrics related to the process * ``promhttp_`` contains metrics related to HTTP calls to the Nuts node's ``/metrics`` endpoint +Tracing +******* + +The Nuts node supports distributed tracing via OpenTelemetry. When enabled, it exports traces to an OTLP-compatible backend +(e.g., Jaeger, Zipkin, .NET Aspire Dashboard, Grafana Tempo). + +Configuration +============= + +Enable tracing by configuring the OTLP endpoint: + +.. code-block:: yaml + + tracing: + endpoint: localhost:4318 + +Or via environment variables: + +.. code-block:: shell + + NUTS_TRACING_ENDPOINT=localhost:4318 + +Configuration options: + +* ``tracing.endpoint`` - OTLP HTTP endpoint (e.g., ``localhost:4318``). Tracing is disabled when empty. +* ``tracing.insecure`` - Disable TLS for the OTLP connection (default: ``false``). Only use in trusted networks or development environments, as trace data may contain sensitive information. + +What is traced +============== + +The following are automatically instrumented: + +* **Inbound HTTP requests** - All API calls to the Nuts node create spans (except ``/health``, ``/metrics``, ``/status``) +* **Outbound HTTP requests** - HTTP calls to external services (e.g., fetching DID documents, OAuth flows) +* **SQL database** - Database queries via GORM +* **Hashicorp Vault** - Key storage operations when using Vault backend +* **Log correlation** - Log entries include ``trace_id`` and ``span_id`` fields when tracing is enabled +* **OTLP log export** - Logs are also exported to the OTLP backend for unified observability + +Trace context propagation +========================= + +The Nuts node uses W3C Trace Context (``traceparent`` header) for propagating trace context across service boundaries. +When calling the Nuts node from another traced service, include the ``traceparent`` header to link spans. + +Known limitations +================= + +The following components are not yet instrumented: + +* **Azure Key Vault** - Azure managed keys backend is not instrumented. The Azure SDK supports OpenTelemetry via the ``azotel`` package (see `Azure SDK tracing `_). +* **gRPC network layer** - P2P communication between nodes (``did:nuts``) does not include tracing as it's for v5 and deprecated + +These limitations may be addressed in future releases. + CPU profiling ************* diff --git a/docs/pages/deployment/server_options.rst b/docs/pages/deployment/server_options.rst index a93ea21e31..207b399b64 100755 --- a/docs/pages/deployment/server_options.rst +++ b/docs/pages/deployment/server_options.rst @@ -15,6 +15,8 @@ url Public facing URL of the server (required). Must be HTTPS when strictmode is set. verbosity info Log level (trace, debug, info, warn, error) httpclient.timeout 30s Request time-out for HTTP clients, such as '10s'. Refer to Golang's 'time.Duration' syntax for a more elaborate description of the syntax. + tracing.endpoint OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled. + tracing.insecure false Disable TLS for the OTLP connection. **Auth** auth.authorizationendpoint.enabled false enables the v2 API's OAuth2 Authorization Endpoint, used by OpenID4VP and OpenID4VCI. This flag might be removed in a future version (or its default become 'true') as the use cases and implementation of OpenID4VP and OpenID4VCI mature. **Crypto** diff --git a/go.mod b/go.mod index 38a918243e..b19d733a73 100644 --- a/go.mod +++ b/go.mod @@ -47,7 +47,7 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/spf13/cobra v1.9.1 github.com/spf13/pflag v1.0.7 - github.com/stretchr/testify v1.10.0 + github.com/stretchr/testify v1.11.1 github.com/twmb/murmur3 v1.1.8 go.etcd.io/bbolt v1.4.3 go.uber.org/atomic v1.11.0 @@ -56,7 +56,7 @@ require ( golang.org/x/crypto v0.41.0 golang.org/x/time v0.12.0 google.golang.org/grpc v1.75.0 - google.golang.org/protobuf v1.36.6 + google.golang.org/protobuf v1.36.8 gopkg.in/Regis24GmbH/go-phonetics.v2 v2.0.3 gopkg.in/yaml.v3 v3.0.1 gorm.io/driver/mysql v1.6.0 @@ -184,12 +184,12 @@ require ( github.com/x448/float16 v0.8.4 // indirect github.com/yuin/gopher-lua v1.1.1 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/net v0.42.0 // indirect + golang.org/x/net v0.43.0 // indirect golang.org/x/sync v0.16.0 // indirect golang.org/x/sys v0.35.0 // indirect golang.org/x/term v0.34.0 // indirect golang.org/x/text v0.28.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect gopkg.in/Regis24GmbH/go-diacritics.v2 v2.0.3 // indirect gorm.io/gorm v1.30.2 modernc.org/mathutil v1.7.1 // indirect @@ -206,16 +206,37 @@ require ( github.com/eko/gocache/store/memcache/v4 v4.2.2 github.com/eko/gocache/store/redis/v4 v4.2.2 github.com/patrickmn/go-cache v2.1.0+incompatible + github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2 + go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 + go.opentelemetry.io/otel v1.38.0 + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 + go.opentelemetry.io/otel/log v0.14.0 + go.opentelemetry.io/otel/sdk v1.38.0 + go.opentelemetry.io/otel/sdk/log v0.14.0 + go.opentelemetry.io/otel/trace v1.38.0 ) require ( github.com/benbjohnson/clock v1.3.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-json-experiment/json v0.0.0-20250725192818-e39067aee2d2 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/mock v1.6.0 // indirect github.com/google/go-tpm v0.9.5 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect github.com/klauspost/cpuid/v2 v2.2.5 // indirect github.com/rs/zerolog v1.26.1 // indirect - go.yaml.in/yaml/v3 v3.0.3 // indirect + github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/proto/otlp v1.7.1 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect modernc.org/libc v1.66.3 // indirect ) diff --git a/go.sum b/go.sum index 2ccfd7d1a8..72a0d3ef22 100644 --- a/go.sum +++ b/go.sum @@ -75,6 +75,8 @@ github.com/cbroglie/mustache v1.4.0 h1:Azg0dVhxTml5me+7PsZ7WPrQq1Gkf3WApcHMjMprY github.com/cbroglie/mustache v1.4.0/go.mod h1:SS1FTIghy0sjse4DUVGV1k/40B1qE1XkD9DtDsHo9iM= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -125,6 +127,8 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo= github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor v1.5.1 h1:XjQWBgdmQyqimslUh5r4tUGmoqzHmBFQOImkWGi2awg= @@ -139,6 +143,7 @@ github.com/go-jose/go-jose/v4 v4.1.1 h1:JYhSgy4mXXzAdF3nUx3ygx347LRXJRrpgyU3adRm github.com/go-jose/go-jose/v4 v4.1.1/go.mod h1:BdsZGqgdO3b6tTc6LSE56wcDbMMLuPsw5d4ZD5f94kA= github.com/go-json-experiment/json v0.0.0-20250725192818-e39067aee2d2 h1:iizUGZ9pEquQS5jTGkh4AqeeHCMbfbjeb0zMt0aEFzs= github.com/go-json-experiment/json v0.0.0-20250725192818-e39067aee2d2/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -202,6 +207,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -472,8 +479,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/stvp/tempredis v0.0.0-20181119212430-b82af8480203 h1:QVqDTf3h2WHt08YuiTGPZLls0Wq99X9bWd0Q5ZSBesM= github.com/stvp/tempredis v0.0.0-20181119212430-b82af8480203/go.mod h1:oqN97ltKNihBbwlX8dLpwxCl3+HnXKV/R0e+sRLd9C8= github.com/templexxx/cpu v0.0.1/go.mod h1:w7Tb+7qgcAlIyX4NhLuDKt78AHA5SzPmq0Wj6HiEnnk= @@ -493,6 +500,10 @@ github.com/timshannon/bolthold v0.0.0-20210913165410-232392fc8a6a h1:oIi7H/bwFUY github.com/timshannon/bolthold v0.0.0-20210913165410-232392fc8a6a/go.mod h1:iSvujNDmpZ6eQX+bg/0X3lF7LEmZ8N77g2a/J/+Zt2U= github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg= github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= +github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2 h1:Jjn3zoRz13f8b1bR6LrXWglx93Sbh4kYfwgmPju3E2k= +github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2/go.mod h1:wocb5pNrj/sjhWB9J5jctnC0K2eisSdz/nJJBNFHo+A= +github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2 h1:ZjUj9BLYf9PEqBn8W/OapxhPjVRdC6CsXTdULHsyk5c= +github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2/go.mod h1:O8bHQfyinKwTXKkiKNGmLQS7vRsqRxIQTFZpYpHK3IQ= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= @@ -512,16 +523,36 @@ go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo= go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= -go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= -go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= -go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0 h1:6YeICKmGrvgJ5th4+OMNpcuoB6q/Xs8gt0YCO7MUv1k= +go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0/go.mod h1:ZEA7j2B35siNV0T00aapacNzjz4tvOlNoHp0ncCfwNQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= +go.opentelemetry.io/contrib/propagators/b3 v1.38.0 h1:uHsCCOSKl0kLrV2dLkFK+8Ywk9iKa/fptkytc6aFFEo= +go.opentelemetry.io/contrib/propagators/b3 v1.38.0/go.mod h1:wMRSZJZcY8ya9mApLLhwIMjqmApy2o/Ml+62lhvxyHU= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 h1:QQqYw3lkrzwVsoEX0w//EhH/TCnpRdEenKBOOEIMjWc= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0/go.mod h1:gSVQcr17jk2ig4jqJ2DX30IdWH251JcNAecvrqTxH1s= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 h1:aTL7F04bJHUlztTsNGJ2l+6he8c+y/b//eR0jjjemT4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4= +go.opentelemetry.io/otel/log v0.14.0 h1:2rzJ+pOAZ8qmZ3DDHg73NEKzSZkhkGIua9gXtxNGgrM= +go.opentelemetry.io/otel/log v0.14.0/go.mod h1:5jRG92fEAgx0SU/vFPxmJvhIuDU9E1SUnEQrMlJpOno= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/log v0.14.0 h1:JU/U3O7N6fsAXj0+CXz21Czg532dW2V4gG1HE/e8Zrg= +go.opentelemetry.io/otel/sdk/log v0.14.0/go.mod h1:imQvII+0ZylXfKU7/wtOND8Hn4OpT3YUoIgqJVksUkM= +go.opentelemetry.io/otel/sdk/log/logtest v0.14.0 h1:Ijbtz+JKXl8T2MngiwqBlPaHqc4YCaP/i13Qrow6gAM= +go.opentelemetry.io/otel/sdk/log/logtest v0.14.0/go.mod h1:dCU8aEL6q+L9cYTqcVOk8rM9Tp8WdnHOPLiBgp0SGOA= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= +go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= @@ -531,8 +562,8 @@ go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE= -go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -585,8 +616,8 @@ golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= -golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= -golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -680,12 +711,14 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 h1:pFyd6EwwL2TqFf8emdthzeX+gZE1ElRq3iM8pui4KBY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc= google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/Regis24GmbH/go-diacritics.v2 v2.0.3 h1:rz88vn1OH2B9kKorR+QCrcuw6WbizVwahU2Y9Q09xqU= gopkg.in/Regis24GmbH/go-diacritics.v2 v2.0.3/go.mod h1:vJmfdx2L0+30M90zUd0GCjLV14Ip3ZgWR5+MV1qljOo= gopkg.in/Regis24GmbH/go-phonetics.v2 v2.0.3 h1:pSSZonNnrORBQXIm3kl6P9EQTNqVds9zszK/BXbOItg= diff --git a/http/client/client.go b/http/client/client.go index 60d9c57b87..47ab2dd1f7 100644 --- a/http/client/client.go +++ b/http/client/client.go @@ -23,10 +23,12 @@ import ( "crypto/tls" "errors" "fmt" - "github.com/nuts-foundation/nuts-node/core" "io" "net/http" "time" + + "github.com/nuts-foundation/nuts-node/core" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) // SafeHttpTransport is a http.Transport that can be used as a default transport for HTTP clients. @@ -44,6 +46,11 @@ func init() { DefaultCachingTransport = SafeHttpTransport } +// httpSpanName formats span names for outbound HTTP requests. +func httpSpanName(_ string, r *http.Request) string { + return "http-client: " + r.Method + " " + r.URL.Path +} + // StrictMode is a flag that can be set to true to enable strict mode for the HTTP client. var StrictMode bool @@ -63,21 +70,33 @@ func limitedReadAll(reader io.Reader) ([]byte, error) { } // New creates a new HTTP client with the given timeout. +// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func New(timeout time.Duration) *StrictHTTPClient { + transport := getTransport(SafeHttpTransport) return &StrictHTTPClient{ client: &http.Client{ - Transport: SafeHttpTransport, + Transport: transport, Timeout: timeout, }, } } +// getTransport wraps the given transport with OpenTelemetry instrumentation if tracing is enabled. +func getTransport(base http.RoundTripper) http.RoundTripper { + if core.TracingEnabled() { + return otelhttp.NewTransport(base, otelhttp.WithSpanNameFormatter(httpSpanName)) + } + return base +} + // NewWithCache creates a new HTTP client with the given timeout. // It uses the DefaultCachingTransport as the underlying transport. +// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func NewWithCache(timeout time.Duration) *StrictHTTPClient { + transport := getTransport(DefaultCachingTransport) return &StrictHTTPClient{ client: &http.Client{ - Transport: DefaultCachingTransport, + Transport: transport, Timeout: timeout, }, } @@ -86,12 +105,13 @@ func NewWithCache(timeout time.Duration) *StrictHTTPClient { // NewWithTLSConfig creates a new HTTP client with the given timeout and TLS configuration. // It copies the http.DefaultTransport and sets the TLSClientConfig to the given tls.Config. // As such, it can't be used in conjunction with the CachingRoundTripper. +// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func NewWithTLSConfig(timeout time.Duration, tlsConfig *tls.Config) *StrictHTTPClient { transport := SafeHttpTransport.Clone() transport.TLSClientConfig = tlsConfig return &StrictHTTPClient{ client: &http.Client{ - Transport: transport, + Transport: getTransport(transport), Timeout: timeout, }, } diff --git a/http/client/client_test.go b/http/client/client_test.go index 76d5c3d401..0db38d0c0c 100644 --- a/http/client/client_test.go +++ b/http/client/client_test.go @@ -21,8 +21,6 @@ package client import ( "crypto/tls" "fmt" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "net/http" "net/http/httptest" "strings" @@ -30,6 +28,10 @@ import ( "sync/atomic" "testing" "time" + + "github.com/nuts-foundation/nuts-node/core" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestStrictHTTPClient(t *testing.T) { @@ -73,6 +75,9 @@ func TestStrictHTTPClient(t *testing.T) { assert.Equal(t, 0, rt.invocations) }) t.Run("sets TLS config", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(false) // ensure we can cast to *http.Transport + t.Cleanup(func() { core.SetTracingEnabled(original) }) client := NewWithTLSConfig(time.Second, &tls.Config{ InsecureSkipVerify: true, }) @@ -197,3 +202,49 @@ func TestCaching(t *testing.T) { assert.Equal(t, int32(1), total.Load()) } + +func TestGetTransport(t *testing.T) { + t.Run("wraps transport when tracing enabled", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(true) + t.Cleanup(func() { core.SetTracingEnabled(original) }) + + transport := getTransport(SafeHttpTransport) + + // Should not be the same as SafeHttpTransport (it's wrapped) + assert.NotEqual(t, SafeHttpTransport, transport) + }) + + t.Run("returns base transport when tracing disabled", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(false) + t.Cleanup(func() { core.SetTracingEnabled(original) }) + + transport := getTransport(SafeHttpTransport) + + assert.Equal(t, SafeHttpTransport, transport) + }) +} + +func TestNew(t *testing.T) { + t.Run("wraps transport when tracing enabled", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(true) + t.Cleanup(func() { core.SetTracingEnabled(original) }) + + client := New(time.Second) + + // Transport should be wrapped (not equal to SafeHttpTransport) + assert.NotEqual(t, SafeHttpTransport, client.client.Transport) + }) + + t.Run("uses SafeHttpTransport when tracing disabled", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(false) + t.Cleanup(func() { core.SetTracingEnabled(original) }) + + client := New(time.Second) + + assert.Equal(t, SafeHttpTransport, client.client.Transport) + }) +} diff --git a/http/engine.go b/http/engine.go index 9803b2d5b3..f85cc05a73 100644 --- a/http/engine.go +++ b/http/engine.go @@ -36,6 +36,7 @@ import ( "github.com/nuts-foundation/nuts-node/http/log" "github.com/nuts-foundation/nuts-node/http/tokenV2" "github.com/nuts-foundation/nuts-node/vdr/didnuts" + "go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho" ) const moduleName = "HTTP" @@ -90,6 +91,7 @@ func (h *Engine) Configure(serverConfig core.ServerConfig) error { return err } + h.applyTracingMiddleware(h.server) h.applyRateLimiterMiddleware(h.server, serverConfig) h.applyLoggerMiddleware(h.server, []string{MetricsPath, StatusPath, HealthPath}, h.config.Log) return h.applyAuthMiddleware(h.server, InternalPath, h.config.Internal.Auth) @@ -103,6 +105,19 @@ func (h *Engine) configureClient(serverConfig core.ServerConfig) { } } +func (h *Engine) applyTracingMiddleware(echoServer core.EchoRouter) { + // Only apply tracing middleware if tracing is enabled + if !core.TracingEnabled() { + return + } + skipper := func(c echo.Context) bool { + // Skip health/metrics/status endpoints to reduce noise + path := c.Request().URL.Path + return matchesPath(path, HealthPath) || matchesPath(path, MetricsPath) || matchesPath(path, StatusPath) + } + echoServer.Use(otelecho.Middleware(moduleName, otelecho.WithSkipper(skipper))) +} + func (h *Engine) createEchoServer(ipHeader string) (EchoServer, error) { echoServer := echo.New() echoServer.HideBanner = true diff --git a/pki/validator.go b/pki/validator.go index 80db1ed633..207b1f6952 100644 --- a/pki/validator.go +++ b/pki/validator.go @@ -29,6 +29,9 @@ import ( "strings" "sync" "time" + + "github.com/nuts-foundation/nuts-node/core" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) var _ Validator = (*validator)(nil) @@ -89,7 +92,18 @@ func newRevocationList(cert *x509.Certificate) *revocationList { // newValidator returns a new PKI (crl/denylist) validator. func newValidator(config Config) (*validator, error) { // we do not use our safe http client here since we're downloading from a trusted resource - return newValidatorWithHTTPClient(config, &http.Client{Timeout: syncTimeout}) + var transport http.RoundTripper = http.DefaultTransport + if core.TracingEnabled() { + transport = otelhttp.NewTransport(http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "pki: " + r.Method + " " + r.URL.Path + })) + } + httpClient := &http.Client{ + Transport: transport, + Timeout: syncTimeout, + } + return newValidatorWithHTTPClient(config, httpClient) } // NewValidatorWithHTTPClient returns a new instance with a pre-configured HTTP client diff --git a/storage/engine.go b/storage/engine.go index 8f9790c82e..5af9518531 100644 --- a/storage/engine.go +++ b/storage/engine.go @@ -38,6 +38,7 @@ import ( "github.com/pressly/goose/v3" "github.com/redis/go-redis/v9" "github.com/sirupsen/logrus" + "github.com/uptrace/opentelemetry-go-extra/otelgorm" "gorm.io/driver/mysql" "gorm.io/driver/postgres" "gorm.io/driver/sqlserver" @@ -324,6 +325,14 @@ func (e *engine) initSQLDatabase(strictmode bool) error { default: return errors.New("unsupported SQL database") } + + // Add OpenTelemetry tracing to GORM if tracing is enabled + if core.TracingEnabled() { + if err := e.sqlDB.Use(otelgorm.NewPlugin()); err != nil { + return fmt.Errorf("failed to add GORM tracing plugin: %w", err) + } + } + goose.SetVerbose(log.Logger().Level >= logrus.DebugLevel) goose.SetLogger(e.sqlMigrationLogger) if err != nil { From 8e93287fef5f4c411c8a4870d3863ba3b2f38f53 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Wed, 3 Dec 2025 16:52:07 +0100 Subject: [PATCH 02/16] fix(tracing): add context to log calls for trace correlation Pass request context to log calls so trace_id and span_id are included in log output when tracing is enabled. --- auth/api/iam/api.go | 4 ++-- vdr/api/v2/api.go | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/auth/api/iam/api.go b/auth/api/iam/api.go index c3affbcf97..7eb0b5cf10 100644 --- a/auth/api/iam/api.go +++ b/auth/api/iam/api.go @@ -318,7 +318,7 @@ func callbackRequestToError(request CallbackRequestObject, redirectURI *url.URL) return requestErr } -func (r Wrapper) RetrieveAccessToken(_ context.Context, request RetrieveAccessTokenRequestObject) (RetrieveAccessTokenResponseObject, error) { +func (r Wrapper) RetrieveAccessToken(ctx context.Context, request RetrieveAccessTokenRequestObject) (RetrieveAccessTokenResponseObject, error) { // get access token from store var token TokenResponse err := r.accessTokenClientStore().Get(request.SessionID, &token) @@ -336,7 +336,7 @@ func (r Wrapper) RetrieveAccessToken(_ context.Context, request RetrieveAccessTo // change this when tokens can be cached err = r.accessTokenClientStore().Delete(request.SessionID) if err != nil { - log.Logger().WithError(err).Warn("Failed to delete access token") + log.Logger().WithContext(ctx).WithError(err).Warn("Failed to delete access token") } // return access token return RetrieveAccessToken200JSONResponse(token), nil diff --git a/vdr/api/v2/api.go b/vdr/api/v2/api.go index 7b330ce919..8a691b13e9 100644 --- a/vdr/api/v2/api.go +++ b/vdr/api/v2/api.go @@ -88,14 +88,14 @@ func (w *Wrapper) Routes(router core.EchoRouter) { router.Use(cache.MaxAge(5*time.Minute, cacheControlMaxAgeURLs...).Handle) } -func (r Wrapper) GetTenantWebDID(_ context.Context, request GetTenantWebDIDRequestObject) (GetTenantWebDIDResponseObject, error) { +func (r Wrapper) GetTenantWebDID(ctx context.Context, request GetTenantWebDIDRequestObject) (GetTenantWebDIDResponseObject, error) { ownDID := r.requestedWebDID(request.Id) document, err := r.VDR.ResolveManaged(ownDID) if err != nil { if resolver.IsFunctionalResolveError(err) { return GetTenantWebDID404Response{}, nil } - log.Logger().WithError(err).Errorf("Could not resolve tenant did:web: %s", ownDID.String()) + log.Logger().WithContext(ctx).WithError(err).Errorf("Could not resolve tenant did:web: %s", ownDID.String()) return nil, errors.New("unable to resolve DID") } return GetTenantWebDID200JSONResponse(*document), nil @@ -108,7 +108,7 @@ func (r Wrapper) GetRootWebDID(ctx context.Context, _ GetRootWebDIDRequestObject if resolver.IsFunctionalResolveError(err) { return GetRootWebDID404Response{}, nil } - log.Logger().WithError(err).Errorf("Could not resolve root did:web: %s", ownDID.String()) + log.Logger().WithContext(ctx).WithError(err).Errorf("Could not resolve root did:web: %s", ownDID.String()) return nil, errors.New("unable to resolve DID") } return GetRootWebDID200JSONResponse(*document), nil From 55bba7ad3c1c7b2d7d29631dd91833f0921befd7 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Thu, 4 Dec 2025 13:34:27 +0100 Subject: [PATCH 03/16] feat(tracing): support embedding with separate service attribution When embedded in another app, nuts-node's spans were attributed to the host's service name because both shared the global TracerProvider. Add GetTracerProvider() that returns nuts-node's own provider, and use it in otelecho middleware. Only set global provider when not embedded. --- core/tracing.go | 25 ++++++++++++++++++++++++- core/tracing_test.go | 10 ++++++++++ http/engine.go | 7 ++++++- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/core/tracing.go b/core/tracing.go index bc000770b3..065fd83d42 100644 --- a/core/tracing.go +++ b/core/tracing.go @@ -44,6 +44,10 @@ const serviceName = "nuts-node" // tracingEnabled is set to true when OpenTelemetry tracing is configured. var tracingEnabled atomic.Bool +// nutsTracerProvider holds nuts-node's own TracerProvider. +// This is used instead of the global when nuts-node is embedded in another application. +var nutsTracerProvider *trace.TracerProvider + // TracingEnabled returns true if OpenTelemetry tracing is configured. func TracingEnabled() bool { return tracingEnabled.Load() @@ -55,6 +59,16 @@ func SetTracingEnabled(enabled bool) { tracingEnabled.Store(enabled) } +// GetTracerProvider returns nuts-node's TracerProvider. +// This should be used by nuts-node components instead of otel.GetTracerProvider() +// to ensure spans are attributed to "nuts-node" service. +func GetTracerProvider() oteltrace.TracerProvider { + if nutsTracerProvider != nil { + return nutsTracerProvider + } + return otel.GetTracerProvider() +} + // RegisterAuditLogHook is a function that registers a logrus hook with the audit logger. // It is set by the audit package during initialization to avoid circular imports. var RegisterAuditLogHook func(hook logrus.Hook) = func(logrus.Hook) {} @@ -135,7 +149,16 @@ func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err trace.WithResource(res), ) shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown) - otel.SetTracerProvider(tracerProvider) + + // Store nuts-node's provider for use by GetTracerProvider() + nutsTracerProvider = tracerProvider + + // Only set as global if no other provider exists (i.e., not embedded). + // When embedded, the parent application owns the global provider. + _, hasParentProvider := otel.GetTracerProvider().(*trace.TracerProvider) + if !hasParentProvider { + otel.SetTracerProvider(tracerProvider) + } // Set up OTLP log exporter logOpts := []otlploghttp.Option{ diff --git a/core/tracing_test.go b/core/tracing_test.go index 4e1dd04632..cacea7e24f 100644 --- a/core/tracing_test.go +++ b/core/tracing_test.go @@ -42,6 +42,16 @@ func TestSetupTracing(t *testing.T) { }) } +func TestGetTracerProvider(t *testing.T) { + t.Run("returns global provider when nutsTracerProvider is nil", func(t *testing.T) { + // Reset state + nutsTracerProvider = nil + + provider := GetTracerProvider() + assert.NotNil(t, provider) + }) +} + func TestTracingLogrusHook(t *testing.T) { hook := &tracingLogrusHook{} diff --git a/http/engine.go b/http/engine.go index f85cc05a73..0c6026c848 100644 --- a/http/engine.go +++ b/http/engine.go @@ -115,7 +115,12 @@ func (h *Engine) applyTracingMiddleware(echoServer core.EchoRouter) { path := c.Request().URL.Path return matchesPath(path, HealthPath) || matchesPath(path, MetricsPath) || matchesPath(path, StatusPath) } - echoServer.Use(otelecho.Middleware(moduleName, otelecho.WithSkipper(skipper))) + // Use nuts-node's own TracerProvider to ensure spans are attributed to "nuts-node" service, + // even when embedded in another application that has its own TracerProvider. + echoServer.Use(otelecho.Middleware(moduleName, + otelecho.WithSkipper(skipper), + otelecho.WithTracerProvider(core.GetTracerProvider()), + )) } func (h *Engine) createEchoServer(ipHeader string) (EchoServer, error) { From 707ff188949db921b8bce3682c4864870c291c0c Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Thu, 4 Dec 2025 14:19:44 +0100 Subject: [PATCH 04/16] fix(tracing): propagate context to GORM for proper span hierarchy --- storage/engine.go | 2 +- vdr/didsubject/manager.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/storage/engine.go b/storage/engine.go index 5af9518531..04ba16b5b5 100644 --- a/storage/engine.go +++ b/storage/engine.go @@ -328,7 +328,7 @@ func (e *engine) initSQLDatabase(strictmode bool) error { // Add OpenTelemetry tracing to GORM if tracing is enabled if core.TracingEnabled() { - if err := e.sqlDB.Use(otelgorm.NewPlugin()); err != nil { + if err := e.sqlDB.Use(otelgorm.NewPlugin(otelgorm.WithTracerProvider(core.GetTracerProvider()))); err != nil { return fmt.Errorf("failed to add GORM tracing plugin: %w", err) } } diff --git a/vdr/didsubject/manager.go b/vdr/didsubject/manager.go index 1fe0bbf925..58f87811f7 100644 --- a/vdr/didsubject/manager.go +++ b/vdr/didsubject/manager.go @@ -63,8 +63,8 @@ func New(db *gorm.DB, methodManagers map[string]MethodManager, keyStore nutsCryp } } -func (r *SqlManager) List(_ context.Context) (map[string][]did.DID, error) { - sqlDIDManager := NewDIDManager(r.DB) +func (r *SqlManager) List(ctx context.Context) (map[string][]did.DID, error) { + sqlDIDManager := NewDIDManager(r.DB.WithContext(ctx)) dids, err := sqlDIDManager.All() if err != nil { return nil, err From f4ed8b2633953cdbc84c42f6bd7446ab6f2541e5 Mon Sep 17 00:00:00 2001 From: "qltysh[bot]" <168846912+qltysh[bot]@users.noreply.github.com> Date: Thu, 4 Dec 2025 13:32:41 +0000 Subject: [PATCH 05/16] =?UTF-8?q?=F0=9F=93=9D=20qlty=20fmt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/tracing_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/core/tracing_test.go b/core/tracing_test.go index cacea7e24f..e66422b1fb 100644 --- a/core/tracing_test.go +++ b/core/tracing_test.go @@ -100,7 +100,6 @@ func TestTracingLogrusHook(t *testing.T) { }) } - func TestFormatValue(t *testing.T) { t.Run("string value", func(t *testing.T) { result := formatValue("test") From d3b7d360f4b1befe23e6f0d69ae0439c6f3fe736 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Tue, 20 Jan 2026 15:24:35 +0100 Subject: [PATCH 06/16] Add e2e test for distributed OpenTelemetry tracing - Test verifies trace propagation across nodes by injecting traceparent - Configure unique service names (nodeA, nodeB) to distinguish nodes in traces - Verify trace contains spans from both services, proving distributed tracing works Extended tracing configuration: - Add tracing.servicename option (defaults to 'nuts-node') --- core/server_config.go | 4 +++ core/tracing.go | 7 +++- docs/pages/deployment/monitoring.rst | 1 + docs/pages/deployment/server_options.rst | 5 +-- e2e-tests/oauth-flow/rfc021/do-test.sh | 15 +++++++-- .../oauth-flow/rfc021/docker-compose.yml | 15 +++++++++ e2e-tests/oauth-flow/rfc021/node-A/nginx.conf | 2 ++ e2e-tests/oauth-flow/rfc021/node-A/nuts.yaml | 3 ++ e2e-tests/oauth-flow/rfc021/node-B/nuts.yaml | 3 ++ e2e-tests/util.sh | 32 ++++++++++++++++++- 10 files changed, 81 insertions(+), 6 deletions(-) diff --git a/core/server_config.go b/core/server_config.go index cd833f3518..5ee9c8c790 100644 --- a/core/server_config.go +++ b/core/server_config.go @@ -95,6 +95,9 @@ type TracingConfig struct { Endpoint string `koanf:"endpoint"` // Insecure disables TLS for the OTLP connection. Insecure bool `koanf:"insecure"` + // ServiceName is the name of the service reported to the tracing backend. + // Defaults to "nuts-node" if not specified. + ServiceName string `koanf:"servicename"` } // TLSConfig specifies how TLS should be configured for connections. @@ -286,6 +289,7 @@ func FlagSet() *pflag.FlagSet { flagSet.String("tls.certheader", defaultCfg.TLS.ClientCertHeaderName, "Name of the HTTP header that will contain the client certificate when TLS is offloaded for gRPC.") flagSet.String("tracing.endpoint", defaultCfg.Tracing.Endpoint, "OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled.") flagSet.Bool("tracing.insecure", defaultCfg.Tracing.Insecure, "Disable TLS for the OTLP connection.") + flagSet.String("tracing.servicename", defaultCfg.Tracing.ServiceName, "Service name reported to the tracing backend. Defaults to 'nuts-node'.") return flagSet } diff --git a/core/tracing.go b/core/tracing.go index 065fd83d42..cbfd0ddbcd 100644 --- a/core/tracing.go +++ b/core/tracing.go @@ -39,7 +39,7 @@ import ( oteltrace "go.opentelemetry.io/otel/trace" ) -const serviceName = "nuts-node" +const defaultServiceName = "nuts-node" // tracingEnabled is set to true when OpenTelemetry tracing is configured. var tracingEnabled atomic.Bool @@ -119,6 +119,10 @@ func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err )) // Set up resource with service info + serviceName := cfg.ServiceName + if serviceName == "" { + serviceName = defaultServiceName + } version := Version() res, err := resource.New(ctx, resource.WithAttributes( @@ -192,6 +196,7 @@ func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err logrus.WithFields(logrus.Fields{ "endpoint": cfg.Endpoint, + "service": serviceName, "version": version, }).Info("OpenTelemetry tracing initialized") diff --git a/docs/pages/deployment/monitoring.rst b/docs/pages/deployment/monitoring.rst index d670c62d6c..296c293de3 100644 --- a/docs/pages/deployment/monitoring.rst +++ b/docs/pages/deployment/monitoring.rst @@ -204,6 +204,7 @@ Configuration options: * ``tracing.endpoint`` - OTLP HTTP endpoint (e.g., ``localhost:4318``). Tracing is disabled when empty. * ``tracing.insecure`` - Disable TLS for the OTLP connection (default: ``false``). Only use in trusted networks or development environments, as trace data may contain sensitive information. +* ``tracing.servicename`` - Service name reported to the tracing backend (default: ``nuts-node``). Useful for distinguishing multiple instances in distributed tracing. What is traced ============== diff --git a/docs/pages/deployment/server_options.rst b/docs/pages/deployment/server_options.rst index 207b399b64..12a40f1e24 100755 --- a/docs/pages/deployment/server_options.rst +++ b/docs/pages/deployment/server_options.rst @@ -15,8 +15,9 @@ url Public facing URL of the server (required). Must be HTTPS when strictmode is set. verbosity info Log level (trace, debug, info, warn, error) httpclient.timeout 30s Request time-out for HTTP clients, such as '10s'. Refer to Golang's 'time.Duration' syntax for a more elaborate description of the syntax. - tracing.endpoint OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled. - tracing.insecure false Disable TLS for the OTLP connection. + tracing.endpoint OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled. + tracing.insecure false Disable TLS for the OTLP connection. + tracing.servicename Service name reported to the tracing backend. Defaults to 'nuts-node'. **Auth** auth.authorizationendpoint.enabled false enables the v2 API's OAuth2 Authorization Endpoint, used by OpenID4VP and OpenID4VCI. This flag might be removed in a future version (or its default become 'true') as the use cases and implementation of OpenID4VP and OpenID4VCI mature. **Crypto** diff --git a/e2e-tests/oauth-flow/rfc021/do-test.sh b/e2e-tests/oauth-flow/rfc021/do-test.sh index 2040a8d5c0..05d5feb9a0 100755 --- a/e2e-tests/oauth-flow/rfc021/do-test.sh +++ b/e2e-tests/oauth-flow/rfc021/do-test.sh @@ -26,7 +26,7 @@ echo "------------------------------------" echo "Starting Docker containers..." echo "------------------------------------" $db_dc up -d -$db_dc up --wait nodeA nodeA-backend nodeB nodeB-backend +$db_dc up --wait nodeA nodeA-backend nodeB nodeB-backend jaeger echo "------------------------------------" echo "Registering vendors..." @@ -140,7 +140,10 @@ cat << EOF EOF ) # Request access token -RESPONSE=$(echo $REQUEST | curl -X POST -s --data-binary @- http://localhost:28081/internal/auth/v2/vendorB/request-service-access-token -H "Content-Type: application/json") +# Include traceparent header to verify OpenTelemetry tracing works across both nodes +TRACE_ID=$(openssl rand -hex 16) +TRACEPARENT="00-${TRACE_ID}-$(openssl rand -hex 8)-01" +RESPONSE=$(echo $REQUEST | curl -X POST -s --data-binary @- http://localhost:28081/internal/auth/v2/vendorB/request-service-access-token -H "Content-Type: application/json" -H "traceparent: $TRACEPARENT") if echo $RESPONSE | grep -q "access_token"; then echo $RESPONSE | sed -E 's/.*"access_token":"([^"]*).*/\1/' > ./node-B/accesstoken.txt echo "access token stored in ./node-B/accesstoken.txt" @@ -231,6 +234,14 @@ else exitWithDockerLogs 1 fi +echo "------------------------------------" +echo "Verifying trace in Jaeger..." +echo "------------------------------------" +# Verify trace contains spans from both nodeA and nodeB, proving distributed tracing works +if ! assertJaegerTraceContainsServices "http://localhost:16686" "$TRACE_ID" "nodeA,nodeB"; then + exitWithDockerLogs 1 +fi + echo "------------------------------------" echo "Stopping Docker containers..." echo "------------------------------------" diff --git a/e2e-tests/oauth-flow/rfc021/docker-compose.yml b/e2e-tests/oauth-flow/rfc021/docker-compose.yml index 36543dd3ea..d94c3ef210 100644 --- a/e2e-tests/oauth-flow/rfc021/docker-compose.yml +++ b/e2e-tests/oauth-flow/rfc021/docker-compose.yml @@ -1,10 +1,21 @@ services: + jaeger: + image: jaegertracing/all-in-one:1.76.0 + ports: + - "16686:16686" # Jaeger API (used for trace verification) + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:16686"] + interval: 1s nodeA-backend: image: "${IMAGE_NODE_A:-nutsfoundation/nuts-node:master}" ports: - "18081:8081" environment: NUTS_CONFIGFILE: /opt/nuts/nuts.yaml + NUTS_TRACING_SERVICENAME: nodeA + OTEL_BSP_SCHEDULE_DELAY: "1000" # Flush traces every 1s for faster e2e test verification + depends_on: + - jaeger volumes: - "./node-A/nuts.yaml:/opt/nuts/nuts.yaml:ro" - "../../tls-certs/nodeA-backend-certificate.pem:/opt/nuts/certificate-and-key.pem:ro" @@ -33,6 +44,10 @@ services: - "28081:8081" environment: NUTS_CONFIGFILE: /opt/nuts/nuts.yaml + NUTS_TRACING_SERVICENAME: nodeB + OTEL_BSP_SCHEDULE_DELAY: "1000" # Flush traces every 1s for faster e2e test verification + depends_on: + - jaeger volumes: - "./node-B/nuts.yaml:/opt/nuts/nuts.yaml:ro" - "../../tls-certs/nodeB-certificate.pem:/opt/nuts/certificate-and-key.pem:ro" diff --git a/e2e-tests/oauth-flow/rfc021/node-A/nginx.conf b/e2e-tests/oauth-flow/rfc021/node-A/nginx.conf index f3a5372982..2d848b5a7a 100644 --- a/e2e-tests/oauth-flow/rfc021/node-A/nginx.conf +++ b/e2e-tests/oauth-flow/rfc021/node-A/nginx.conf @@ -45,6 +45,8 @@ http { location / { proxy_set_header X-Ssl-Client-Cert $ssl_client_escaped_cert; + proxy_set_header traceparent $http_traceparent; + proxy_set_header tracestate $http_tracestate; proxy_pass http://nodeA-external; } diff --git a/e2e-tests/oauth-flow/rfc021/node-A/nuts.yaml b/e2e-tests/oauth-flow/rfc021/node-A/nuts.yaml index 32ed855184..d12c6a93cb 100644 --- a/e2e-tests/oauth-flow/rfc021/node-A/nuts.yaml +++ b/e2e-tests/oauth-flow/rfc021/node-A/nuts.yaml @@ -2,6 +2,9 @@ url: https://nodeA verbosity: debug strictmode: false internalratelimiter: false +tracing: + endpoint: jaeger:4318 + insecure: true http: log: metadata-and-body internal: diff --git a/e2e-tests/oauth-flow/rfc021/node-B/nuts.yaml b/e2e-tests/oauth-flow/rfc021/node-B/nuts.yaml index fb78e0ee5f..9a2f2b2e3c 100644 --- a/e2e-tests/oauth-flow/rfc021/node-B/nuts.yaml +++ b/e2e-tests/oauth-flow/rfc021/node-B/nuts.yaml @@ -2,6 +2,9 @@ url: https://nodeB verbosity: debug strictmode: false internalratelimiter: false +tracing: + endpoint: jaeger:4318 + insecure: true http: log: metadata-and-body internal: diff --git a/e2e-tests/util.sh b/e2e-tests/util.sh index b140fb94ce..64b0dcd2b5 100644 --- a/e2e-tests/util.sh +++ b/e2e-tests/util.sh @@ -187,4 +187,34 @@ function removeNodeDID() { urlencode() { local raw="$1" jq -nr --arg raw "$raw" '$raw|@uri' -} \ No newline at end of file +} + +# assertJaegerTraceContainsServices verifies that a trace exists in Jaeger with spans from specified services +# Retries up to 3 times to allow for trace batching/export delays +# Args: Jaeger URL, trace ID, comma-separated list of expected service names (e.g., "nodeA,nodeB") +function assertJaegerTraceContainsServices() { + JAEGER_URL=$1 + TRACE_ID=$2 + EXPECTED_SERVICES=$3 + for i in {1..3}; do + RESPONSE=$(curl -s "$JAEGER_URL/api/traces/$TRACE_ID") + TRACE_COUNT=$(echo "$RESPONSE" | jq '.data | length') + if [ "$TRACE_COUNT" -gt 0 ]; then + # Get unique services from the trace + ACTUAL_SERVICES=$(echo "$RESPONSE" | jq -r '[.data[0].processes[].serviceName] | unique | sort | join(",")') + # Sort expected services for comparison + SORTED_EXPECTED=$(echo "$EXPECTED_SERVICES" | tr ',' '\n' | sort | tr '\n' ',' | sed 's/,$//') + if [ "$ACTUAL_SERVICES" == "$SORTED_EXPECTED" ]; then + SPAN_COUNT=$(echo "$RESPONSE" | jq '.data[0].spans | length') + echo "Verified trace '$TRACE_ID' contains $SPAN_COUNT spans from services: $ACTUAL_SERVICES" + return 0 + else + echo "FAILED: Trace '$TRACE_ID' found but services are '$ACTUAL_SERVICES', expected '$SORTED_EXPECTED'" 1>&2 + return 1 + fi + fi + sleep 1 + done + echo "FAILED: Trace '$TRACE_ID' not found in Jaeger after 3 attempts" 1>&2 + return 1 +} From d57ff6a05d368f171f81f3766fb28a229070a867 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Wed, 21 Jan 2026 17:57:15 +0100 Subject: [PATCH 07/16] refactor(tracing): extract tracing into dedicated engine package Refactored OpenTelemetry tracing from core/tracing.go into a proper tracing/ engine package following the standard engine pattern. Key improvements: - Use official otellogrus bridge instead of custom implementation - Remove disableableHook pattern (OTEL shutdown makes Emit() a no-op) - Add trace context (trace_id, span_id) to audit logs for correlation - Fix enabled flag not being reset on setup failure - Create hooks once and register to both standard and audit loggers --- audit/audit.go | 12 +- cmd/root.go | 6 + cmd/root_test.go | 2 +- core/engine.go | 28 +-- core/http_client.go | 13 +- core/server_config.go | 27 +-- core/tracing_test.go | 123 ----------- crypto/storage/external/client.go | 3 +- crypto/storage/vault/vault.go | 3 +- go.mod | 11 +- go.sum | 12 ++ http/client/client.go | 3 +- http/client/client_test.go | 32 +-- http/engine.go | 5 +- pki/validator.go | 4 +- storage/engine.go | 5 +- tracing/cmd/cmd.go | 37 ++++ tracing/config.go | 37 ++++ core/tracing.go => tracing/engine.go | 235 +++++++++++++-------- tracing/engine_test.go | 292 +++++++++++++++++++++++++++ 20 files changed, 596 insertions(+), 294 deletions(-) delete mode 100644 core/tracing_test.go create mode 100644 tracing/cmd/cmd.go create mode 100644 tracing/config.go rename core/tracing.go => tracing/engine.go (52%) create mode 100644 tracing/engine_test.go diff --git a/audit/audit.go b/audit/audit.go index 35237f1165..fdfc8b5b41 100644 --- a/audit/audit.go +++ b/audit/audit.go @@ -26,7 +26,7 @@ import ( "strings" "sync" - "github.com/nuts-foundation/nuts-node/core" + "github.com/nuts-foundation/nuts-node/tracing" "github.com/sirupsen/logrus" ) @@ -64,12 +64,12 @@ var auditLoggerInstance *logrus.Logger var initAuditLoggerOnce = &sync.Once{} func init() { - // Register callback so core.SetupTracing can add hooks to the audit logger. + // Register callback so tracing can add hooks to the audit logger. // This is needed because the audit logger is a separate logrus instance, - // and we can't import audit from core due to circular dependencies. - core.RegisterAuditLogHook = func(hook logrus.Hook) { + // and we can't import audit from tracing due to circular dependencies. + tracing.RegisterAuditLogHook(func(hook logrus.Hook) { auditLogger().AddHook(hook) - } + }) } // auditLogger returns the initialized logger instance intended for audit logging. @@ -191,7 +191,7 @@ func Log(ctx context.Context, logger *logrus.Entry, eventName string) *logrus.En panic("audit: eventName is empty") } - return auditLogger().WithFields(logger.Data). + return auditLogger().WithContext(ctx).WithFields(logger.Data). WithField("actor", info.Actor). WithField("operation", info.Operation). WithField("event", eventName) diff --git a/cmd/root.go b/cmd/root.go index 493e138ac8..811108486d 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -61,6 +61,8 @@ import ( "github.com/nuts-foundation/nuts-node/policy" "github.com/nuts-foundation/nuts-node/storage" storageCmd "github.com/nuts-foundation/nuts-node/storage/cmd" + "github.com/nuts-foundation/nuts-node/tracing" + tracingCmd "github.com/nuts-foundation/nuts-node/tracing/cmd" "github.com/nuts-foundation/nuts-node/vcr" openid4vciAPI "github.com/nuts-foundation/nuts-node/vcr/api/openid4vci/v0" vcrAPI "github.com/nuts-foundation/nuts-node/vcr/api/vcr/v2" @@ -224,6 +226,9 @@ func CreateSystem(shutdownCallback context.CancelFunc) *core.System { system.RegisterRoutes(&discoveryServerAPI.Wrapper{Server: discoveryInstance}) // Register engines + // Tracing engine MUST be registered first to ensure tracing is active before other engines configure/start, + // and shuts down last (due to reverse shutdown order) to capture all logs/spans. + system.RegisterEngine(tracing.New()) // without dependencies system.RegisterEngine(pkiInstance) system.RegisterEngine(storageInstance) @@ -340,6 +345,7 @@ func serverConfigFlags() *pflag.FlagSet { set.AddFlagSet(goldenHammerCmd.FlagSet()) set.AddFlagSet(discoveryCmd.FlagSet()) set.AddFlagSet(policy.FlagSet()) + set.AddFlagSet(tracingCmd.FlagSet()) return set } diff --git a/cmd/root_test.go b/cmd/root_test.go index 90e8374d6d..d676328d8e 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -165,7 +165,7 @@ func Test_CreateSystem(t *testing.T) { system.VisitEngines(func(engine core.Engine) { numEngines++ }) - assert.Equal(t, 17, numEngines) + assert.Equal(t, 18, numEngines) } func Test_ClientCommand_ErrorHandlers(t *testing.T) { diff --git a/core/engine.go b/core/engine.go index 3402c8cf8e..6a721c2b99 100644 --- a/core/engine.go +++ b/core/engine.go @@ -58,8 +58,6 @@ type System struct { Context context.Context // ContextCancel is a function to signal the system should shut down. ContextCancel context.CancelFunc - // tracingShutdown is the shutdown function for OpenTelemetry tracing - tracingShutdown func(context.Context) error } var coreLogger = logrus.StandardLogger().WithField(LogFieldModule, "core") @@ -98,6 +96,7 @@ func (system *System) Start() error { } // Shutdown shuts down all engines in the system. +// Engines are shut down in reverse order of registration. func (system *System) Shutdown() error { var engines []Runnable system.VisitEngines(func(engine Engine) { @@ -114,26 +113,14 @@ func (system *System) Shutdown() error { } coreLogger.Infof("Stopped %s", name) } - // Shutdown tracing last to ensure all logs are flushed - if system.tracingShutdown != nil { - if err := system.tracingShutdown(context.Background()); err != nil { - coreLogger.WithError(err).Error("Failed to shutdown tracing") - } - } return nil } // Configure configures all engines in the system. +// Engines are configured in order of registration (tracing engine should be first). func (system *System) Configure() error { - // Set up tracing first, so all logs (including engine configuration) go to the configured destination - tracingShutdown, err := SetupTracing(system.Config.Tracing) - if err != nil { - return fmt.Errorf("failed to setup tracing: %w", err) - } - system.tracingShutdown = tracingShutdown - coreLogger.Debugf("Creating datadir: %s", system.Config.Datadir) - if err = os.MkdirAll(system.Config.Datadir, os.ModePerm); err != nil { + if err := os.MkdirAll(system.Config.Datadir, os.ModePerm); err != nil { return fmt.Errorf("unable to create datadir (dir=%s): %w", system.Config.Datadir, err) } return system.VisitEnginesE(func(engine Engine) error { @@ -141,13 +128,12 @@ func (system *System) Configure() error { name := engineName(engine) if m, ok := engine.(Configurable); ok { coreLogger.Debugf("Configuring %s", name) - err = m.Configure(*system.Config) + if err := m.Configure(*system.Config); err != nil { + return fmt.Errorf("unable to configure %s: %w", name, err) + } coreLogger.Debugf("Configured %s", name) } - if err != nil { - err = fmt.Errorf("unable to configure %s: %w", name, err) - } - return err + return nil }) } diff --git a/core/http_client.go b/core/http_client.go index 2960fa327f..f5acec4ed2 100644 --- a/core/http_client.go +++ b/core/http_client.go @@ -26,9 +26,13 @@ import ( "net/http" "github.com/sirupsen/logrus" - "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) +// TracingHTTPTransport wraps an http.RoundTripper with OpenTelemetry tracing instrumentation. +// It is set by the tracing package when tracing is enabled, and nil when disabled. +// This callback pattern avoids circular imports between core and tracing packages. +var TracingHTTPTransport func(http.RoundTripper) http.RoundTripper + // HttpResponseBodyLogClipAt is the maximum length of a response body to log. // If the response body is longer than this, it will be truncated. const HttpResponseBodyLogClipAt = 200 @@ -101,11 +105,8 @@ func (w httpRequestDoerAdapter) Do(req *http.Request) (*http.Response, error) { func CreateHTTPInternalClient(cfg ClientConfig, generator AuthorizationTokenGenerator) (HTTPRequestDoer, error) { var result *httpRequestDoerAdapter var transport http.RoundTripper = http.DefaultTransport - if TracingEnabled() { - transport = otelhttp.NewTransport(http.DefaultTransport, - otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { - return "internal-api: " + r.Method + " " + r.URL.Path - })) + if TracingHTTPTransport != nil { + transport = TracingHTTPTransport(transport) } client := &http.Client{ Transport: transport, diff --git a/core/server_config.go b/core/server_config.go index 5ee9c8c790..434300e72b 100644 --- a/core/server_config.go +++ b/core/server_config.go @@ -25,15 +25,16 @@ import ( "crypto/x509" "errors" "fmt" + "net/url" + "reflect" + "strings" + "time" + "github.com/knadh/koanf/providers/env" "github.com/knadh/koanf/providers/posflag" "github.com/knadh/koanf/v2" "github.com/sirupsen/logrus" "github.com/spf13/pflag" - "net/url" - "reflect" - "strings" - "time" ) const defaultConfigFile = "./config/nuts.yaml" @@ -72,8 +73,7 @@ type ServerConfig struct { LegacyTLS TLSConfig `koanf:"network"` // HTTP exists to expose http.clientipheader to the nuts-network layer. // This header should contaisn the client IP address for logging. Can be removed together with the nuts-network - HTTP HTTPConfig `koanf:"http"` - Tracing TracingConfig `koanf:"tracing"` + HTTP HTTPConfig `koanf:"http"` configMap *koanf.Koanf } @@ -88,18 +88,6 @@ type HTTPClientConfig struct { Timeout time.Duration `koanf:"timeout"` } -// TracingConfig contains settings for OpenTelemetry tracing. -type TracingConfig struct { - // Endpoint is the OTLP collector endpoint (e.g., "localhost:4318" for HTTP). - // When empty, tracing is disabled. When set, logs are sent to both stdout and the OTLP endpoint. - Endpoint string `koanf:"endpoint"` - // Insecure disables TLS for the OTLP connection. - Insecure bool `koanf:"insecure"` - // ServiceName is the name of the service reported to the tracing backend. - // Defaults to "nuts-node" if not specified. - ServiceName string `koanf:"servicename"` -} - // TLSConfig specifies how TLS should be configured for connections. type TLSConfig struct { // Offload specifies the TLS offloading mode for incoming/outgoing traffic. @@ -287,9 +275,6 @@ func FlagSet() *pflag.FlagSet { flagSet.String("tls.offload", string(defaultCfg.TLS.Offload), fmt.Sprintf("Whether to enable TLS offloading for incoming gRPC connections. "+ "Enable by setting it to '%s'. If enabled 'tls.certheader' must be configured as well.", OffloadIncomingTLS)) flagSet.String("tls.certheader", defaultCfg.TLS.ClientCertHeaderName, "Name of the HTTP header that will contain the client certificate when TLS is offloaded for gRPC.") - flagSet.String("tracing.endpoint", defaultCfg.Tracing.Endpoint, "OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled.") - flagSet.Bool("tracing.insecure", defaultCfg.Tracing.Insecure, "Disable TLS for the OTLP connection.") - flagSet.String("tracing.servicename", defaultCfg.Tracing.ServiceName, "Service name reported to the tracing backend. Defaults to 'nuts-node'.") return flagSet } diff --git a/core/tracing_test.go b/core/tracing_test.go deleted file mode 100644 index e66422b1fb..0000000000 --- a/core/tracing_test.go +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Nuts node - * Copyright (C) 2025 Nuts community - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -package core - -import ( - "context" - "testing" - - "github.com/sirupsen/logrus" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.opentelemetry.io/otel/trace" -) - -func TestSetupTracing(t *testing.T) { - t.Run("disabled when endpoint is empty", func(t *testing.T) { - cfg := TracingConfig{Endpoint: ""} - - shutdown, err := SetupTracing(cfg) - - require.NoError(t, err) - assert.NotNil(t, shutdown) - // Shutdown should be a no-op - assert.NoError(t, shutdown(context.Background())) - }) -} - -func TestGetTracerProvider(t *testing.T) { - t.Run("returns global provider when nutsTracerProvider is nil", func(t *testing.T) { - // Reset state - nutsTracerProvider = nil - - provider := GetTracerProvider() - assert.NotNil(t, provider) - }) -} - -func TestTracingLogrusHook(t *testing.T) { - hook := &tracingLogrusHook{} - - t.Run("no-op when context is nil", func(t *testing.T) { - entry := &logrus.Entry{ - Data: make(logrus.Fields), - } - err := hook.Fire(entry) - assert.NoError(t, err) - assert.NotContains(t, entry.Data, "trace_id") - assert.NotContains(t, entry.Data, "span_id") - }) - - t.Run("no-op when span context is invalid", func(t *testing.T) { - entry := &logrus.Entry{ - Context: context.Background(), - Data: make(logrus.Fields), - } - err := hook.Fire(entry) - assert.NoError(t, err) - assert.NotContains(t, entry.Data, "trace_id") - assert.NotContains(t, entry.Data, "span_id") - }) - - t.Run("adds trace context when span is valid", func(t *testing.T) { - // Create a valid span context - traceID, _ := trace.TraceIDFromHex("0102030405060708090a0b0c0d0e0f10") - spanID, _ := trace.SpanIDFromHex("0102030405060708") - spanCtx := trace.NewSpanContext(trace.SpanContextConfig{ - TraceID: traceID, - SpanID: spanID, - TraceFlags: trace.FlagsSampled, - }) - - // Use noop tracer but with our span context - ctx := trace.ContextWithSpanContext(context.Background(), spanCtx) - - entry := &logrus.Entry{ - Context: ctx, - Data: make(logrus.Fields), - } - err := hook.Fire(entry) - assert.NoError(t, err) - assert.Equal(t, "0102030405060708090a0b0c0d0e0f10", entry.Data["trace_id"]) - assert.Equal(t, "0102030405060708", entry.Data["span_id"]) - }) -} - -func TestFormatValue(t *testing.T) { - t.Run("string value", func(t *testing.T) { - result := formatValue("test") - assert.Equal(t, "test", result) - }) - - t.Run("error value", func(t *testing.T) { - result := formatValue(assert.AnError) - assert.Equal(t, assert.AnError.Error(), result) - }) - - t.Run("int value", func(t *testing.T) { - result := formatValue(42) - assert.Equal(t, "42", result) - }) - - t.Run("nil value", func(t *testing.T) { - result := formatValue(nil) - assert.Equal(t, "", result) - }) -} diff --git a/crypto/storage/external/client.go b/crypto/storage/external/client.go index 23a1e84599..bd9ea3d025 100644 --- a/crypto/storage/external/client.go +++ b/crypto/storage/external/client.go @@ -30,6 +30,7 @@ import ( "github.com/nuts-foundation/nuts-node/core" "github.com/nuts-foundation/nuts-node/crypto/storage/spi" "github.com/nuts-foundation/nuts-node/crypto/util" + "github.com/nuts-foundation/nuts-node/tracing" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) @@ -84,7 +85,7 @@ func NewAPIClient(config Config) (spi.Storage, error) { return nil, err } var transport http.RoundTripper = http.DefaultTransport - if core.TracingEnabled() { + if tracing.Enabled() { transport = otelhttp.NewTransport(http.DefaultTransport, otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { return "crypto-storage: " + r.Method + " " + r.URL.Path diff --git a/crypto/storage/vault/vault.go b/crypto/storage/vault/vault.go index 117e6e6870..f87e9feb75 100644 --- a/crypto/storage/vault/vault.go +++ b/crypto/storage/vault/vault.go @@ -32,6 +32,7 @@ import ( "github.com/nuts-foundation/nuts-node/crypto/log" "github.com/nuts-foundation/nuts-node/crypto/storage/spi" "github.com/nuts-foundation/nuts-node/crypto/util" + "github.com/nuts-foundation/nuts-node/tracing" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) @@ -115,7 +116,7 @@ func configureVaultClient(cfg Config) (*vault.Client, error) { vaultConfig.Timeout = cfg.Timeout // Add tracing if enabled - if core.TracingEnabled() { + if tracing.Enabled() { vaultConfig.HttpClient.Transport = otelhttp.NewTransport( vaultConfig.HttpClient.Transport, otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { diff --git a/go.mod b/go.mod index 2b38cbf799..861769efe4 100644 --- a/go.mod +++ b/go.mod @@ -209,13 +209,13 @@ require ( github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2 go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 - go.opentelemetry.io/otel v1.38.0 + go.opentelemetry.io/otel v1.39.0 go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 - go.opentelemetry.io/otel/log v0.14.0 + go.opentelemetry.io/otel/log v0.15.0 go.opentelemetry.io/otel/sdk v1.38.0 go.opentelemetry.io/otel/sdk/log v0.14.0 - go.opentelemetry.io/otel/trace v1.38.0 + go.opentelemetry.io/otel/trace v1.39.0 ) require ( @@ -231,9 +231,10 @@ require ( github.com/klauspost/cpuid/v2 v2.2.5 // indirect github.com/rs/zerolog v1.26.1 // indirect github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/bridges/otellogrus v0.14.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect - go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.39.0 // indirect go.opentelemetry.io/proto/otlp v1.7.1 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect diff --git a/go.sum b/go.sum index 02ed5bf082..61b84d1404 100644 --- a/go.sum +++ b/go.sum @@ -522,6 +522,10 @@ go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo= go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/bridges/otellogrus v0.14.0 h1:UtI97OoeD9Cjx/s1nQ4W9fCFjJbPfhTsVBorhCM2lQg= +go.opentelemetry.io/contrib/bridges/otellogrus v0.14.0/go.mod h1:L38Uc5BbIN4o6QKrxc252Le7FyE7Ym8IV9GMa9dr3I0= go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0 h1:6YeICKmGrvgJ5th4+OMNpcuoB6q/Xs8gt0YCO7MUv1k= go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0/go.mod h1:ZEA7j2B35siNV0T00aapacNzjz4tvOlNoHp0ncCfwNQ= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= @@ -530,6 +534,8 @@ go.opentelemetry.io/contrib/propagators/b3 v1.38.0 h1:uHsCCOSKl0kLrV2dLkFK+8Ywk9 go.opentelemetry.io/contrib/propagators/b3 v1.38.0/go.mod h1:wMRSZJZcY8ya9mApLLhwIMjqmApy2o/Ml+62lhvxyHU= go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= +go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 h1:QQqYw3lkrzwVsoEX0w//EhH/TCnpRdEenKBOOEIMjWc= go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0/go.mod h1:gSVQcr17jk2ig4jqJ2DX30IdWH251JcNAecvrqTxH1s= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= @@ -538,8 +544,12 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 h1:aTL7F go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4= go.opentelemetry.io/otel/log v0.14.0 h1:2rzJ+pOAZ8qmZ3DDHg73NEKzSZkhkGIua9gXtxNGgrM= go.opentelemetry.io/otel/log v0.14.0/go.mod h1:5jRG92fEAgx0SU/vFPxmJvhIuDU9E1SUnEQrMlJpOno= +go.opentelemetry.io/otel/log v0.15.0 h1:0VqVnc3MgyYd7QqNVIldC3dsLFKgazR6P3P3+ypkyDY= +go.opentelemetry.io/otel/log v0.15.0/go.mod h1:9c/G1zbyZfgu1HmQD7Qj84QMmwTp2QCQsZH1aeoWDE4= go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= +go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= go.opentelemetry.io/otel/sdk/log v0.14.0 h1:JU/U3O7N6fsAXj0+CXz21Czg532dW2V4gG1HE/e8Zrg= @@ -550,6 +560,8 @@ go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6 go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= +go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= diff --git a/http/client/client.go b/http/client/client.go index 47ab2dd1f7..b31bfe0cf1 100644 --- a/http/client/client.go +++ b/http/client/client.go @@ -28,6 +28,7 @@ import ( "time" "github.com/nuts-foundation/nuts-node/core" + "github.com/nuts-foundation/nuts-node/tracing" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) @@ -83,7 +84,7 @@ func New(timeout time.Duration) *StrictHTTPClient { // getTransport wraps the given transport with OpenTelemetry instrumentation if tracing is enabled. func getTransport(base http.RoundTripper) http.RoundTripper { - if core.TracingEnabled() { + if tracing.Enabled() { return otelhttp.NewTransport(base, otelhttp.WithSpanNameFormatter(httpSpanName)) } return base diff --git a/http/client/client_test.go b/http/client/client_test.go index 0db38d0c0c..1a1b01366c 100644 --- a/http/client/client_test.go +++ b/http/client/client_test.go @@ -29,7 +29,7 @@ import ( "testing" "time" - "github.com/nuts-foundation/nuts-node/core" + "github.com/nuts-foundation/nuts-node/tracing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -75,9 +75,9 @@ func TestStrictHTTPClient(t *testing.T) { assert.Equal(t, 0, rt.invocations) }) t.Run("sets TLS config", func(t *testing.T) { - original := core.TracingEnabled() - core.SetTracingEnabled(false) // ensure we can cast to *http.Transport - t.Cleanup(func() { core.SetTracingEnabled(original) }) + original := tracing.Enabled() + tracing.SetEnabled(false) // ensure we can cast to *http.Transport + t.Cleanup(func() { tracing.SetEnabled(original) }) client := NewWithTLSConfig(time.Second, &tls.Config{ InsecureSkipVerify: true, }) @@ -205,9 +205,9 @@ func TestCaching(t *testing.T) { func TestGetTransport(t *testing.T) { t.Run("wraps transport when tracing enabled", func(t *testing.T) { - original := core.TracingEnabled() - core.SetTracingEnabled(true) - t.Cleanup(func() { core.SetTracingEnabled(original) }) + original := tracing.Enabled() + tracing.SetEnabled(true) + t.Cleanup(func() { tracing.SetEnabled(original) }) transport := getTransport(SafeHttpTransport) @@ -216,9 +216,9 @@ func TestGetTransport(t *testing.T) { }) t.Run("returns base transport when tracing disabled", func(t *testing.T) { - original := core.TracingEnabled() - core.SetTracingEnabled(false) - t.Cleanup(func() { core.SetTracingEnabled(original) }) + original := tracing.Enabled() + tracing.SetEnabled(false) + t.Cleanup(func() { tracing.SetEnabled(original) }) transport := getTransport(SafeHttpTransport) @@ -228,9 +228,9 @@ func TestGetTransport(t *testing.T) { func TestNew(t *testing.T) { t.Run("wraps transport when tracing enabled", func(t *testing.T) { - original := core.TracingEnabled() - core.SetTracingEnabled(true) - t.Cleanup(func() { core.SetTracingEnabled(original) }) + original := tracing.Enabled() + tracing.SetEnabled(true) + t.Cleanup(func() { tracing.SetEnabled(original) }) client := New(time.Second) @@ -239,9 +239,9 @@ func TestNew(t *testing.T) { }) t.Run("uses SafeHttpTransport when tracing disabled", func(t *testing.T) { - original := core.TracingEnabled() - core.SetTracingEnabled(false) - t.Cleanup(func() { core.SetTracingEnabled(original) }) + original := tracing.Enabled() + tracing.SetEnabled(false) + t.Cleanup(func() { tracing.SetEnabled(original) }) client := New(time.Second) diff --git a/http/engine.go b/http/engine.go index 0c6026c848..e58b5a6a0f 100644 --- a/http/engine.go +++ b/http/engine.go @@ -35,6 +35,7 @@ import ( "github.com/nuts-foundation/nuts-node/http/client" "github.com/nuts-foundation/nuts-node/http/log" "github.com/nuts-foundation/nuts-node/http/tokenV2" + "github.com/nuts-foundation/nuts-node/tracing" "github.com/nuts-foundation/nuts-node/vdr/didnuts" "go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho" ) @@ -107,7 +108,7 @@ func (h *Engine) configureClient(serverConfig core.ServerConfig) { func (h *Engine) applyTracingMiddleware(echoServer core.EchoRouter) { // Only apply tracing middleware if tracing is enabled - if !core.TracingEnabled() { + if !tracing.Enabled() { return } skipper := func(c echo.Context) bool { @@ -119,7 +120,7 @@ func (h *Engine) applyTracingMiddleware(echoServer core.EchoRouter) { // even when embedded in another application that has its own TracerProvider. echoServer.Use(otelecho.Middleware(moduleName, otelecho.WithSkipper(skipper), - otelecho.WithTracerProvider(core.GetTracerProvider()), + otelecho.WithTracerProvider(tracing.GetTracerProvider()), )) } diff --git a/pki/validator.go b/pki/validator.go index 207b1f6952..e5469bcced 100644 --- a/pki/validator.go +++ b/pki/validator.go @@ -30,7 +30,7 @@ import ( "sync" "time" - "github.com/nuts-foundation/nuts-node/core" + "github.com/nuts-foundation/nuts-node/tracing" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) @@ -93,7 +93,7 @@ func newRevocationList(cert *x509.Certificate) *revocationList { func newValidator(config Config) (*validator, error) { // we do not use our safe http client here since we're downloading from a trusted resource var transport http.RoundTripper = http.DefaultTransport - if core.TracingEnabled() { + if tracing.Enabled() { transport = otelhttp.NewTransport(http.DefaultTransport, otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { return "pki: " + r.Method + " " + r.URL.Path diff --git a/storage/engine.go b/storage/engine.go index 04ba16b5b5..50c8da06ee 100644 --- a/storage/engine.go +++ b/storage/engine.go @@ -33,6 +33,7 @@ import ( "github.com/nuts-foundation/go-stoabs" "github.com/nuts-foundation/nuts-node/core" "github.com/nuts-foundation/nuts-node/storage/log" + "github.com/nuts-foundation/nuts-node/tracing" "github.com/nuts-foundation/nuts-node/storage/sql_migrations" "github.com/nuts-foundation/sqlite" "github.com/pressly/goose/v3" @@ -327,8 +328,8 @@ func (e *engine) initSQLDatabase(strictmode bool) error { } // Add OpenTelemetry tracing to GORM if tracing is enabled - if core.TracingEnabled() { - if err := e.sqlDB.Use(otelgorm.NewPlugin(otelgorm.WithTracerProvider(core.GetTracerProvider()))); err != nil { + if tracing.Enabled() { + if err := e.sqlDB.Use(otelgorm.NewPlugin(otelgorm.WithTracerProvider(tracing.GetTracerProvider()))); err != nil { return fmt.Errorf("failed to add GORM tracing plugin: %w", err) } } diff --git a/tracing/cmd/cmd.go b/tracing/cmd/cmd.go new file mode 100644 index 0000000000..df72748818 --- /dev/null +++ b/tracing/cmd/cmd.go @@ -0,0 +1,37 @@ +/* + * Nuts node + * Copyright (C) 2026 Nuts community + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package cmd + +import ( + "github.com/nuts-foundation/nuts-node/tracing" + "github.com/spf13/pflag" +) + +// FlagSet defines the set of flags that sets the tracing configuration. +func FlagSet() *pflag.FlagSet { + flags := pflag.NewFlagSet("tracing", pflag.ContinueOnError) + defs := tracing.DefaultConfig() + + flags.String("tracing.endpoint", defs.Endpoint, "OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled.") + flags.Bool("tracing.insecure", defs.Insecure, "Disable TLS for the OTLP connection.") + flags.String("tracing.servicename", defs.ServiceName, "Service name reported to the tracing backend. Defaults to 'nuts-node'.") + + return flags +} diff --git a/tracing/config.go b/tracing/config.go new file mode 100644 index 0000000000..0f472c1013 --- /dev/null +++ b/tracing/config.go @@ -0,0 +1,37 @@ +/* + * Nuts node + * Copyright (C) 2026 Nuts community + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package tracing + +// DefaultConfig returns the default configuration for the tracing engine. +func DefaultConfig() Config { + return Config{} +} + +// Config contains settings for OpenTelemetry tracing. +type Config struct { + // Endpoint is the OTLP collector endpoint for tracing (e.g., "localhost:4318"). + // When empty, tracing is disabled. + Endpoint string `koanf:"endpoint"` + // Insecure disables TLS for the OTLP connection. + Insecure bool `koanf:"insecure"` + // ServiceName is the service name reported to the tracing backend. + // Defaults to "nuts-node". + ServiceName string `koanf:"servicename"` +} diff --git a/core/tracing.go b/tracing/engine.go similarity index 52% rename from core/tracing.go rename to tracing/engine.go index cbfd0ddbcd..2998041ae5 100644 --- a/core/tracing.go +++ b/tracing/engine.go @@ -1,6 +1,6 @@ /* * Nuts node - * Copyright (C) 2025 Nuts community + * Copyright (C) 2026 Nuts community * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,20 +17,23 @@ * */ -package core +package tracing import ( "context" "errors" "fmt" + "net/http" "sync/atomic" "time" + "github.com/nuts-foundation/nuts-node/core" "github.com/sirupsen/logrus" + "go.opentelemetry.io/contrib/bridges/otellogrus" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" - otellog "go.opentelemetry.io/otel/log" "go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/sdk/log" "go.opentelemetry.io/otel/sdk/resource" @@ -39,24 +42,134 @@ import ( oteltrace "go.opentelemetry.io/otel/trace" ) -const defaultServiceName = "nuts-node" +const ( + moduleName = "Tracing" + defaultServiceName = "nuts-node" +) -// tracingEnabled is set to true when OpenTelemetry tracing is configured. -var tracingEnabled atomic.Bool +// enabled is set to true when OpenTelemetry tracing is configured. +var enabled atomic.Bool // nutsTracerProvider holds nuts-node's own TracerProvider. // This is used instead of the global when nuts-node is embedded in another application. var nutsTracerProvider *trace.TracerProvider -// TracingEnabled returns true if OpenTelemetry tracing is configured. -func TracingEnabled() bool { - return tracingEnabled.Load() +// registerAuditLogHook is a function that registers a logrus hook with the audit logger. +// It is set by the audit package during init() to avoid circular imports. +// Go guarantees imported packages initialize before the importing package, so audit's init() +// runs after tracing's package-level vars are initialized but before Configure(). +var registerAuditLogHook func(hook logrus.Hook) = func(logrus.Hook) {} + +// RegisterAuditLogHook sets the function that registers a logrus hook with the audit logger. +// This is called by the audit package during initialization to avoid circular imports. +func RegisterAuditLogHook(fn func(hook logrus.Hook)) { + registerAuditLogHook = fn +} + +// New creates a new tracing engine instance. +func New() *Engine { + return &Engine{} +} + +// Engine is the engine that manages OpenTelemetry tracing. +// It must be registered first to ensure tracing is active before other engines start, +// and shut down last (due to reverse shutdown order) to capture all logs/spans. +type Engine struct { + config Config + shutdown func(context.Context) error +} + +// Name returns the engine name. +func (e *Engine) Name() string { + return moduleName +} + +// Config returns the engine configuration. +func (e *Engine) Config() any { + return &e.config +} + +// Configure sets up OpenTelemetry tracing with the configured endpoint. +func (e *Engine) Configure(_ core.ServerConfig) error { + shutdown, err := setupTracing(e.config) + if err != nil { + return fmt.Errorf("failed to setup tracing: %w", err) + } + e.shutdown = shutdown + return nil +} + +// Start is a no-op since tracing is already active after Configure. +func (e *Engine) Start() error { + return nil +} + +// Shutdown stops the tracing exporters and flushes any remaining spans/logs. +// Hooks remain registered but become no-ops after the OTEL providers are shut down. +func (e *Engine) Shutdown() error { + // Reset global state + enabled.Store(false) + nutsTracerProvider = nil + core.TracingHTTPTransport = nil + + // Call the shutdown function to flush and close exporters + // After this, any hook calls to logger.Emit() become no-ops per OTEL spec. + if e.shutdown != nil { + return e.shutdown(context.Background()) + } + return nil +} + +// CheckHealth returns the health status of the tracing subsystem. +// When tracing is not configured or not running, no health entry is returned (not applicable). +// When enabled and running, we return UP since we successfully initialized - we can't easily +// verify OTLP endpoint connectivity as spans are exported asynchronously. +func (e *Engine) CheckHealth() map[string]core.Health { + if e.config.Endpoint == "" || !enabled.Load() { + // Tracing is not configured or not running, don't report health + return nil + } + // Tracing is configured and running + return map[string]core.Health{ + "otlp": { + Status: core.HealthStatusUp, + Details: e.config.Endpoint, + }, + } +} + +// Diagnostics returns diagnostic information about the tracing configuration. +func (e *Engine) Diagnostics() []core.DiagnosticResult { + isEnabled := e.config.Endpoint != "" + return []core.DiagnosticResult{ + core.DiagnosticResultMap{ + Title: "tracing", + Items: []core.DiagnosticResult{ + core.GenericDiagnosticResult{Title: "enabled", Outcome: isEnabled}, + core.GenericDiagnosticResult{Title: "endpoint", Outcome: e.config.Endpoint}, + core.GenericDiagnosticResult{Title: "service_name", Outcome: e.resolvedServiceName()}, + core.GenericDiagnosticResult{Title: "insecure", Outcome: e.config.Insecure}, + }, + }, + } +} + +func (e *Engine) resolvedServiceName() string { + if e.config.ServiceName != "" { + return e.config.ServiceName + } + return defaultServiceName } -// SetTracingEnabled sets the tracing enabled flag. +// Enabled returns true if OpenTelemetry tracing is configured. +func Enabled() bool { + return enabled.Load() +} + +// SetEnabled sets the tracing enabled flag. // Exported for testing only; do not call from production code. -func SetTracingEnabled(enabled bool) { - tracingEnabled.Store(enabled) +func SetEnabled(value bool) { + enabled.Store(value) } // GetTracerProvider returns nuts-node's TracerProvider. @@ -69,22 +182,18 @@ func GetTracerProvider() oteltrace.TracerProvider { return otel.GetTracerProvider() } -// RegisterAuditLogHook is a function that registers a logrus hook with the audit logger. -// It is set by the audit package during initialization to avoid circular imports. -var RegisterAuditLogHook func(hook logrus.Hook) = func(logrus.Hook) {} - -// SetupTracing initializes OpenTelemetry tracing with the given configuration. +// setupTracing initializes OpenTelemetry tracing with the given configuration. // Returns a shutdown function that should be called on application exit. // If cfg.Endpoint is empty, tracing is disabled and a no-op shutdown function is returned. // When tracing is enabled, logs are sent to both stdout and the OTLP endpoint. -func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err error) { +func setupTracing(cfg Config) (shutdown func(context.Context) error, err error) { if cfg.Endpoint == "" { logrus.Info("Tracing disabled (no endpoint configured)") return func(context.Context) error { return nil }, nil } // Enable tracing flag for HTTP clients and other components - tracingEnabled.Store(true) + enabled.Store(true) ctx := context.Background() var shutdownFuncs []func(context.Context) error @@ -101,6 +210,7 @@ func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err // Handle errors by cleaning up already-created resources handleErr := func(err error) (func(context.Context) error, error) { + enabled.Store(false) shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() _ = shutdown(shutdownCtx) @@ -123,7 +233,7 @@ func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err if serviceName == "" { serviceName = defaultServiceName } - version := Version() + version := core.Version() res, err := resource.New(ctx, resource.WithAttributes( semconv.ServiceNameKey.String(serviceName), @@ -164,6 +274,15 @@ func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err otel.SetTracerProvider(tracerProvider) } + // Set up HTTP transport wrapper for core package (avoids circular import) + core.TracingHTTPTransport = func(transport http.RoundTripper) http.RoundTripper { + return otelhttp.NewTransport(transport, + otelhttp.WithTracerProvider(tracerProvider), + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "internal-api: " + r.Method + " " + r.URL.Path + })) + } + // Set up OTLP log exporter logOpts := []otlploghttp.Option{ otlploghttp.WithEndpoint(cfg.Endpoint), @@ -184,15 +303,18 @@ func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err ) shutdownFuncs = append(shutdownFuncs, loggerProvider.Shutdown) - // Create OTEL hook for sending logs via OTLP (logs go to both stdout and OTLP) - otelHook := &OtelLogrusHook{logger: loggerProvider.Logger(serviceName)} - logrus.AddHook(otelHook) + // Create hooks for log correlation and OTLP export + // Uses official otellogrus bridge: https://pkg.go.dev/go.opentelemetry.io/contrib/bridges/otellogrus + traceContextHook := &tracingLogrusHook{} + otelHook := otellogrus.NewHook(serviceName, otellogrus.WithLoggerProvider(loggerProvider)) - // Also add trace context to stdout logs - logrus.AddHook(&tracingLogrusHook{}) + // Add hooks to standard logger (logs go to both stdout and OTLP) + logrus.AddHook(traceContextHook) + logrus.AddHook(otelHook) - // Register hook with audit logger (which uses its own logger instance) - RegisterAuditLogHook(otelHook) + // Register same hooks with audit logger (which uses its own logger instance) + registerAuditLogHook(traceContextHook) + registerAuditLogHook(otelHook) logrus.WithFields(logrus.Fields{ "endpoint": cfg.Endpoint, @@ -223,62 +345,3 @@ func (h *tracingLogrusHook) Fire(entry *logrus.Entry) error { entry.Data["span_id"] = spanCtx.SpanID().String() return nil } - -// OtelLogrusHook is a logrus hook that sends logs to an OTLP endpoint. -// It is exported so other loggers (like the audit logger) can use it. -type OtelLogrusHook struct { - logger otellog.Logger -} - -func (h *OtelLogrusHook) Levels() []logrus.Level { - return logrus.AllLevels -} - -func (h *OtelLogrusHook) Fire(entry *logrus.Entry) error { - ctx := entry.Context - if ctx == nil { - ctx = context.Background() - } - - // Convert logrus level to otel severity - var severity otellog.Severity - switch entry.Level { - case logrus.TraceLevel: - severity = otellog.SeverityTrace - case logrus.DebugLevel: - severity = otellog.SeverityDebug - case logrus.InfoLevel: - severity = otellog.SeverityInfo - case logrus.WarnLevel: - severity = otellog.SeverityWarn - case logrus.ErrorLevel: - severity = otellog.SeverityError - case logrus.FatalLevel, logrus.PanicLevel: - severity = otellog.SeverityFatal - default: - severity = otellog.SeverityInfo - } - - // Build log record - record := otellog.Record{} - record.SetTimestamp(entry.Time) - record.SetSeverity(severity) - record.SetBody(otellog.StringValue(entry.Message)) - - // Add logrus fields as attributes - attrs := make([]otellog.KeyValue, 0, len(entry.Data)) - for k, v := range entry.Data { - attrs = append(attrs, otellog.String(k, formatValue(v))) - } - record.AddAttributes(attrs...) - - h.logger.Emit(ctx, record) - return nil -} - -func formatValue(v any) string { - if err, ok := v.(error); ok { - return err.Error() - } - return fmt.Sprintf("%v", v) -} diff --git a/tracing/engine_test.go b/tracing/engine_test.go new file mode 100644 index 0000000000..c692606860 --- /dev/null +++ b/tracing/engine_test.go @@ -0,0 +1,292 @@ +/* + * Nuts node + * Copyright (C) 2026 Nuts community + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package tracing + +import ( + "context" + "net/http" + "testing" + + "github.com/nuts-foundation/nuts-node/core" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" +) + +// resetGlobalState resets global state for test isolation. +func resetGlobalState() { + enabled.Store(false) + nutsTracerProvider = nil + core.TracingHTTPTransport = nil +} + +func TestSetupTracing(t *testing.T) { + t.Run("disabled when endpoint is empty", func(t *testing.T) { + resetGlobalState() + t.Cleanup(resetGlobalState) + cfg := Config{Endpoint: ""} + + shutdown, err := setupTracing(cfg) + + require.NoError(t, err) + assert.NotNil(t, shutdown) + assert.False(t, Enabled(), "tracing should not be enabled when endpoint is empty") + assert.Nil(t, nutsTracerProvider, "provider should not be set when disabled") + assert.Nil(t, core.TracingHTTPTransport, "HTTP transport should not be set when disabled") + // Shutdown should be a no-op + assert.NoError(t, shutdown(context.Background())) + }) + + t.Run("enabled when endpoint is configured", func(t *testing.T) { + resetGlobalState() + t.Cleanup(resetGlobalState) + cfg := Config{ + Endpoint: "localhost:4318", + Insecure: true, + } + + shutdown, err := setupTracing(cfg) + + require.NoError(t, err) + require.NotNil(t, shutdown) + t.Cleanup(func() { _ = shutdown(context.Background()) }) + + // Verify global state is set up correctly + assert.True(t, Enabled(), "tracing should be enabled") + assert.NotNil(t, nutsTracerProvider, "provider should be set") + assert.NotNil(t, core.TracingHTTPTransport, "HTTP transport should be set") + + // Verify HTTP transport wrapper works + wrappedTransport := core.TracingHTTPTransport(http.DefaultTransport) + assert.NotNil(t, wrappedTransport) + assert.NotEqual(t, http.DefaultTransport, wrappedTransport, "transport should be wrapped") + }) + +} + +func TestGetTracerProvider(t *testing.T) { + t.Run("returns global provider when nutsTracerProvider is nil", func(t *testing.T) { + originalProvider := nutsTracerProvider + t.Cleanup(func() { nutsTracerProvider = originalProvider }) + + nutsTracerProvider = nil + + provider := GetTracerProvider() + assert.NotNil(t, provider) + }) + + t.Run("returns nuts provider when set", func(t *testing.T) { + originalProvider := nutsTracerProvider + t.Cleanup(func() { nutsTracerProvider = originalProvider }) + + customProvider := sdktrace.NewTracerProvider() + nutsTracerProvider = customProvider + + provider := GetTracerProvider() + assert.Equal(t, customProvider, provider, "should return nuts-node's provider when set") + }) +} + +func TestTracingLogrusHook(t *testing.T) { + t.Run("no-op when context is nil", func(t *testing.T) { + hook := &tracingLogrusHook{} + entry := &logrus.Entry{ + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.NotContains(t, entry.Data, "trace_id") + assert.NotContains(t, entry.Data, "span_id") + }) + + t.Run("no-op when span context is invalid", func(t *testing.T) { + hook := &tracingLogrusHook{} + entry := &logrus.Entry{ + Context: context.Background(), + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.NotContains(t, entry.Data, "trace_id") + assert.NotContains(t, entry.Data, "span_id") + }) + + t.Run("adds trace context when span is valid", func(t *testing.T) { + hook := &tracingLogrusHook{} + // Create a valid span context + traceID, _ := trace.TraceIDFromHex("0102030405060708090a0b0c0d0e0f10") + spanID, _ := trace.SpanIDFromHex("0102030405060708") + spanCtx := trace.NewSpanContext(trace.SpanContextConfig{ + TraceID: traceID, + SpanID: spanID, + TraceFlags: trace.FlagsSampled, + }) + + // Use noop tracer but with our span context + ctx := trace.ContextWithSpanContext(context.Background(), spanCtx) + + entry := &logrus.Entry{ + Context: ctx, + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.Equal(t, "0102030405060708090a0b0c0d0e0f10", entry.Data["trace_id"]) + assert.Equal(t, "0102030405060708", entry.Data["span_id"]) + }) +} + +func TestEngine(t *testing.T) { + t.Run("Name", func(t *testing.T) { + engine := New() + assert.Equal(t, "Tracing", engine.Name()) + }) + + t.Run("Config returns pointer to config", func(t *testing.T) { + engine := New() + cfg := engine.Config() + assert.IsType(t, &Config{}, cfg) + }) + + t.Run("Configure with empty endpoint", func(t *testing.T) { + engine := New() + engine.config = Config{Endpoint: ""} + + err := engine.Configure(core.ServerConfig{}) + assert.NoError(t, err) + }) + + t.Run("Start is a no-op", func(t *testing.T) { + engine := New() + err := engine.Start() + assert.NoError(t, err) + }) + + t.Run("Shutdown when not configured", func(t *testing.T) { + engine := New() + err := engine.Shutdown() + assert.NoError(t, err) + }) + + t.Run("CheckHealth when not configured returns nil", func(t *testing.T) { + resetGlobalState() + t.Cleanup(resetGlobalState) + engine := New() + engine.config = Config{Endpoint: ""} + + health := engine.CheckHealth() + assert.Nil(t, health, "should not report health when tracing is not configured") + }) + + t.Run("CheckHealth when configured but not running returns nil", func(t *testing.T) { + resetGlobalState() + t.Cleanup(resetGlobalState) + // Endpoint is configured but enabled flag is false (e.g., after shutdown) + engine := New() + engine.config = Config{Endpoint: "localhost:4318"} + + health := engine.CheckHealth() + assert.Nil(t, health, "should not report health when tracing is not running") + }) + + t.Run("CheckHealth when configured and running", func(t *testing.T) { + resetGlobalState() + t.Cleanup(resetGlobalState) + enabled.Store(true) // Simulate running state + engine := New() + engine.config = Config{Endpoint: "localhost:4318"} + + health := engine.CheckHealth() + require.NotNil(t, health) + assert.Equal(t, core.HealthStatusUp, health["otlp"].Status) + assert.Equal(t, "localhost:4318", health["otlp"].Details) + }) + + t.Run("Diagnostics", func(t *testing.T) { + engine := New() + engine.config = Config{ + Endpoint: "localhost:4318", + ServiceName: "test-service", + Insecure: true, + } + + results := engine.Diagnostics() + require.Len(t, results, 1) + assert.Equal(t, "tracing", results[0].Name()) + + resultMap := results[0].Result().(map[string]interface{}) + assert.Equal(t, true, resultMap["enabled"]) + assert.Equal(t, "localhost:4318", resultMap["endpoint"]) + assert.Equal(t, "test-service", resultMap["service_name"]) + assert.Equal(t, true, resultMap["insecure"]) + }) + + t.Run("Diagnostics with default service name", func(t *testing.T) { + engine := New() + engine.config = Config{ + Endpoint: "localhost:4318", + } + + results := engine.Diagnostics() + resultMap := results[0].Result().(map[string]interface{}) + assert.Equal(t, "nuts-node", resultMap["service_name"]) + }) + + t.Run("Shutdown resets global state", func(t *testing.T) { + resetGlobalState() + t.Cleanup(resetGlobalState) + + // Simulate enabled state with all global variables set + enabled.Store(true) + nutsTracerProvider = sdktrace.NewTracerProvider() + core.TracingHTTPTransport = func(rt http.RoundTripper) http.RoundTripper { return rt } + + engine := New() + err := engine.Shutdown() + + assert.NoError(t, err) + assert.False(t, Enabled(), "enabled should be false after shutdown") + assert.Nil(t, nutsTracerProvider, "nutsTracerProvider should be nil after shutdown") + assert.Nil(t, core.TracingHTTPTransport, "TracingHTTPTransport should be nil after shutdown") + }) +} + +func TestRegisterAuditLogHook(t *testing.T) { + t.Run("callback is invoked when hook is registered", func(t *testing.T) { + // Save original and restore after test + originalCallback := registerAuditLogHook + t.Cleanup(func() { registerAuditLogHook = originalCallback }) + + var registeredHook logrus.Hook + + // Simulate what audit.init() does + RegisterAuditLogHook(func(hook logrus.Hook) { + registeredHook = hook + }) + + // Simulate registering a hook (what setupTracing does) + testHook := &tracingLogrusHook{} + registerAuditLogHook(testHook) + + assert.Equal(t, testHook, registeredHook) + }) +} From c83d4a26cb60dcaec2b162b3b2b443b22ca67d9a Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Wed, 21 Jan 2026 17:57:30 +0100 Subject: [PATCH 08/16] test(e2e): extend trace verification to check component spans Extend assertJaegerTrace to verify both services and span patterns, proving that gorm and http-client instrumentation work end-to-end. - Add span pattern verification (gorm.Query, http-client) - Add curl timeout to prevent hangs - Change service check from exact match to contains - Simplify argument parsing (space-separated) --- e2e-tests/oauth-flow/rfc021/do-test.sh | 4 +- e2e-tests/util.sh | 67 +++++++++++++++++--------- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/e2e-tests/oauth-flow/rfc021/do-test.sh b/e2e-tests/oauth-flow/rfc021/do-test.sh index 05d5feb9a0..2eb90fa14e 100755 --- a/e2e-tests/oauth-flow/rfc021/do-test.sh +++ b/e2e-tests/oauth-flow/rfc021/do-test.sh @@ -237,8 +237,8 @@ fi echo "------------------------------------" echo "Verifying trace in Jaeger..." echo "------------------------------------" -# Verify trace contains spans from both nodeA and nodeB, proving distributed tracing works -if ! assertJaegerTraceContainsServices "http://localhost:16686" "$TRACE_ID" "nodeA,nodeB"; then +# Verify distributed tracing: trace spans from both nodes with expected components (gorm, http-client) +if ! assertJaegerTrace "http://localhost:16686" "$TRACE_ID" "nodeA nodeB" "gorm.Query http-client"; then exitWithDockerLogs 1 fi diff --git a/e2e-tests/util.sh b/e2e-tests/util.sh index 64b0dcd2b5..e47e37ee74 100644 --- a/e2e-tests/util.sh +++ b/e2e-tests/util.sh @@ -189,32 +189,53 @@ urlencode() { jq -nr --arg raw "$raw" '$raw|@uri' } -# assertJaegerTraceContainsServices verifies that a trace exists in Jaeger with spans from specified services -# Retries up to 3 times to allow for trace batching/export delays -# Args: Jaeger URL, trace ID, comma-separated list of expected service names (e.g., "nodeA,nodeB") -function assertJaegerTraceContainsServices() { - JAEGER_URL=$1 - TRACE_ID=$2 - EXPECTED_SERVICES=$3 - for i in {1..3}; do - RESPONSE=$(curl -s "$JAEGER_URL/api/traces/$TRACE_ID") - TRACE_COUNT=$(echo "$RESPONSE" | jq '.data | length') - if [ "$TRACE_COUNT" -gt 0 ]; then - # Get unique services from the trace - ACTUAL_SERVICES=$(echo "$RESPONSE" | jq -r '[.data[0].processes[].serviceName] | unique | sort | join(",")') - # Sort expected services for comparison - SORTED_EXPECTED=$(echo "$EXPECTED_SERVICES" | tr ',' '\n' | sort | tr '\n' ',' | sed 's/,$//') - if [ "$ACTUAL_SERVICES" == "$SORTED_EXPECTED" ]; then - SPAN_COUNT=$(echo "$RESPONSE" | jq '.data[0].spans | length') - echo "Verified trace '$TRACE_ID' contains $SPAN_COUNT spans from services: $ACTUAL_SERVICES" - return 0 - else - echo "FAILED: Trace '$TRACE_ID' found but services are '$ACTUAL_SERVICES', expected '$SORTED_EXPECTED'" 1>&2 +# assertJaegerTrace verifies a trace exists with expected services and span patterns +# Fetches trace once and validates all expectations +# Args: Jaeger URL, trace ID, expected services (space-separated), span patterns (space-separated) +function assertJaegerTrace() { + local jaeger_url=$1 + local trace_id=$2 + local expected_services=$3 + local expected_patterns=$4 + + for attempt in {1..5}; do + local response=$(curl -s -m 10 "$jaeger_url/api/traces/$trace_id") + local trace_count=$(echo "$response" | jq '.data | length') + + if [ "$trace_count" -eq 0 ]; then + sleep 1 + continue + fi + + local actual_services=$(echo "$response" | jq -r '[.data[0].processes[].serviceName] | unique | sort | join(",")') + local span_names=$(echo "$response" | jq -r '.data[0].spans[].operationName') + + # Check each expected service is present + for svc in $expected_services; do + if ! echo "$actual_services" | grep -q "$svc"; then + echo "FAILED: Trace '$trace_id' missing service '$svc' (found: $actual_services)" 1>&2 return 1 fi + done + + # Check each expected span pattern is present + local missing="" + for pattern in $expected_patterns; do + if ! echo "$span_names" | grep -q "$pattern"; then + missing="$missing $pattern" + fi + done + if [ -n "$missing" ]; then + echo "FAILED: Trace '$trace_id' missing spans:$missing" 1>&2 + echo "Available: $(echo "$span_names" | sort -u | tr '\n' ', ')" 1>&2 + return 1 fi - sleep 1 + + local span_count=$(echo "$response" | jq '.data[0].spans | length') + echo "Verified trace '$trace_id': $span_count spans from $actual_services" + return 0 done - echo "FAILED: Trace '$TRACE_ID' not found in Jaeger after 3 attempts" 1>&2 + + echo "FAILED: Trace '$trace_id' not found after 5 attempts" 1>&2 return 1 } From e820c6a87b35689e152e8139308d36ec22fbf547 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Wed, 21 Jan 2026 19:24:08 +0100 Subject: [PATCH 09/16] fix(tracing): use parent TracerProvider when embedded When nuts-node is embedded in another application (e.g., nuts-knooppunt) that has already configured OpenTelemetry, nuts-node now correctly uses the parent's infrastructure instead of creating its own. Changes: - Detect embedded mode by checking for existing SDK TracerProvider - In embedded mode: reuse parent's TracerProvider, propagator, error handler, and logging - only set up HTTP transport wrapper and logrus hook for trace context - In standalone mode: full OTEL setup as before - Fix race condition on nutsTracerProvider using atomic.Pointer - Add timeout to shutdown (30s) and error recovery (5s) - Include shutdown errors via errors.Join instead of discarding - Check enabled flag in logrus hook to skip processing after shutdown - Extract setupHTTPTransport helper to reduce duplication --- tracing/engine.go | 112 ++++++++++++++++++++++++++--------------- tracing/engine_test.go | 70 ++++++++++++++++++++++---- 2 files changed, 131 insertions(+), 51 deletions(-) diff --git a/tracing/engine.go b/tracing/engine.go index 2998041ae5..510b692e7f 100644 --- a/tracing/engine.go +++ b/tracing/engine.go @@ -52,7 +52,8 @@ var enabled atomic.Bool // nutsTracerProvider holds nuts-node's own TracerProvider. // This is used instead of the global when nuts-node is embedded in another application. -var nutsTracerProvider *trace.TracerProvider +// Uses atomic.Pointer for thread-safe access during shutdown. +var nutsTracerProvider atomic.Pointer[trace.TracerProvider] // registerAuditLogHook is a function that registers a logrus hook with the audit logger. // It is set by the audit package during init() to avoid circular imports. @@ -109,13 +110,15 @@ func (e *Engine) Start() error { func (e *Engine) Shutdown() error { // Reset global state enabled.Store(false) - nutsTracerProvider = nil + nutsTracerProvider.Store(nil) core.TracingHTTPTransport = nil - // Call the shutdown function to flush and close exporters + // Call the shutdown function to flush and close exporters with timeout. // After this, any hook calls to logger.Emit() become no-ops per OTEL spec. if e.shutdown != nil { - return e.shutdown(context.Background()) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + return e.shutdown(ctx) } return nil } @@ -176,8 +179,8 @@ func SetEnabled(value bool) { // This should be used by nuts-node components instead of otel.GetTracerProvider() // to ensure spans are attributed to "nuts-node" service. func GetTracerProvider() oteltrace.TracerProvider { - if nutsTracerProvider != nil { - return nutsTracerProvider + if provider := nutsTracerProvider.Load(); provider != nil { + return provider } return otel.GetTracerProvider() } @@ -185,14 +188,63 @@ func GetTracerProvider() oteltrace.TracerProvider { // setupTracing initializes OpenTelemetry tracing with the given configuration. // Returns a shutdown function that should be called on application exit. // If cfg.Endpoint is empty, tracing is disabled and a no-op shutdown function is returned. -// When tracing is enabled, logs are sent to both stdout and the OTLP endpoint. +// When a parent TracerProvider exists (embedded mode), nuts-node uses the parent's OTEL +// infrastructure and only sets up the HTTP transport wrapper for internal API calls. +// When standalone, tracing is fully configured with OTLP exporters for traces and logs. func setupTracing(cfg Config) (shutdown func(context.Context) error, err error) { if cfg.Endpoint == "" { logrus.Info("Tracing disabled (no endpoint configured)") return func(context.Context) error { return nil }, nil } - // Enable tracing flag for HTTP clients and other components + // Check for parent TracerProvider first, before modifying any global state. + // Per OpenTelemetry best practices, libraries should use the application's infrastructure, + // not create their own. We detect embedding by checking if a SDK TracerProvider is set. + // Note: Custom TracerProvider implementations won't be detected, but we won't overwrite + // the global provider, so they'll continue to work. + _, isEmbedded := otel.GetTracerProvider().(*trace.TracerProvider) + + if isEmbedded { + return setupEmbeddedTracing() + } + return setupStandaloneTracing(cfg) +} + +// setupEmbeddedTracing configures tracing when nuts-node is embedded in another application. +// In this mode, nuts-node reuses the parent's TracerProvider, propagator, and error handler. +// All component tracing (GORM, HTTP server/client, etc.) still works because they call +// GetTracerProvider(), which returns the parent's provider when nutsTracerProvider is nil. +// This function only needs to set up the HTTP transport wrapper and logrus hook. +func setupEmbeddedTracing() (func(context.Context) error, error) { + enabled.Store(true) + setupHTTPTransport() + + // Add trace context hook to inject trace_id/span_id into log entries. + // This works with any TracerProvider and doesn't require OTLP export. + traceContextHook := &tracingLogrusHook{} + logrus.AddHook(traceContextHook) + registerAuditLogHook(traceContextHook) + + logrus.Info("Tracing enabled (embedded mode, using parent's TracerProvider)") + + return func(context.Context) error { return nil }, nil +} + +// setupHTTPTransport configures the HTTP transport wrapper for internal API calls. +// Uses GetTracerProvider() so it works in both embedded and standalone modes. +func setupHTTPTransport() { + core.TracingHTTPTransport = func(transport http.RoundTripper) http.RoundTripper { + return otelhttp.NewTransport(transport, + otelhttp.WithTracerProvider(GetTracerProvider()), + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "internal-api: " + r.Method + " " + r.URL.Path + })) + } +} + +// setupStandaloneTracing configures full OTEL infrastructure when running standalone. +// Sets up trace exporter, log exporter, propagator, error handler, and logrus hooks. +func setupStandaloneTracing(cfg Config) (shutdown func(context.Context) error, err error) { enabled.Store(true) ctx := context.Background() @@ -208,16 +260,15 @@ func setupTracing(cfg Config) (shutdown func(context.Context) error, err error) return errs } - // Handle errors by cleaning up already-created resources handleErr := func(err error) (func(context.Context) error, error) { enabled.Store(false) shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() - _ = shutdown(shutdownCtx) - return nil, err + shutdownErr := shutdown(shutdownCtx) + return nil, errors.Join(err, shutdownErr) } - // Set up OpenTelemetry error handler to integrate with logrus + // Set up OpenTelemetry error handler otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { logrus.WithError(err).Error("OpenTelemetry SDK error") })) @@ -244,44 +295,29 @@ func setupTracing(cfg Config) (shutdown func(context.Context) error, err error) return handleErr(err) } - // Set up OTLP HTTP exporter - opts := []otlptracehttp.Option{ + // Set up OTLP trace exporter + traceOpts := []otlptracehttp.Option{ otlptracehttp.WithEndpoint(cfg.Endpoint), } if cfg.Insecure { - opts = append(opts, otlptracehttp.WithInsecure()) + traceOpts = append(traceOpts, otlptracehttp.WithInsecure()) } - traceExporter, err := otlptracehttp.New(ctx, opts...) + traceExporter, err := otlptracehttp.New(ctx, traceOpts...) if err != nil { return handleErr(err) } shutdownFuncs = append(shutdownFuncs, traceExporter.Shutdown) - // Set up trace provider with batch exporter + // Set up trace provider tracerProvider := trace.NewTracerProvider( trace.WithBatcher(traceExporter), trace.WithResource(res), ) shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown) - // Store nuts-node's provider for use by GetTracerProvider() - nutsTracerProvider = tracerProvider - - // Only set as global if no other provider exists (i.e., not embedded). - // When embedded, the parent application owns the global provider. - _, hasParentProvider := otel.GetTracerProvider().(*trace.TracerProvider) - if !hasParentProvider { - otel.SetTracerProvider(tracerProvider) - } - - // Set up HTTP transport wrapper for core package (avoids circular import) - core.TracingHTTPTransport = func(transport http.RoundTripper) http.RoundTripper { - return otelhttp.NewTransport(transport, - otelhttp.WithTracerProvider(tracerProvider), - otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { - return "internal-api: " + r.Method + " " + r.URL.Path - })) - } + nutsTracerProvider.Store(tracerProvider) + otel.SetTracerProvider(tracerProvider) + setupHTTPTransport() // Set up OTLP log exporter logOpts := []otlploghttp.Option{ @@ -304,15 +340,11 @@ func setupTracing(cfg Config) (shutdown func(context.Context) error, err error) shutdownFuncs = append(shutdownFuncs, loggerProvider.Shutdown) // Create hooks for log correlation and OTLP export - // Uses official otellogrus bridge: https://pkg.go.dev/go.opentelemetry.io/contrib/bridges/otellogrus traceContextHook := &tracingLogrusHook{} otelHook := otellogrus.NewHook(serviceName, otellogrus.WithLoggerProvider(loggerProvider)) - // Add hooks to standard logger (logs go to both stdout and OTLP) logrus.AddHook(traceContextHook) logrus.AddHook(otelHook) - - // Register same hooks with audit logger (which uses its own logger instance) registerAuditLogHook(traceContextHook) registerAuditLogHook(otelHook) @@ -333,7 +365,7 @@ func (h *tracingLogrusHook) Levels() []logrus.Level { } func (h *tracingLogrusHook) Fire(entry *logrus.Entry) error { - if entry.Context == nil { + if !enabled.Load() || entry.Context == nil { return nil } span := oteltrace.SpanFromContext(entry.Context) diff --git a/tracing/engine_test.go b/tracing/engine_test.go index c692606860..87be42d331 100644 --- a/tracing/engine_test.go +++ b/tracing/engine_test.go @@ -28,15 +28,20 @@ import ( "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/propagation" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace" + "go.opentelemetry.io/otel/trace/noop" ) // resetGlobalState resets global state for test isolation. func resetGlobalState() { enabled.Store(false) - nutsTracerProvider = nil + nutsTracerProvider.Store(nil) core.TracingHTTPTransport = nil + // Reset global TracerProvider to noop provider to avoid test interference + otel.SetTracerProvider(noop.NewTracerProvider()) } func TestSetupTracing(t *testing.T) { @@ -50,7 +55,7 @@ func TestSetupTracing(t *testing.T) { require.NoError(t, err) assert.NotNil(t, shutdown) assert.False(t, Enabled(), "tracing should not be enabled when endpoint is empty") - assert.Nil(t, nutsTracerProvider, "provider should not be set when disabled") + assert.Nil(t, nutsTracerProvider.Load(), "provider should not be set when disabled") assert.Nil(t, core.TracingHTTPTransport, "HTTP transport should not be set when disabled") // Shutdown should be a no-op assert.NoError(t, shutdown(context.Background())) @@ -72,7 +77,7 @@ func TestSetupTracing(t *testing.T) { // Verify global state is set up correctly assert.True(t, Enabled(), "tracing should be enabled") - assert.NotNil(t, nutsTracerProvider, "provider should be set") + assert.NotNil(t, nutsTracerProvider.Load(), "provider should be set") assert.NotNil(t, core.TracingHTTPTransport, "HTTP transport should be set") // Verify HTTP transport wrapper works @@ -81,25 +86,64 @@ func TestSetupTracing(t *testing.T) { assert.NotEqual(t, http.DefaultTransport, wrappedTransport, "transport should be wrapped") }) + t.Run("uses parent TracerProvider when embedded", func(t *testing.T) { + resetGlobalState() + + // Simulate parent application setting up its own TracerProvider and propagator + parentProvider := sdktrace.NewTracerProvider() + parentPropagator := propagation.NewCompositeTextMapPropagator(propagation.Baggage{}) + t.Cleanup(func() { + _ = parentProvider.Shutdown(context.Background()) + resetGlobalState() + }) + otel.SetTracerProvider(parentProvider) + otel.SetTextMapPropagator(parentPropagator) + + cfg := Config{ + Endpoint: "localhost:4318", + Insecure: true, + } + + shutdown, err := setupTracing(cfg) + + require.NoError(t, err) + require.NotNil(t, shutdown) + t.Cleanup(func() { _ = shutdown(context.Background()) }) + + // Tracing should be enabled + assert.True(t, Enabled(), "tracing should be enabled in embedded mode") + + // In embedded mode, nutsTracerProvider should stay nil + assert.Nil(t, nutsTracerProvider.Load(), "should not create own provider when embedded") + + // GetTracerProvider should return the parent's provider + assert.Equal(t, parentProvider, GetTracerProvider(), "should use parent's TracerProvider") + + // Parent's propagator should not be overwritten + assert.Equal(t, parentPropagator, otel.GetTextMapPropagator(), "should not overwrite parent's propagator") + + // HTTP transport should still be set up (using parent's provider via GetTracerProvider) + assert.NotNil(t, core.TracingHTTPTransport, "HTTP transport should be set") + }) } func TestGetTracerProvider(t *testing.T) { t.Run("returns global provider when nutsTracerProvider is nil", func(t *testing.T) { - originalProvider := nutsTracerProvider - t.Cleanup(func() { nutsTracerProvider = originalProvider }) + originalProvider := nutsTracerProvider.Load() + t.Cleanup(func() { nutsTracerProvider.Store(originalProvider) }) - nutsTracerProvider = nil + nutsTracerProvider.Store(nil) provider := GetTracerProvider() assert.NotNil(t, provider) }) t.Run("returns nuts provider when set", func(t *testing.T) { - originalProvider := nutsTracerProvider - t.Cleanup(func() { nutsTracerProvider = originalProvider }) + originalProvider := nutsTracerProvider.Load() + t.Cleanup(func() { nutsTracerProvider.Store(originalProvider) }) customProvider := sdktrace.NewTracerProvider() - nutsTracerProvider = customProvider + nutsTracerProvider.Store(customProvider) provider := GetTracerProvider() assert.Equal(t, customProvider, provider, "should return nuts-node's provider when set") @@ -131,6 +175,10 @@ func TestTracingLogrusHook(t *testing.T) { }) t.Run("adds trace context when span is valid", func(t *testing.T) { + // Enable tracing for this test (hook checks enabled flag) + enabled.Store(true) + t.Cleanup(func() { enabled.Store(false) }) + hook := &tracingLogrusHook{} // Create a valid span context traceID, _ := trace.TraceIDFromHex("0102030405060708090a0b0c0d0e0f10") @@ -257,7 +305,7 @@ func TestEngine(t *testing.T) { // Simulate enabled state with all global variables set enabled.Store(true) - nutsTracerProvider = sdktrace.NewTracerProvider() + nutsTracerProvider.Store(sdktrace.NewTracerProvider()) core.TracingHTTPTransport = func(rt http.RoundTripper) http.RoundTripper { return rt } engine := New() @@ -265,7 +313,7 @@ func TestEngine(t *testing.T) { assert.NoError(t, err) assert.False(t, Enabled(), "enabled should be false after shutdown") - assert.Nil(t, nutsTracerProvider, "nutsTracerProvider should be nil after shutdown") + assert.Nil(t, nutsTracerProvider.Load(), "nutsTracerProvider should be nil after shutdown") assert.Nil(t, core.TracingHTTPTransport, "TracingHTTPTransport should be nil after shutdown") }) } From c69d0c9533a731b9807833cd6537d22fbc9811f4 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Wed, 21 Jan 2026 23:41:19 +0100 Subject: [PATCH 10/16] fix(tracing): add explicit TracerProvider to all otelhttp.NewTransport calls This ensures consistent tracing behavior in embedded mode where GetTracerProvider() returns the parent's provider instead of the global. --- crypto/storage/external/client.go | 4 +++- crypto/storage/vault/vault.go | 1 + http/client/client.go | 5 ++++- pki/validator.go | 4 +++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/crypto/storage/external/client.go b/crypto/storage/external/client.go index bd9ea3d025..94db104788 100644 --- a/crypto/storage/external/client.go +++ b/crypto/storage/external/client.go @@ -89,7 +89,9 @@ func NewAPIClient(config Config) (spi.Storage, error) { transport = otelhttp.NewTransport(http.DefaultTransport, otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { return "crypto-storage: " + r.Method + " " + r.URL.Path - })) + }), + otelhttp.WithTracerProvider(tracing.GetTracerProvider()), + ) } httpClient := &http.Client{ Transport: transport, diff --git a/crypto/storage/vault/vault.go b/crypto/storage/vault/vault.go index f87e9feb75..8bb716c8cc 100644 --- a/crypto/storage/vault/vault.go +++ b/crypto/storage/vault/vault.go @@ -122,6 +122,7 @@ func configureVaultClient(cfg Config) (*vault.Client, error) { otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { return "vault: " + r.Method + " " + r.URL.Path }), + otelhttp.WithTracerProvider(tracing.GetTracerProvider()), ) } diff --git a/http/client/client.go b/http/client/client.go index b31bfe0cf1..6a6f73b802 100644 --- a/http/client/client.go +++ b/http/client/client.go @@ -85,7 +85,10 @@ func New(timeout time.Duration) *StrictHTTPClient { // getTransport wraps the given transport with OpenTelemetry instrumentation if tracing is enabled. func getTransport(base http.RoundTripper) http.RoundTripper { if tracing.Enabled() { - return otelhttp.NewTransport(base, otelhttp.WithSpanNameFormatter(httpSpanName)) + return otelhttp.NewTransport(base, + otelhttp.WithSpanNameFormatter(httpSpanName), + otelhttp.WithTracerProvider(tracing.GetTracerProvider()), + ) } return base } diff --git a/pki/validator.go b/pki/validator.go index e5469bcced..1fef65b4de 100644 --- a/pki/validator.go +++ b/pki/validator.go @@ -97,7 +97,9 @@ func newValidator(config Config) (*validator, error) { transport = otelhttp.NewTransport(http.DefaultTransport, otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { return "pki: " + r.Method + " " + r.URL.Path - })) + }), + otelhttp.WithTracerProvider(tracing.GetTracerProvider()), + ) } httpClient := &http.Client{ Transport: transport, From 84f2c1ebbba71c4fb939d8837b7c33c5a81441c1 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Thu, 22 Jan 2026 11:48:13 +0100 Subject: [PATCH 11/16] chore: remove redundant comment --- cmd/root.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/root.go b/cmd/root.go index 811108486d..b2737c9b3a 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -124,7 +124,7 @@ func startServer(ctx context.Context, system *core.System) error { logrus.Info(fmt.Sprintf("Build info: \n%s", core.BuildInfo())) logrus.Info(fmt.Sprintf("Config: \n%s", system.Config.PrintConfig())) - // check config on all engines (also initializes tracing) + // check config on all engines if err := system.Configure(); err != nil { return err } From 9bbead10bc2f443d227c98dc68e58b66ccb16cf0 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Thu, 22 Jan 2026 12:21:35 +0100 Subject: [PATCH 12/16] docs: move tracing config to alphabetical position with own section Move tracing.* options to their own **Tracing** section placed alphabetically between Storage and policy. Also fix tracing.servicename to have default value in defaults column rather than description. --- docs/pages/deployment/server_options.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/pages/deployment/server_options.rst b/docs/pages/deployment/server_options.rst index 12a40f1e24..27fcb7e04f 100755 --- a/docs/pages/deployment/server_options.rst +++ b/docs/pages/deployment/server_options.rst @@ -15,9 +15,6 @@ url Public facing URL of the server (required). Must be HTTPS when strictmode is set. verbosity info Log level (trace, debug, info, warn, error) httpclient.timeout 30s Request time-out for HTTP clients, such as '10s'. Refer to Golang's 'time.Duration' syntax for a more elaborate description of the syntax. - tracing.endpoint OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled. - tracing.insecure false Disable TLS for the OTLP connection. - tracing.servicename Service name reported to the tracing backend. Defaults to 'nuts-node'. **Auth** auth.authorizationendpoint.enabled false enables the v2 API's OAuth2 Authorization Endpoint, used by OpenID4VP and OpenID4VCI. This flag might be removed in a future version (or its default become 'true') as the use cases and implementation of OpenID4VP and OpenID4VCI mature. **Crypto** @@ -60,7 +57,11 @@ storage.session.redis.sentinel.password Password for authenticating to Redis Sentinels. storage.session.redis.sentinel.username Username for authenticating to Redis Sentinels. storage.session.redis.tls.truststorefile PEM file containing the trusted CA certificate(s) for authenticating remote Redis session servers. Can only be used when connecting over TLS (use 'rediss://' as scheme in address). - storage.sql.connection Connection string for the SQL database. If not set it, defaults to a SQLite database stored inside the configured data directory. Note: using SQLite is not recommended in production environments. If using SQLite anyways, remember to enable foreign keys ('_foreign_keys=on') and the write-ahead-log ('_journal_mode=WAL'). + storage.sql.connection Connection string for the SQL database. If not set it, defaults to a SQLite database stored inside the configured data directory. Note: using SQLite is not recommended in production environments. If using SQLite anyways, remember to enable foreign keys ('_foreign_keys=on') and the write-ahead-log ('_journal_mode=WAL'). + **Tracing** + tracing.endpoint OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled. + tracing.insecure false Disable TLS for the OTLP connection. + tracing.servicename nuts-node Service name reported to the tracing backend. **policy** policy.directory ./config/policy Directory to read policy files from. Policy files are JSON files that contain a scope to PresentationDefinition mapping. ======================================== =================================================================================================================================================================================================================================================================================================================================================================================================================================================================== ============================================================================================================================================================================================================================================================================================================================================ From fc0f0fa0c0adfe804902f342934f15afd5518d95 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Thu, 22 Jan 2026 12:25:57 +0100 Subject: [PATCH 13/16] chore: remove redundant tracing comments from HTTP client functions The getTransport function already documents the tracing behavior. --- http/client/client.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/http/client/client.go b/http/client/client.go index 6a6f73b802..2c9e4308df 100644 --- a/http/client/client.go +++ b/http/client/client.go @@ -71,7 +71,6 @@ func limitedReadAll(reader io.Reader) ([]byte, error) { } // New creates a new HTTP client with the given timeout. -// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func New(timeout time.Duration) *StrictHTTPClient { transport := getTransport(SafeHttpTransport) return &StrictHTTPClient{ @@ -95,7 +94,6 @@ func getTransport(base http.RoundTripper) http.RoundTripper { // NewWithCache creates a new HTTP client with the given timeout. // It uses the DefaultCachingTransport as the underlying transport. -// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func NewWithCache(timeout time.Duration) *StrictHTTPClient { transport := getTransport(DefaultCachingTransport) return &StrictHTTPClient{ @@ -109,7 +107,6 @@ func NewWithCache(timeout time.Duration) *StrictHTTPClient { // NewWithTLSConfig creates a new HTTP client with the given timeout and TLS configuration. // It copies the http.DefaultTransport and sets the TLSClientConfig to the given tls.Config. // As such, it can't be used in conjunction with the CachingRoundTripper. -// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func NewWithTLSConfig(timeout time.Duration, tlsConfig *tls.Config) *StrictHTTPClient { transport := SafeHttpTransport.Clone() transport.TLSClientConfig = tlsConfig From ad81e6fa56bf333d7a5ddff789c420b23152694a Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Thu, 22 Jan 2026 12:29:18 +0100 Subject: [PATCH 14/16] chore: remove misleading TracerProvider comment --- http/engine.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/http/engine.go b/http/engine.go index e58b5a6a0f..d2c1533056 100644 --- a/http/engine.go +++ b/http/engine.go @@ -116,8 +116,6 @@ func (h *Engine) applyTracingMiddleware(echoServer core.EchoRouter) { path := c.Request().URL.Path return matchesPath(path, HealthPath) || matchesPath(path, MetricsPath) || matchesPath(path, StatusPath) } - // Use nuts-node's own TracerProvider to ensure spans are attributed to "nuts-node" service, - // even when embedded in another application that has its own TracerProvider. echoServer.Use(otelecho.Middleware(moduleName, otelecho.WithSkipper(skipper), otelecho.WithTracerProvider(tracing.GetTracerProvider()), From a48bc75716eb75f61c7564d709141bc22ba2785a Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Thu, 22 Jan 2026 12:42:51 +0100 Subject: [PATCH 15/16] chore: add TODO for potential dependency injection refactor --- tracing/engine.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tracing/engine.go b/tracing/engine.go index 510b692e7f..9b79af8f2b 100644 --- a/tracing/engine.go +++ b/tracing/engine.go @@ -47,6 +47,9 @@ const ( defaultServiceName = "nuts-node" ) +// TODO: Global static vars have caused testing issues before, requiring moves to instantiated types. +// These may need to change to dependency-injected style later if similar issues arise. + // enabled is set to true when OpenTelemetry tracing is configured. var enabled atomic.Bool From 4573c7e455cac2cd69b511ee1b0090643366b6a6 Mon Sep 17 00:00:00 2001 From: Joris Scharp Date: Thu, 22 Jan 2026 14:16:25 +0100 Subject: [PATCH 16/16] style: fix import ordering --- mock/mock_echo.go | 2 +- storage/engine.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mock/mock_echo.go b/mock/mock_echo.go index 4626c80d22..6801d9c625 100644 --- a/mock/mock_echo.go +++ b/mock/mock_echo.go @@ -11,8 +11,8 @@ import ( url "net/url" reflect "reflect" - gomock "go.uber.org/mock/gomock" v4 "github.com/labstack/echo/v4" + gomock "go.uber.org/mock/gomock" ) // MockContext is a mock of Context interface diff --git a/storage/engine.go b/storage/engine.go index 50c8da06ee..0636d493b1 100644 --- a/storage/engine.go +++ b/storage/engine.go @@ -33,8 +33,8 @@ import ( "github.com/nuts-foundation/go-stoabs" "github.com/nuts-foundation/nuts-node/core" "github.com/nuts-foundation/nuts-node/storage/log" - "github.com/nuts-foundation/nuts-node/tracing" "github.com/nuts-foundation/nuts-node/storage/sql_migrations" + "github.com/nuts-foundation/nuts-node/tracing" "github.com/nuts-foundation/sqlite" "github.com/pressly/goose/v3" "github.com/redis/go-redis/v9"