Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
471cd55
feat(tracing): add OpenTelemetry tracing support
JorisHeadease Dec 3, 2025
8e93287
fix(tracing): add context to log calls for trace correlation
JorisHeadease Dec 3, 2025
55bba7a
feat(tracing): support embedding with separate service attribution
JorisHeadease Dec 4, 2025
707ff18
fix(tracing): propagate context to GORM for proper span hierarchy
JorisHeadease Dec 4, 2025
d015bd7
Merge origin/master into feat/otel-tracing
JorisHeadease Dec 4, 2025
f4ed8b2
📝 qlty fmt
qltysh[bot] Dec 4, 2025
2b045ab
Merge master into feat/otel-tracing
JorisHeadease Jan 20, 2026
d3b7d36
Add e2e test for distributed OpenTelemetry tracing
JorisHeadease Jan 20, 2026
d57ff6a
refactor(tracing): extract tracing into dedicated engine package
JorisHeadease Jan 21, 2026
c83d4a2
test(e2e): extend trace verification to check component spans
JorisHeadease Jan 21, 2026
e820c6a
fix(tracing): use parent TracerProvider when embedded
JorisHeadease Jan 21, 2026
c69d0c9
fix(tracing): add explicit TracerProvider to all otelhttp.NewTranspor…
JorisHeadease Jan 21, 2026
84f2c1e
chore: remove redundant comment
JorisHeadease Jan 22, 2026
9bbead1
docs: move tracing config to alphabetical position with own section
JorisHeadease Jan 22, 2026
fc0f0fa
chore: remove redundant tracing comments from HTTP client functions
JorisHeadease Jan 22, 2026
ad81e6f
chore: remove misleading TracerProvider comment
JorisHeadease Jan 22, 2026
a48bc75
chore: add TODO for potential dependency injection refactor
JorisHeadease Jan 22, 2026
4573c7e
style: fix import ordering
JorisHeadease Jan 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions audit/audit.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/sirupsen/logrus"
"strings"
"sync"

"github.com/nuts-foundation/nuts-node/tracing"
"github.com/sirupsen/logrus"
)

const (
Expand Down Expand Up @@ -61,6 +63,15 @@ const auditLogLevel = "audit"
var auditLoggerInstance *logrus.Logger
var initAuditLoggerOnce = &sync.Once{}

func init() {
// Register callback so tracing can add hooks to the audit logger.
// This is needed because the audit logger is a separate logrus instance,
// and we can't import audit from tracing due to circular dependencies.
tracing.RegisterAuditLogHook(func(hook logrus.Hook) {
auditLogger().AddHook(hook)
})
}

// auditLogger returns the initialized logger instance intended for audit logging.
func auditLogger() *logrus.Logger {
initAuditLoggerOnce.Do(func() {
Expand Down Expand Up @@ -180,7 +191,7 @@ func Log(ctx context.Context, logger *logrus.Entry, eventName string) *logrus.En
panic("audit: eventName is empty")
}

return auditLogger().WithFields(logger.Data).
return auditLogger().WithContext(ctx).WithFields(logger.Data).
WithField("actor", info.Actor).
WithField("operation", info.Operation).
WithField("event", eventName)
Expand Down
4 changes: 2 additions & 2 deletions auth/api/iam/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ func callbackRequestToError(request CallbackRequestObject, redirectURI *url.URL)
return requestErr
}

func (r Wrapper) RetrieveAccessToken(_ context.Context, request RetrieveAccessTokenRequestObject) (RetrieveAccessTokenResponseObject, error) {
func (r Wrapper) RetrieveAccessToken(ctx context.Context, request RetrieveAccessTokenRequestObject) (RetrieveAccessTokenResponseObject, error) {
// get access token from store
var token TokenResponse
err := r.accessTokenClientStore().Get(request.SessionID, &token)
Expand All @@ -336,7 +336,7 @@ func (r Wrapper) RetrieveAccessToken(_ context.Context, request RetrieveAccessTo
// change this when tokens can be cached
err = r.accessTokenClientStore().Delete(request.SessionID)
if err != nil {
log.Logger().WithError(err).Warn("Failed to delete access token")
log.Logger().WithContext(ctx).WithError(err).Warn("Failed to delete access token")
}
// return access token
return RetrieveAccessToken200JSONResponse(token), nil
Expand Down
6 changes: 6 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ import (
"github.com/nuts-foundation/nuts-node/policy"
"github.com/nuts-foundation/nuts-node/storage"
storageCmd "github.com/nuts-foundation/nuts-node/storage/cmd"
"github.com/nuts-foundation/nuts-node/tracing"
tracingCmd "github.com/nuts-foundation/nuts-node/tracing/cmd"
"github.com/nuts-foundation/nuts-node/vcr"
openid4vciAPI "github.com/nuts-foundation/nuts-node/vcr/api/openid4vci/v0"
vcrAPI "github.com/nuts-foundation/nuts-node/vcr/api/vcr/v2"
Expand Down Expand Up @@ -224,6 +226,9 @@ func CreateSystem(shutdownCallback context.CancelFunc) *core.System {
system.RegisterRoutes(&discoveryServerAPI.Wrapper{Server: discoveryInstance})

// Register engines
// Tracing engine MUST be registered first to ensure tracing is active before other engines configure/start,
// and shuts down last (due to reverse shutdown order) to capture all logs/spans.
system.RegisterEngine(tracing.New())
// without dependencies
system.RegisterEngine(pkiInstance)
system.RegisterEngine(storageInstance)
Expand Down Expand Up @@ -340,6 +345,7 @@ func serverConfigFlags() *pflag.FlagSet {
set.AddFlagSet(goldenHammerCmd.FlagSet())
set.AddFlagSet(discoveryCmd.FlagSet())
set.AddFlagSet(policy.FlagSet())
set.AddFlagSet(tracingCmd.FlagSet())

return set
}
2 changes: 1 addition & 1 deletion cmd/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ func Test_CreateSystem(t *testing.T) {
system.VisitEngines(func(engine core.Engine) {
numEngines++
})
assert.Equal(t, 17, numEngines)
assert.Equal(t, 18, numEngines)
}

func Test_ClientCommand_ErrorHandlers(t *testing.T) {
Expand Down
19 changes: 10 additions & 9 deletions core/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ package core
import (
"context"
"fmt"
"github.com/sirupsen/logrus"
"github.com/spf13/pflag"
"os"
"strings"

"github.com/sirupsen/logrus"
"github.com/spf13/pflag"
)

// Routable enables connecting a REST API to the echo server. The API wrappers should implement this interface
Expand Down Expand Up @@ -95,6 +96,7 @@ func (system *System) Start() error {
}

// Shutdown shuts down all engines in the system.
// Engines are shut down in reverse order of registration.
func (system *System) Shutdown() error {
var engines []Runnable
system.VisitEngines(func(engine Engine) {
Expand All @@ -115,24 +117,23 @@ func (system *System) Shutdown() error {
}

// Configure configures all engines in the system.
// Engines are configured in order of registration (tracing engine should be first).
func (system *System) Configure() error {
coreLogger.Debugf("Creating datadir: %s", system.Config.Datadir)
var err error
if err = os.MkdirAll(system.Config.Datadir, os.ModePerm); err != nil {
if err := os.MkdirAll(system.Config.Datadir, os.ModePerm); err != nil {
return fmt.Errorf("unable to create datadir (dir=%s): %w", system.Config.Datadir, err)
}
return system.VisitEnginesE(func(engine Engine) error {
// only if Engine is dynamically configurable
name := engineName(engine)
if m, ok := engine.(Configurable); ok {
coreLogger.Debugf("Configuring %s", name)
err = m.Configure(*system.Config)
if err := m.Configure(*system.Config); err != nil {
return fmt.Errorf("unable to configure %s: %w", name, err)
}
coreLogger.Debugf("Configured %s", name)
}
if err != nil {
err = fmt.Errorf("unable to configure %s: %w", name, err)
}
return err
return nil
})
}

Expand Down
18 changes: 15 additions & 3 deletions core/http_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,17 @@ package core
import (
"context"
"fmt"
"github.com/sirupsen/logrus"
"io"
"net/http"

"github.com/sirupsen/logrus"
)

// TracingHTTPTransport wraps an http.RoundTripper with OpenTelemetry tracing instrumentation.
// It is set by the tracing package when tracing is enabled, and nil when disabled.
// This callback pattern avoids circular imports between core and tracing packages.
var TracingHTTPTransport func(http.RoundTripper) http.RoundTripper

// HttpResponseBodyLogClipAt is the maximum length of a response body to log.
// If the response body is longer than this, it will be truncated.
const HttpResponseBodyLogClipAt = 200
Expand Down Expand Up @@ -98,8 +104,14 @@ func (w httpRequestDoerAdapter) Do(req *http.Request) (*http.Response, error) {
// If the given authorization token builder is non-nil, it calls it and passes the resulting token as bearer token with requests.
func CreateHTTPInternalClient(cfg ClientConfig, generator AuthorizationTokenGenerator) (HTTPRequestDoer, error) {
var result *httpRequestDoerAdapter
client := &http.Client{}
client.Timeout = cfg.Timeout
var transport http.RoundTripper = http.DefaultTransport
if TracingHTTPTransport != nil {
transport = TracingHTTPTransport(transport)
}
client := &http.Client{
Transport: transport,
Timeout: cfg.Timeout,
}

result = &httpRequestDoerAdapter{
fn: client.Do,
Expand Down
9 changes: 5 additions & 4 deletions core/server_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@ import (
"crypto/x509"
"errors"
"fmt"
"net/url"
"reflect"
"strings"
"time"

"github.com/knadh/koanf/providers/env"
"github.com/knadh/koanf/providers/posflag"
"github.com/knadh/koanf/v2"
"github.com/sirupsen/logrus"
"github.com/spf13/pflag"
"net/url"
"reflect"
"strings"
"time"
)

const defaultConfigFile = "./config/nuts.yaml"
Expand Down
19 changes: 17 additions & 2 deletions crypto/storage/external/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ import (
"github.com/nuts-foundation/nuts-node/core"
"github.com/nuts-foundation/nuts-node/crypto/storage/spi"
"github.com/nuts-foundation/nuts-node/crypto/util"
"github.com/nuts-foundation/nuts-node/tracing"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
)

// StorageType is the name of this storage type, used in health check reports and configuration.
Expand Down Expand Up @@ -82,8 +84,21 @@ func NewAPIClient(config Config) (spi.Storage, error) {
if _, err := url.ParseRequestURI(config.Address); err != nil {
return nil, err
}
client, _ := NewClientWithResponses(config.Address, WithHTTPClient(&http.Client{Timeout: config.Timeout}))
return &APIClient{httpClient: client}, nil
var transport http.RoundTripper = http.DefaultTransport
if tracing.Enabled() {
transport = otelhttp.NewTransport(http.DefaultTransport,
otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string {
return "crypto-storage: " + r.Method + " " + r.URL.Path
}),
otelhttp.WithTracerProvider(tracing.GetTracerProvider()),
)
}
httpClient := &http.Client{
Transport: transport,
Timeout: config.Timeout,
}
apiClient, _ := NewClientWithResponses(config.Address, WithHTTPClient(httpClient))
return &APIClient{httpClient: apiClient}, nil
}

func (c APIClient) GetPrivateKey(ctx context.Context, keyName string, _ string) (crypto.Signer, error) {
Expand Down
20 changes: 18 additions & 2 deletions crypto/storage/vault/vault.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ import (
"crypto"
"errors"
"fmt"
"net/http"
"path/filepath"
"time"

vault "github.com/hashicorp/vault/api"
"github.com/nuts-foundation/nuts-node/core"
"github.com/nuts-foundation/nuts-node/crypto/log"
"github.com/nuts-foundation/nuts-node/crypto/storage/spi"
"github.com/nuts-foundation/nuts-node/crypto/util"
"path/filepath"
"time"
"github.com/nuts-foundation/nuts-node/tracing"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
)

const privateKeyPathName = "nuts-private-keys"
Expand Down Expand Up @@ -110,6 +114,18 @@ func (v vaultKVStorage) NewPrivateKey(ctx context.Context, keyPath string) (cryp
func configureVaultClient(cfg Config) (*vault.Client, error) {
vaultConfig := vault.DefaultConfig()
vaultConfig.Timeout = cfg.Timeout

// Add tracing if enabled
if tracing.Enabled() {
vaultConfig.HttpClient.Transport = otelhttp.NewTransport(
vaultConfig.HttpClient.Transport,
otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string {
return "vault: " + r.Method + " " + r.URL.Path
}),
otelhttp.WithTracerProvider(tracing.GetTracerProvider()),
)
}

client, err := vault.NewClient(vaultConfig)
if err != nil {
return nil, fmt.Errorf("unable to initialize Vault client: %w", err)
Expand Down
56 changes: 56 additions & 0 deletions docs/pages/deployment/monitoring.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,62 @@ The Nuts service executable exports the following metric namespaces:
* ``go_`` contains Go metrics related to the process
* ``promhttp_`` contains metrics related to HTTP calls to the Nuts node's ``/metrics`` endpoint

Tracing
*******

The Nuts node supports distributed tracing via OpenTelemetry. When enabled, it exports traces to an OTLP-compatible backend
(e.g., Jaeger, Zipkin, .NET Aspire Dashboard, Grafana Tempo).

Configuration
=============

Enable tracing by configuring the OTLP endpoint:

.. code-block:: yaml

tracing:
endpoint: localhost:4318

Or via environment variables:

.. code-block:: shell

NUTS_TRACING_ENDPOINT=localhost:4318

Configuration options:

* ``tracing.endpoint`` - OTLP HTTP endpoint (e.g., ``localhost:4318``). Tracing is disabled when empty.
* ``tracing.insecure`` - Disable TLS for the OTLP connection (default: ``false``). Only use in trusted networks or development environments, as trace data may contain sensitive information.
* ``tracing.servicename`` - Service name reported to the tracing backend (default: ``nuts-node``). Useful for distinguishing multiple instances in distributed tracing.

What is traced
==============

The following are automatically instrumented:

* **Inbound HTTP requests** - All API calls to the Nuts node create spans (except ``/health``, ``/metrics``, ``/status``)
* **Outbound HTTP requests** - HTTP calls to external services (e.g., fetching DID documents, OAuth flows)
* **SQL database** - Database queries via GORM
* **Hashicorp Vault** - Key storage operations when using Vault backend
* **Log correlation** - Log entries include ``trace_id`` and ``span_id`` fields when tracing is enabled
* **OTLP log export** - Logs are also exported to the OTLP backend for unified observability

Trace context propagation
=========================

The Nuts node uses W3C Trace Context (``traceparent`` header) for propagating trace context across service boundaries.
When calling the Nuts node from another traced service, include the ``traceparent`` header to link spans.

Known limitations
=================

The following components are not yet instrumented:

* **Azure Key Vault** - Azure managed keys backend is not instrumented. The Azure SDK supports OpenTelemetry via the ``azotel`` package (see `Azure SDK tracing <https://github.com/Azure/azure-sdk-for-go/tree/main/sdk/tracing/azotel>`_).
* **gRPC network layer** - P2P communication between nodes (``did:nuts``) does not include tracing as it's for v5 and deprecated

These limitations may be addressed in future releases.

CPU profiling
*************

Expand Down
Loading
Loading