Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

* **BREAKING CHANGE** tempo-query is no longer a jaeger instance with grpcPlugin. Its now a standalone server. Serving a grpc api for jaeger on `0.0.0.0:7777` by default. [#3840](https://github.com/grafana/tempo/issues/3840) (@frzifus)
* [CHANGE] **BREAKING CHANGE** The dynamic injection of X-Scope-OrgID header for metrics generator remote-writes is changed. If the header is aleady set in per-tenant overrides or global tempo configuration, then it is honored and not overwritten. [#4021](https://github.com/grafana/tempo/pull/4021) (@mdisibio)
* [CHANGE] **BREAKING CHANGE** Migrate from OpenTracing to OpenTelemetry instrumentation. Removed the `use_otel_tracer` configuration option. Use the OpenTelemetry environment variables to configure the span exporter [#3646](https://github.com/grafana/tempo/pull/3646) (@andreasgerstmayr)
To continue using the Jaeger exporter, use the following environment variable: `OTEL_TRACES_EXPORTER=jaeger`.
* [FEATURE] Discarded span logging `log_discarded_spans` [#3957](https://github.com/grafana/tempo/issues/3957) (@dastrobu)
* [FEATURE] TraceQL support for instrumentation scope [#3967](https://github.com/grafana/tempo/pull/3967) (@ie-pham)
* [ENHANCEMENT] TraceQL: Attribute iterators collect matched array values [#3867](https://github.com/grafana/tempo/pull/3867) (@electron0zero, @stoewer)
Expand Down
58 changes: 27 additions & 31 deletions cmd/tempo-query/tempo/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ import (
"github.com/grafana/dskit/user"
"github.com/grafana/tempo/pkg/tempopb"
"github.com/jaegertracing/jaeger/proto-gen/storage_v1"
"github.com/opentracing/opentracing-go"
ot_log "github.com/opentracing/opentracing-go/log"
"go.opentelemetry.io/collector/pdata/ptrace"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/propagation"
"google.golang.org/grpc/metadata"

jaeger "github.com/jaegertracing/jaeger/model"
Expand Down Expand Up @@ -59,6 +59,8 @@ var (
_ storage_v1.SpanWriterPluginServer = (*Backend)(nil)
)

var tracer = otel.Tracer("cmd/tempo-query/tempo")

type Backend struct {
tempoBackend string
tlsEnabled bool
Expand Down Expand Up @@ -202,10 +204,10 @@ func (b *Backend) GetTrace(req *storage_v1.GetTraceRequest, stream storage_v1.Sp
func (b *Backend) getTrace(ctx context.Context, traceID jaeger.TraceID) (*jaeger.Trace, error) {
url := fmt.Sprintf("%s://%s/api/traces/%s", b.apiSchema(), b.tempoBackend, traceID)

span, ctx := opentracing.StartSpanFromContext(ctx, "tempo-query.GetTrace")
defer span.Finish()
ctx, span := tracer.Start(ctx, "tempo-query.GetTrace")
defer span.End()

req, err := b.newGetRequest(ctx, url, span)
req, err := b.newGetRequest(ctx, url)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -247,7 +249,7 @@ func (b *Backend) getTrace(ctx context.Context, traceID jaeger.TraceID) (*jaeger
ProcessMap: []jaeger.Trace_ProcessMapping{},
}

span.LogFields(ot_log.String("msg", "build process map"))
span.AddEvent("build process map")
// otel proto conversion doesn't set jaeger processes
for _, batch := range jaegerBatches {
for _, s := range batch.Spans {
Expand All @@ -271,21 +273,21 @@ func (b *Backend) calculateTimeRange() (int64, int64) {
}

func (b *Backend) GetServices(ctx context.Context, _ *storage_v1.GetServicesRequest) (*storage_v1.GetServicesResponse, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "tempo-query.GetOperations")
defer span.Finish()
ctx, span := tracer.Start(ctx, "tempo-query.GetOperations")
defer span.End()

services, err := b.lookupTagValues(ctx, span, serviceSearchTag)
services, err := b.lookupTagValues(ctx, serviceSearchTag)
if err != nil {
return nil, err
}
return &storage_v1.GetServicesResponse{Services: services}, nil
}

func (b *Backend) GetOperations(ctx context.Context, _ *storage_v1.GetOperationsRequest) (*storage_v1.GetOperationsResponse, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "tempo-query.GetOperations")
defer span.Finish()
ctx, span := tracer.Start(ctx, "tempo-query.GetOperations")
defer span.End()

tagValues, err := b.lookupTagValues(ctx, span, operationSearchTag)
tagValues, err := b.lookupTagValues(ctx, operationSearchTag)
if err != nil {
return nil, err
}
Expand All @@ -305,30 +307,31 @@ func (b *Backend) GetOperations(ctx context.Context, _ *storage_v1.GetOperations
}

func (b *Backend) FindTraces(req *storage_v1.FindTracesRequest, stream storage_v1.SpanReaderPlugin_FindTracesServer) error {
span, ctx := opentracing.StartSpanFromContext(stream.Context(), "tempo-query.FindTraces")
defer span.Finish()
ctx, span := tracer.Start(stream.Context(), "tempo-query.FindTraces")
defer span.End()

resp, err := b.FindTraceIDs(ctx, &storage_v1.FindTraceIDsRequest{Query: req.Query})
if err != nil {
return err
}

span.LogFields(ot_log.String("msg", fmt.Sprintf("Found %d trace IDs", len(resp.TraceIDs))))
span.AddEvent(fmt.Sprintf("Found %d trace IDs", len(resp.TraceIDs)))

// for every traceID, get the full trace
var jaegerTraces []*jaeger.Trace
for _, traceID := range resp.TraceIDs {
trace, err := b.getTrace(ctx, traceID)
if err != nil {
// TODO this seems to be an internal inconsistency error, ignore so we can still show the rest
span.LogFields(ot_log.Error(fmt.Errorf("could not get trace for traceID %v: %w", traceID, err)))
span.AddEvent(fmt.Sprintf("could not get trace for traceID %v", traceID))
span.RecordError(err)
continue
}

jaegerTraces = append(jaegerTraces, trace)
}

span.LogFields(ot_log.String("msg", fmt.Sprintf("Returning %d traces", len(jaegerTraces))))
span.AddEvent(fmt.Sprintf("Returning %d traces", len(jaegerTraces)))

for _, jt := range jaegerTraces {
spans := make([]jaeger.Span, len(jt.Spans))
Expand All @@ -343,8 +346,8 @@ func (b *Backend) FindTraces(req *storage_v1.FindTracesRequest, stream storage_v
}

func (b *Backend) FindTraceIDs(ctx context.Context, r *storage_v1.FindTraceIDsRequest) (*storage_v1.FindTraceIDsResponse, error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "tempo-query.FindTraceIDs")
defer span.Finish()
ctx, span := tracer.Start(ctx, "tempo-query.FindTraceIDs")
defer span.End()

url := url.URL{
Scheme: b.apiSchema(),
Expand All @@ -370,7 +373,7 @@ func (b *Backend) FindTraceIDs(ctx context.Context, r *storage_v1.FindTraceIDsRe

url.RawQuery = urlQuery.Encode()

req, err := b.newGetRequest(ctx, url.String(), span)
req, err := b.newGetRequest(ctx, url.String())
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -435,7 +438,7 @@ func createTagsQueryParam(service string, operation string, tags map[string]stri
return tagsBuilder.String(), nil
}

func (b *Backend) lookupTagValues(ctx context.Context, span opentracing.Span, tagName string) ([]string, error) {
func (b *Backend) lookupTagValues(ctx context.Context, tagName string) ([]string, error) {
var url string

if b.QueryServicesDuration == nil {
Expand All @@ -445,7 +448,7 @@ func (b *Backend) lookupTagValues(ctx context.Context, span opentracing.Span, ta
url = fmt.Sprintf("%s://%s/api/search/tag/%s/values?start=%d&end=%d", b.apiSchema(), b.tempoBackend, tagName, startTime, endTime)
}

req, err := b.newGetRequest(ctx, url, span)
req, err := b.newGetRequest(ctx, url)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -486,20 +489,13 @@ func (b *Backend) Close(context.Context, *storage_v1.CloseWriterRequest) (*stora
return nil, nil
}

func (b *Backend) newGetRequest(ctx context.Context, url string, span opentracing.Span) (*http.Request, error) {
func (b *Backend) newGetRequest(ctx context.Context, url string) (*http.Request, error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}

if tracer := opentracing.GlobalTracer(); tracer != nil {
carrier := make(opentracing.TextMapCarrier, len(req.Header))
for k, v := range req.Header {
carrier.Set(k, v[0])
}
// this is not really loggable or anything we can react to. just ignoring this error
_ = tracer.Inject(span.Context(), opentracing.TextMap, carrier)
}
otel.GetTextMapPropagator().Inject(ctx, propagation.HeaderCarrier(req.Header))

// currently Jaeger Query will only propagate bearer token to the grpc backend and no other headers
// so we are going to extract the tenant id from the header, if it exists and use it
Expand Down
2 changes: 0 additions & 2 deletions cmd/tempo/app/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ type Config struct {
ShutdownDelay time.Duration `yaml:"shutdown_delay,omitempty"`
StreamOverHTTPEnabled bool `yaml:"stream_over_http_enabled,omitempty"`
HTTPAPIPrefix string `yaml:"http_api_prefix"`
UseOTelTracer bool `yaml:"use_otel_tracer,omitempty"`
EnableGoRuntimeMetrics bool `yaml:"enable_go_runtime_metrics,omitempty"`

Server server.Config `yaml:"server,omitempty"`
Expand Down Expand Up @@ -73,7 +72,6 @@ func (c *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet) {
f.BoolVar(&c.AuthEnabled, "auth.enabled", false, "Set to true to enable auth (deprecated: use multitenancy.enabled)")
f.BoolVar(&c.MultitenancyEnabled, "multitenancy.enabled", false, "Set to true to enable multitenancy.")
f.StringVar(&c.HTTPAPIPrefix, "http-api-prefix", "", "String prefix for all http api endpoints.")
f.BoolVar(&c.UseOTelTracer, "use-otel-tracer", false, "Set to true to replace the OpenTracing tracer with the OpenTelemetry tracer")
f.BoolVar(&c.EnableGoRuntimeMetrics, "enable-go-runtime-metrics", false, "Set to true to enable all Go runtime metrics")
f.DurationVar(&c.ShutdownDelay, "shutdown-delay", 0, "How long to wait between SIGTERM and shutdown. After receiving SIGTERM, Tempo will report not-ready status via /ready endpoint.")

Expand Down
71 changes: 8 additions & 63 deletions cmd/tempo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"github.com/grafana/dskit/flagext"
dslog "github.com/grafana/dskit/log"
"github.com/grafana/dskit/spanprofiler"
"github.com/grafana/dskit/tracing"
ot "github.com/opentracing/opentracing-go"
"github.com/prometheus/client_golang/prometheus"
ver "github.com/prometheus/client_golang/prometheus/collectors/version"
Expand Down Expand Up @@ -95,18 +94,15 @@ func main() {
os.Exit(0)
}

// Init tracer
var shutdownTracer func()
if config.UseOTelTracer {
shutdownTracer, err = installOpenTelemetryTracer(config)
} else {
shutdownTracer, err = installOpenTracingTracer(config)
}
if err != nil {
level.Error(log.Logger).Log("msg", "error initialising tracer", "err", err)
os.Exit(1)
// Init tracer if OTEL_TRACES_EXPORTER, OTEL_EXPORTER_OTLP_ENDPOINT or OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is set
if os.Getenv("OTEL_TRACES_EXPORTER") != "" || os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") != "" || os.Getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") != "" {
shutdownTracer, err := installOpenTelemetryTracer(config)
if err != nil {
level.Error(log.Logger).Log("msg", "error initialising tracer", "err", err)
os.Exit(1)
}
defer shutdownTracer()
}
defer shutdownTracer()

if *mutexProfileFraction > 0 {
runtime.SetMutexProfileFraction(*mutexProfileFraction)
Expand Down Expand Up @@ -228,30 +224,9 @@ func loadConfig() (*app.Config, bool, error) {
return config, configVerify, nil
}

func installOpenTracingTracer(config *app.Config) (func(), error) {
level.Info(log.Logger).Log("msg", "initialising OpenTracing tracer")

// Setting the environment variable JAEGER_AGENT_HOST enables tracing
trace, err := tracing.NewFromEnv(fmt.Sprintf("%s-%s", appName, config.Target))
if err != nil {
return nil, fmt.Errorf("error initialising tracer: %w", err)
}
ot.SetGlobalTracer(spanprofiler.NewTracer(ot.GlobalTracer()))

return func() {
if err := trace.Close(); err != nil {
level.Error(log.Logger).Log("msg", "error closing tracing", "err", err)
os.Exit(1)
}
}, nil
}

func installOpenTelemetryTracer(config *app.Config) (func(), error) {
level.Info(log.Logger).Log("msg", "initialising OpenTelemetry tracer")

// for now, migrate OpenTracing Jaeger environment variables
migrateJaegerEnvironmentVariables()

exp, err := autoexport.NewSpanExporter(context.Background())
if err != nil {
return nil, fmt.Errorf("failed to create OTEL exporter: %w", err)
Expand Down Expand Up @@ -304,36 +279,6 @@ func installOpenTelemetryTracer(config *app.Config) (func(), error) {
return shutdown, nil
}

func migrateJaegerEnvironmentVariables() {
if _, ok := os.LookupEnv("OTEL_TRACES_EXPORTER"); !ok {
os.Setenv("OTEL_TRACES_EXPORTER", "jaeger")
}

// jaeger-tracing-go: https://github.com/jaegertracing/jaeger-client-go#environment-variables
// opentelemetry-go: https://github.com/open-telemetry/opentelemetry-go/tree/main/exporters/jaeger#environment-variables
jaegerToOtel := map[string]string{
"JAEGER_AGENT_HOST": "OTEL_EXPORTER_JAEGER_AGENT_HOST",
"JAEGER_AGENT_PORT": "OTEL_EXPORTER_JAEGER_AGENT_PORT",
"JAEGER_ENDPOINT": "OTEL_EXPORTER_JAEGER_ENDPOINT",
"JAEGER_USER": "OTEL_EXPORTER_JAEGER_USER",
"JAEGER_PASSWORD": "OTEL_EXPORTER_JAEGER_PASSWORD",
"JAEGER_TAGS": "OTEL_RESOURCE_ATTRIBUTES",
}
for jaegerKey, otelKey := range jaegerToOtel {
value, jaegerOk := os.LookupEnv(jaegerKey)
_, otelOk := os.LookupEnv(otelKey)

if jaegerOk && !otelOk {
level.Warn(log.Logger).Log("msg", "migrating Jaeger environment variable, consider using native OpenTelemetry variables", "jaeger", jaegerKey, "otel", otelKey)
_ = os.Setenv(otelKey, value)
}
}

if _, ok := os.LookupEnv("JAEGER_SAMPLER_TYPE"); ok {
level.Warn(log.Logger).Log("msg", "JAEGER_SAMPLER_TYPE is not supported with the OpenTelemetry tracer, no sampling will be performed")
}
}

type otelErrorHandlerFunc func(error)

// Handle implements otel.ErrorHandler
Expand Down
16 changes: 5 additions & 11 deletions docs/sources/tempo/operations/monitor/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,13 @@ Tempo emits logs in the `key=value` ([logfmt](https://brandur.org/logfmt)) forma

### Traces

Tempo uses the [Jaeger Golang SDK](https://github.com/jaegertracing/jaeger-client-go) for tracing instrumentation.
Tempo uses the [OpenTelemetry SDK](https://github.com/open-telemetry/opentelemetry-go) for tracing instrumentation.
The complete read path and some parts of the write path of Tempo are instrumented for tracing.

You can configure the tracer [using environment variables](https://github.com/jaegertracing/jaeger-client-go#environment-variables).
To enable tracing, set one of the following: `JAEGER_AGENT_HOST` and `JAEGER_AGENT_PORT`, or `JAEGER_ENDPOINT`.
You can configure the tracer [using environment variables](https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/).
To enable tracing, set one of the following: `OTEL_EXPORTER_OTLP_ENDPOINT` or `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`.

The Jaeger client uses remote sampling by default, if the management server is not available no traces are sent.
To always send traces (no sampling), set the following environment variables:

```
JAEGER_SAMPLER_TYPE=const
JAEGER_SAMPLER_PARAM=1
```
The OpenTelemetry SDK uses OTLP/HTTP by default, which can be configured with `OTEL_EXPORTER_OTLP_PROTOCOL`.

## Polling

Expand Down Expand Up @@ -109,4 +103,4 @@ The Rules and Alerts are available as [YAML files in the compiled mixin](https:/
To set up alerting, download the provided JSON files and configure them for use on your Prometheus monitoring server.

Check the [runbook](https://github.com/grafana/tempo/blob/main/operations/tempo-mixin/runbook.md) to understand the
various steps that can be taken to fix firing alerts.
various steps that can be taken to fix firing alerts.
Loading