grafana · mdisibio · Feb 21, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025
@@ -21,6 +21,7 @@
 * [ENHANCEMENT] Update minio to version [#4341](https://github.com/grafana/tempo/pull/4568) (@javiermolinar)
 * [ENHANCEMENT] Prevent queries in the ingester from blocking flushing traces to disk and memory spikes. [#4483](https://github.com/grafana/tempo/pull/4483) (@joe-elliott)
 * [ENHANCEMENT] Update tempo operational dashboard for new block-builder and v2 traces api [#4559](https://github.com/grafana/tempo/pull/4559) (@mdisibio)
+* [ENHANCEMENT] Improve metrics-generator performance and stability by applying queue back pressure and concurrency [#4721](https://github.com/grafana/tempo/pull/4721) (@mdisibio)
 * [ENHANCEMENT] Improve block-builder performance by flushing blocks concurrently [#4565](https://github.com/grafana/tempo/pull/4565) (@mdisibio)
 * [ENHANCEMENT] Improve block-builder performance [#4596](https://github.com/grafana/tempo/pull/4596) (@mdisibio)
 * [ENHANCEMENT] Improve block-builder performance by not using WAL stage [#4647](https://github.com/grafana/tempo/pull/4647) [#4671](https://github.com/grafana/tempo/pull/4671) (@mdisibio)

@@ -615,8 +615,10 @@ metrics_generator:
             trace_idle_period: 10s
             max_block_duration: 1m0s
             max_block_bytes: 500000000
+            concurrency: 4
             complete_block_timeout: 1h0m0s
             max_live_traces: 0
+            max_live_traces_bytes: 250000000
             filter_server_spans: true
             flush_to_storage: false
             concurrent_blocks: 10
@@ -653,6 +655,7 @@ metrics_generator:
     metrics_ingestion_time_range_slack: 30s
     query_timeout: 30s
     override_ring_key: metrics-generator
+    ingest_concurrency: 16
     instance_id: hostname
 ingest:
     enabled: false

@@ -1,6 +1,7 @@
 package generator
 
 import (
+	"errors"
 	"flag"
 	"fmt"
 	"os"
@@ -41,8 +42,9 @@ type Config struct {
 	OverrideRingKey       string        `yaml:"override_ring_key"`
 
 	// This config is dynamically injected because defined outside the generator config.
-	Ingest     ingest.Config `yaml:"-"`
-	InstanceID string        `yaml:"instance_id" doc:"default=<hostname>" category:"advanced"`
+	Ingest            ingest.Config `yaml:"-"`
+	IngestConcurrency uint          `yaml:"ingest_concurrency"`
+	InstanceID        string        `yaml:"instance_id" doc:"default=<hostname>" category:"advanced"`
 }
 
 // RegisterFlagsAndApplyDefaults registers the flags.
@@ -55,6 +57,8 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet)
 	cfg.TracesWAL.Version = encoding.DefaultEncoding().Version()
 	cfg.TracesQueryWAL.RegisterFlags(f)
 	cfg.TracesQueryWAL.Version = encoding.DefaultEncoding().Version()
+	cfg.Ingest.RegisterFlagsAndApplyDefaults(prefix, f)
+	cfg.IngestConcurrency = 16
 
 	// setting default for max span age before discarding to 30s
 	cfg.MetricsIngestionSlack = 30 * time.Second
@@ -70,10 +74,16 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet)
 }
 
 func (cfg *Config) Validate() error {
-	if cfg.Ingest.Enabled {
-		if err := cfg.Ingest.Kafka.Validate(); err != nil {
-			return err
-		}
+	if err := cfg.Ingest.Validate(); err != nil {
+		return err
+	}
+
+	if cfg.IngestConcurrency == 0 {
+		return errors.New("ingest concurrency must be greater than zero")
+	}
+
+	if err := cfg.Processor.Validate(); err != nil {
+		return err
 	}
 
 	// Only validate if being used
@@ -103,6 +113,10 @@ func (cfg *ProcessorConfig) RegisterFlagsAndApplyDefaults(prefix string, f *flag
 	cfg.LocalBlocks.RegisterFlagsAndApplyDefaults(prefix, f)
 }
 
+func (cfg *ProcessorConfig) Validate() error {
+	return cfg.LocalBlocks.Validate()
+}
+
 // copyWithOverrides creates a copy of the config using values set in the overrides.
 func (cfg *ProcessorConfig) copyWithOverrides(o metricsGeneratorOverrides, userID string) (ProcessorConfig, error) {
 	copyCfg := *cfg

@@ -70,6 +70,7 @@ type Generator struct {
 	reg    prometheus.Registerer
 	logger log.Logger
 
+	kafkaCh            chan *kgo.Record
 	kafkaWG            sync.WaitGroup
 	kafkaStop          func()
 	kafkaClient        *ingest.Client

@@ -5,17 +5,35 @@ import (
 	"errors"
 	"sort"
 	"strconv"
+	"time"
 
 	"github.com/go-kit/log/level"
 	"github.com/grafana/tempo/pkg/ingest"
 	"github.com/grafana/tempo/pkg/tempopb"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/twmb/franz-go/pkg/kgo"
 )
 
+var metricEnqueueTime = promauto.NewCounter(prometheus.CounterOpts{
+	Namespace: "tempo",
+	Subsystem: "metrics_generator",
+	Name:      "enqueue_time_seconds_total",
+	Help:      "The total amount of time spent waiting to enqueue for processing",
+})
+
 func (g *Generator) startKafka() {
+	g.kafkaCh = make(chan *kgo.Record, g.cfg.IngestConcurrency)
+
 	// Create context that will be used to stop the goroutines.
 	var ctx context.Context
 	ctx, g.kafkaStop = context.WithCancel(context.Background())
 
+	for i := uint(0); i < g.cfg.IngestConcurrency; i++ {
+		g.kafkaWG.Add(1)
+		go g.readCh(ctx)
+	}
+
 	g.kafkaWG.Add(1)
 	go g.listenKafka(ctx)
 	ingest.ExportPartitionLagMetrics(ctx, g.kafkaAdm, g.logger, g.cfg.Ingest, g.getAssignedActivePartitions)
@@ -24,6 +42,7 @@ func (g *Generator) startKafka() {
 func (g *Generator) stopKafka() {
 	g.kafkaStop()
 	g.kafkaWG.Wait()
+	close(g.kafkaCh)
 }
 
 func (g *Generator) listenKafka(ctx context.Context) {
@@ -49,45 +68,85 @@ func (g *Generator) listenKafka(ctx context.Context) {
 }
 
 func (g *Generator) readKafka(ctx context.Context) error {
-	d := ingest.NewDecoder()
-
 	fetches := g.kafkaClient.PollFetches(ctx)
 	fetches.EachError(func(_ string, _ int32, err error) {
 		if !errors.Is(err, context.Canceled) {
 			level.Error(g.logger).Log("msg", "failed to fetch records", "err", err)
 		}
 	})
+	if err := fetches.Err(); err != nil && !errors.Is(err, context.Canceled) {
+		return err
+	}
+
+	// Metric lag based on first message in each partition.
+	// This balances overhead with granularity.
+	fetches.EachPartition(func(p kgo.FetchTopicPartition) {
+		if len(p.Records) > 0 {
+			lag := time.Since(p.Records[0].Timestamp)
+			ingest.SetPartitionLagSeconds(g.cfg.Ingest.Kafka.ConsumerGroup, int(p.Partition), lag)
+		}
+	})
+
+	start := time.Now()
 
 	for iter := fetches.RecordIter(); !iter.Done(); {
-		r := iter.Next()
+		select {
+		case g.kafkaCh <- iter.Next():
+		case <-ctx.Done():
+			return ctx.Err()
+		}
+	}
+
+	metricEnqueueTime.Add(time.Since(start).Seconds())
+
+	return nil
+}
+
+// readCh reads records from the internal channel.
+// This allows for offloading the expensive proto unmarshal
+// to multiple goroutines.
+func (g *Generator) readCh(ctx context.Context) {
+	defer g.kafkaWG.Done()
+	d := ingest.NewDecoder()
+
+	for {
+		var r *kgo.Record
+		select {
+		case r = <-g.kafkaCh:
+		case <-ctx.Done():
+			return
+		}
 
 		tenant := string(r.Key)
 
 		i, err := g.getOrCreateInstance(tenant)
 		if err != nil {
-			return err
+			level.Error(g.logger).Log("msg", "consumeKafkaChannel getOrCreateInstance", "err", err)
+			continue
 		}
 
 		d.Reset()
 		req, err := d.Decode(r.Value)
 		if err != nil {
-			return err
+			level.Error(g.logger).Log("msg", "consumeKafkaChannel decode", "err", err)
+			continue
 		}
 
 		for _, tr := range req.Traces {
 			trace := &tempopb.Trace{}
 			err = trace.Unmarshal(tr.Slice)
 			if err != nil {
-				return err
+				level.Error(g.logger).Log("msg", "consumeKafkaChannel unmarshal", "err", err)
+				continue
 			}
 
-			i.pushSpansFromQueue(ctx, &tempopb.PushSpansRequest{
+			i.pushSpansFromQueue(ctx, r.Timestamp, &tempopb.PushSpansRequest{
 				Batches: trace.ResourceSpans,
 			})
+
+			tempopb.ReuseByteSlices([][]byte{tr.Slice})
 		}
 	}
-
-	return nil
 }
 
 func (g *Generator) getAssignedActivePartitions() []int32 {

@@ -60,9 +60,9 @@ overrides:
 	require.NoError(t, services.StartAndAwaitRunning(context.Background(), o))
 
 	generatorConfig := &Config{}
+	generatorConfig.RegisterFlagsAndApplyDefaults("", &flag.FlagSet{})
 	generatorConfig.Storage.Path = t.TempDir()
 	generatorConfig.Ring.KVStore.Store = "inmemory"
-	generatorConfig.Processor.SpanMetrics.RegisterFlagsAndApplyDefaults("", nil)
 	g, err := New(generatorConfig, o, prometheus.NewRegistry(), nil, nil, newTestLogger(t))
 	require.NoError(t, err)
 	require.NoError(t, services.StartAndAwaitRunning(context.Background(), g))
@@ -155,7 +155,7 @@ func BenchmarkPushSpans(b *testing.B) {
 				"span-metrics":   {},
 				"service-graphs": {},
 			},
-			spanMetricsEnableTargetInfo:             false,
+			spanMetricsEnableTargetInfo:             true,
 			spanMetricsTargetInfoExcludedDimensions: []string{"excluded}"},
 		}
 	)
@@ -211,6 +211,7 @@ func BenchmarkPushSpans(b *testing.B) {
 	mem := runtime.MemStats{}
 	runtime.ReadMemStats(&mem)
 	b.ReportMetric(float64(mem.HeapInuse), "heap_in_use")
+	b.ReportMetric(float64(mem.HeapAlloc), "heap_alloc")
 }
 
 func BenchmarkCollect(b *testing.B) {

@@ -313,6 +313,8 @@ func (i *instance) addProcessor(processorName string, cfg ProcessorConfig) error
 		if i.traceQueryWAL != nil {
 			nonFlushingConfig := cfg.LocalBlocks
 			nonFlushingConfig.FlushToStorage = false
+			nonFlushingConfig.AssertMaxLiveTraces = true
+			nonFlushingConfig.AdjustTimeRangeForSlack = false
 			i.queuebasedLocalBlocks, err = localblocks.New(nonFlushingConfig, i.instanceID, i.traceQueryWAL, i.writer, i.overrides)
 			if err != nil {
 				return err
@@ -377,7 +379,7 @@ func (i *instance) pushSpans(ctx context.Context, req *tempopb.PushSpansRequest)
 	}
 }
 
-func (i *instance) pushSpansFromQueue(ctx context.Context, req *tempopb.PushSpansRequest) {
+func (i *instance) pushSpansFromQueue(ctx context.Context, ts time.Time, req *tempopb.PushSpansRequest) {
 	i.preprocessSpans(req)
 	i.processorsMtx.RLock()
 	defer i.processorsMtx.RUnlock()
@@ -392,7 +394,7 @@ func (i *instance) pushSpansFromQueue(ctx context.Context, req *tempopb.PushSpan
 
 	// Now we push to the non-flushing local blocks if present
 	if i.queuebasedLocalBlocks != nil {
-		i.queuebasedLocalBlocks.PushSpans(ctx, req)
+		i.queuebasedLocalBlocks.DeterministicPush(ts, req)
 	}
 }
 

@@ -38,6 +38,7 @@ type metricsGeneratorOverrides interface {
 	MetricsGeneratorProcessorServiceGraphsEnableVirtualNodeLabel(userID string) bool
 	MetricsGeneratorProcessorSpanMetricsTargetInfoExcludedDimensions(userID string) []string
 	DedicatedColumns(userID string) backend.DedicatedColumns
+	MaxLocalTracesPerUser(userID string) int
 	MaxBytesPerTrace(userID string) int
 	UnsafeQueryHints(userID string) bool
 }

@@ -31,6 +31,7 @@ type mockOverrides struct {
 	localBlocksTraceIdlePeriod                         time.Duration
 	localBlocksCompleteBlockTimeout                    time.Duration
 	dedicatedColumns                                   backend.DedicatedColumns
+	maxLocalTraces                                     int
 	maxBytesPerTrace                                   int
 	unsafeQueryHints                                   bool
 	nativeHistograms                                   overrides.HistogramMethod
@@ -152,6 +153,10 @@ func (m *mockOverrides) DedicatedColumns(string) backend.DedicatedColumns {
 	return m.dedicatedColumns
 }
 
+func (m *mockOverrides) MaxLocalTracesPerUser(string) int {
+	return m.maxLocalTraces
+}
+
 func (m *mockOverrides) MaxBytesPerTrace(string) int {
 	return m.maxBytesPerTrace
 }