Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/tempo/app/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ func IsSingleBinary(target string) bool {
func (t *App) initServer() (services.Service, error) {
t.cfg.Server.MetricsNamespace = metricsNamespace
t.cfg.Server.ExcludeRequestInLog = true
t.cfg.Server.MetricsNativeHistogramFactor = 1.1
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting t.cfg.Server.MetricsNativeHistogramFactor = 1.1 enables native histograms for server request/throughput metrics by default, which adds per-request CPU/memory overhead and (because the upstream field is yaml:"-") can’t be tuned/disabled via Tempo config.

Could we consider plumbing a Tempo config/flag for this (even if defaulting to 1.1), so operators can turn it off or adjust the factor if they hit performance or cardinality/memory issues?

Suggested change
t.cfg.Server.MetricsNativeHistogramFactor = 1.1
// Default to native histograms for server metrics, but do not overwrite
// an explicit value that may have been provided via flags or code.
if t.cfg.Server.MetricsNativeHistogramFactor <= 0 {
t.cfg.Server.MetricsNativeHistogramFactor = 1.1
}

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe this is necessary.


Comment on lines 93 to 97
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description/checklist says CHANGELOG.md was updated, but this PR’s diff doesn’t include a changelog entry. Since enabling native histograms for core metrics is a user-visible operational change (additional native histogram emission for OpenMetrics/protobuf scrapes), could you add a CHANGELOG.md entry in the project’s required format (with PR number + link)?

Copilot uses AI. Check for mistakes.
if t.cfg.EnableGoRuntimeMetrics {
// unregister default Go collector
Expand Down
24 changes: 16 additions & 8 deletions modules/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,24 @@ var (
Help: "The total number of attribute keys or values truncated per tenant and scope",
}, []string{"tenant", "scope"})
metricKafkaRecordsPerRequest = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "tempo",
Subsystem: "distributor",
Name: "kafka_records_per_request",
Help: "The number of records in each kafka request",
Namespace: "tempo",
Subsystem: "distributor",
Name: "kafka_records_per_request",
Help: "The number of records in each kafka request",
Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
metricKafkaWriteLatency = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "tempo",
Subsystem: "distributor",
Name: "kafka_write_latency_seconds",
Help: "The latency of writing to kafka",
Namespace: "tempo",
Subsystem: "distributor",
Name: "kafka_write_latency_seconds",
Help: "The latency of writing to kafka",
Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
metricKafkaWriteBytesTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "tempo",
Expand Down
9 changes: 6 additions & 3 deletions modules/frontend/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,12 @@ func New(cfg Config, next pipeline.RoundTripper, o overrides.Interface, reader t
}

jobsPerQuery := promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{
Name: "tempo_query_frontend_jobs_per_query",
Help: "Number of planned jobs per query in the query frontend.",
Buckets: prometheus.ExponentialBuckets(1, 10, 7),
Name: "tempo_query_frontend_jobs_per_query",
Help: "Number of planned jobs per query in the query frontend.",
Buckets: prometheus.ExponentialBuckets(1, 10, 7),
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
}, []string{"op"})

// Propagate RF1After to search and traceByID sharders
Expand Down
18 changes: 12 additions & 6 deletions modules/frontend/v1/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,12 @@ func New(cfg Config, log log.Logger, registerer prometheus.Registerer) (*Fronten
Help: "Number of queries in the queue.",
}, []string{"user"}),
batchWeight: promauto.With(registerer).NewHistogramVec(prometheus.HistogramOpts{
Name: "tempo_query_frontend_batch_weight",
Help: "Weight of the batch.",
Buckets: prometheus.LinearBuckets(1, 1, cfg.MaxBatchSize),
Name: "tempo_query_frontend_batch_weight",
Help: "Weight of the batch.",
Buckets: prometheus.LinearBuckets(1, 1, cfg.MaxBatchSize),
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
}, []string{"user"}),
discardedRequests: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Name: "tempo_query_frontend_discarded_requests_total",
Expand All @@ -118,9 +121,12 @@ func New(cfg Config, log log.Logger, registerer prometheus.Registerer) (*Fronten
NativeHistogramMinResetDuration: 1 * time.Hour,
}),
actualBatchSize: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{
Name: "tempo_query_frontend_actual_batch_size",
Help: "Batch size.",
Buckets: prometheus.LinearBuckets(1, batchBucketSize, batchBucketCount),
Name: "tempo_query_frontend_actual_batch_size",
Help: "Batch size.",
Buckets: prometheus.LinearBuckets(1, batchBucketSize, batchBucketCount),
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
}),
connectedQuerierWorkers: &atomic.Int32{},
}
Expand Down
11 changes: 7 additions & 4 deletions modules/ingester/flush.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ var (
NativeHistogramMinResetDuration: 1 * time.Hour,
})
metricFlushSize = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "tempo",
Name: "ingester_flush_size_bytes",
Help: "Size in bytes of blocks flushed.",
Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 10), // from 1MB up to 1GB
Namespace: "tempo",
Name: "ingester_flush_size_bytes",
Help: "Size in bytes of blocks flushed.",
Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 10), // from 1MB up to 1GB
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
)

Expand Down
23 changes: 15 additions & 8 deletions modules/livestore/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,13 @@ var (
Help: "The total number of blocks cleared.",
}, []string{"block_type"})
metricCompletionSize = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "tempo_live_store",
Name: "completion_size_bytes",
Help: "Size in bytes of blocks completed.",
Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 10), // from 1MB up to 1GB
Namespace: "tempo_live_store",
Name: "completion_size_bytes",
Help: "Size in bytes of blocks completed.",
Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 10), // from 1MB up to 1GB
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this true, ie from 1MB to 1GB? Or is this more like 500MB?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IE the first bucket is 1 MiB, so the last one would be 512MiB? Other than that looks good.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. This is pre-existing, but I've pushed a commit to update the comment.

NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
metricBackPressure = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "tempo",
Expand All @@ -78,10 +81,14 @@ var (
Help: "The total amount of time spent waiting to process data from queue",
}, []string{"reason"})
metricTotalBackPressure = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "tempo",
Subsystem: "live_store",
Name: "back_pressure_duration_seconds",
Help: "Duration of backpressure wait per push",
Namespace: "tempo",
Subsystem: "live_store",
Name: "back_pressure_duration_seconds",
Help: "Duration of backpressure wait per push",
Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
)

Expand Down
24 changes: 16 additions & 8 deletions pkg/dataquality/warnings.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package dataquality

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
Expand Down Expand Up @@ -39,15 +41,21 @@ func WarnRootlessTrace(tenant string, phase string) {
}

var MetricSpanInFuture = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "tempo",
Name: "spans_distance_in_future_seconds",
Help: "The number of seconds in the future of the span end time in relation to the ingestion time.",
Buckets: []float64{300, 1800, 3600}, // 5m, 30m, 1h
Namespace: "tempo",
Name: "spans_distance_in_future_seconds",
Help: "The number of seconds in the future of the span end time in relation to the ingestion time.",
Buckets: []float64{300, 1800, 3600}, // 5m, 30m, 1h
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
}, []string{"tenant"})

var MetricSpanInPast = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "tempo",
Name: "spans_distance_in_past_seconds",
Help: "The number of seconds in the past of the span end time in relation to the ingestion time.",
Buckets: []float64{300, 1800, 3600}, // 5m, 30m, 1h
Namespace: "tempo",
Name: "spans_distance_in_past_seconds",
Help: "The number of seconds in the past of the span end time in relation to the ingestion time.",
Buckets: []float64{300, 1800, 3600}, // 5m, 30m, 1h
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
}, []string{"tenant"})
12 changes: 9 additions & 3 deletions pkg/drain/metrics.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package drain

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
Expand Down Expand Up @@ -41,9 +43,13 @@ func newMetrics(reg prometheus.Registerer) *metrics {
Help: "The total amount of lines skipped per tenant",
}, []string{"reason", "tenant"}),
TokensPerLine: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
Namespace: "tempo",
Name: "metrics_generator_registry_drain_tokens_per_line",
Help: "The number of tokens per line",
Namespace: "tempo",
Name: "metrics_generator_registry_drain_tokens_per_line",
Help: "The number of tokens per line",
Comment thread
zalegrala marked this conversation as resolved.
Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
}, []string{"tenant"}),
}
}
Expand Down
Loading