Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
* [CHANGE] **BREAKING CHANGE** tempo-cli: Support relative time (now, now-1h) for start/end args and standardize on RFC3339 in all commands. [#6458](https://github.com/grafana/tempo/pull/6458) (@electron0zero)
`query search` command no longer accepts timestamps without timezone (e.g. `2024-01-01T00:00:00`), use RFC3339 (e.g. `2024-01-01T00:00:00Z`) or relative time instead.
* [CHANGE] **BREAKING CHANGE** Consolidate read configuration for recent data cutoff. `query_frontend.search.query_ingesters_until` is removed in favor of only `query_frontend.search.query_backend_after`. [#6507](https://github.com/grafana/tempo/pull/6507) (@mapno)
* [CHANGE] jsonnet: Add emptyDir data volume to block-builder StatefulSet [#6648](https://github.com/grafana/tempo/pull/6648) (@mapno)
* [FEATURE] Add new include_any filter policy for spanmetrics filter [#6392](https://github.com/grafana/tempo/pull/6392) (@javiermolinar)
* [FEATURE] Add span_multiplier_key to overrides. This allows tenants to specify the attribute key used for span multiplier values to compensate for head-based sampling. [#6260](https://github.com/grafana/tempo/pull/6260) (@carles-grafana)
* [FEATURE] **BREAKING CHANGE** Optimize TraceQL AST by rewriting conditions on the same attribute to their array equivalent [#6353](https://github.com/grafana/tempo/pull/6353) (@stoewer)
Expand All @@ -37,16 +38,16 @@
* [ENHANCEMENT] Add support for per-tenant left-padding of trace IDs [#6439](https://github.com/grafana/tempo/pull/6489) (@mapno)
* [BUGFIX] Force live-store to rehydrate from Kafka lookback period when local data is missing (e.g. PVC wipe, new node) instead of resuming from the committed consumer group offset [#6428](https://github.com/grafana/tempo/pull/6428) (@oleg-kozlyuk-grafana)
* [ENHANCEMENT] Add new metric for generator ring size: `tempo_distributor_metrics_generator_tenant_ring_size` [#5686](https://github.com/grafana/tempo/pull/5686) (@zalegrala)
* [ENHANCEMENT] Remove explicit `runtime.GC()` calls in vParquet5 compactor/block creation and CLI [#6603](https://github.com/grafana/tempo/pull/6603) (@oleg-kozlyuk-grafana)
* [BUGFIX] fix: reload span_name_sanitization overrides during runtime [#6435](https://github.com/grafana/tempo/pull/6435) (@electron0zero)
* [BUGFIX] fix: live store honor the config options for block and WAL versions [#6509](https://github.com/grafana/tempo/pull/6509) (@mdisibio)
* [BUGFIX] fix: block builder honor the global storage block config for block and WAL versions [#6451](https://github.com/grafana/tempo/issues/6451) (@Harry-kp)
* [BUGFIX] fix: normalize allowlist headers when building the allowlist map [#6481](https://github.com/grafana/tempo/pull/6481) (@javiermolinar)
* [BUGFIX] fix: bug related to dedicated column filtering [6586](https://github.com/grafana/tempo/pull/6586) (@stoewer)
* [BUGFIX] fix: compactor deduped spans metric uses wrong type (gauge instead of counter) [#6558](https://github.com/grafana/tempo/issues/6558) (@bejaratommy)
* [BUGFIX] metrics-generator: Fix active-series counter underflow in local series limiter when overflow series are deleted [#6568](https://github.com/grafana/tempo/pull/6568) (@carles-grafana)
* [ENHANCEMENT] Remove explicit `runtime.GC()` calls in vParquet5 compactor/block creation and CLI [#6603](https://github.com/grafana/tempo/pull/6603) (@oleg-kozlyuk-grafana)
* [BUGFIX] fix: skip per-label limiter and sanitizer for target_info and host_info metrics in metrics-generator [#6660](https://github.com/grafana/tempo/pull/6660) (@electron0zero)
* [BUGFIX] fix(traceql): err on division by zero [#6580](https://github.com/grafana/tempo/pull/6580) (@Proximyst)
* [CHANGE] jsonnet: Add emptyDir data volume to block-builder StatefulSet [#6648](https://github.com/grafana/tempo/pull/6648) (@mapno)

### 3.0 Cleanup

Expand Down
2 changes: 1 addition & 1 deletion modules/generator/processor/hostinfo/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (p *Processor) PushSpans(_ context.Context, req *tempopb.PushSpansRequest)
for i := range req.Batches {
resourceSpans := req.Batches[i]
if hostID, hostSource := p.findHostIdentifier(resourceSpans); hostID != "" && hostSource != "" {
builder := p.registry.NewLabelBuilder()
builder := p.registry.NewInfoMetricLabelBuilder()
builder.Add(hostIdentifierAttr, hostID)
builder.Add(hostSourceAttr, hostSource)
labels, validUTF8 := builder.CloseAndBuildLabels()
Expand Down
2 changes: 1 addition & 1 deletion modules/generator/processor/spanmetrics/spanmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func (p *Processor) aggregateMetricsForSpan(svcName string, jobName string, inst
}

builder := p.registry.NewLabelBuilder()
targetInfoBuilder := p.registry.NewLabelBuilder()
targetInfoBuilder := p.registry.NewInfoMetricLabelBuilder()
for i := range resourceLabels {
targetInfoBuilder.Add(resourceLabels[i], resourceValues[i])
}
Expand Down
4 changes: 4 additions & 0 deletions modules/generator/registry/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ import (
// Registry is a metrics store.
type Registry interface {
NewLabelBuilder() LabelBuilder
// NewInfoMetricLabelBuilder returns a LabelBuilder that skips the per-label
// cardinality limiter and drain sanitizer.
// Use this builder for info metrics (target_info, host_info) whose labels are high cardinality by design.
NewInfoMetricLabelBuilder() LabelBuilder
NewCounter(name string) Counter
NewHistogram(name string, buckets []float64, histogramOverride HistogramMode) Histogram
NewGauge(name string) Gauge
Expand Down
14 changes: 14 additions & 0 deletions modules/generator/registry/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ var _ Registry = (*ManagedRegistry)(nil)

var OverflowEntity = labels.FromStrings("metric_overflow", "true")

// noopLabelLimiter is a LabelLimiter that passes labels through unchanged.
type noopLabelLimiter struct{}

func (noopLabelLimiter) Limit(lbls labels.Labels) labels.Labels { return lbls }

// noopSanitizer is a Sanitizer that passes labels through unchanged.
type noopSanitizer struct{}

func (noopSanitizer) Sanitize(lbls labels.Labels) labels.Labels { return lbls }

// Limiter is used to limit the memory consumption of the registry.
type Limiter interface {
// OnAdd is called when a new entity is created. It accepts the labels and returns
Expand Down Expand Up @@ -162,6 +172,10 @@ func (r *ManagedRegistry) NewLabelBuilder() LabelBuilder {
return NewLabelBuilder(r.cfg.MaxLabelNameLength, r.cfg.MaxLabelValueLength, r.sanitizer, r.perLabelLimiter)
}

func (r *ManagedRegistry) NewInfoMetricLabelBuilder() LabelBuilder {
return NewLabelBuilder(r.cfg.MaxLabelNameLength, r.cfg.MaxLabelValueLength, noopSanitizer{}, noopLabelLimiter{})
}

func (r *ManagedRegistry) OnAdd(labelHash uint64, seriesCount uint32, lbls labels.Labels) (labels.Labels, uint64) {
r.entityDemand.Insert(labelHash)
return r.limiter.OnAdd(labelHash, seriesCount, lbls)
Expand Down
88 changes: 88 additions & 0 deletions modules/generator/registry/registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (

"github.com/go-kit/log"
"github.com/grafana/tempo/modules/overrides/histograms"
"github.com/prometheus/client_golang/prometheus"
io_prometheus_client "github.com/prometheus/client_model/go"
"github.com/prometheus/prometheus/model/labels"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -485,6 +487,7 @@ type mockOverrides struct {
nativeHistogramBucketFactor float64
nativeHistogramMinResetDuration time.Duration
maxCardinalityPerLabel uint64
spanNameSanitization string
}

var _ Overrides = (*mockOverrides)(nil)
Expand Down Expand Up @@ -526,6 +529,9 @@ func (m *mockOverrides) MetricsGeneratorNativeHistogramMinResetDuration(string)
}

func (m *mockOverrides) MetricsGeneratorSpanNameSanitization(string) string {
if m.spanNameSanitization != "" {
return m.spanNameSanitization
}
return SpanNameSanitizationDisabled
}

Expand Down Expand Up @@ -873,3 +879,85 @@ func TestManagedRegistry_cardinalitySanitizer(t *testing.T) {
activeSeries := reg.activeSeries()
require.Equal(t, uint32(11), activeSeries, "10 pre-overflow + 1 overflow series")
}

// TestManagedRegistry_InfoMetricLabelsNotInDemandEstimate verifies that labels from
// info metrics (target_info, host_info) do not appear in the label_cardinality_demand_estimate metric.
// Only labels from span-metrics and service-graphs (via NewLabelBuilder) should be tracked.
func TestManagedRegistry_InfoMetricLabelsNotInDemandEstimate(t *testing.T) {
tenant := "test-demand"
cfg := &Config{
StaleDuration: 15 * time.Minute,
}
reg := New(cfg, &mockOverrides{
maxCardinalityPerLabel: 100,
}, tenant, &noopAppender{}, log.NewNopLogger(), noopLimiter)
defer reg.Close()

spanMetricsCounter := reg.NewCounter("span_metrics_calls_total")
targetInfoGauge := reg.NewGauge("target_info")
hostInfoGauge := reg.NewGauge("host_info")

// Simulate span-metrics data through NewLabelBuilder (per-label limiter active)
for i := 0; i < 10; i++ {
b := reg.NewLabelBuilder()
b.Add("service", fmt.Sprintf("svc-%d", i))
b.Add("span_name", fmt.Sprintf("GET /api/%d", i))
lbls, _ := b.CloseAndBuildLabels()
spanMetricsCounter.Inc(lbls, 1.0)
}

// Simulate target_info data through NewInfoMetricLabelBuilder (limiter bypassed)
for i := 0; i < 10; i++ {
b := reg.NewInfoMetricLabelBuilder()
b.Add("grafana_host_id", fmt.Sprintf("host-%d", i))
b.Add("k8s_pod_name", fmt.Sprintf("pod-%d", i))
b.Add("ci_github_workflow_job_id", fmt.Sprintf("job-%d", i))
lbls, _ := b.CloseAndBuildLabels()
targetInfoGauge.SetForTargetInfo(lbls, 1)
}

// Simulate host_info data through NewInfoMetricLabelBuilder (limiter bypassed)
for i := 0; i < 10; i++ {
b := reg.NewInfoMetricLabelBuilder()
b.Add("grafana_host_id", fmt.Sprintf("host-%d", i))
b.Add("host_source", "k8s.node.name")
lbls, _ := b.CloseAndBuildLabels()
hostInfoGauge.Set(lbls, 1)
}

// Trigger demand update so the limiter publishes the demand estimate metric
triggerDemandUpdate(reg.perLabelLimiter.(*PerLabelLimiter))

trackedLabels := collectDemandEstimateLabels(t, tenant)

// Only span-metrics labels should be present - no info metric labels at all
require.Len(t, trackedLabels, 2, "demand estimate should only contain span-metrics labels")
require.Equal(t, float64(10), trackedLabels["service"], "service should have demand of 10")
require.Equal(t, float64(10), trackedLabels["span_name"], "span_name should have demand of 10")
}

// collectDemandEstimateLabels returns all label_name -> demand values from the
// tempo_metrics_generator_registry_label_cardinality_demand_estimate metric for the given tenant.
func collectDemandEstimateLabels(t *testing.T, tenant string) map[string]float64 {
result := map[string]float64{}
ch := make(chan prometheus.Metric, 100)
metricLabelCardinalityDemand.Collect(ch)
close(ch)
for m := range ch {
var g io_prometheus_client.Metric
require.NoError(t, m.Write(&g))
var metricTenant, labelName string
for _, lbl := range g.GetLabel() {
switch lbl.GetName() {
case "tenant":
metricTenant = lbl.GetValue()
case "label_name":
labelName = lbl.GetValue()
}
}
if metricTenant == tenant {
result[labelName] = g.GetGauge().GetValue()
}
}
return result
}
4 changes: 4 additions & 0 deletions modules/generator/registry/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@
return NewLabelBuilder(0, 0, nds, newTestLabelLimiter())
}

func (t *TestRegistry) NewInfoMetricLabelBuilder() LabelBuilder {
return NewLabelBuilder(0, 0, noopSanitizer{}, noopLabelLimiter{})
}

Check notice on line 54 in modules/generator/registry/test.go

View workflow job for this annotation

GitHub Actions / Coverage Annotations

Uncovered lines

Lines 52-54 are not covered by tests

func (t *TestRegistry) NewHistogram(name string, buckets []float64, histogramOverrides HistogramMode) Histogram {
return &testHistogram{
nameSum: name + "_sum",
Expand Down
Loading