Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
## main / unreleased

* [BUGFIX] Fix some instances where spanmetrics histograms could be inconsistent [#3412](https://github.com/grafana/tempo/pull/3412) (@mdisibio)

## v2.4.0-rc.0

* [CHANGE] Merge the processors overrides set through runtime overrides and user-configurable overrides [#3125](https://github.com/grafana/tempo/pull/3125) (@kvrhdn)
Expand Down
6 changes: 5 additions & 1 deletion modules/generator/processor/spanmetrics/spanmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,11 @@ func (p *Processor) aggregateMetrics(resourceSpans []*v1_trace.ResourceSpans) {
}

func (p *Processor) aggregateMetricsForSpan(svcName string, jobName string, instanceID string, rs *v1.Resource, span *v1_trace.Span, resourceLabels []string, resourceValues []string) {
latencySeconds := float64(span.GetEndTimeUnixNano()-span.GetStartTimeUnixNano()) / float64(time.Second.Nanoseconds())
// Spans with negative latency are treated as zero.
latencySeconds := 0.0
if start, end := span.GetStartTimeUnixNano(), span.GetEndTimeUnixNano(); start < end {
latencySeconds = float64(end-start) / float64(time.Second.Nanoseconds())
}

labelValues := make([]string, 0, 4+len(p.Cfg.Dimensions))
targetInfoLabelValues := make([]string, len(resourceLabels))
Expand Down
42 changes: 42 additions & 0 deletions modules/generator/processor/spanmetrics/spanmetrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
filterconfig "github.com/grafana/tempo/pkg/spanfilter/config"
"github.com/grafana/tempo/pkg/tempopb"
common_v1 "github.com/grafana/tempo/pkg/tempopb/common/v1"
resource_v1 "github.com/grafana/tempo/pkg/tempopb/resource/v1"
trace_v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1"
"github.com/grafana/tempo/pkg/util/test"
)
Expand Down Expand Up @@ -1023,6 +1024,47 @@ func TestSpanMetricsDimensionMappingMissingLabels(t *testing.T) {
assert.Equal(t, 10.0, testRegistry.Query("traces_spanmetrics_latency_sum", lbls))
}

func TestSpanMetricsNegativeLatency(t *testing.T) {
testRegistry := registry.NewTestRegistry()
filteredSpansCounter := metricSpansDiscarded.WithLabelValues("test-tenant", "filtered")

cfg := Config{}
cfg.RegisterFlagsAndApplyDefaults("", nil)
cfg.HistogramBuckets = []float64{0.5, 1}

p, err := New(cfg, testRegistry, filteredSpansCounter)
require.NoError(t, err)
defer p.Shutdown(context.Background())

p.PushSpans(context.Background(), &tempopb.PushSpansRequest{
Batches: []*trace_v1.ResourceSpans{{
Resource: &resource_v1.Resource{},
ScopeSpans: []*trace_v1.ScopeSpans{{
Spans: []*trace_v1.Span{
{
StartTimeUnixNano: uint64(1),
EndTimeUnixNano: uint64(0),
},
},
}},
}},
})

lbls := labels.FromMap(map[string]string{
"service": "",
"span_name": "",
"span_kind": "SPAN_KIND_UNSPECIFIED",
"status_code": "STATUS_CODE_UNSET",
})

require.Equal(t, 1.0, testRegistry.Query("traces_spanmetrics_calls_total", lbls), "calls_total")
require.Equal(t, 1.0, testRegistry.Query("traces_spanmetrics_latency_bucket", withLe(lbls, 0.5)), "bucket_0.5")
require.Equal(t, 1.0, testRegistry.Query("traces_spanmetrics_latency_bucket", withLe(lbls, 1)), "bucket_1")
require.Equal(t, 1.0, testRegistry.Query("traces_spanmetrics_latency_bucket", withLe(lbls, math.Inf(1))), "bucket_Inf")
require.Equal(t, 1.0, testRegistry.Query("traces_spanmetrics_latency_count", lbls), "count")
require.Equal(t, 0.0, testRegistry.Query("traces_spanmetrics_latency_sum", lbls), "sum")
}

func withLe(lbls labels.Labels, le float64) labels.Labels {
lb := labels.NewBuilder(lbls)
lb = lb.Set(labels.BucketLabel, strconv.FormatFloat(le, 'f', -1, 64))
Expand Down
17 changes: 6 additions & 11 deletions modules/generator/registry/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ type histogram struct {
buckets []float64
bucketLabels []string

// seriesMtx is used to sync modifications to the map, not to the data in series
seriesMtx sync.RWMutex
seriesMtx sync.Mutex
series map[uint64]*histogramSeries

onAddSerie func(count uint32) bool
Expand Down Expand Up @@ -89,10 +88,10 @@ func newHistogram(name string, buckets []float64, onAddSeries func(uint32) bool,
func (h *histogram) ObserveWithExemplar(labelValueCombo *LabelValueCombo, value float64, traceID string, multiplier float64) {
hash := labelValueCombo.getHash()

h.seriesMtx.RLock()
s, ok := h.series[hash]
h.seriesMtx.RUnlock()
h.seriesMtx.Lock()
defer h.seriesMtx.Unlock()

s, ok := h.series[hash]
if ok {
h.updateSeries(s, value, traceID, multiplier)
return
Expand All @@ -103,10 +102,6 @@ func (h *histogram) ObserveWithExemplar(labelValueCombo *LabelValueCombo, value
}

newSeries := h.newSeries(labelValueCombo, value, traceID, multiplier)

h.seriesMtx.Lock()
defer h.seriesMtx.Unlock()

s, ok = h.series[hash]
if ok {
h.updateSeries(s, value, traceID, multiplier)
Expand Down Expand Up @@ -157,8 +152,8 @@ func (h *histogram) name() string {
}

func (h *histogram) collectMetrics(appender storage.Appender, timeMs int64, externalLabels map[string]string) (activeSeries int, err error) {
h.seriesMtx.RLock()
defer h.seriesMtx.RUnlock()
h.seriesMtx.Lock()
defer h.seriesMtx.Unlock()

activeSeries = len(h.series) * int(h.activeSeriesPerHistogramSerie())

Expand Down