Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c17aa36
First working draft of cost attribution usage tracker
mdisibio Sep 18, 2024
dd1c660
Add missing tracker name label, more efficient batch proportioning, c…
mdisibio Sep 20, 2024
f631f66
Reduce series hashing
mdisibio Oct 7, 2024
9841b90
Fix user-configurable overrides tests for new json element
mdisibio Oct 7, 2024
86a64d6
lint
mdisibio Oct 7, 2024
bbae991
Add per-tenant override for max cardinality
mdisibio Oct 8, 2024
b972253
lint, review feedback
mdisibio Oct 9, 2024
4488a7b
Default to not enabled, cleanup test config
mdisibio Oct 9, 2024
40a3845
Explicitly check for usage_metrics handler
mdisibio Oct 9, 2024
f35267e
review feedback
mdisibio Oct 9, 2024
83856bd
Merge branch 'main' into usage-tracker
mdisibio Oct 10, 2024
73f55a0
Update tracker to support many-to-one mapping with relabel
mdisibio Oct 10, 2024
5ed9bb8
lint
mdisibio Oct 10, 2024
33fa586
New behavior for missing and overflow
mdisibio Oct 21, 2024
40a0964
Fix issue where subsequent spans would incorrectly reuse the series o…
mdisibio Oct 21, 2024
7e7e949
Revert maps back to slices now that we can depend on a dimension alwa…
mdisibio Oct 21, 2024
9c670ec
Please ignore benchmark profiles
mdisibio Oct 21, 2024
c7c2001
Tweak config to have specific cost attribution tracker section. Updat…
mdisibio Oct 22, 2024
84225bf
lint
mdisibio Oct 22, 2024
61608f8
changelog
mdisibio Oct 22, 2024
4f53dde
Merge branch 'main' into usage-tracker
mdisibio Oct 22, 2024
ba69c98
Update api docs for new endpoint
mdisibio Oct 22, 2024
300f2db
Merge branch 'main' into usage-tracker
mdisibio Oct 22, 2024
95660e4
Review feedback
mdisibio Oct 23, 2024
0941a04
review feedback
mdisibio Oct 25, 2024
4042829
Swap loop order for a tad more performance
mdisibio Oct 25, 2024
eed21b5
Merge branch 'main' into usage-tracker
mdisibio Oct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/tempo/app/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ func (t *App) initDistributor() (services.Service, error) {
t.Server.HTTPRouter().Handle("/distributor/ring", distributor.DistributorRing)
}

t.Server.HTTPRouter().Handle("/usage_metrics", distributor.UsageTrackerHandler())
Comment thread
mdisibio marked this conversation as resolved.
Outdated

return t.distributor, nil
}

Expand Down
7 changes: 5 additions & 2 deletions modules/distributor/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
ring_client "github.com/grafana/dskit/ring/client"

"github.com/grafana/tempo/modules/distributor/forwarder"
"github.com/grafana/tempo/modules/distributor/usage"
"github.com/grafana/tempo/pkg/util"
)

Expand Down Expand Up @@ -37,8 +38,8 @@ type Config struct {
LogReceivedSpans LogSpansConfig `yaml:"log_received_spans,omitempty"`
LogDiscardedSpans LogSpansConfig `yaml:"log_discarded_spans,omitempty"`
MetricReceivedSpans MetricReceivedSpansConfig `yaml:"metric_received_spans,omitempty"`

Forwarders forwarder.ConfigList `yaml:"forwarders"`
Forwarders forwarder.ConfigList `yaml:"forwarders"`
Usage usage.Config `yaml:"usage,omitempty"`

// disables write extension with inactive ingesters. Use this along with ingester.lifecycler.unregister_on_shutdown = true
// note that setting these two config values reduces tolerance to failures on rollout b/c there is always one guaranteed to be failing replica
Expand Down Expand Up @@ -80,4 +81,6 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet)
f.BoolVar(&cfg.LogDiscardedSpans.Enabled, util.PrefixConfig(prefix, "log-discarded-spans.enabled"), false, "Enable to log every discarded span to help debug ingestion or calculate span error distributions using the logs.")
f.BoolVar(&cfg.LogDiscardedSpans.IncludeAllAttributes, util.PrefixConfig(prefix, "log-discarded-spans.include-attributes"), false, "Enable to include span attributes in the logs.")
f.BoolVar(&cfg.LogDiscardedSpans.FilterByStatusError, util.PrefixConfig(prefix, "log-discarded-spans.filter-by-status-error"), false, "Enable to filter out spans without status error.")

cfg.Usage.RegisterFlagsAndApplyDefaults(prefix, f)
}
26 changes: 26 additions & 0 deletions modules/distributor/distributor.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/hex"
"fmt"
"math"
"net/http"
"sync"
"time"

Expand All @@ -28,6 +29,7 @@ import (

"github.com/grafana/tempo/modules/distributor/forwarder"
"github.com/grafana/tempo/modules/distributor/receiver"
"github.com/grafana/tempo/modules/distributor/usage"
generator_client "github.com/grafana/tempo/modules/generator/client"
ingester_client "github.com/grafana/tempo/modules/ingester/client"
"github.com/grafana/tempo/modules/overrides"
Expand Down Expand Up @@ -154,6 +156,8 @@ type Distributor struct {
subservices *services.Manager
subservicesWatcher *services.FailureWatcher

usage *usage.Tracker

logger log.Logger
}

Expand Down Expand Up @@ -214,6 +218,14 @@ func New(cfg Config, clientCfg ingester_client.Config, ingestersRing ring.ReadRi
logger: logger,
}

if cfg.Usage.Enabled {
usage, err := usage.NewTracker(cfg.Usage, "cost-attribution", o.CostAttributionDimensions)
Comment thread
joe-elliott marked this conversation as resolved.
Outdated
if err != nil {
return nil, fmt.Errorf("creating usage tracker: %w", err)
}
d.usage = usage
}

var generatorsPoolFactory ring_client.PoolAddrFunc = func(addr string) (ring_client.PoolClient, error) {
return generator_client.New(addr, generatorClientCfg)
}
Expand Down Expand Up @@ -328,6 +340,7 @@ func (d *Distributor) PushTraces(ctx context.Context, traces ptrace.Traces) (*te
return &tempopb.PushResponse{}, nil
}
// check limits
// todo - usage tracker include discarded bytes?
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i believe the answer is no? we don't want it to included discarded?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify this TODO is about creating separate metrics for discards.
tempo_usage_tracker_bytes_received_total vs
tempo_usage_tracker_bytes_discarded_total

I think for now let's proceed without it. We can add it later if needed.

err = d.checkForRateLimits(size, spanCount, userID)
if err != nil {
return nil, err
Expand Down Expand Up @@ -360,6 +373,11 @@ func (d *Distributor) PushTraces(ctx context.Context, traces ptrace.Traces) (*te
statBytesReceived.Inc(int64(size))
statSpansReceived.Inc(int64(spanCount))

// Usage tracking
if d.usage != nil {
d.usage.Observe(userID, batches)
}

keys, rebatchedTraces, err := requestsByTraceID(batches, userID, spanCount)
if err != nil {
overrides.RecordDiscardedSpans(spanCount, reasonInternalError, userID)
Expand Down Expand Up @@ -498,6 +516,14 @@ func (*Distributor) Check(_ context.Context, _ *grpc_health_v1.HealthCheckReques
return &grpc_health_v1.HealthCheckResponse{Status: grpc_health_v1.HealthCheckResponse_SERVING}, nil
}

func (d *Distributor) UsageTrackerHandler() http.Handler {
if d.usage != nil {
return d.usage.Handler()
}

return nil
}

// requestsByTraceID takes an incoming tempodb.PushRequest and creates a set of keys for the hash ring
// and traces to pass onto the ingesters.
func requestsByTraceID(batches []*v1.ResourceSpans, userID string, spanCount int) ([]uint32, []*rebatchedTrace, error) {
Expand Down
35 changes: 35 additions & 0 deletions modules/distributor/usage/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package usage

import (
"flag"
"time"
)

const (
defaultMaxCardinality = 1000
defaultStaleDuration = 15 * time.Minute
defaultPurgePeriod = time.Minute
)

type Config struct {
Enabled bool `yaml:"enabled,omitempty" json:"enabled,omitempty"`
MaxCardinality uint `yaml:"max_cardinality,omitempty" json:"max_cardinality,omitempty"`
StaleDuration time.Duration `yaml:"stale_duration,omitempty" json:"stale_duration,omitempty"`
PurgePeriod time.Duration `yaml:"purge_period,omitempty" json:"purge_period,omitempty"`
}

func (c *Config) RegisterFlagsAndApplyDefaults(_ string, _ *flag.FlagSet) {
c.Enabled = true
Comment thread
mdisibio marked this conversation as resolved.
Outdated
c.MaxCardinality = defaultMaxCardinality
c.StaleDuration = defaultStaleDuration
c.PurgePeriod = defaultPurgePeriod
}

func DefaultConfig() Config {
return Config{
Enabled: true,
MaxCardinality: defaultMaxCardinality,
StaleDuration: defaultStaleDuration,
PurgePeriod: defaultPurgePeriod,
}
}
Loading