Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## main / unreleased

* [CHANGE] tempo: check configuration returns now a list of warnings [#1663](https://github.com/grafana/tempo/pull/1663) (@frzifus)
* [ENHANCEMENT] metrics-generator: expose span size as a metric [#1662](https://github.com/grafana/tempo/pull/1662) (@ie-pham)
* [ENHANCEMENT] Set Max Idle connections to 100 for Azure, should reduce DNS errors in Azure [#1632](https://github.com/grafana/tempo/pull/1632) (@electron0zero)

Expand Down
69 changes: 53 additions & 16 deletions cmd/tempo/app/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package app

import (
"flag"
"fmt"
"time"

"github.com/go-kit/log/level"
"github.com/grafana/dskit/flagext"
"github.com/grafana/dskit/kv/memberlist"
"github.com/grafana/tempo/modules/compactor"
Expand All @@ -19,7 +19,6 @@ import (
"github.com/grafana/tempo/modules/storage"
"github.com/grafana/tempo/pkg/usagestats"
"github.com/grafana/tempo/pkg/util"
"github.com/grafana/tempo/pkg/util/log"
"github.com/grafana/tempo/tempodb"
"github.com/prometheus/client_golang/prometheus"
"github.com/weaveworks/common/server"
Expand Down Expand Up @@ -117,43 +116,42 @@ func (c *Config) MultitenancyIsEnabled() bool {
return c.MultitenancyEnabled || c.AuthEnabled
}

// CheckConfig checks if config values are suspect.
func (c *Config) CheckConfig() {
// CheckConfig checks if config values are suspect and returns a bundled list of warnings and explanation.
func (c *Config) CheckConfig() []ConfigWarning {
var warnings []ConfigWarning
if c.Target == MetricsGenerator && !c.MetricsGeneratorEnabled {
level.Warn(log.Logger).Log("msg", "target == metrics-generator but metrics_generator_enabled != true",
"explain", "The metrics-generator will only receive data if metrics_generator_enabled is set to true globally")
warnings = append(warnings, warnMetricsGenerator)
}

if c.Ingester.CompleteBlockTimeout < c.StorageConfig.Trace.BlocklistPoll {
level.Warn(log.Logger).Log("msg", "ingester.complete_block_timeout < storage.trace.blocklist_poll",
"explain", "You may receive 404s between the time the ingesters have flushed a trace and the querier is aware of the new block")
warnings = append(warnings, warnCompleteBlockTimeout)
}

if c.Compactor.Compactor.BlockRetention < c.StorageConfig.Trace.BlocklistPoll {
level.Warn(log.Logger).Log("msg", "compactor.compaction.compacted_block_timeout < storage.trace.blocklist_poll",
"explain", "Queriers and Compactors may attempt to read a block that no longer exists")
warnings = append(warnings, warnBlockRetention)
}

if c.Compactor.Compactor.RetentionConcurrency == 0 {
level.Warn(log.Logger).Log("msg", "c.Compactor.Compactor.RetentionConcurrency must be greater than zero. Using default.", "default", tempodb.DefaultRetentionConcurrency)
warnings = append(warnings, warnRetentionConcurrency)
}

if c.StorageConfig.Trace.Backend == "s3" && c.Compactor.Compactor.FlushSizeBytes < 5242880 {
level.Warn(log.Logger).Log("msg", "c.Compactor.Compactor.FlushSizeBytes < 5242880",
"explain", "Compaction flush size should be 5MB or higher for S3 backend")
warnings = append(warnings, warnStorageTraceBackendS3)
}

if c.StorageConfig.Trace.BlocklistPollConcurrency == 0 {
level.Warn(log.Logger).Log("msg", "c.StorageConfig.Trace.BlocklistPollConcurrency must be greater than zero. Using default.", "default", tempodb.DefaultBlocklistPollConcurrency)
warnings = append(warnings, warnBlocklistPollConcurrency)
}

if c.Distributor.LogReceivedTraces {
level.Warn(log.Logger).Log("msg", "c.Distributor.LogReceivedTraces is deprecated. The new flag is c.Distributor.log_received_spans.enabled")
warnings = append(warnings, warnLogReceivedTraces)
}

if c.StorageConfig.Trace.Backend == "local" && c.Target != SingleBinary {
level.Warn(log.Logger).Log("msg", "Local backend will not correctly retrieve traces with a distributed deployment unless all components have access to the same disk. You should probably be using object storage as a backend.")
warnings = append(warnings, warnStorageTraceBackendLocal)
}

return warnings
}

func (c *Config) Describe(ch chan<- *prometheus.Desc) {
Expand Down Expand Up @@ -184,3 +182,42 @@ func (c *Config) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(metricConfigFeatDesc, prometheus.GaugeValue, float64(value), label)
}
}

// ConfigWarning bundles message and explanation strings in one structure.
type ConfigWarning struct {
Message string
Explain string
}

var (
warnMetricsGenerator = ConfigWarning{
Message: "target == metrics-generator but metrics_generator_enabled != true",
Explain: "The metrics-generator will only receive data if metrics_generator_enabled is set to true globally",
}
warnCompleteBlockTimeout = ConfigWarning{
Message: "ingester.complete_block_timeout < storage.trace.blocklist_poll",
Explain: "You may receive 404s between the time the ingesters have flushed a trace and the querier is aware of the new block",
}
warnBlockRetention = ConfigWarning{
Message: "compactor.compaction.compacted_block_timeout < storage.trace.blocklist_poll",
Explain: "Queriers and Compactors may attempt to read a block that no longer exists",
}
warnRetentionConcurrency = ConfigWarning{
Message: "c.Compactor.Compactor.RetentionConcurrency must be greater than zero. Using default.",
Explain: fmt.Sprintf("default=%d", tempodb.DefaultRetentionConcurrency),
}
warnStorageTraceBackendS3 = ConfigWarning{
Message: "c.Compactor.Compactor.FlushSizeBytes < 5242880",
Explain: "Compaction flush size should be 5MB or higher for S3 backend",
}
warnBlocklistPollConcurrency = ConfigWarning{
Message: "c.StorageConfig.Trace.BlocklistPollConcurrency must be greater than zero. Using default.",
Explain: fmt.Sprintf("default=%d", tempodb.DefaultBlocklistPollConcurrency),
}
warnLogReceivedTraces = ConfigWarning{
Message: "c.Distributor.LogReceivedTraces is deprecated. The new flag is c.Distributor.log_received_spans.enabled",
}
warnStorageTraceBackendLocal = ConfigWarning{
Message: "Local backend will not correctly retrieve traces with a distributed deployment unless all components have access to the same disk. You should probably be using object storage as a backend.",
}
)
70 changes: 70 additions & 0 deletions cmd/tempo/app/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package app

import (
"testing"
"time"

"github.com/grafana/tempo/modules/distributor"
"github.com/grafana/tempo/modules/storage"
"github.com/grafana/tempo/tempodb"

"github.com/stretchr/testify/assert"
)

func TestConfig_CheckConfig(t *testing.T) {
tt := []struct {
name string
config *Config
expect []ConfigWarning
}{
{
name: "check default cfg and expect no warnings",
config: newDefaultConfig(),
expect: nil,
},
{
name: "hit all except local backend warnings",
config: &Config{
Target: MetricsGenerator,
StorageConfig: storage.Config{
Trace: tempodb.Config{
Backend: "s3",
BlocklistPoll: time.Minute,
},
},
Distributor: distributor.Config{
LogReceivedTraces: true,
},
},
expect: []ConfigWarning{
warnMetricsGenerator,
warnCompleteBlockTimeout,
warnBlockRetention,
warnRetentionConcurrency,
warnStorageTraceBackendS3,
warnBlocklistPollConcurrency,
warnLogReceivedTraces,
},
},
{
name: "hit local backend warnings",
config: func() *Config {
cfg := newDefaultConfig()
cfg.StorageConfig.Trace = tempodb.Config{
Backend: "local",
BlocklistPollConcurrency: 1,
}
cfg.Target = "something"
return cfg
}(),
expect: []ConfigWarning{warnStorageTraceBackendLocal},
},
}

for _, tc := range tt {
t.Run(tc.name, func(t *testing.T) {
warnings := tc.config.CheckConfig()
assert.Equal(t, tc.expect, warnings)
})
}
}
11 changes: 10 additions & 1 deletion cmd/tempo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,16 @@ func main() {
ballast := make([]byte, *ballastMBs*1024*1024)

// Warn the user for suspect configurations
config.CheckConfig()
if warnings := config.CheckConfig(); len(warnings) != 0 {
level.Warn(log.Logger).Log("-- CONFIGURATION WARNINGS --")
for _, w := range warnings {
output := []any{"msg", w.Message}
if w.Explain != "" {
output = append(output, "explain", w.Explain)
}
level.Warn(log.Logger).Log(output...)
}
}

// Start Tempo
t, err := app.New(*config)
Expand Down