Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
aaf8837
Base bufferer service
mapno Jun 27, 2025
dfe10c3
Basic Kafka functionality
mapno Jul 4, 2025
3c639e1
Basic block functionality
mapno Jul 8, 2025
e0a39e5
Align cutting with Kafka consumption
mapno Jul 8, 2025
33fb555
Make bufferer multi-tenant
mapno Jul 8, 2025
ea8b528
Fix manifest
mapno Jul 8, 2025
60073cd
Commit/watermark + concurrent cutting
mapno Jul 18, 2025
9397891
Remove per-tenant watermark code
mapno Jul 21, 2025
3635522
Add integration tests
mapno Jul 21, 2025
a9a6ee7
Merge remote-tracking branch 'origin/main' into rhythm-bufferer
mapno Jul 21, 2025
7a4fdd5
fmt
mapno Jul 21, 2025
d7b6368
bufferer -> live-store
mapno Jul 21, 2025
c569f06
Bump the opentelemetry-collector group with 19 updates (#5426)
dependabot[bot] Jul 21, 2025
553bf3d
fmt more
mapno Jul 21, 2025
599ca3e
Bump github.com/mark3labs/mcp-go from 0.31.0 to 0.34.0 (#5427)
dependabot[bot] Jul 21, 2025
041d3cc
feature: implement a listOffset by partition client (#5415)
javiermolinar Jul 21, 2025
6735a51
Split docs for MCP (#5417)
joe-elliott Jul 21, 2025
2a8d2d2
Bump the opentelemetry-contrib group with 8 updates (#5425)
dependabot[bot] Jul 22, 2025
f6dfd50
feat: allow configure group lag exporter update time (#5431)
javiermolinar Jul 22, 2025
aa13a3a
Simply mismatch check
mapno Jul 22, 2025
286ae33
docs: remove extra apostrophe (#5433)
javiermolinar Jul 22, 2025
d46d8da
Address review comments
mapno Jul 23, 2025
3099e2a
Merge remote-tracking branch 'origin/main' into rhythm-bufferer
mapno Jul 23, 2025
2ca9704
fmt
mapno Jul 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ Additionally the `compaction_tenant_backoff_total` metric has been renamed to `c
* [ENHANCEMENT] Add endpoint for partition downscaling [#4913](https://github.com/grafana/tempo/pull/4913) (@mapno)
* [ENHANCEMENT] Add alert for high error rate reported by vulture [#5206](https://github.com/grafana/tempo/pull/5206) (@ruslan-mikhailov)
* [ENHANCEMENT] Add backend scheduler and worker to the resources dashboard [#5206](https://github.com/grafana/tempo/pull/5241) (@javiermolinar)
* [ENHANCEMENT] Allow configure group lag exporter update time [#5431](https://github.com/grafana/tempo/pull/5431) (@javiermolinar)
* [ENHANCEMENT] TraceQL metrics performance increase for simple queries [#5247](https://github.com/grafana/tempo/pull/5247) (@mdisibio)
* [ENHANCEMENT] TraceQL search and metrics performance increase [#5280](https://github.com/grafana/tempo/pull/5280) (@mdisibio)
* [ENHANCEMENT] Traceql performance improvement [#5218](https://github.com/grafana/tempo/pull/5218) (@mdisibio)
* [ENHANCEMENT] Implement a listOffset by partition client [#5415](https://github.com/grafana/tempo/pull/5415) (@javiermolinar)
* [ENHANCEMENT] Align traceql attribute struct for better performance [#5240](https://github.com/grafana/tempo/pull/5240) (@mdisibio)
* [ENHANCEMENT] Enable HTTP writes in the multi-tenant example [#5297](https://github.com/grafana/tempo/pull/5297)
* [ENHANCEMENT] Drop invalid prometheus label names in spanmetrics processor [#5122](https://github.com/grafana/tempo/pull/5122) (@KyriosGN0)
Expand Down
2 changes: 2 additions & 0 deletions cmd/tempo/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/grafana/dskit/signals"
"github.com/grafana/tempo/modules/backendworker"
"github.com/grafana/tempo/modules/blockbuilder"
"github.com/grafana/tempo/modules/livestore"
"github.com/jedib0t/go-pretty/v6/table"
"github.com/prometheus/common/version"
"go.uber.org/atomic"
Expand Down Expand Up @@ -84,6 +85,7 @@ type App struct {
MemberlistKV *memberlist.KVInitService
backendScheduler *backendscheduler.BackendScheduler
backendWorker *backendworker.BackendWorker
liveStore *livestore.LiveStore
signalsHandler *signals.Handler

HTTPAuthMiddleware middleware.Interface
Expand Down
3 changes: 3 additions & 0 deletions cmd/tempo/app/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
generator_client "github.com/grafana/tempo/modules/generator/client"
"github.com/grafana/tempo/modules/ingester"
ingester_client "github.com/grafana/tempo/modules/ingester/client"
"github.com/grafana/tempo/modules/livestore"
"github.com/grafana/tempo/modules/overrides"
"github.com/grafana/tempo/modules/querier"
"github.com/grafana/tempo/modules/storage"
Expand Down Expand Up @@ -62,6 +63,7 @@ type Config struct {
BackendScheduler backendscheduler.Config `yaml:"backend_scheduler,omitempty"`
BackenSchedulerClient backendscheduler_client.Config `yaml:"backend_scheduler_client,omitempty"`
BackendWorker backendworker.Config `yaml:"backend_worker,omitempty"`
LiveStore livestore.Config `yaml:"live_store,omitempty"`
}

func NewDefaultConfig() *Config {
Expand Down Expand Up @@ -144,6 +146,7 @@ func (c *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet) {
c.CacheProvider.RegisterFlagsAndApplyDefaults(util.PrefixConfig(prefix, "cache"), f)
c.BackendScheduler.RegisterFlagsAndApplyDefaults(util.PrefixConfig(prefix, "backend-scheduler"), f)
c.BackendWorker.RegisterFlagsAndApplyDefaults(util.PrefixConfig(prefix, "backend-worker"), f)
c.LiveStore.RegisterFlagsAndApplyDefaults(util.PrefixConfig(prefix, "live-store"), f)
}

// MultitenancyIsEnabled checks if multitenancy is enabled
Expand Down
32 changes: 31 additions & 1 deletion cmd/tempo/app/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/grafana/dskit/ring"
"github.com/grafana/dskit/server"
"github.com/grafana/dskit/services"
"github.com/grafana/tempo/modules/livestore"
jsoniter "github.com/json-iterator/go"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
Expand Down Expand Up @@ -82,6 +83,7 @@ const (
BlockBuilder string = "block-builder"
BackendScheduler string = "backend-scheduler"
BackendWorker string = "backend-worker"
LiveStore string = "live-store"

// composite targets
SingleBinary string = "all"
Expand Down Expand Up @@ -285,7 +287,7 @@ func (t *App) initIngester() (services.Service, error) {
t.cfg.Ingester.DedicatedColumns = t.cfg.StorageConfig.Trace.Block.DedicatedColumns
t.cfg.Ingester.IngestStorageConfig = t.cfg.Ingest

// In SingleBinary mode don't try to discover parition from host name. Always use
// In SingleBinary mode don't try to discover partition from host name. Always use
// partition 0. This is for small installs or local/debugging setups.
singlePartition := t.cfg.Target == SingleBinary

Expand Down Expand Up @@ -626,6 +628,7 @@ func (t *App) initMemberlistKV() (services.Service, error) {
t.cfg.Distributor.DistributorRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.cfg.Compactor.ShardingRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.cfg.BackendWorker.Ring.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV
t.cfg.LiveStore.PartitionRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV

// Only the memberlist endpoint uses static files currently
t.Server.HTTPRouter().PathPrefix("/static/").HandlerFunc(http.FileServer(http.FS(staticFiles)).ServeHTTP).Methods("GET")
Expand Down Expand Up @@ -748,6 +751,31 @@ func (t *App) initBackendWorker() (services.Service, error) {
return worker, nil
}

func (t *App) initLiveStore() (services.Service, error) {
if !t.cfg.Ingest.Enabled {
return services.NewIdleService(nil, nil), nil
}

// In SingleBinary mode don't try to discover partition from host name.
// Always use partition 0. This is for small installs or local/debugging setups.
singlePartition := t.cfg.Target == SingleBinary

t.cfg.LiveStore.IngestConfig = t.cfg.Ingest

var err error
t.liveStore, err = livestore.New(t.cfg.LiveStore, t.Overrides, log.Logger, prometheus.DefaultRegisterer, singlePartition)
if err != nil {
return nil, fmt.Errorf("failed to create liveStore: %w", err)
}

// TODO: Support downscaling
// t.Server.HTTPRouter().Methods(http.MethodGet, http.MethodPost, http.MethodDelete).
// Path("/live-store/prepare-partition-downscale").
// Handler(http.HandlerFunc(t.liveStore.PreparePartitionDownscaleHandler))

return t.liveStore, nil
}

func (t *App) setupModuleManager() error {
mm := modules.NewManager(log.Logger)

Expand Down Expand Up @@ -781,6 +809,7 @@ func (t *App) setupModuleManager() error {
mm.RegisterModule(BlockBuilder, t.initBlockBuilder)
mm.RegisterModule(BackendScheduler, t.initBackendScheduler)
mm.RegisterModule(BackendWorker, t.initBackendWorker)
mm.RegisterModule(LiveStore, t.initLiveStore)

mm.RegisterModule(SingleBinary, nil)
mm.RegisterModule(ScalableSingleBinary, nil)
Expand Down Expand Up @@ -813,6 +842,7 @@ func (t *App) setupModuleManager() error {
BlockBuilder: {Common, Store, MemberlistKV, PartitionRing},
BackendScheduler: {Common, Store},
BackendWorker: {Common, Store, MemberlistKV},
LiveStore: {Common, MemberlistKV, PartitionRing},

// composite targets
SingleBinary: {Compactor, QueryFrontend, Querier, Ingester, Distributor, MetricsGenerator, BlockBuilder},
Expand Down
9 changes: 0 additions & 9 deletions docs/embed.go

This file was deleted.

90 changes: 90 additions & 0 deletions docs/sources/tempo/configuration/manifest.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ distributor:
producer_max_buffered_bytes: 0
target_consumer_lag_at_startup: 0s
max_consumer_lag_at_startup: 0s
consumer_group_lag_metric_update_interval: 0s
extend_writes: true
retry_after_on_resource_exhausted: 0s
max_attribute_bytes: 2048
Expand Down Expand Up @@ -715,6 +716,7 @@ ingest:
producer_max_buffered_bytes: 1073741824
target_consumer_lag_at_startup: 2s
max_consumer_lag_at_startup: 15s
consumer_group_lag_metric_update_interval: 1m0s
block_builder:
instance_id: hostname
assigned_partitions: {}
Expand Down Expand Up @@ -1100,4 +1102,92 @@ backend_worker:
instance_addr: ""
enable_inet6: false
wait_active_instance_timeout: 10m0s
live_store:
lifecycler:
ring:
kvstore:
store: consul
prefix: collectors/
consul:
host: localhost:8500
acl_token: ""
http_client_timeout: 20s
consistent_reads: false
watch_rate_limit: 1
watch_burst_size: 1
cas_retry_delay: 1s
etcd:
endpoints: []
dial_timeout: 10s
max_retries: 10
tls_enabled: false
tls_cert_path: ""
tls_key_path: ""
tls_ca_path: ""
tls_server_name: ""
tls_insecure_skip_verify: false
tls_cipher_suites: ""
tls_min_version: ""
username: ""
password: ""
multi:
primary: ""
secondary: ""
mirror_enabled: false
mirror_timeout: 2s
heartbeat_timeout: 1m0s
replication_factor: 3
zone_awareness_enabled: false
excluded_zones: ""
num_tokens: 128
heartbeat_period: 5s
heartbeat_timeout: 1m0s
observe_period: 0s
join_after: 0s
min_ready_duration: 15s
interface_names:
- eth0
enable_inet6: false
final_sleep: 0s
tokens_file_path: ""
availability_zone: ""
unregister_on_shutdown: true
readiness_check_ring_health: true
address: ""
port: 0
id: hostname
partition_ring:
kvstore:
store: memberlist
prefix: collectors/
consul:
host: localhost:8500
acl_token: ""
http_client_timeout: 20s
consistent_reads: false
watch_rate_limit: 1
watch_burst_size: 1
cas_retry_delay: 1s
etcd:
endpoints: []
dial_timeout: 10s
max_retries: 10
tls_enabled: false
tls_cert_path: ""
tls_key_path: ""
tls_ca_path: ""
tls_server_name: ""
tls_insecure_skip_verify: false
tls_cipher_suites: ""
tls_min_version: ""
username: ""
password: ""
multi:
primary: ""
secondary: ""
mirror_enabled: false
mirror_timeout: 2s
min_partition_owners_count: 1
min_partition_owners_duration: 10s
delete_inactive_partition_after: 13h0m0s
```
7 changes: 0 additions & 7 deletions docs/sources/tempo/traceql/construct-traceql-queries.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,6 @@ If, for a given trace, this pipeline produces a spanset then it's included in th

Refer to [TraceQL metrics queries](https://grafana.com/docs/tempo/<TEMPO_VERSION>/traceql/metrics-queries/) for examples of TraceQL metrics queries.

<!-- WARNING: This file is loaded by /modules/frontend/mcp.go and served to LLMs through the MCP protocol. It -->
<!-- should be kept terse and to the point. Links and videos and such are bad. Examples are good! The below "mcp-cutoff"
<!-- is used to remove everything above. -->
<!-- TODO: Anthropic suggests using xml tags to organize content delivered to an LLM. Can we tag this content with xml tags -->
<!-- that make both hugo and llms happy? -->
<!-- mcp-cutoff -->

## Examples

The following examples illustrate some commonly used queries.
Expand Down
13 changes: 3 additions & 10 deletions docs/sources/tempo/traceql/metrics-queries/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,15 @@ keywords:

<!-- If you add a new function to this page, make sure you also add it to the _index.md#functions section.-->

<!-- WARNING: This file is loaded by /modules/frontend/mcp.go and served to LLMs through the MCP protocol. It -->
<!-- should be kept terse and to the point. Links and videos and such are bad. Examples are good! The below "mcp-cutoff"
<!-- is used to remove everything above. -->
<!-- TODO: Anthropic suggests using xml tags to organize content delivered to an LLM. Can we tag this content with xml tags -->
<!-- that make both hugo and llms happy? -->
<!-- mcp-cutoff -->

TraceQL supports `rate`, `count_over_time`, `sum_over_time`, `min_over_time`, `avg_over_time`, `quantile_over_time`,
`histogram_over_time`, and `compare` functions. These methods can be appended to any TraceQL query to calculate and
`histogram_over_time`, and `compare` functions. These methods can be appended to any TraceQL query to calculate and
return the desired metrics like:

```
{} | rate()
```

Note that `topk` and `bottomk` are also supported to only return a subset of series. These can only be added
Note that `topk` and `bottomk` are also supported to only return a subset of series. These can only be added
after a metrics query like:

```
Expand Down Expand Up @@ -257,7 +250,7 @@ from 1 through `k` of the number of the top or bottom results.

For example:
```
`{ resource.service.name = "foo" } | rate() by (span.http.url) | topk(10)`
{ resource.service.name = "foo" } | rate() by (span.http.url) | topk(10)
```

The first part, `{ resource.service.name = "foo" }`, takes all spans in the service `foo`.
Expand Down
51 changes: 48 additions & 3 deletions example/docker-compose/ingest-storage/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,62 @@ services:
# environment:
# - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4318

ingester-2:
live-store-zone-a-0:
image: *tempoImage
depends_on:
- kafka
command: "-target=ingester -config.file=/etc/tempo.yaml"
command: "-target=live-store -config.file=/etc/tempo.yaml"
restart: always
volumes:
- ./tempo.yaml:/etc/tempo.yaml
ports:
- "3200" # tempo
hostname: live-store-zone-a-0
# Uncomment the following lines to enable tracing
# environment:
# - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4318

live-store-zone-a-1:
image: *tempoImage
depends_on:
- kafka
command: "-target=live-store -config.file=/etc/tempo.yaml"
restart: always
volumes:
- ./tempo.yaml:/etc/tempo.yaml
ports:
- "3200" # tempo
hostname: live-store-zone-a-1
# Uncomment the following lines to enable tracing
# environment:
# - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4318

live-store-zone-b-0:
image: *tempoImage
depends_on:
- kafka
command: "-target=live-store -config.file=/etc/tempo.yaml"
restart: always
volumes:
- ./tempo.yaml:/etc/tempo.yaml
ports:
- "3200" # tempo
hostname: live-store-zone-b-0
# Uncomment the following lines to enable tracing
# environment:
# - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4318

live-store-zone-b-1:
image: *tempoImage
depends_on:
- kafka
command: "-target=live-store -config.file=/etc/tempo.yaml"
restart: always
volumes:
- ./tempo.yaml:/etc/tempo.yaml
ports:
- "3200" # tempo
hostname: ingester-2
hostname: live-store-zone-b-1
# Uncomment the following lines to enable tracing
# environment:
# - OTEL_EXPORTER_OTLP_ENDPOINT=http://alloy:4318
Expand Down
12 changes: 9 additions & 3 deletions example/docker-compose/ingest-storage/tempo.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
server:
http_listen_port: 3200
log_level: info
log_level: debug

distributor:
kafka_write_path_enabled: true
Expand All @@ -26,7 +26,10 @@ memberlist:
join_members:
- ingester-0:7946
- ingester-1:7946
- ingester-2:7946
- live-store-zone-a-0:7946
- live-store-zone-a-1:7946
- live-store-zone-b-0:7946
- live-store-zone-b-1:7946

compactor:
compaction:
Expand Down Expand Up @@ -84,4 +87,7 @@ block_builder:
consume_cycle_duration: 30s
assigned_partitions:
block-builder-0: [0,2]
block-builder-1: [1]
block-builder-1: [1]

usage_report:
reporting_enabled: false
Loading
Loading