diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a9b102b65c..c0b0e721fd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,39 @@ tolerate_failed_blocks: ``` * [CHANGE] Upgrade memcached version in jsonnet microservices [#2466](https://github.com/grafana/tempo/pull/2466) (@zalegrala) +* [CHANGE] **Breaking Change** Convert metrics generator from deployment to a statefulset in jsonnet [#2533](https://github.com/grafana/tempo/pull/2533) (@zalegrala) +To support a new `processor`, the metrics generator has been converted from a `deployment` into a `statefulset` with a PVC. This will require manual intervention in order to migrate successfully and avoid downtime. Note that currently both a `deployment` and a `statefulset` will be managed by the jsonnet for a period of time, after which we will delete the `deployment` from this repo and you will need to delete user-side references to the `tempo_metrics_generator_deployment`, as well as delete the `deployment` itself. + +First, just as with the `ingester` configuration, you will need to specify a `pvc_size` and a `pvc_storage_class` for the `metrics_generator` PVC configuration. For example: +```jsonnet +{ + _config+:: { + metrics_generator+: { + pvc_size: '10Gi', + pvc_storage_class: 'local-path', + }, + } +} +``` + +Any user-side overrides for the `tempo_metrics_generator_deployment` need to be considered for the `tempo_metrics_generator_statefulset` object. + +Currently, the `deplyment` replicas are set to `0` by default in the jsonnet, while the `statefulset` inherits replica configuration from the `$._config` object. To keep the `deployment` replicas around and make the transition without an outage, you can keep the replicas by overriding the following key. + +```jsonnet + tempo_metrics_generator_deployment+: + { spec+: { replicas: $._config.metrics_generator.replicas } }, +``` + +This will maintain the same number of replicas you have specified in the configuration for the `statefulset`. Note that this will be approximately double the resource requirements for a period of time while you stabilize the ring and prepare to scale down the `deployment`. + +You can check memberlist either with the `tempo_memberlist_client_cluster_members_count` metric, or you can visit the `http://tempo:3200/memberlist` page to see that metrics generator instances for both the `statefulset` and `deployment` are available. + +Once all instances are healthy, you can begin to scale down your `deployment` and delete the above reference to the `tempo_metrics_generator_deployment`. + +Without handling the above, a brief outage will be incurred for the metrics-generator, but everything should be functioning again once the `statefulset` for the metrics-generator is up and available. + + * [ENHANCEMENT] Add support to filter using negated regex operator `!~` [#2410](https://github.com/grafana/tempo/pull/2410) (@kousikmitra) * [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.azure` and `storage.trace.gcs` [#2386](https://github.com/grafana/tempo/pull/2386) (@kousikmitra) * [ENHANCEMENT] Add `prefix` configuration option to `storage.trace.s3` [#2362](https://github.com/grafana/tempo/pull/2362) (@kousikmitra) diff --git a/operations/jsonnet-compiled/Deployment-metrics-generator.yaml b/operations/jsonnet-compiled/Deployment-metrics-generator.yaml index 65e5cbc5c55..ec1649b8ebe 100644 --- a/operations/jsonnet-compiled/Deployment-metrics-generator.yaml +++ b/operations/jsonnet-compiled/Deployment-metrics-generator.yaml @@ -4,9 +4,7 @@ metadata: name: metrics-generator namespace: tracing spec: - minReadySeconds: 10 replicas: 0 - revisionHistoryLimit: 10 selector: matchLabels: app: metrics-generator @@ -31,7 +29,6 @@ spec: - -mem-ballast-size-mbs=1024 - -target=metrics-generator image: grafana/tempo:latest - imagePullPolicy: IfNotPresent name: metrics-generator ports: - containerPort: 3200 diff --git a/operations/jsonnet-compiled/StatefulSet-metrics-generator.yaml b/operations/jsonnet-compiled/StatefulSet-metrics-generator.yaml new file mode 100644 index 00000000000..329a7615923 --- /dev/null +++ b/operations/jsonnet-compiled/StatefulSet-metrics-generator.yaml @@ -0,0 +1,84 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: metrics-generator + namespace: tracing +spec: + podManagementPolicy: Parallel + replicas: 0 + selector: + matchLabels: + app: metrics-generator + name: metrics-generator + tempo-gossip-member: "true" + serviceName: metrics-generator + template: + metadata: + annotations: + config_hash: 5067569ac65e5c3c0d79d48abd17c511 + labels: + app: metrics-generator + name: metrics-generator + tempo-gossip-member: "true" + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + name: metrics-generator + topologyKey: kubernetes.io/hostname + containers: + - args: + - -config.file=/conf/tempo.yaml + - -mem-ballast-size-mbs=1024 + - -target=metrics-generator + image: grafana/tempo:latest + name: metrics-generator + ports: + - containerPort: 3200 + name: prom-metrics + readinessProbe: + httpGet: + path: /ready + port: 3200 + initialDelaySeconds: 15 + timeoutSeconds: 1 + resources: + limits: + cpu: "1" + ephemeral-storage: 11Gi + memory: 2Gi + requests: + cpu: 500m + ephemeral-storage: 10Gi + memory: 1Gi + volumeMounts: + - mountPath: /conf + name: tempo-conf + - mountPath: /var/tempo + name: metrics-generator-data + - mountPath: /overrides + name: overrides + volumes: + - configMap: + name: tempo-metrics-generator + name: tempo-conf + - configMap: + name: tempo-overrides + name: overrides + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + labels: + app: metrics-generator + name: metrics-generator-data + namespace: tracing + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: fast diff --git a/operations/jsonnet-compiled/util/example/main.jsonnet b/operations/jsonnet-compiled/util/example/main.jsonnet index 6e15447c2e3..100aa548f3c 100644 --- a/operations/jsonnet-compiled/util/example/main.jsonnet +++ b/operations/jsonnet-compiled/util/example/main.jsonnet @@ -46,6 +46,8 @@ tempo { }, }, metrics_generator+: { + pvc_size: '10Gi', + pvc_storage_class: 'fast', ephemeral_storage_request_size: '10Gi', ephemeral_storage_limit_size: '11Gi', }, diff --git a/operations/jsonnet-compiled/util/jsonnetfile.lock.json b/operations/jsonnet-compiled/util/jsonnetfile.lock.json index 795a8b2add6..6b29e3edc48 100644 --- a/operations/jsonnet-compiled/util/jsonnetfile.lock.json +++ b/operations/jsonnet-compiled/util/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "ksonnet-util" } }, - "version": "fe2c78b6090156abcb877a4feb9bd288aef79118", + "version": "ef364ae521bbb2304f23845e36182a908e295789", "sum": "0y3AFX9LQSpfWTxWKSwoLgbt0Wc9nnCwhMH2szKzHv0=" }, { @@ -18,8 +18,8 @@ "subdir": "memcached" } }, - "version": "fe2c78b6090156abcb877a4feb9bd288aef79118", - "sum": "SWywAq4U0MRPMbASU0Ez8O9ArRNeoZzb75sEuReueow=" + "version": "ef364ae521bbb2304f23845e36182a908e295789", + "sum": "Cc715Y3rgTuimgDFIw+FaKzXSJGRYwt1pFTMbdrNBD8=" }, { "source": { @@ -28,7 +28,7 @@ "subdir": "1.21" } }, - "version": "a0779420abf73f90167d449a9e527319ced7d134", + "version": "44a9f3d21c089a01f62b22e25bdf553f488a74e8", "sum": "b8GtKWztbpnnMojHt8A9sfkEgs+2t2rtvZcpDteuLFo=" }, { diff --git a/operations/jsonnet-compiled/util/vendor/github.com/grafana/jsonnet-libs/memcached/memcached.libsonnet b/operations/jsonnet-compiled/util/vendor/github.com/grafana/jsonnet-libs/memcached/memcached.libsonnet index 46219dd06d9..7a3131a76d2 100644 --- a/operations/jsonnet-compiled/util/vendor/github.com/grafana/jsonnet-libs/memcached/memcached.libsonnet +++ b/operations/jsonnet-compiled/util/vendor/github.com/grafana/jsonnet-libs/memcached/memcached.libsonnet @@ -33,6 +33,7 @@ k { std.max(self.memory_limit_mb * 1.5 * 1024 * 1024, self.memory_request_bytes), use_topology_spread:: false, topology_spread_max_skew:: 1, + extended_options:: [], local container = $.core.v1.container, local containerPort = $.core.v1.containerPort, @@ -40,12 +41,15 @@ k { memcached_container:: container.new('memcached', $._images.memcached) + container.withPorts([containerPort.new('client', 11211)]) + - container.withArgs([ - '-m %(memory_limit_mb)s' % self, - '-I %(max_item_size)s' % self, - '-c %(connection_limit)s' % self, - '-v', - ]) + + container.withArgs( + [ + '-m %(memory_limit_mb)s' % self, + '-I %(max_item_size)s' % self, + '-c %(connection_limit)s' % self, + '-v', + ] + + if std.length(self.extended_options) != 0 then ['--extended=' + std.join(',', self.extended_options)] else [] + ) + $.util.resourcesRequests(self.cpu_requests, $.util.bytesToK8sQuantity(self.memory_request_bytes)) + $.util.resourcesLimits(self.cpu_limits, $.util.bytesToK8sQuantity(self.memory_limits_bytes)), diff --git a/operations/jsonnet/microservices/Readme.md b/operations/jsonnet/microservices/README.md similarity index 100% rename from operations/jsonnet/microservices/Readme.md rename to operations/jsonnet/microservices/README.md diff --git a/operations/jsonnet/microservices/config.libsonnet b/operations/jsonnet/microservices/config.libsonnet index e6602ed42cf..446bd44e630 100644 --- a/operations/jsonnet/microservices/config.libsonnet +++ b/operations/jsonnet/microservices/config.libsonnet @@ -91,6 +91,8 @@ }, }, metrics_generator: { + pvc_size: error 'Must specify a metrics-generator pvc size', + pvc_storage_class: error 'Must specify a metrics-generator pvc storage class', ephemeral_storage_request_size: error 'Must specify a generator ephemeral_storage_request size', ephemeral_storage_limit_size: error 'Must specify a metrics generator ephemeral_storage_limit size', replicas: 0, diff --git a/operations/jsonnet/microservices/generator.libsonnet b/operations/jsonnet/microservices/generator.libsonnet index 158f5255e6e..10c65cef900 100644 --- a/operations/jsonnet/microservices/generator.libsonnet +++ b/operations/jsonnet/microservices/generator.libsonnet @@ -1,15 +1,19 @@ { - local k = import 'ksonnet-util/kausal.libsonnet', + local k = import 'k.libsonnet', + local kausal = import 'ksonnet-util/kausal.libsonnet', local container = k.core.v1.container, - local containerPort = k.core.v1.containerPort, + local containerPort = kausal.core.v1.containerPort, local deployment = k.apps.v1.deployment, + local statefulset = k.apps.v1.statefulSet, local volume = k.core.v1.volume, + local pvc = k.core.v1.persistentVolumeClaim, local volumeMount = k.core.v1.volumeMount, local target_name = 'metrics-generator', local tempo_config_volume = 'tempo-conf', local tempo_generator_wal_volume = 'metrics-generator-wal-data', + local tempo_data_volume = 'metrics-generator-data', local tempo_overrides_config_volume = 'overrides', tempo_metrics_generator_ports:: [containerPort.new('prom-metrics', $._config.port)], @@ -19,13 +23,21 @@ 'mem-ballast-size-mbs': $._config.ballast_size_mbs, }, + tempo_metrics_generator_pvc:: + pvc.new(tempo_data_volume) + + pvc.mixin.spec.resources.withRequests({ storage: $._config.metrics_generator.pvc_size }) + + pvc.mixin.spec.withAccessModes(['ReadWriteOnce']) + + pvc.mixin.spec.withStorageClassName($._config.metrics_generator.pvc_storage_class) + + pvc.mixin.metadata.withLabels({ app: target_name }) + + pvc.mixin.metadata.withNamespace($._config.namespace), + tempo_metrics_generator_container:: container.new(target_name, $._images.tempo) + container.withPorts($.tempo_metrics_generator_ports) + container.withArgs($.util.mapToFlags($.tempo_metrics_generator_args)) + container.withVolumeMounts([ volumeMount.new(tempo_config_volume, '/conf'), - volumeMount.new(tempo_generator_wal_volume, $.tempo_metrics_generator_config.metrics_generator.storage.path), + volumeMount.new(tempo_data_volume, '/var/tempo'), volumeMount.new(tempo_overrides_config_volume, '/overrides'), ]) + $.util.withResources($._config.metrics_generator.resources) + @@ -38,8 +50,14 @@ tempo_metrics_generator_deployment: deployment.new( target_name, - $._config.metrics_generator.replicas, - $.tempo_metrics_generator_container, + 0, + $.tempo_metrics_generator_container + + + container.withVolumeMounts([ + volumeMount.new(tempo_config_volume, '/conf'), + volumeMount.new(tempo_generator_wal_volume, $.tempo_metrics_generator_config.metrics_generator.storage.path), + volumeMount.new(tempo_overrides_config_volume, '/overrides'), + ]), { app: target_name, [$._config.gossip_member_label]: 'true', @@ -54,8 +72,35 @@ volume.fromConfigMap(tempo_config_volume, $.tempo_metrics_generator_configmap.metadata.name), volume.fromConfigMap(tempo_overrides_config_volume, $._config.overrides_configmap_name), volume.fromEmptyDir(tempo_generator_wal_volume), - ]), + ]) + , + + newGeneratorStatefulSet(name, container, with_anti_affinity=true):: + statefulset.new( + name, + $._config.metrics_generator.replicas, + $.tempo_metrics_generator_container, + self.tempo_metrics_generator_pvc, + { + app: target_name, + [$._config.gossip_member_label]: 'true', + }, + ) + + kausal.util.antiAffinityStatefulSet + + statefulset.mixin.spec.withServiceName(target_name) + + statefulset.mixin.spec.template.metadata.withAnnotations({ + config_hash: std.md5(std.toString($.tempo_metrics_generator_configmap.data['tempo.yaml'])), + }) + + statefulset.mixin.spec.template.spec.withVolumes([ + volume.fromConfigMap(tempo_config_volume, $.tempo_metrics_generator_configmap.metadata.name), + volume.fromConfigMap(tempo_overrides_config_volume, $._config.overrides_configmap_name), + ]) + + statefulset.mixin.spec.withPodManagementPolicy('Parallel') + + $.util.podPriority('high') + + (if with_anti_affinity then $.util.antiAffinity else {}), + + tempo_metrics_generator_statefulset: $.newGeneratorStatefulSet(target_name, self.tempo_metrics_generator_container) + statefulset.mixin.spec.withReplicas($._config.metrics_generator.replicas), tempo_metrics_generator_service: - k.util.serviceFor($.tempo_metrics_generator_deployment), + kausal.util.serviceFor($.tempo_metrics_generator_deployment), } diff --git a/operations/jsonnet/microservices/test/environments/default/main.jsonnet b/operations/jsonnet/microservices/test/environments/default/main.jsonnet index 9767ec9395b..9e37fc7aa76 100644 --- a/operations/jsonnet/microservices/test/environments/default/main.jsonnet +++ b/operations/jsonnet/microservices/test/environments/default/main.jsonnet @@ -27,6 +27,8 @@ tempo { }, }, metrics_generator+: { + pvc_size: '5Gi', + pvc_storage_class: 'local-path', ephemeral_storage_limit_size: '2Gi', ephemeral_storage_request_size: '1Gi', },