grafana · 56quarters · Mar 28, 2023 · Mar 28, 2023
@@ -62,6 +62,7 @@
 * [ENHANCEMENT] Compactor: validation of blocks uploaded via the TSDB block upload feature is now configurable on a per tenant basis: #4585
   * `-compactor.block-upload-validation-enabled` has been added, `compactor_block_upload_validation_enabled` can be used to override per tenant
   * `-compactor.block-upload.block-validation-enabled` was the previous global flag and has been removed
+* [ENHANCEMENT] Query-frontend: add experimental limit to enforce a max query expression size in bytes via `-query-frontend.max-query-expression-size-bytes` or `max_query_expression_size_bytes`. #4604
 * [BUGFIX] Querier: Streaming remote read will now continue to return multiple chunks per frame after the first frame. #4423
 * [BUGFIX] Store-gateway: the values for `stage="processed"` for the metrics `cortex_bucket_store_series_data_touched` and  `cortex_bucket_store_series_data_size_touched_bytes` when using fine-grained chunks caching is now reporting the correct values of chunks held in memory. #4449
 * [BUGFIX] Compactor: fixed reporting a compaction error when compactor is correctly shut down while populating blocks. #4580

@@ -3261,6 +3261,17 @@
           "fieldType": "duration",
           "fieldCategory": "experimental"
         },
+        {
+          "kind": "field",
+          "name": "max_query_expression_size_bytes",
+          "required": false,
+          "desc": "Max size of the raw query, in bytes. 0 to not apply a limit to the size of the query.",
+          "fieldValue": null,
+          "fieldDefaultValue": 0,
+          "fieldFlag": "query-frontend.max-query-expression-size-bytes",
+          "fieldType": "int",
+          "fieldCategory": "experimental"
+        },
         {
           "kind": "field",
           "name": "cardinality_analysis_enabled",

@@ -1655,6 +1655,8 @@ Usage of ./cmd/mimir/mimir:
     	Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux. (default 1m)
   -query-frontend.max-queriers-per-tenant int
     	Maximum number of queriers that can handle requests for a single tenant. If set to 0 or value higher than number of available queriers, *all* queriers will handle requests for the tenant. Each frontend (or query-scheduler, if used) will select the same set of queriers for the same tenant (given that all queriers are connected to all frontends / query-schedulers). This option only works with queriers connecting to the query-frontend / query-scheduler, not when using downstream URL.
+  -query-frontend.max-query-expression-size-bytes int
+    	[experimental] Max size of the raw query, in bytes. 0 to not apply a limit to the size of the query.
   -query-frontend.max-retries-per-request int
     	Maximum number of retries for a single request; beyond this, the downstream error is returned. (default 5)
   -query-frontend.max-total-query-length duration

@@ -93,6 +93,7 @@ The following features are currently experimental:
   - Lower TTL for cache entries overlapping the out-of-order samples ingestion window (re-using `-ingester.out-of-order-allowance` from ingesters)
   - Cardinality-based query sharding (`-query-frontend.query-sharding-target-series-per-shard`)
   - Use of Redis cache backend (`-query-frontend.results-cache.backend=redis`)
+  - Query expression size limit (`-query-frontend.max-query-expression-size-bytes`)
 - Query-scheduler
   - `-query-scheduler.querier-forget-delay`
   - Max number of used instances (`-query-scheduler.max-used-instances`)

@@ -1534,6 +1534,18 @@ This limit is applied to range queries before they are split (according to time)
 To configure the limit on a per-tenant basis, use the `-query-frontend.max-total-query-length` option (or `max_total_query_length` in the runtime configuration).
 If this limit is set to 0, it takes its value from `-store.max-query-length`.
 
+### err-mimir-max-query-expression-size-bytes
+
+This error occurs when the size of a raw query exceeds the configured maximum size (in bytes).
+
+This limit is used to protect the system’s stability from potential abuse or mistakes, when running a large potentially expensive query.
+To configure the limit on a per-tenant basis, use the `-query-frontend.max-query-expression-size-bytes` option (or `max_query_expression_size_bytes` in the runtime configuration).
+
+How to **fix** it:
+
+- Consider reducing the size of the query. It's possible there's a simpler way to select the desired data or a better way to export data from Mimir.
+- Consider increasing the per-tenant limit by using the `-query-frontend.max-query-expression-size-bytes` option (or `max_query_expression_size_bytes` in the runtime configuration).
+
 ### err-mimir-tenant-max-request-rate
 
 This error occurs when the rate of write requests per second is exceeded for this tenant.

@@ -2737,6 +2737,11 @@ The `limits` block configures default and per-tenant limits imposed by component
 # CLI flag: -query-frontend.results-cache-ttl-for-out-of-order-time-window
 [results_cache_ttl_for_out_of_order_time_window: <duration> | default = 10m]
 
+# (experimental) Max size of the raw query, in bytes. 0 to not apply a limit to
+# the size of the query.
+# CLI flag: -query-frontend.max-query-expression-size-bytes
+[max_query_expression_size_bytes: <int> | default = 0]
+
 # Enables endpoints used for cardinality analysis.
 # CLI flag: -querier.cardinality-analysis-enabled
 [cardinality_analysis_enabled: <boolean> | default = false]

@@ -39,6 +39,10 @@ type Limits interface {
 	// frontend will process in parallel.
 	MaxQueryParallelism(userID string) int
 
+	// MaxQueryExpressionSizeBytes returns the limit of the max number of bytes long a raw
+	// query may be. 0 means "unlimited".
+	MaxQueryExpressionSizeBytes(userID string) int
+
 	// MaxCacheFreshness returns the period after which results are cacheable,
 	// to prevent caching of very recent results.
 	MaxCacheFreshness(userID string) time.Duration
@@ -153,6 +157,14 @@ func (l limitsMiddleware) Do(ctx context.Context, r Request) (Response, error) {
 		}
 	}
 
+	// Enforce max query size, in bytes.
+	if maxQuerySize := validation.SmallestPositiveNonZeroIntPerTenant(tenantIDs, l.MaxQueryExpressionSizeBytes); maxQuerySize > 0 {
+		querySize := len(r.GetQuery())
+		if querySize > maxQuerySize {
+			return nil, apierror.New(apierror.TypeBadData, validation.NewMaxQueryExpressionSizeBytesError(querySize, maxQuerySize).Error())
+		}
+	}
+
 	// Enforce the max query length.
 	if maxQueryLength := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.MaxTotalQueryLength); maxQueryLength > 0 {
 		queryLen := timestamp.Time(r.GetEnd()).Sub(timestamp.Time(r.GetStart()))

@@ -7,12 +7,15 @@ package querymiddleware
 
 import (
 	"context"
+	"fmt"
 	"net/http"
+	"strings"
 	"sync"
 	"testing"
 	"time"
 
 	"github.com/go-kit/log"
+	"github.com/grafana/dskit/tenant"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/require"
@@ -126,6 +129,77 @@ func TestLimitsMiddleware_MaxQueryLookback(t *testing.T) {
 	}
 }
 
+func TestLimitsMiddleware_MaxQueryExpressionSizeBytes(t *testing.T) {
+	now := time.Now()
+
+	tests := map[string]struct {
+		query       string
+		queryLimits map[string]int
+		expectError bool
+	}{
+		"should fail for queries longer than the limit": {
+			query:       fmt.Sprintf("up{foo=\"%s\"}", strings.Repeat("a", 1000)),
+			queryLimits: map[string]int{"test1": 100, "test2": 100},
+			expectError: true,
+		},
+		"should fail for queries longer than a one tenant limit": {
+			query:       fmt.Sprintf("up{foo=\"%s\"}", strings.Repeat("a", 1000)),
+			queryLimits: map[string]int{"test1": 100, "test2": 2000},
+			expectError: true,
+		},
+		"should fail for queries longer than a one tenant limit with one limit disabled": {
+			query:       fmt.Sprintf("up{foo=\"%s\"}", strings.Repeat("a", 1000)),
+			queryLimits: map[string]int{"test1": 100, "test2": 0},
+			expectError: true,
+		},
+		"should work for queries under the limit": {
+			query:       fmt.Sprintf("up{foo=\"%s\"}", strings.Repeat("a", 50)),
+			queryLimits: map[string]int{"test1": 100, "test2": 100},
+			expectError: false,
+		},
+		"should work for queries when the limit is disabled": {
+			query:       fmt.Sprintf("up{foo=\"%s\"}", strings.Repeat("a", 50)),
+			queryLimits: map[string]int{"test1": 0, "test2": 0},
+			expectError: false,
+		},
+	}
+
+	for testName, testData := range tests {
+		t.Run(testName, func(t *testing.T) {
+			req := &PrometheusRangeQueryRequest{
+				Query: testData.query,
+				Start: util.TimeToMillis(now.Add(-time.Hour * 2)),
+				End:   util.TimeToMillis(now.Add(-time.Hour)),
+			}
+
+			tenant.WithDefaultResolver(tenant.NewMultiResolver())
+			limits := multiTenantMockLimits{
+				byTenant: map[string]mockLimits{
+					"test1": {maxQueryExpressionSizeBytes: testData.queryLimits["test1"]},
+					"test2": {maxQueryExpressionSizeBytes: testData.queryLimits["test2"]},
+				},
+			}
+			middleware := newLimitsMiddleware(limits, log.NewNopLogger())
+
+			innerRes := newEmptyPrometheusResponse()
+			inner := &mockHandler{}
+			inner.On("Do", mock.Anything, mock.Anything).Return(innerRes, nil)
+
+			ctx := user.InjectOrgID(context.Background(), "test1|test2")
+			outer := middleware.Wrap(inner)
+			res, err := outer.Do(ctx, req)
+
+			if testData.expectError {
+				require.Error(t, err)
+				require.Contains(t, err.Error(), "err-mimir-max-query-expression-size-bytes")
+			} else {
+				require.NoError(t, err)
+				require.Same(t, innerRes, res)
+			}
+		})
+	}
+}
+
 func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
 	const (
 		thirtyDays = 30 * 24 * time.Hour
@@ -277,10 +351,79 @@ func TestLimitsMiddleware_CreationGracePeriod(t *testing.T) {
 	}
 }
 
+type multiTenantMockLimits struct {
+	byTenant map[string]mockLimits
+}
+
+func (m multiTenantMockLimits) MaxQueryLookback(userID string) time.Duration {
+	return m.byTenant[userID].maxQueryLookback
+}
+
+func (m multiTenantMockLimits) MaxQueryLength(userID string) time.Duration {
+	return m.byTenant[userID].maxQueryLength
+}
+
+func (m multiTenantMockLimits) MaxTotalQueryLength(userID string) time.Duration {
+	return m.byTenant[userID].maxTotalQueryLength
+}
+
+func (m multiTenantMockLimits) MaxQueryExpressionSizeBytes(userID string) int {
+	return m.byTenant[userID].maxQueryExpressionSizeBytes
+}
+
+func (m multiTenantMockLimits) MaxQueryParallelism(userID string) int {
+	return m.byTenant[userID].maxQueryParallelism
+}
+
+func (m multiTenantMockLimits) MaxCacheFreshness(userID string) time.Duration {
+	return m.byTenant[userID].maxCacheFreshness
+}
+
+func (m multiTenantMockLimits) QueryShardingTotalShards(userID string) int {
+	return m.byTenant[userID].totalShards
+}
+
+func (m multiTenantMockLimits) QueryShardingMaxShardedQueries(userID string) int {
+	return m.byTenant[userID].maxShardedQueries
+}
+
+func (m multiTenantMockLimits) SplitInstantQueriesByInterval(userID string) time.Duration {
+	return m.byTenant[userID].splitInstantQueriesInterval
+}
+
+func (m multiTenantMockLimits) CompactorSplitAndMergeShards(userID string) int {
+	return m.byTenant[userID].compactorShards
+}
+
+func (m multiTenantMockLimits) CompactorBlocksRetentionPeriod(userID string) time.Duration {
+	return m.byTenant[userID].compactorBlocksRetentionPeriod
+}
+
+func (m multiTenantMockLimits) OutOfOrderTimeWindow(userID string) time.Duration {
+	return m.byTenant[userID].outOfOrderTimeWindow
+}
+
+func (m multiTenantMockLimits) ResultsCacheTTL(userID string) time.Duration {
+	return m.byTenant[userID].resultsCacheTTL
+}
+
+func (m multiTenantMockLimits) ResultsCacheTTLForOutOfOrderTimeWindow(userID string) time.Duration {
+	return m.byTenant[userID].resultsCacheOutOfOrderWindowTTL
+}
+
+func (m multiTenantMockLimits) CreationGracePeriod(userID string) time.Duration {
+	return m.byTenant[userID].creationGracePeriod
+}
+
+func (m multiTenantMockLimits) NativeHistogramsIngestionEnabled(userID string) bool {
+	return m.byTenant[userID].nativeHistogramsIngestionEnabled
+}
+
 type mockLimits struct {
 	maxQueryLookback                 time.Duration
 	maxQueryLength                   time.Duration
 	maxTotalQueryLength              time.Duration
+	maxQueryExpressionSizeBytes      int
 	maxCacheFreshness                time.Duration
 	maxQueryParallelism              int
 	maxShardedQueries                int
@@ -310,6 +453,10 @@ func (m mockLimits) MaxTotalQueryLength(string) time.Duration {
 	return m.maxTotalQueryLength
 }
 
+func (m mockLimits) MaxQueryExpressionSizeBytes(string) int {
+	return m.maxQueryExpressionSizeBytes
+}
+
 func (m mockLimits) MaxQueryParallelism(string) int {
 	if m.maxQueryParallelism == 0 {
 		return 14 // Flag default.

@@ -49,11 +49,12 @@ const (
 	MetricMetadataHelpTooLong       ID = "help-too-long" // unused, left here to prevent reuse for different purpose
 	MetricMetadataUnitTooLong       ID = "unit-too-long"
 
-	MaxQueryLength       ID = "max-query-length"
-	MaxTotalQueryLength  ID = "max-total-query-length"
-	RequestRateLimited   ID = "tenant-max-request-rate"
-	IngestionRateLimited ID = "tenant-max-ingestion-rate"
-	TooManyHAClusters    ID = "tenant-too-many-ha-clusters"
+	MaxQueryLength              ID = "max-query-length"
+	MaxTotalQueryLength         ID = "max-total-query-length"
+	MaxQueryExpressionSizeBytes ID = "max-query-expression-size-bytes"
+	RequestRateLimited          ID = "tenant-max-request-rate"
+	IngestionRateLimited        ID = "tenant-max-ingestion-rate"
+	TooManyHAClusters           ID = "tenant-too-many-ha-clusters"
 
 	SampleTimestampTooOld    ID = "sample-timestamp-too-old"
 	SampleOutOfOrder         ID = "sample-out-of-order"

@@ -258,6 +258,12 @@ func NewMaxTotalQueryLengthError(actualQueryLen, maxTotalQueryLength time.Durati
 		maxTotalQueryLengthFlag))
 }
 
+func NewMaxQueryExpressionSizeBytesError(actualSizeBytes, maxQuerySizeBytes int) LimitError {
+	return LimitError(globalerror.MaxQueryExpressionSizeBytes.MessageWithPerTenantLimitConfig(
+		fmt.Sprintf("the raw query size in bytes exceeds the limit (query size: %d, limit: %d)", actualSizeBytes, maxQuerySizeBytes),
+		maxQueryExpressionSizeBytesFlag))
+}
+
 func NewRequestRateLimitedError(limit float64, burst int) LimitError {
 	return LimitError(globalerror.RequestRateLimited.MessageWithPerTenantLimitConfig(
 		fmt.Sprintf("the request has been rejected because the tenant exceeded the request rate limit, set to %v requests/s across all distributors with a maximum allowed burst of %d", limit, burst),

@@ -41,6 +41,7 @@ const (
 	maxQueryLengthFlag                     = "store.max-query-length"
 	maxPartialQueryLengthFlag              = "querier.max-partial-query-length"
 	maxTotalQueryLengthFlag                = "query-frontend.max-total-query-length"
+	maxQueryExpressionSizeBytesFlag        = "query-frontend.max-query-expression-size-bytes"
 	requestRateFlag                        = "distributor.request-rate-limit"
 	requestBurstSizeFlag                   = "distributor.request-burst-size"
 	ingestionRateFlag                      = "distributor.ingestion-rate-limit"
@@ -129,6 +130,7 @@ type Limits struct {
 	MaxTotalQueryLength                    model.Duration `yaml:"max_total_query_length" json:"max_total_query_length"`
 	ResultsCacheTTL                        model.Duration `yaml:"results_cache_ttl" json:"results_cache_ttl" category:"experimental"`
 	ResultsCacheTTLForOutOfOrderTimeWindow model.Duration `yaml:"results_cache_ttl_for_out_of_order_time_window" json:"results_cache_ttl_for_out_of_order_time_window" category:"experimental"`
+	MaxQueryExpressionSizeBytes            int            `yaml:"max_query_expression_size_bytes" json:"max_query_expression_size_bytes" category:"experimental"`
 
 	// Cardinality
 	CardinalityAnalysisEnabled                    bool `yaml:"cardinality_analysis_enabled" json:"cardinality_analysis_enabled"`
@@ -259,6 +261,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&l.ResultsCacheTTL, resultsCacheTTLFlag, fmt.Sprintf("Time to live duration for cached query results. If query falls into out-of-order time window, -%s is used instead.", resultsCacheTTLForOutOfOrderWindowFlag))
 	_ = l.ResultsCacheTTLForOutOfOrderTimeWindow.Set("10m")
 	f.Var(&l.ResultsCacheTTLForOutOfOrderTimeWindow, resultsCacheTTLForOutOfOrderWindowFlag, fmt.Sprintf("Time to live duration for cached query results if query falls into out-of-order time window. This is lower than -%s so that incoming out-of-order samples are returned in the query results sooner.", resultsCacheTTLFlag))
+	f.IntVar(&l.MaxQueryExpressionSizeBytes, maxQueryExpressionSizeBytesFlag, 0, "Max size of the raw query, in bytes. 0 to not apply a limit to the size of the query.")
 
 	// Store-gateway.
 	f.IntVar(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The tenant's shard size, used when store-gateway sharding is enabled. Value of 0 disables shuffle sharding for the tenant, that is all tenant blocks are sharded across all store-gateway replicas.")
@@ -508,6 +511,11 @@ func (o *Overrides) MaxTotalQueryLength(userID string) time.Duration {
 	return t
 }
 
+// MaxQueryExpressionSizeBytes returns the limit of the raw query size, in bytes.
+func (o *Overrides) MaxQueryExpressionSizeBytes(userID string) int {
+	return o.getOverridesForUser(userID).MaxQueryExpressionSizeBytes
+}
+
 // MaxLabelsQueryLength returns the limit of the length (in time) of a label names or values request.
 func (o *Overrides) MaxLabelsQueryLength(userID string) time.Duration {
 	return time.Duration(o.getOverridesForUser(userID).MaxLabelsQueryLength)