benchmark: use custom duration in prom metric

Arko Dasgupta · Arko Dasgupta · commit fe19e99c8ea7 · 2025-08-20T00:08:57.000-07:00
* determine the time duration more accurately instead
of using 30s, which may be reading values across tests

Signed-off-by: Arko Dasgupta &lt;arko@tetrate.io&gt;
diff --git a/test/benchmark/suite/report.go b/test/benchmark/suite/report.go
@@ -24,11 +24,12 @@ import (
 )
 
 const (
+	DURATION_FORMATTER         = "%DURATION"
 	controlPlaneContainerMemQL = `process_resident_memory_bytes{namespace="envoy-gateway-system", control_plane="envoy-gateway"}/1024/1024`
 	controlPlaneProcessMemQL   = `go_memstats_heap_inuse_bytes{namespace="envoy-gateway-system", control_plane="envoy-gateway"}/1024/1024`
-	controlPlaneCPUQL          = `rate(process_cpu_seconds_total{namespace="envoy-gateway-system", control_plane="envoy-gateway"}[30s])*100`
+	controlPlaneCPUQL          = `rate(process_cpu_seconds_total{namespace="envoy-gateway-system", control_plane="envoy-gateway"}[%DURATIONs])*100`
 	dataPlaneMemQL             = `container_memory_working_set_bytes{namespace="envoy-gateway-system", container="envoy"}/1024/1024`
-	dataPlaneCPUQL             = `rate(container_cpu_usage_seconds_total{namespace="envoy-gateway-system", container="envoy"}[30s])*100`
+	dataPlaneCPUQL             = `rate(container_cpu_usage_seconds_total{namespace="envoy-gateway-system", container="envoy"}[%DURATIONs])*100`
 )
 
 // BenchmarkMetricSample contains sampled metrics and profiles data.
@@ -63,10 +64,10 @@ func NewBenchmarkReport(name, profilesOutputDir string, kubeClient kube.CLIClien
 	}
 }
 
-func (r *BenchmarkReport) Sample(ctx context.Context) (err error) {
+func (r *BenchmarkReport) Sample(ctx context.Context, startTime time.Time) (err error) {
 	sample := BenchmarkMetricSample{}
 
-	if mErr := r.sampleMetrics(ctx, &sample); mErr != nil {
+	if mErr := r.sampleMetrics(ctx, &sample, startTime); mErr != nil {
 		err = errors.Join(mErr)
 	}
 
@@ -122,11 +123,18 @@ func (r *BenchmarkReport) sampleMetrics(ctx context.Context, sample *BenchmarkMe
 		err = errors.Join(err, fmt.Errorf("failed to query data plane memory: %w", qErr))
 	}
 	// Sample cpu
-	cpCPU, qErr := r.promClient.QuerySum(ctx, controlPlaneCPUQL)
+
+	// Get duration
+	durationSeconds := int(time.Now.Sub(startTime))
+	cpCPUQL := strings.ReplaceAll(controlPlaneCPUQL, DURATION_FORMATTER, durationSeconds)
+
+	cpCPUQL, qErr := r.promClient.QuerySum(ctx, cpCPUQL)
 	if qErr != nil {
 		err = errors.Join(err, fmt.Errorf("failed to query control plane cpu: %w", qErr))
 	}
-	dpCPU, qErr := r.promClient.QueryAvg(ctx, dataPlaneCPUQL)
+
+	dpCPUQL := strings.ReplaceAll(dataPlaneCPUQL, DURATION_FORMATTER, durationSeconds)
+	dpCPUQL, qErr := r.promClient.QueryAvg(ctx, dpCPUQL)
 	if qErr != nil {
 		err = errors.Join(err, fmt.Errorf("failed to query data plane cpu: %w", qErr))
 	}
diff --git a/test/benchmark/suite/suite.go b/test/benchmark/suite/suite.go
@@ -183,7 +183,7 @@ func (b *BenchmarkTestSuite) Run(t *testing.T, tests []BenchmarkTest) {
 // TODO: currently running benchmark test via nighthawk_client,
 // consider switching to gRPC nighthawk-service for benchmark test.
 // ref: https://github.com/envoyproxy/nighthawk/blob/main/api/client/service.proto
-func (b *BenchmarkTestSuite) Benchmark(t *testing.T, ctx context.Context, jobName, resultTitle, gatewayHostPort, hostnamePattern string, host int) (*BenchmarkReport, error) {
+func (b *BenchmarkTestSuite) Benchmark(t *testing.T, ctx context.Context, jobName, resultTitle, gatewayHostPort, hostnamePattern string, host int, startTime time.Time) (*BenchmarkReport, error) {
 	t.Logf("Running benchmark test: %s", resultTitle)
 
 	requestHeaders := make([]string, 0, host)
@@ -230,7 +230,7 @@ func (b *BenchmarkTestSuite) Benchmark(t *testing.T, ctx context.Context, jobNam
 
 		// Sample the metrics and profiles at runtime.
 		// Do not consider it as an error, fail sampling should not affect test running.
-		if err := report.Sample(ctx); err != nil {
+		if err := report.Sample(ctx, startTime); err != nil {
 			t.Logf("Error occurs while sampling metrics or profiles: %v", err)
 		}
 
diff --git a/test/benchmark/tests/scale_httproutes.go b/test/benchmark/tests/scale_httproutes.go
@@ -11,6 +11,7 @@ import (
 	"context"
 	"fmt"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/require"
 	"k8s.io/apimachinery/pkg/types"
@@ -56,6 +57,7 @@ var ScaleHTTPRoutes = suite.BenchmarkTest{
 				testName := fmt.Sprintf("scaling up httproutes to %d with %d routes per hostname", scale, routePerHost)
 
 				t.Run(testName, func(t *testing.T) {
+					startTime := time.Now()
 					err = bSuite.ScaleUpHTTPRoutes(ctx, [2]uint16{start, scale}, routeNameFormat, routeHostnameFormat, gatewayNN.Name, routePerHost-batch, func(route *gwapiv1.HTTPRoute) {
 						routeNN := types.NamespacedName{Name: route.Name, Namespace: route.Namespace}
 						routeNNs = append(routeNNs, routeNN)
@@ -71,7 +73,7 @@ var ScaleHTTPRoutes = suite.BenchmarkTest{
 
 					// Run benchmark test at different scale.
 					jobName := fmt.Sprintf("scale-up-httproutes-%d", scale)
-					report, err := bSuite.Benchmark(t, ctx, jobName, testName, gatewayAddr, routeHostnameFormat, int(totalHosts))
+					report, err := bSuite.Benchmark(t, ctx, jobName, testName, gatewayAddr, routeHostnameFormat, int(totalHosts), startTime)
 					require.NoError(t, err)
 
 					reports = append(reports, report)

Original file line number	Diff line number	Diff line change
`@@ -24,11 +24,12 @@ import (`
`24`	`24`	`)`
`25`	`25`
`26`	`26`	`const (`
	`27`	`+ DURATION_FORMATTER = "%DURATION"`
`27`	`28`	controlPlaneContainerMemQL = `process_resident_memory_bytes{namespace="envoy-gateway-system", control_plane="envoy-gateway"}/1024/1024`
`28`	`29`	controlPlaneProcessMemQL = `go_memstats_heap_inuse_bytes{namespace="envoy-gateway-system", control_plane="envoy-gateway"}/1024/1024`
`29`		- controlPlaneCPUQL = `rate(process_cpu_seconds_total{namespace="envoy-gateway-system", control_plane="envoy-gateway"}[30s])*100`
	`30`	+ controlPlaneCPUQL = `rate(process_cpu_seconds_total{namespace="envoy-gateway-system", control_plane="envoy-gateway"}[%DURATIONs])*100`
`30`	`31`	dataPlaneMemQL = `container_memory_working_set_bytes{namespace="envoy-gateway-system", container="envoy"}/1024/1024`
`31`		- dataPlaneCPUQL = `rate(container_cpu_usage_seconds_total{namespace="envoy-gateway-system", container="envoy"}[30s])*100`
	`32`	+ dataPlaneCPUQL = `rate(container_cpu_usage_seconds_total{namespace="envoy-gateway-system", container="envoy"}[%DURATIONs])*100`
`32`	`33`	`)`
`33`	`34`
`34`	`35`	`// BenchmarkMetricSample contains sampled metrics and profiles data.`
`@@ -63,10 +64,10 @@ func NewBenchmarkReport(name, profilesOutputDir string, kubeClient kube.CLIClien`
`63`	`64`	`}`
`64`	`65`	`}`
`65`	`66`
`66`		`-func (r *BenchmarkReport) Sample(ctx context.Context) (err error) {`
	`67`	`+func (r *BenchmarkReport) Sample(ctx context.Context, startTime time.Time) (err error) {`
`67`	`68`	`sample := BenchmarkMetricSample{}`
`68`	`69`
`69`		`- if mErr := r.sampleMetrics(ctx, &sample); mErr != nil {`
	`70`	`+ if mErr := r.sampleMetrics(ctx, &sample, startTime); mErr != nil {`
`70`	`71`	`err = errors.Join(mErr)`
`71`	`72`	`}`
`72`	`73`
`@@ -122,11 +123,18 @@ func (r BenchmarkReport) sampleMetrics(ctx context.Context, sample BenchmarkMe`
`122`	`123`	`err = errors.Join(err, fmt.Errorf("failed to query data plane memory: %w", qErr))`
`123`	`124`	`}`
`124`	`125`	`// Sample cpu`
`125`		`- cpCPU, qErr := r.promClient.QuerySum(ctx, controlPlaneCPUQL)`
	`126`	`+`
	`127`	`+ // Get duration`
	`128`	`+ durationSeconds := int(time.Now.Sub(startTime))`
	`129`	`+ cpCPUQL := strings.ReplaceAll(controlPlaneCPUQL, DURATION_FORMATTER, durationSeconds)`
	`130`	`+`
	`131`	`+ cpCPUQL, qErr := r.promClient.QuerySum(ctx, cpCPUQL)`
`126`	`132`	`if qErr != nil {`
`127`	`133`	`err = errors.Join(err, fmt.Errorf("failed to query control plane cpu: %w", qErr))`
`128`	`134`	`}`
`129`		`- dpCPU, qErr := r.promClient.QueryAvg(ctx, dataPlaneCPUQL)`
	`135`	`+`
	`136`	`+ dpCPUQL := strings.ReplaceAll(dataPlaneCPUQL, DURATION_FORMATTER, durationSeconds)`
	`137`	`+ dpCPUQL, qErr := r.promClient.QueryAvg(ctx, dpCPUQL)`
`130`	`138`	`if qErr != nil {`
`131`	`139`	`err = errors.Join(err, fmt.Errorf("failed to query data plane cpu: %w", qErr))`
`132`	`140`	`}`