Skip to content

Commit 3899cc4

Browse files
authored
Merge pull request #1436 from howard-junec/automated-cherry-pick-of-#1374-upstream-release-1.35
Automated cherry pick of #1374: fix: Add explicit HTTP request timeouts to all AWS SDK
2 parents e88ebc2 + c38f246 commit 3899cc4

7 files changed

Lines changed: 280 additions & 6 deletions

File tree

cmd/ecr-credential-provider/main.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"time"
3030

3131
"github.com/aws/aws-sdk-go-v2/aws"
32+
awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http"
3233
"github.com/aws/aws-sdk-go-v2/config"
3334
"github.com/aws/aws-sdk-go-v2/service/ecr"
3435
"github.com/aws/aws-sdk-go-v2/service/ecrpublic"
@@ -43,6 +44,8 @@ import (
4344

4445
const ecrPublicRegion string = "us-east-1"
4546

47+
var defaultHTTPClient = awshttp.NewBuildableClient().WithTimeout(30 * time.Second)
48+
4649
var ecrPublicHosts []string = []string{"public.ecr.aws", "ecr-public.aws.com"}
4750

4851
var ecrPrivateHostPattern = regexp.MustCompile(`^(\d{12})\.dkr[\.\-]ecr(\-fips)?\.([a-zA-Z0-9][a-zA-Z0-9-_]*)\.(amazonaws\.(?:com(?:\.cn)?|eu)|on\.(?:aws|amazonwebservices\.com\.cn)|sc2s\.sgov\.gov|c2s\.ic\.gov|cloud\.adc-e\.uk|csp\.hci\.ic\.gov)$`)
@@ -74,10 +77,13 @@ func defaultECRProvider(ctx context.Context, region string) (ECR, error) {
7477
if region != "" {
7578
cfg, err = config.LoadDefaultConfig(ctx,
7679
config.WithRegion(region),
80+
config.WithHTTPClient(defaultHTTPClient),
7781
)
7882
} else {
7983
klog.Warningf("No region found in the image reference, the default region will be used. Please refer to AWS SDK documentation for configuration purpose.")
80-
cfg, err = config.LoadDefaultConfig(ctx)
84+
cfg, err = config.LoadDefaultConfig(ctx,
85+
config.WithHTTPClient(defaultHTTPClient),
86+
)
8187
}
8288

8389
if err != nil {
@@ -92,6 +98,7 @@ func publicECRProvider(ctx context.Context) (ECRPublic, error) {
9298
// in the "aws" partition.
9399
cfg, err := config.LoadDefaultConfig(ctx,
94100
config.WithRegion(ecrPublicRegion),
101+
config.WithHTTPClient(defaultHTTPClient),
95102
)
96103
if err != nil {
97104
return nil, err
@@ -106,10 +113,13 @@ func stsProvider(ctx context.Context, region string) (STS, error) {
106113
if region != "" {
107114
cfg, err = config.LoadDefaultConfig(ctx,
108115
config.WithRegion(region),
116+
config.WithHTTPClient(defaultHTTPClient),
109117
)
110118
} else {
111119
klog.Warningf("No region found in the image reference, the default region will be used. Please refer to AWS SDK documentation for configuration purpose.")
112-
cfg, err = config.LoadDefaultConfig(ctx)
120+
cfg, err = config.LoadDefaultConfig(ctx,
121+
config.WithHTTPClient(defaultHTTPClient),
122+
)
113123
}
114124

115125
if err != nil {

pkg/providers/v1/aws.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types"
4242
"github.com/aws/aws-sdk-go-v2/service/kms"
4343
"github.com/aws/smithy-go"
44+
smithymiddleware "github.com/aws/smithy-go/middleware"
4445
"gopkg.in/gcfg.v1"
4546

4647
v1 "k8s.io/api/core/v1"
@@ -518,7 +519,11 @@ func init() {
518519

519520
var creds *stscreds.AssumeRoleProvider
520521
if cfg.Global.RoleARN != "" {
521-
stsClient, err := services.NewStsClient(ctx, regionName, cfg.Global.RoleARN, cfg.Global.SourceARN)
522+
stsClient, err := services.NewStsClient(ctx, regionName, cfg.Global.RoleARN, cfg.Global.SourceARN,
523+
func(stack *smithymiddleware.Stack) error {
524+
return stack.Deserialize.Add(awsAPIMetricsMiddleware(), smithymiddleware.After)
525+
},
526+
)
522527
if err != nil {
523528
return nil, fmt.Errorf("unable to create sts v2 client: %v", err)
524529
}
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*
2+
Copyright 2026 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package aws
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"net/http"
23+
"testing"
24+
25+
"github.com/aws/smithy-go"
26+
"github.com/aws/smithy-go/middleware"
27+
smithyhttp "github.com/aws/smithy-go/transport/http"
28+
"github.com/stretchr/testify/assert"
29+
"k8s.io/component-base/metrics/testutil"
30+
)
31+
32+
func TestAWSAPIMetricsMiddleware(t *testing.T) {
33+
registerMetrics()
34+
35+
tests := []struct {
36+
name string
37+
statusCode int
38+
err error
39+
expectStatusCode string
40+
}{
41+
{
42+
name: "4xx response records status code",
43+
statusCode: 403,
44+
expectStatusCode: "403",
45+
},
46+
{
47+
name: "5xx response records status code",
48+
statusCode: 500,
49+
expectStatusCode: "500",
50+
},
51+
{
52+
name: "2xx response does not record",
53+
statusCode: 200,
54+
},
55+
{
56+
name: "4xx with API error records status code",
57+
statusCode: 400,
58+
err: &smithy.GenericAPIError{Code: "ThrottlingException", Message: "rate exceeded"},
59+
expectStatusCode: "400",
60+
},
61+
{
62+
name: "5xx with non-API error records status code",
63+
statusCode: 500,
64+
err: fmt.Errorf("connection reset"),
65+
expectStatusCode: "500",
66+
},
67+
}
68+
69+
for _, tc := range tests {
70+
t.Run(tc.name, func(t *testing.T) {
71+
awsAPIResponseStatusTotal.Reset()
72+
73+
mw := awsAPIMetricsMiddleware()
74+
handler := middleware.DeserializeHandlerFunc(
75+
func(ctx context.Context, in middleware.DeserializeInput) (
76+
middleware.DeserializeOutput, middleware.Metadata, error,
77+
) {
78+
return middleware.DeserializeOutput{
79+
RawResponse: &smithyhttp.Response{
80+
Response: &http.Response{StatusCode: tc.statusCode},
81+
},
82+
}, middleware.Metadata{}, tc.err
83+
},
84+
)
85+
86+
_, _, _ = mw.HandleDeserialize(context.Background(), middleware.DeserializeInput{}, handler)
87+
88+
if tc.expectStatusCode != "" {
89+
val, err := testutil.GetCounterMetricValue(awsAPIResponseStatusTotal.With(map[string]string{
90+
"service": "",
91+
"operation": "",
92+
"status_code": tc.expectStatusCode,
93+
}))
94+
assert.NoError(t, err)
95+
assert.Equal(t, float64(1), val)
96+
}
97+
})
98+
}
99+
}

pkg/providers/v1/aws_metrics.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,12 @@ limitations under the License.
1717
package aws
1818

1919
import (
20+
"context"
21+
"strconv"
2022
"sync"
2123

24+
"github.com/aws/smithy-go/middleware"
25+
smithyhttp "github.com/aws/smithy-go/transport/http"
2226
"k8s.io/component-base/metrics"
2327
"k8s.io/component-base/metrics/legacyregistry"
2428
)
@@ -47,6 +51,15 @@ var (
4751
StabilityLevel: metrics.ALPHA,
4852
},
4953
[]string{"operation_name"})
54+
55+
// awsAPIResponseStatusTotal counts AWS API responses by status code.
56+
awsAPIResponseStatusTotal = metrics.NewCounterVec(
57+
&metrics.CounterOpts{
58+
Name: "cloudprovider_aws_api_response_status_total",
59+
Help: "AWS API response status code counts",
60+
StabilityLevel: metrics.ALPHA,
61+
},
62+
[]string{"service", "operation", "status_code"})
5063
)
5164

5265
func recordAWSMetric(actionName string, timeTaken float64, err error) {
@@ -68,5 +81,32 @@ func registerMetrics() {
6881
legacyregistry.MustRegister(awsAPIMetric)
6982
legacyregistry.MustRegister(awsAPIErrorMetric)
7083
legacyregistry.MustRegister(awsAPIThrottlesMetric)
84+
legacyregistry.MustRegister(awsAPIResponseStatusTotal)
7185
})
7286
}
87+
88+
// awsAPIMetricsMiddleware returns a Deserialize middleware that records
89+
// AWS API response status codes as metrics.
90+
func awsAPIMetricsMiddleware() middleware.DeserializeMiddleware {
91+
return middleware.DeserializeMiddlewareFunc(
92+
"k8s/aws-api-metrics",
93+
func(ctx context.Context, in middleware.DeserializeInput, next middleware.DeserializeHandler) (
94+
out middleware.DeserializeOutput, metadata middleware.Metadata, err error,
95+
) {
96+
out, metadata, err = next.HandleDeserialize(ctx, in)
97+
98+
service := middleware.GetServiceID(ctx)
99+
operation := middleware.GetOperationName(ctx)
100+
101+
if response, ok := out.RawResponse.(*smithyhttp.Response); ok && response.StatusCode >= 400 {
102+
awsAPIResponseStatusTotal.With(metrics.Labels{
103+
"service": service,
104+
"operation": operation,
105+
"status_code": strconv.Itoa(response.StatusCode),
106+
}).Inc()
107+
}
108+
109+
return out, metadata, err
110+
},
111+
)
112+
}

pkg/providers/v1/aws_sdk.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ import (
2020
"context"
2121
"fmt"
2222
"sync"
23+
"time"
2324

2425
"github.com/aws/aws-sdk-go-v2/aws"
2526
"github.com/aws/aws-sdk-go-v2/aws/middleware"
2627
"github.com/aws/aws-sdk-go-v2/aws/retry"
28+
awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http"
2729
awsConfig "github.com/aws/aws-sdk-go-v2/config"
2830
stscredsv2 "github.com/aws/aws-sdk-go-v2/credentials/stscreds"
2931
"github.com/aws/aws-sdk-go-v2/feature/ec2/imds"
@@ -40,6 +42,12 @@ import (
4042
"k8s.io/klog/v2"
4143
)
4244

45+
// defaultHTTPClient is shared across all AWS SDK clients to enforce an explicit
46+
// HTTP request timeout and reuse connection pools. Without a timeout, a single
47+
// slow response can trigger the Go SDK's clock skew overcorrection and break all
48+
// subsequent API calls.
49+
var defaultHTTPClient = awshttp.NewBuildableClient().WithTimeout(30 * time.Second)
50+
4351
type awsSDKProvider struct {
4452
creds aws.CredentialsProvider
4553
cfg awsCloudConfigProvider
@@ -77,6 +85,13 @@ func (p *awsSDKProvider) AddMiddleware(ctx context.Context, regionName string, c
7785
}
7886

7987
p.addAPILoggingMiddleware(cfg)
88+
89+
// Record AWS API response status codes and error codes as metrics.
90+
cfg.APIOptions = append(cfg.APIOptions,
91+
func(stack *smithymiddleware.Stack) error {
92+
return stack.Deserialize.Add(awsAPIMetricsMiddleware(), smithymiddleware.After)
93+
},
94+
)
8095
}
8196

8297
// Adds logging middleware for AWS SDK Go V2 clients
@@ -114,6 +129,7 @@ func (p *awsSDKProvider) getCrossRequestRetryDelay(regionName string) *CrossRequ
114129
func (p *awsSDKProvider) Compute(ctx context.Context, regionName string, assumeRoleProvider *stscredsv2.AssumeRoleProvider) (iface.EC2, error) {
115130
cfg, err := awsConfig.LoadDefaultConfig(ctx, awsConfig.WithDefaultsMode(aws.DefaultsModeInRegion),
116131
awsConfig.WithRegion(regionName),
132+
awsConfig.WithHTTPClient(defaultHTTPClient),
117133
)
118134
if assumeRoleProvider != nil {
119135
cfg.Credentials = aws.NewCredentialsCache(assumeRoleProvider)
@@ -142,6 +158,7 @@ func (p *awsSDKProvider) Compute(ctx context.Context, regionName string, assumeR
142158
func (p *awsSDKProvider) LoadBalancing(ctx context.Context, regionName string, assumeRoleProvider *stscredsv2.AssumeRoleProvider) (ELB, error) {
143159
cfg, err := awsConfig.LoadDefaultConfig(ctx, awsConfig.WithDefaultsMode(aws.DefaultsModeInRegion),
144160
awsConfig.WithRegion(regionName),
161+
awsConfig.WithHTTPClient(defaultHTTPClient),
145162
)
146163
if assumeRoleProvider != nil {
147164
cfg.Credentials = aws.NewCredentialsCache(assumeRoleProvider)
@@ -167,6 +184,7 @@ func (p *awsSDKProvider) LoadBalancing(ctx context.Context, regionName string, a
167184
func (p *awsSDKProvider) LoadBalancingV2(ctx context.Context, regionName string, assumeRoleProvider *stscredsv2.AssumeRoleProvider) (ELBV2, error) {
168185
cfg, err := awsConfig.LoadDefaultConfig(ctx, awsConfig.WithDefaultsMode(aws.DefaultsModeInRegion),
169186
awsConfig.WithRegion(regionName),
187+
awsConfig.WithHTTPClient(defaultHTTPClient),
170188
)
171189
if assumeRoleProvider != nil {
172190
cfg.Credentials = aws.NewCredentialsCache(assumeRoleProvider)
@@ -190,7 +208,9 @@ func (p *awsSDKProvider) LoadBalancingV2(ctx context.Context, regionName string,
190208
}
191209

192210
func (p *awsSDKProvider) Metadata(ctx context.Context) (config.EC2Metadata, error) {
193-
cfg, err := awsConfig.LoadDefaultConfig(context.TODO(), awsConfig.WithDefaultsMode(aws.DefaultsModeInRegion))
211+
cfg, err := awsConfig.LoadDefaultConfig(context.TODO(), awsConfig.WithDefaultsMode(aws.DefaultsModeInRegion),
212+
awsConfig.WithHTTPClient(defaultHTTPClient),
213+
)
194214
if err != nil {
195215
return nil, fmt.Errorf("unable to initialize AWS config: %v", err)
196216
}
@@ -226,6 +246,7 @@ func (p *awsSDKProvider) Metadata(ctx context.Context) (config.EC2Metadata, erro
226246
func (p *awsSDKProvider) KeyManagement(ctx context.Context, regionName string, assumeRoleProvider *stscredsv2.AssumeRoleProvider) (KMS, error) {
227247
cfg, err := awsConfig.LoadDefaultConfig(ctx, awsConfig.WithDefaultsMode(aws.DefaultsModeInRegion),
228248
awsConfig.WithRegion(regionName),
249+
awsConfig.WithHTTPClient(defaultHTTPClient),
229250
)
230251
if assumeRoleProvider != nil {
231252
cfg.Credentials = aws.NewCredentialsCache(assumeRoleProvider)

0 commit comments

Comments
 (0)