Skip to content

Commit ddbe248

Browse files
Add Config Knob to allow Pod to use different VPC subnet
1 parent 5751423 commit ddbe248

File tree

17 files changed

+938
-39
lines changed

17 files changed

+938
-39
lines changed

config/v1.2/aws-k8s-cni.yaml

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
---
2+
apiVersion: rbac.authorization.k8s.io/v1
3+
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
4+
kind: ClusterRole
5+
metadata:
6+
name: aws-node
7+
rules:
8+
- apiGroups:
9+
- crd.k8s.amazonaws.com
10+
resources:
11+
- "*"
12+
- namespaecs
13+
verbs:
14+
- "*"
15+
- apiGroups: [""]
16+
resources:
17+
- pods
18+
- nodes
19+
- namespaces
20+
verbs: ["list", "watch", "get"]
21+
- apiGroups: ["extensions"]
22+
resources:
23+
- daemonsets
24+
verbs: ["list", "watch"]
25+
---
26+
apiVersion: v1
27+
kind: ServiceAccount
28+
metadata:
29+
name: aws-node
30+
namespace: kube-system
31+
---
32+
apiVersion: rbac.authorization.k8s.io/v1
33+
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
34+
kind: ClusterRoleBinding
35+
metadata:
36+
name: aws-node
37+
roleRef:
38+
apiGroup: rbac.authorization.k8s.io
39+
kind: ClusterRole
40+
name: aws-node
41+
subjects:
42+
- kind: ServiceAccount
43+
name: aws-node
44+
namespace: kube-system
45+
---
46+
kind: DaemonSet
47+
apiVersion: extensions/v1beta1
48+
metadata:
49+
name: aws-node
50+
namespace: kube-system
51+
labels:
52+
k8s-app: aws-node
53+
spec:
54+
updateStrategy:
55+
type: RollingUpdate
56+
selector:
57+
matchLabels:
58+
k8s-app: aws-node
59+
template:
60+
metadata:
61+
labels:
62+
k8s-app: aws-node
63+
annotations:
64+
scheduler.alpha.kubernetes.io/critical-pod: ''
65+
spec:
66+
serviceAccountName: aws-node
67+
hostNetwork: true
68+
tolerations:
69+
- operator: Exists
70+
containers:
71+
- image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni:1.2.0
72+
imagePullPolicy: Always
73+
ports:
74+
- containerPort: 60000
75+
name: metrics
76+
name: aws-node
77+
env:
78+
- name: AWS_VPC_K8S_CNI_LOGLEVEL
79+
value: DEBUG
80+
- name: MY_NODE_NAME
81+
valueFrom:
82+
fieldRef:
83+
fieldPath: spec.nodeName
84+
- name: WATCH_NAMESPACE
85+
valueFrom:
86+
fieldRef:
87+
fieldPath: metadata.namespace
88+
resources:
89+
requests:
90+
cpu: 10m
91+
securityContext:
92+
privileged: true
93+
volumeMounts:
94+
- mountPath: /host/opt/cni/bin
95+
name: cni-bin-dir
96+
- mountPath: /host/etc/cni/net.d
97+
name: cni-net-dir
98+
- mountPath: /host/var/log
99+
name: log-dir
100+
- mountPath: /var/run/docker.sock
101+
name: dockersock
102+
volumes:
103+
- name: cni-bin-dir
104+
hostPath:
105+
path: /opt/cni/bin
106+
- name: cni-net-dir
107+
hostPath:
108+
path: /etc/cni/net.d
109+
- name: log-dir
110+
hostPath:
111+
path: /var/log
112+
- name: dockersock
113+
hostPath:
114+
path: /var/run/docker.sock
115+
---
116+
apiVersion: apiextensions.k8s.io/v1beta1
117+
kind: CustomResourceDefinition
118+
metadata:
119+
name: eniconfigs.crd.k8s.amazonaws.com
120+
spec:
121+
scope: Cluster
122+
group: crd.k8s.amazonaws.com
123+
version: v1alpha1
124+
names:
125+
scope: Cluster
126+
plural: eniconfigs
127+
singuar: eniconfig
128+
kind: ENIConfig
129+
130+

ipamd/datastore/data_store.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,8 +351,12 @@ func (ds *DataStore) getDeletableENI() *ENIIPPool {
351351
}
352352

353353
// GetENINeedsIP finds out the eni in datastore which failed to get secondary IP address
354-
func (ds *DataStore) GetENINeedsIP(maxIPperENI int64) *ENIIPPool {
354+
func (ds *DataStore) GetENINeedsIP(maxIPperENI int64, skipPrimary bool) *ENIIPPool {
355355
for _, eni := range ds.eniIPPools {
356+
if skipPrimary && eni.IsPrimary {
357+
log.Debugf("Skip the primary ENI for need IP check")
358+
continue
359+
}
356360
if int64(len(eni.IPv4Addresses)) < maxIPperENI {
357361
log.Debugf("Found eni %s that have less IP address allocated: cur=%d, max=%d",
358362
eni.ID, len(eni.IPv4Addresses), maxIPperENI)

ipamd/introspect.go

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,11 @@ func (c *IPAMContext) SetupHTTP() {
6969

7070
func (c *IPAMContext) setupServer() *http.Server {
7171
serverFunctions := map[string]func(w http.ResponseWriter, r *http.Request){
72-
"/v1/enis": eniV1RequestHandler(c),
73-
"/v1/pods": podV1RequestHandler(c),
74-
"/v1/env-settings": envV1RequestHandler(c),
72+
"/v1/enis": eniV1RequestHandler(c),
73+
"/v1/pods": podV1RequestHandler(c),
74+
"/v1/networkutils-env-settings": networkEnvV1RequestHandler(c),
75+
"/v1/ipamd-env-settings": ipamdEnvV1RequestHandler(c),
76+
"/v1/eni-configs": eniConfigRequestHandler(c),
7577
}
7678
paths := make([]string, 0, len(serverFunctions))
7779
for path := range serverFunctions {
@@ -134,7 +136,19 @@ func podV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Requ
134136
}
135137
}
136138

137-
func envV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
139+
func eniConfigRequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
140+
return func(w http.ResponseWriter, r *http.Request) {
141+
responseJSON, err := json.Marshal(ipam.eniConfig.Getter())
142+
if err != nil {
143+
log.Error("Failed to marshal pod data: %v", err)
144+
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
145+
return
146+
}
147+
w.Write(responseJSON)
148+
}
149+
}
150+
151+
func networkEnvV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
138152
return func(w http.ResponseWriter, r *http.Request) {
139153
responseJSON, err := json.Marshal(networkutils.GetConfigForDebug())
140154
if err != nil {
@@ -146,6 +160,18 @@ func envV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Requ
146160
}
147161
}
148162

163+
func ipamdEnvV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
164+
return func(w http.ResponseWriter, r *http.Request) {
165+
responseJSON, err := json.Marshal(GetConfigForDebug())
166+
if err != nil {
167+
log.Error("Failed to marshal env var data: %v", err)
168+
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
169+
return
170+
}
171+
w.Write(responseJSON)
172+
}
173+
}
174+
149175
func metricsHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
150176
return func(w http.ResponseWriter, r *http.Request) {
151177
promhttp.Handler()

ipamd/ipamd.go

Lines changed: 85 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
package ipamd
1515

1616
import (
17+
"fmt"
1718
"net"
1819
"os"
1920
"strconv"
@@ -31,6 +32,7 @@ import (
3132
"github.com/aws/amazon-vpc-cni-k8s/ipamd/datastore"
3233
"github.com/aws/amazon-vpc-cni-k8s/pkg/awsutils"
3334
"github.com/aws/amazon-vpc-cni-k8s/pkg/docker"
35+
"github.com/aws/amazon-vpc-cni-k8s/pkg/eniconfig"
3436
"github.com/aws/amazon-vpc-cni-k8s/pkg/k8sapi"
3537
"github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils"
3638
)
@@ -43,11 +45,40 @@ const (
4345
ipPoolMonitorInterval = 5 * time.Second
4446
maxRetryCheckENI = 5
4547
eniAttachTime = 10 * time.Second
46-
defaultWarmENITarget = 1
4748
nodeIPPoolReconcileInterval = 60 * time.Second
4849
maxK8SRetries = 12
4950
retryK8SInterval = 5 * time.Second
50-
noWarmIPTarget = 0
51+
52+
// This environment is used to specify the desired number of free IPs always available in "warm pool"
53+
// When it is not set, ipamD defaut to use the number IPs per ENI for that instance.
54+
// For example, for a m4.4xlarge node,
55+
// if WARM-IP-TARGET is set to 1, and there are 9 pods running on the node, ipamD will try
56+
// to make "warm pool" to have 10 IP address with 9 being assigned to Pod and 1 free IP.
57+
//
58+
// if "WARM-IP-TARGET is not set, it will be defaulted to 30 (which the number of IPs per ENI). If there are 9 pods
59+
// running on the node, ipamD will try to make "warm pool" to have 39 IPs with 9 being assigned to Pod and 30 free IPs.
60+
envWarmIPTarget = "WARM_IP_TARGET"
61+
noWarmIPTarget = 0
62+
63+
// This environment is used to specify the desired number of free ENIs along with all of its IP addresses
64+
// always available in "warm pool".
65+
// When it is not set, it is default to 1.
66+
//
67+
// when "WARM-IP-TARGET" is defined, ipamD will use behavior defined for "WARM-IP-TARGET".
68+
//
69+
// For example, for a m4.4xlarget node
70+
// if WARM_ENI_TARGET is set to 2, and there are 9 pods running on the node, ipamD will try to
71+
// make "warm pool" to have 2 extra ENIs and its IP addresses, in another word, 90 IP addresses with 9 IPs assigne to Pod
72+
// and 81 free IPs.
73+
//
74+
// if "WARM_ENI_TARGET" is not set, it is default to 1, if there 9 pods running on the node, ipamD will try to
75+
// make "warm pool" to have 1 extra ENI, in aother word 60 IPs with 9 being assigned to Pod and 51 free IPs.
76+
envWarmENITarget = "WARM_ENI_TARGET"
77+
defaultWarmENITarget = 1
78+
79+
// This environment is used to specify whether Pods need to use securitygroup and subnet defined in ENIConfig CRD
80+
// When it is NOT set or set to false, ipamD will use primary interface security group and subnet for Pod network.
81+
envCustomNetworkCfg = "AWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG"
5182
)
5283

5384
var (
@@ -105,6 +136,7 @@ type IPAMContext struct {
105136
awsClient awsutils.APIs
106137
dataStore *datastore.DataStore
107138
k8sClient k8sapi.K8SAPIs
139+
eniConfig eniconfig.ENIConfig
108140
dockerClient docker.APIs
109141
networkClient networkutils.NetworkAPIs
110142

@@ -133,13 +165,14 @@ func prometheusRegister() {
133165

134166
// New retrieves IP address usage information from Instance MetaData service and Kubelet
135167
// then initializes IP address pool data store
136-
func New(k8sapiClient k8sapi.K8SAPIs) (*IPAMContext, error) {
168+
func New(k8sapiClient k8sapi.K8SAPIs, eniConfig *eniconfig.ENIConfigController) (*IPAMContext, error) {
137169
prometheusRegister()
138170
c := &IPAMContext{}
139171

140172
c.k8sClient = k8sapiClient
141173
c.networkClient = networkutils.New()
142174
c.dockerClient = docker.New()
175+
c.eniConfig = eniConfig
143176

144177
client, err := awsutils.New()
145178
if err != nil {
@@ -314,7 +347,7 @@ func (c *IPAMContext) retryAllocENIIP() {
314347
log.Infof("Failed to retrieve ENI IP limit: %v", err)
315348
return
316349
}
317-
eni := c.dataStore.GetENINeedsIP(maxIPLimit)
350+
eni := c.dataStore.GetENINeedsIP(maxIPLimit, useCustomNetworkCfg())
318351
if eni != nil {
319352
log.Debugf("Attempt again to allocate IP address for eni :%s", eni.ID)
320353
var err error
@@ -392,7 +425,30 @@ func (c *IPAMContext) increaseIPPool() {
392425
log.Debugf("Skipping increase IPPOOL due to max ENI already attached to the instance : %d", c.maxENI)
393426
return
394427
}
395-
eni, err := c.awsClient.AllocENI()
428+
429+
var securityGroups []*string
430+
var subnet string
431+
customNetworkCfg := useCustomNetworkCfg()
432+
433+
if customNetworkCfg {
434+
eniCfg, err := c.eniConfig.MyENIConfig()
435+
436+
if err != nil {
437+
log.Errorf("Failed to get pod ENI config")
438+
return
439+
}
440+
441+
log.Infof("ipamd: using custom network config: %v, %s", eniCfg.SecurityGroups, eniCfg.Subnet)
442+
443+
for _, sgID := range eniCfg.SecurityGroups {
444+
log.Debugf("Found security-group id: %s", sgID)
445+
securityGroups = append(securityGroups, aws.String(sgID))
446+
}
447+
448+
subnet = eniCfg.Subnet
449+
}
450+
451+
eni, err := c.awsClient.AllocENI(customNetworkCfg, securityGroups, subnet)
396452
if err != nil {
397453
log.Errorf("Failed to increase pool size due to not able to allocate ENI %v", err)
398454

@@ -546,7 +602,7 @@ func (c *IPAMContext) waitENIAttached(eni string) (awsutils.ENIMetadata, error)
546602
}
547603

548604
func getWarmENITarget() int {
549-
inputStr, found := os.LookupEnv("WARM_ENI_TARGET")
605+
inputStr, found := os.LookupEnv(envWarmENITarget)
550606

551607
if !found {
552608
return defaultWarmENITarget
@@ -722,8 +778,21 @@ func (c *IPAMContext) eniIPPoolReconcile(ipPool map[string]*datastore.AddressInf
722778

723779
}
724780

781+
func useCustomNetworkCfg() bool {
782+
defaultValue := false
783+
if strValue := os.Getenv(envCustomNetworkCfg); strValue != "" {
784+
parsedValue, err := strconv.ParseBool(strValue)
785+
if err != nil {
786+
log.Error("Failed to parse "+envCustomNetworkCfg+"; using default: "+fmt.Sprint(defaultValue), err.Error())
787+
return defaultValue
788+
}
789+
return parsedValue
790+
}
791+
return defaultValue
792+
}
793+
725794
func getWarmIPTarget() int {
726-
inputStr, found := os.LookupEnv("WARM_IP_TARGET")
795+
inputStr, found := os.LookupEnv(envWarmIPTarget)
727796

728797
if !found {
729798
return noWarmIPTarget
@@ -753,3 +822,12 @@ func (c *IPAMContext) getCurWarmIPTarget() (int, bool) {
753822

754823
return curTarget, true
755824
}
825+
826+
// GetConfigForDebug returns the active values of the configuration env vars (for debugging purposes).
827+
func GetConfigForDebug() map[string]interface{} {
828+
return map[string]interface{}{
829+
envWarmIPTarget: getWarmIPTarget(),
830+
envWarmENITarget: getWarmENITarget(),
831+
envCustomNetworkCfg: useCustomNetworkCfg(),
832+
}
833+
}

0 commit comments

Comments
 (0)