Skip to content

Commit ce18199

Browse files
committed
feat: add instancetype controller
1 parent cb89615 commit ce18199

10 files changed

Lines changed: 295 additions & 37 deletions

File tree

cmd/controller/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func main() {
9090
// TODO: still need to refactor ImageProvider side of things.
9191
op.KubernetesVersionProvider,
9292
op.ImageProvider,
93+
op.InstanceTypesProvider,
9394
op.InClusterKubernetesInterface,
9495
op.AZClient.SubnetsClient(),
9596
op.AZClient.DiskEncryptionSetsClient(),

cmd/controller/main_ccp.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func main() {
9090
// TODO: still need to refactor ImageProvider side of things.
9191
op.KubernetesVersionProvider,
9292
op.ImageProvider,
93+
op.InstanceTypesProvider,
9394
op.InClusterKubernetesInterface,
9495
op.AZClient.SubnetsClient(),
9596
op.AZClient.DiskEncryptionSetsClient(),

pkg/cloudprovider/suite_aksmachineapi_offerings_test.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,11 @@ var _ = Describe("CloudProvider", func() {
418418
// Reconcile the NodeClass to ensure status is updated
419419
ExpectObjectReconciled(ctx, env.Client, localStatusController, nodeClass)
420420

421-
azureEnv.SKUsAPI.Error = fmt.Errorf("failed to list SKUs")
421+
// Flush the cache to simulate the controller not having run yet.
422+
// With the instance type controller, SKU API errors happen during
423+
// UpdateInstanceTypes (controller reconcile), not during List.
424+
// When the cache is empty, List returns an error.
425+
azureEnv.InstanceTypesProvider.Reset()
422426

423427
testNodeClaim3 := coretest.NodeClaim(karpv1.NodeClaim{
424428
ObjectMeta: metav1.ObjectMeta{
@@ -439,10 +443,10 @@ var _ = Describe("CloudProvider", func() {
439443
Expect(err).To(HaveOccurred())
440444
Expect(err).To(BeAssignableToTypeOf(&corecloudprovider.CreateError{}))
441445
Expect(claim).To(BeNil())
442-
Expect(err.Error()).To(ContainSubstring("failed to list SKUs"))
446+
Expect(err.Error()).To(ContainSubstring("resolving instance types"))
443447

444-
// Clean up the error for other tests
445-
azureEnv.SKUsAPI.Error = nil
448+
// Reset instance types
449+
Expect(azureEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
446450
})
447451

448452
// Ported from VM test: "should return error when instance creation fails"

pkg/controllers/controllers.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@ import (
3636
nodeclassstatus "github.com/Azure/karpenter-provider-azure/pkg/controllers/nodeclass/status"
3737
nodeclasstermination "github.com/Azure/karpenter-provider-azure/pkg/controllers/nodeclass/termination"
3838

39+
instancetypecontroller "github.com/Azure/karpenter-provider-azure/pkg/controllers/instancetype"
3940
"github.com/Azure/karpenter-provider-azure/pkg/controllers/nodeclaim/inplaceupdate"
4041
"github.com/Azure/karpenter-provider-azure/pkg/providers/azclient"
4142
"github.com/Azure/karpenter-provider-azure/pkg/providers/imagefamily"
4243
"github.com/Azure/karpenter-provider-azure/pkg/providers/instance"
44+
instancetypeprovider "github.com/Azure/karpenter-provider-azure/pkg/providers/instancetype"
4345
"github.com/Azure/karpenter-provider-azure/pkg/providers/kubernetesversion"
4446
)
4547

@@ -53,6 +55,7 @@ func NewControllers(
5355
aksMachineInstanceProvider instance.AKSMachineProvider,
5456
kubernetesVersionProvider kubernetesversion.KubernetesVersionProvider,
5557
nodeImageProvider imagefamily.NodeImageProvider,
58+
instanceTypesProvider instancetypeprovider.Provider,
5659
inClusterKubernetesInterface kubernetes.Interface,
5760
subnetsClient azclient.SubnetsAPI,
5861
diskEncryptionSetsClient azclient.DiskEncryptionSetsAPI,
@@ -69,6 +72,8 @@ func NewControllers(
6972
// TODO: nodeclaim tagging
7073
inplaceupdate.NewController(kubeClient, vmInstanceProvider, aksMachineInstanceProvider),
7174
status.NewController[*v1beta1.AKSNodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter")),
75+
76+
instancetypecontroller.NewController(instanceTypesProvider),
7277
}
7378
return controllers
7479
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
Portions Copyright (c) Microsoft Corporation.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package instancetype
18+
19+
import (
20+
"context"
21+
"time"
22+
23+
"github.com/awslabs/operatorpkg/reconciler"
24+
"github.com/awslabs/operatorpkg/singleton"
25+
controllerruntime "sigs.k8s.io/controller-runtime"
26+
"sigs.k8s.io/controller-runtime/pkg/log"
27+
"sigs.k8s.io/controller-runtime/pkg/manager"
28+
"sigs.k8s.io/karpenter/pkg/operator/injection"
29+
30+
instancetypeprovider "github.com/Azure/karpenter-provider-azure/pkg/providers/instancetype"
31+
)
32+
33+
const (
34+
InstanceTypesRefreshInterval = 12 * time.Hour
35+
)
36+
37+
// Controller periodically updates the instance types cache by fetching
38+
// instance type data from Azure. This removes the need to fetch instance
39+
// types on-demand during List calls.
40+
type Controller struct {
41+
instanceTypeProvider instancetypeprovider.Provider
42+
}
43+
44+
func NewController(instanceTypeProvider instancetypeprovider.Provider) *Controller {
45+
return &Controller{
46+
instanceTypeProvider: instanceTypeProvider,
47+
}
48+
}
49+
50+
func (c *Controller) Reconcile(ctx context.Context) (reconciler.Result, error) {
51+
ctx = injection.WithControllerName(ctx, "instancetype")
52+
53+
if err := c.instanceTypeProvider.UpdateInstanceTypes(ctx); err != nil {
54+
log.FromContext(ctx).Error(err, "updating instance types")
55+
return reconciler.Result{}, err
56+
}
57+
log.FromContext(ctx).V(1).Info("updated instance types")
58+
return reconciler.Result{RequeueAfter: InstanceTypesRefreshInterval}, nil
59+
}
60+
61+
func (c *Controller) Register(_ context.Context, m manager.Manager) error {
62+
return controllerruntime.NewControllerManagedBy(m).
63+
Named("instancetype").
64+
WatchesRawSource(singleton.Source()).
65+
Complete(singleton.AsReconciler(c))
66+
}
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/*
2+
Portions Copyright (c) Microsoft Corporation.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package instancetype_test
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"testing"
23+
24+
. "github.com/onsi/ginkgo/v2"
25+
. "github.com/onsi/gomega"
26+
"github.com/samber/lo"
27+
28+
"github.com/Azure/azure-sdk-for-go/profiles/latest/compute/mgmt/compute"
29+
"github.com/Azure/karpenter-provider-azure/pkg/apis"
30+
instancetypecontroller "github.com/Azure/karpenter-provider-azure/pkg/controllers/instancetype"
31+
"github.com/Azure/karpenter-provider-azure/pkg/fake"
32+
"github.com/Azure/karpenter-provider-azure/pkg/operator/options"
33+
"github.com/Azure/karpenter-provider-azure/pkg/test"
34+
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
35+
coretest "sigs.k8s.io/karpenter/pkg/test"
36+
. "sigs.k8s.io/karpenter/pkg/test/expectations"
37+
"sigs.k8s.io/karpenter/pkg/test/v1alpha1"
38+
. "sigs.k8s.io/karpenter/pkg/utils/testing"
39+
)
40+
41+
var ctx context.Context
42+
var env *coretest.Environment
43+
var azureEnv *test.Environment
44+
var controller *instancetypecontroller.Controller
45+
46+
func TestController(t *testing.T) {
47+
ctx = TestContextWithLogger(t)
48+
RegisterFailHandler(Fail)
49+
RunSpecs(t, "InstanceTypeController")
50+
}
51+
52+
var _ = BeforeSuite(func() {
53+
ctx = coreoptions.ToContext(ctx, coretest.Options())
54+
ctx = options.ToContext(ctx, test.Options())
55+
env = coretest.NewEnvironment(coretest.WithCRDs(apis.CRDs...), coretest.WithCRDs(v1alpha1.CRDs...))
56+
azureEnv = test.NewEnvironment(ctx, env)
57+
controller = instancetypecontroller.NewController(azureEnv.InstanceTypesProvider)
58+
})
59+
60+
var _ = AfterSuite(func() {
61+
Expect(env.Stop()).To(Succeed(), "Failed to stop environment")
62+
})
63+
64+
var _ = BeforeEach(func() {
65+
azureEnv.Reset()
66+
})
67+
68+
var _ = AfterEach(func() {
69+
ExpectCleanedUp(ctx, env.Client)
70+
})
71+
72+
var _ = Describe("InstanceType Controller", func() {
73+
It("should return a requeue interval of 12 hours", func() {
74+
result := ExpectSingletonReconciled(ctx, controller)
75+
Expect(result.RequeueAfter).To(Equal(instancetypecontroller.InstanceTypesRefreshInterval))
76+
})
77+
78+
It("should List after reconciliation", func() {
79+
// Flush the cache to simulate a cold start
80+
azureEnv.InstanceTypesProvider.Reset()
81+
82+
// Reconcile to populate instance types
83+
ExpectSingletonReconciled(ctx, controller)
84+
85+
nodeClass := test.AKSNodeClass()
86+
instanceTypes, err := azureEnv.InstanceTypesProvider.List(ctx, nodeClass)
87+
Expect(err).To(BeNil())
88+
Expect(instanceTypes).NotTo(BeEmpty())
89+
})
90+
91+
It("should fail reconciliation when the SKU API returns an error", func() {
92+
azureEnv.SKUsAPI.Error = fmt.Errorf("simulated SKU API failure")
93+
94+
err := ExpectSingletonReconcileFailed(ctx, controller)
95+
Expect(err).To(HaveOccurred())
96+
Expect(err.Error()).To(ContainSubstring("fetching SKUs using skewer"))
97+
})
98+
99+
It("should update instance types on subsequent reconciliations", func() {
100+
// First reconcile
101+
ExpectSingletonReconciled(ctx, controller)
102+
103+
nodeClass := test.AKSNodeClass()
104+
instanceTypes, err := azureEnv.InstanceTypesProvider.List(ctx, nodeClass)
105+
Expect(err).To(BeNil())
106+
Expect(instanceTypes).NotTo(BeEmpty())
107+
Expect(instanceTypes).ToNot(ContainElement(HaveField("Name", Equal("Standard_D64s_v6"))))
108+
109+
// create a copy of the slice so we can revert it to its old state afterwards
110+
copy := append([]compute.ResourceSku{}, fake.ResourceSkus[fake.Region]...)
111+
fake.ResourceSkus[fake.Region] = append(fake.ResourceSkus[fake.Region],
112+
compute.ResourceSku{
113+
Name: lo.ToPtr("Standard_D64s_v6"),
114+
Tier: lo.ToPtr("Stanadard"),
115+
Kind: lo.ToPtr(""),
116+
Size: lo.ToPtr("D64s_v6"),
117+
Family: lo.ToPtr("standardD64s_v6Family"),
118+
ResourceType: lo.ToPtr("virtualMachines"),
119+
APIVersions: &[]string{},
120+
Costs: &[]compute.ResourceSkuCosts{},
121+
Restrictions: &[]compute.ResourceSkuRestrictions{},
122+
Capabilities: &[]compute.ResourceSkuCapabilities{
123+
{Name: lo.ToPtr("vCPUs"), Value: lo.ToPtr("64")},
124+
{Name: lo.ToPtr("MemoryGB"), Value: lo.ToPtr("64")},
125+
{Name: lo.ToPtr("CpuArchitectureType"), Value: lo.ToPtr("x64")},
126+
{Name: lo.ToPtr("vCPUsAvailable"), Value: lo.ToPtr("64")},
127+
},
128+
Locations: &[]string{"southcentralus"},
129+
LocationInfo: &[]compute.ResourceSkuLocationInfo{{Location: lo.ToPtr("southcentralus"), Zones: &[]string{}}},
130+
},
131+
)
132+
defer func() {
133+
fake.ResourceSkus[fake.Region] = copy
134+
}()
135+
136+
// Second reconcile should succeed and have new cached data
137+
ExpectSingletonReconciled(ctx, controller)
138+
instanceTypes, err = azureEnv.InstanceTypesProvider.List(ctx, nodeClass)
139+
Expect(err).To(BeNil())
140+
Expect(instanceTypes).NotTo(BeEmpty())
141+
Expect(instanceTypes).To(ContainElement(HaveField("Name", Equal("Standard_D64s_v6"))))
142+
})
143+
})

pkg/operator/operator.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,12 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont
180180
pricingProvider,
181181
unavailableOfferingsCache,
182182
)
183+
184+
// Ensure we're able to hydrate instance types before starting any controllers
185+
// that depend on them. The instance type controller will refresh this list
186+
// perioidcally once all controllers are running.
187+
lo.Must0(instanceTypeProvider.UpdateInstanceTypes(ctx))
188+
183189
imageResolver := imagefamily.NewDefaultResolver(
184190
operator.GetClient(),
185191
imageProvider,

0 commit comments

Comments
 (0)