@@ -20,10 +20,8 @@ import (
2020 "sync"
2121 "time"
2222
23- "github.com/awslabs/operatorpkg/object"
2423 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v7"
2524 "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8"
26- "github.com/Azure/karpenter-provider-azure/pkg/apis/v1beta1"
2725 "github.com/Azure/karpenter-provider-azure/pkg/operator/options"
2826 "github.com/Azure/karpenter-provider-azure/pkg/providers/instance"
2927 "github.com/Azure/karpenter-provider-azure/pkg/providers/launchtemplate"
@@ -34,7 +32,6 @@ import (
3432 . "github.com/onsi/ginkgo/v2"
3533 . "github.com/onsi/gomega"
3634 "github.com/samber/lo"
37- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3835 karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
3936 corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider"
4037 coretest "sigs.k8s.io/karpenter/pkg/test"
@@ -220,264 +217,6 @@ var _ = Describe("Instance Garbage Collection", func() {
220217 ExpectNotFound (ctx , env .Client , node )
221218 })
222219
223- // AKS-specific: malformed/missing timestamp tests
224- It ("should delete an AKS machine if there is no NodeClaim owner, and with malformed timestamp tag" , func () {
225- aksMachine .Properties .Tags ["karpenter.azure.com_aksmachine_creationtimestamp" ] = lo .ToPtr ("malformed-timestamp" )
226- azureEnv .AKSDataStorage .AKSMachines .Store (lo .FromPtr (aksMachine .ID ), * aksMachine )
227-
228- ExpectSingletonReconciled (ctx , InstanceGCController )
229-
230- _ , err = cloudProvider .Get (ctx , providerID )
231- Expect (err ).To (HaveOccurred ())
232- Expect (corecloudprovider .IsNodeClaimNotFoundError (err )).To (BeTrue ())
233- })
234-
235- It ("should delete an AKS machine if there is no NodeClaim owner, and without timestamp tag" , func () {
236- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_creationtimestamp" )
237- azureEnv .AKSDataStorage .AKSMachines .Store (lo .FromPtr (aksMachine .ID ), * aksMachine )
238-
239- ExpectSingletonReconciled (ctx , InstanceGCController )
240-
241- _ , err = cloudProvider .Get (ctx , providerID )
242- Expect (err ).To (HaveOccurred ())
243- Expect (corecloudprovider .IsNodeClaimNotFoundError (err )).To (BeTrue ())
244- })
245-
246- var _ = Context ("Complex tags manipulation scenarios with in-place updates" , func () {
247- var nodeClaim * karpv1.NodeClaim
248- var nodeClass * v1beta1.AKSNodeClass
249-
250- BeforeEach (func () {
251- // Set up agent pool for AKS machines
252- opts := options .FromContext (ctx )
253- agentPool := test .AKSAgentPool (test.AKSAgentPoolOptions {
254- Name : opts .AKSMachinesPoolName ,
255- ResourceGroup : opts .NodeResourceGroup ,
256- ClusterName : opts .ClusterName ,
257- })
258- azureEnv .AKSDataStorage .AgentPools .Store (lo .FromPtr (agentPool .ID ), * agentPool )
259-
260- // Create AKS machine
261- aksMachine = test .AKSMachine (test.AKSMachineOptions {Name : "corner-case-machine" , MachinesPoolName : opts .AKSMachinesPoolName })
262- providerID = utils .VMResourceIDToProviderID (ctx , lo .FromPtr (aksMachine .Properties .ResourceID ))
263-
264- // Create corresponding NodeClaim, not launched yet
265- nodeClass = test .AKSNodeClass ()
266- nodeClaim = coretest .NodeClaim (karpv1.NodeClaim {
267- ObjectMeta : metav1.ObjectMeta {
268- Name : "corner-case-nodeclaim" ,
269- Annotations : map [string ]string {
270- v1beta1 .AnnotationAKSMachineResourceID : lo .FromPtr (aksMachine .ID ),
271- },
272- },
273- Spec : karpv1.NodeClaimSpec {
274- NodeClassRef : & karpv1.NodeClassReference {
275- Group : object .GVK (nodeClass ).Group ,
276- Kind : object .GVK (nodeClass ).Kind ,
277- Name : nodeClass .Name ,
278- },
279- },
280- })
281- })
282-
283- AfterEach (func () {
284- ExpectCleanedUp (ctx , env .Client )
285- })
286-
287- It ("Instance created -> Tag deleted -> In-place update -> Garbage collection false positive -> Create() completed -> Registered -> In-place update" , func () {
288- // Blank NodeClaim is there from the core.
289- ExpectApplied (ctx , env .Client , nodeClaim , nodeClass )
290-
291- // AKS machine created, but the user somehow deleted the timestamp tag
292- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_creationtimestamp" )
293- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_nodeclaim" )
294- azureEnv .AKSDataStorage .AKSMachines .Store (lo .FromPtr (aksMachine .ID ), * aksMachine )
295-
296- // Provider still waiting for Create() to complete. No change to NodeClaim.
297-
298- // In-place update reconciles - should not do anything as no ProviderID yet
299- ExpectObjectReconciled (ctx , env .Client , inPlaceUpdateController , nodeClaim )
300- // Verify no update calls and the timestamp tag stays broken.
301- Expect (azureEnv .AKSMachinesAPI .AKSMachineCreateOrUpdateBehavior .CalledWithInput .Len ()).To (Equal (0 ))
302- unchangedAKSMachine , err := azureEnv .AKSMachineProvider .Get (ctx , * aksMachine .Name )
303- Expect (err ).ToNot (HaveOccurred ())
304- Expect (unchangedAKSMachine .Properties .Tags ).ToNot (HaveKey ("karpenter.azure.com_aksmachine_creationtimestamp" ))
305- Expect (unchangedAKSMachine .Properties .Tags ).ToNot (HaveKey ("karpenter.azure.com_aksmachine_nodeclaim" ))
306-
307- // Garbage collection reconciles - should garbage collect due to no NodeClaim owner + timestamp defaulting to epoch
308- // This is expected, but not what we really wanted... See suggestions in respective modules.
309- ExpectSingletonReconciled (ctx , InstanceGCController )
310- _ , err = cloudProvider .Get (ctx , providerID )
311- Expect (err ).To (HaveOccurred ())
312- Expect (corecloudprovider .IsNodeClaimNotFoundError (err )).To (BeTrue ())
313-
314- // Provider Create() completes, setting the ProviderID on the NodeClaim
315- // Assume this comes at unfortunate time and just went in effect...
316- nodeClaim .Status .ProviderID = providerID
317- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeLaunched )
318- ExpectApplied (ctx , env .Client , nodeClaim )
319-
320- // NodeClaim gets registered
321- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeRegistered )
322- ExpectApplied (ctx , env .Client , nodeClaim )
323-
324- // In-place update reconciles again - should error NodeClaim not found, as instance is gone
325- _ , err = inPlaceUpdateController .Reconcile (ctx , nodeClaim )
326- Expect (err ).To (HaveOccurred ())
327- Expect (corecloudprovider .IsNodeClaimNotFoundError (err )).To (BeTrue ())
328- // Verify no additional update calls and no instance
329- Expect (azureEnv .AKSMachinesAPI .AKSMachineCreateOrUpdateBehavior .CalledWithInput .Len ()).To (Equal (0 ))
330- _ , err = azureEnv .AKSMachineProvider .Get (ctx , * aksMachine .Name )
331- Expect (err ).To (HaveOccurred ()) // Still gone
332- Expect (corecloudprovider .IsNodeClaimNotFoundError (err )).To (BeTrue ())
333-
334- // Core will eventually clean up the orphaned NodeClaim
335- })
336-
337- It ("Instance created -> Tag deleted -> Create() completed -> Registered -> In-place update -> Garbage collection negative -> In-place update -> Garbage collection negative -> Tag deleted -> Garbage collection negative" , func () {
338- // Blank NodeClaim is there from the core.
339- ExpectApplied (ctx , env .Client , nodeClaim , nodeClass )
340-
341- // AKS machine created, but the user somehow deleted the timestamp tag
342- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_creationtimestamp" )
343- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_nodeclaim" )
344- azureEnv .AKSDataStorage .AKSMachines .Store (lo .FromPtr (aksMachine .ID ), * aksMachine )
345-
346- // Provider Create() completes, setting the ProviderID on the NodeClaim
347- nodeClaim .Status .ProviderID = providerID
348- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeLaunched )
349- ExpectApplied (ctx , env .Client , nodeClaim )
350-
351- // NodeClaim gets registered
352- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeRegistered )
353- ExpectApplied (ctx , env .Client , nodeClaim )
354-
355- // In-place update reconciles - should repair timestamp tag to epoch
356- ExpectObjectReconciled (ctx , env .Client , inPlaceUpdateController , nodeClaim )
357- // Verify update call and the timestamp tag is repaired to epoch.
358- Expect (azureEnv .AKSMachinesAPI .AKSMachineCreateOrUpdateBehavior .CalledWithInput .Len ()).To (Equal (1 ))
359- updatedAKSMachine , err := azureEnv .AKSMachineProvider .Get (ctx , * aksMachine .Name )
360- Expect (err ).ToNot (HaveOccurred ())
361- Expect (updatedAKSMachine .Properties .Tags ).To (HaveKey ("karpenter.azure.com_aksmachine_creationtimestamp" ))
362- Expect (* updatedAKSMachine .Properties .Tags ["karpenter.azure.com_aksmachine_creationtimestamp" ]).To (Equal (instance .AKSMachineTimestampToTag (instance .ZeroAKSMachineTimestamp ())))
363- Expect (updatedAKSMachine .Properties .Tags ).To (HaveKey ("karpenter.azure.com_aksmachine_nodeclaim" ))
364- Expect (* updatedAKSMachine .Properties .Tags ["karpenter.azure.com_aksmachine_nodeclaim" ]).To (Equal ("corner-case-nodeclaim" ))
365-
366- // Garbage collection reconciles - should not garbage collect due to NodeClaim owner
367- ExpectSingletonReconciled (ctx , InstanceGCController )
368- _ , err = cloudProvider .Get (ctx , providerID )
369- Expect (err ).ToNot (HaveOccurred ())
370- Expect (azureEnv .AKSAgentPoolsAPI .AgentPoolDeleteMachinesBehavior .CalledWithInput .Len ()).To (Equal (0 ))
371-
372- // In-place update reconciles again - should preserve existing timestamp tag
373- ExpectObjectReconciled (ctx , env .Client , inPlaceUpdateController , nodeClaim )
374- // Verify no additional update calls and the timestamp tag stays unchanged.
375- Expect (azureEnv .AKSMachinesAPI .AKSMachineCreateOrUpdateBehavior .CalledWithInput .Len ()).To (Equal (1 ))
376- unchangedAKSMachine , err := azureEnv .AKSMachineProvider .Get (ctx , * aksMachine .Name )
377- Expect (err ).ToNot (HaveOccurred ())
378- Expect (unchangedAKSMachine .Properties .Tags ).To (HaveKey ("karpenter.azure.com_aksmachine_creationtimestamp" ))
379- Expect (* unchangedAKSMachine .Properties .Tags ["karpenter.azure.com_aksmachine_creationtimestamp" ]).To (Equal (instance .AKSMachineTimestampToTag (instance .ZeroAKSMachineTimestamp ())))
380- Expect (unchangedAKSMachine .Properties .Tags ).To (HaveKey ("karpenter.azure.com_aksmachine_nodeclaim" ))
381- Expect (* unchangedAKSMachine .Properties .Tags ["karpenter.azure.com_aksmachine_nodeclaim" ]).To (Equal ("corner-case-nodeclaim" ))
382-
383- // Garbage collection reconciles - should not garbage collect due to NodeClaim owner
384- ExpectSingletonReconciled (ctx , InstanceGCController )
385- _ , err = cloudProvider .Get (ctx , providerID )
386- Expect (err ).ToNot (HaveOccurred ())
387- Expect (azureEnv .AKSAgentPoolsAPI .AgentPoolDeleteMachinesBehavior .CalledWithInput .Len ()).To (Equal (0 ))
388-
389- // The user somehow deleted the timestamp tag again
390- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_creationtimestamp" )
391- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_nodeclaim" )
392- azureEnv .AKSDataStorage .AKSMachines .Store (lo .FromPtr (aksMachine .ID ), * aksMachine )
393-
394- // Garbage collection reconciles - should not garbage collect due to NodeClaim owner
395- ExpectSingletonReconciled (ctx , InstanceGCController )
396- _ , err = cloudProvider .Get (ctx , providerID )
397- Expect (err ).ToNot (HaveOccurred ())
398- Expect (azureEnv .AKSAgentPoolsAPI .AgentPoolDeleteMachinesBehavior .CalledWithInput .Len ()).To (Equal (0 ))
399- })
400-
401- It ("Instance created -> Tag deleted -> In-place update -> Create() completed -> Registered -> Garbage collection negative" , func () {
402- // Blank NodeClaim is there from the core.
403- ExpectApplied (ctx , env .Client , nodeClaim , nodeClass )
404-
405- // AKS machine created, but the user somehow deleted the timestamp tag
406- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_creationtimestamp" )
407- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_nodeclaim" )
408- azureEnv .AKSDataStorage .AKSMachines .Store (lo .FromPtr (aksMachine .ID ), * aksMachine )
409-
410- // In-place update reconciles - should not do anything as no ProviderID yet
411- ExpectObjectReconciled (ctx , env .Client , inPlaceUpdateController , nodeClaim )
412- // Verify no update calls and the timestamp tag stays broken.
413- Expect (azureEnv .AKSMachinesAPI .AKSMachineCreateOrUpdateBehavior .CalledWithInput .Len ()).To (Equal (0 ))
414- unchangedAKSMachine , err := azureEnv .AKSMachineProvider .Get (ctx , * aksMachine .Name )
415- Expect (err ).ToNot (HaveOccurred ())
416- Expect (unchangedAKSMachine .Properties .Tags ).ToNot (HaveKey ("karpenter.azure.com_aksmachine_creationtimestamp" ))
417- Expect (unchangedAKSMachine .Properties .Tags ).ToNot (HaveKey ("karpenter.azure.com_aksmachine_nodeclaim" ))
418-
419- // Provider Create() completes, setting the ProviderID on the NodeClaim
420- nodeClaim .Status .ProviderID = providerID
421- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeLaunched )
422- ExpectApplied (ctx , env .Client , nodeClaim )
423-
424- // NodeClaim gets registered
425- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeRegistered )
426- ExpectApplied (ctx , env .Client , nodeClaim )
427-
428- // Garbage collection reconciles - should not garbage collect due to NodeClaim owner
429- ExpectSingletonReconciled (ctx , InstanceGCController )
430- _ , err = cloudProvider .Get (ctx , providerID )
431- Expect (err ).ToNot (HaveOccurred ())
432- Expect (azureEnv .AKSAgentPoolsAPI .AgentPoolDeleteMachinesBehavior .CalledWithInput .Len ()).To (Equal (0 ))
433- })
434-
435- It ("Instance created -> Tag deleted -> Create() completed -> Garbage collection negative" , func () {
436- // Blank NodeClaim is there from the core.
437- ExpectApplied (ctx , env .Client , nodeClaim , nodeClass )
438-
439- // AKS machine created, but the user somehow deleted the timestamp tag
440- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_creationtimestamp" )
441- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_nodeclaim" )
442- azureEnv .AKSDataStorage .AKSMachines .Store (lo .FromPtr (aksMachine .ID ), * aksMachine )
443-
444- // Provider Create() completes, setting the ProviderID on the NodeClaim
445- nodeClaim .Status .ProviderID = providerID
446- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeLaunched )
447- ExpectApplied (ctx , env .Client , nodeClaim )
448-
449- // Garbage collection reconciles - should not garbage collect due to NodeClaim owner
450- ExpectSingletonReconciled (ctx , InstanceGCController )
451- _ , err = cloudProvider .Get (ctx , providerID )
452- Expect (err ).ToNot (HaveOccurred ())
453- Expect (azureEnv .AKSAgentPoolsAPI .AgentPoolDeleteMachinesBehavior .CalledWithInput .Len ()).To (Equal (0 ))
454- })
455-
456- It ("Instance created -> Tag deleted -> Create() completed -> Registered -> Garbage collection negative" , func () {
457- // Blank NodeClaim is there from the core.
458- ExpectApplied (ctx , env .Client , nodeClaim , nodeClass )
459-
460- // AKS machine created, but the user somehow deleted the timestamp tag
461- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_creationtimestamp" )
462- delete (aksMachine .Properties .Tags , "karpenter.azure.com_aksmachine_nodeclaim" )
463- azureEnv .AKSDataStorage .AKSMachines .Store (lo .FromPtr (aksMachine .ID ), * aksMachine )
464-
465- // Provider Create() completes, setting the ProviderID on the NodeClaim
466- nodeClaim .Status .ProviderID = providerID
467- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeLaunched )
468- ExpectApplied (ctx , env .Client , nodeClaim )
469-
470- // NodeClaim gets registered
471- nodeClaim .StatusConditions ().SetTrue (karpv1 .ConditionTypeRegistered )
472- ExpectApplied (ctx , env .Client , nodeClaim )
473-
474- // Garbage collection reconciles - should not garbage collect due to NodeClaim owner
475- ExpectSingletonReconciled (ctx , InstanceGCController )
476- _ , err = cloudProvider .Get (ctx , providerID )
477- Expect (err ).ToNot (HaveOccurred ())
478- Expect (azureEnv .AKSAgentPoolsAPI .AgentPoolDeleteMachinesBehavior .CalledWithInput .Len ()).To (Equal (0 ))
479- })
480- })
481220 })
482221
483222 var _ = Context ("Mixed VM and AKS machine instances" , func () {
0 commit comments