@@ -36,9 +36,15 @@ import (
3636)
3737
3838const (
39- router = "router"
40- prefill = "prefill"
41- decode = "decode"
39+ router = "router"
40+ prefill = "prefill"
41+ decode = "decode"
42+ prefillImageVersionV1 = "prefill:v1"
43+ prefillImageVersionV2 = "prefill:v2"
44+ decodeImageVersionV1 = "decode:v1"
45+ decodeImageVersionV2 = "decode:v2"
46+ routerImageVersionV1 = "router:v1"
47+ routerImageVersionV2 = "router:v2"
4248)
4349
4450var _ = ginkgo .Describe ("RoleSet controller test" , func () {
@@ -162,15 +168,19 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
162168 podList := & corev1.PodList {}
163169 if err := k8sClient .List (ctx , podList ,
164170 client .InNamespace (ns .Name ),
165- client.MatchingLabels {constants .RoleSetNameLabelKey : rs .Name }); err == nil {
166- for i := range podList .Items {
167- pod := & podList .Items [i ]
168- if pod .DeletionTimestamp != nil {
169- continue
170- }
171- if pod .Status .Phase != corev1 .PodRunning {
172- makePodReady (pod )
173- _ = k8sClient .Status ().Update (ctx , pod )
171+ client.MatchingLabels {constants .RoleSetNameLabelKey : rs .Name }); err != nil {
172+ ginkgo .GinkgoLogr .Error (err , "Failed to list pods in startPodReadyHelper" )
173+ continue
174+ }
175+ for i := range podList .Items {
176+ pod := & podList .Items [i ]
177+ if pod .DeletionTimestamp != nil {
178+ continue
179+ }
180+ if pod .Status .Phase != corev1 .PodRunning {
181+ makePodReady (pod )
182+ if err := k8sClient .Status ().Update (ctx , pod ); err != nil {
183+ ginkgo .GinkgoLogr .Error (err , "Failed to update pod status in startPodReadyHelper" , "pod" , pod .Name )
174184 }
175185 }
176186 }
@@ -346,7 +356,7 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
346356 Name : router ,
347357 Replicas : int32Ptr (1 ),
348358 UpgradeOrder : int32Ptr (1 ),
349- Template : makePodTemplate ("router:v1" ),
359+ Template : makePodTemplate (routerImageVersionV1 ),
350360 UpdateStrategy : orchestrationapi.RoleUpdateStrategy {
351361 MaxUnavailable : & maxUnavailable ,
352362 },
@@ -356,7 +366,7 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
356366 Name : prefill ,
357367 Replicas : int32Ptr (2 ),
358368 UpgradeOrder : int32Ptr (2 ),
359- Template : makePodTemplate ("prefill:v1" ),
369+ Template : makePodTemplate (prefillImageVersionV1 ),
360370 UpdateStrategy : orchestrationapi.RoleUpdateStrategy {
361371 MaxUnavailable : & maxUnavailable ,
362372 },
@@ -366,7 +376,7 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
366376 Name : decode ,
367377 Replicas : int32Ptr (2 ),
368378 UpgradeOrder : int32Ptr (3 ),
369- Template : makePodTemplate ("decode:v1" ),
379+ Template : makePodTemplate (decodeImageVersionV1 ),
370380 UpdateStrategy : orchestrationapi.RoleUpdateStrategy {
371381 MaxUnavailable : & maxUnavailable ,
372382 },
@@ -415,11 +425,11 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
415425 for i := range latest .Spec .Roles {
416426 switch latest .Spec .Roles [i ].Name {
417427 case router :
418- latest .Spec .Roles [i ].Template = makePodTemplate ("router:v2" )
428+ latest .Spec .Roles [i ].Template = makePodTemplate (routerImageVersionV2 )
419429 case prefill :
420- latest .Spec .Roles [i ].Template = makePodTemplate ("prefill:v2" )
430+ latest .Spec .Roles [i ].Template = makePodTemplate (prefillImageVersionV2 )
421431 case decode :
422- latest .Spec .Roles [i ].Template = makePodTemplate ("decode:v2" )
432+ latest .Spec .Roles [i ].Template = makePodTemplate (decodeImageVersionV2 )
423433 }
424434 }
425435
@@ -455,20 +465,21 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
455465 if routerHasNewImage {
456466 prefillPods := getPodsForRole (rs , prefill )
457467 for _ , pod := range prefillPods {
458- if pod .Spec .Containers [0 ].Image == "prefill:v2" {
468+ if pod .Spec .Containers [0 ].Image == prefillImageVersionV2 {
459469 return true
460470 }
461471 }
462472 }
463473 return false
464- }, time .Second * 45 , time .Millisecond * 500 ).Should (gomega .BeTrue (), "prefill should start upgrading after router starts" )
474+ }, time .Second * 45 , time .Millisecond * 500 ).Should (gomega .BeTrue (),
475+ "prefill should start upgrading after router starts" )
465476
466477 // Decode should upgrade after prefill
467478 gomega .Eventually (func () bool {
468479 prefillPods := getPodsForRole (rs , prefill )
469480 prefillHasNewImage := false
470481 for _ , pod := range prefillPods {
471- if pod .Spec .Containers [0 ].Image == "prefill:v2" {
482+ if pod .Spec .Containers [0 ].Image == prefillImageVersionV2 {
472483 prefillHasNewImage = true
473484 break
474485 }
@@ -477,13 +488,14 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
477488 if prefillHasNewImage {
478489 decodePods := getPodsForRole (rs , decode )
479490 for _ , pod := range decodePods {
480- if pod .Spec .Containers [0 ].Image == "decode:v2" {
491+ if pod .Spec .Containers [0 ].Image == decodeImageVersionV2 {
481492 return true
482493 }
483494 }
484495 }
485496 return false
486- }, time .Second * 45 , time .Millisecond * 500 ).Should (gomega .BeTrue (), "decode should start upgrading after prefill starts" )
497+ }, time .Second * 45 , time .Millisecond * 500 ).Should (gomega .BeTrue (),
498+ "decode should start upgrading after prefill starts" )
487499
488500 // Verify final state - all roles should be upgraded
489501 gomega .Eventually (func () error {
@@ -506,27 +518,28 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
506518 // Verify all pods have the new images
507519 routerPods := getPodsForRole (rs , router )
508520 for _ , pod := range routerPods {
509- if pod .Spec .Containers [0 ].Image != "router:v2" {
521+ if pod .Spec .Containers [0 ].Image != routerImageVersionV2 {
510522 return fmt .Errorf ("router pod still has old image: %s" , pod .Spec .Containers [0 ].Image )
511523 }
512524 }
513525
514526 prefillPods := getPodsForRole (rs , prefill )
515527 for _ , pod := range prefillPods {
516- if pod .Spec .Containers [0 ].Image != "prefill:v2" {
528+ if pod .Spec .Containers [0 ].Image != prefillImageVersionV2 {
517529 return fmt .Errorf ("prefill pod still has old image: %s" , pod .Spec .Containers [0 ].Image )
518530 }
519531 }
520532
521533 decodePods := getPodsForRole (rs , decode )
522534 for _ , pod := range decodePods {
523- if pod .Spec .Containers [0 ].Image != "decode:v2" {
535+ if pod .Spec .Containers [0 ].Image != decodeImageVersionV2 {
524536 return fmt .Errorf ("decode pod still has old image: %s" , pod .Spec .Containers [0 ].Image )
525537 }
526538 }
527539
528540 return nil
529- }, time .Second * 45 , time .Millisecond * 500 ).Should (gomega .Succeed (), "All roles should be fully upgraded with new images" )
541+ }, time .Second * 45 , time .Millisecond * 500 ).Should (gomega .Succeed (),
542+ "All roles should be fully upgraded with new images" )
530543 },
531544 },
532545 },
@@ -543,7 +556,7 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
543556 Name : prefill ,
544557 Replicas : int32Ptr (1 ),
545558 UpgradeOrder : int32Ptr (1 ),
546- Template : makePodTemplate ("prefill:v1" ),
559+ Template : makePodTemplate (prefillImageVersionV1 ),
547560 UpdateStrategy : orchestrationapi.RoleUpdateStrategy {
548561 MaxUnavailable : & maxUnavailable ,
549562 },
@@ -553,7 +566,7 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
553566 Name : decode ,
554567 Replicas : int32Ptr (1 ),
555568 UpgradeOrder : int32Ptr (1 ),
556- Template : makePodTemplate ("decode:v1" ),
569+ Template : makePodTemplate (decodeImageVersionV1 ),
557570 UpdateStrategy : orchestrationapi.RoleUpdateStrategy {
558571 MaxUnavailable : & maxUnavailable ,
559572 },
@@ -607,40 +620,49 @@ var _ = ginkgo.Describe("RoleSet controller test", func() {
607620 for i := range latest .Spec .Roles {
608621 switch latest .Spec .Roles [i ].Name {
609622 case prefill :
610- latest .Spec .Roles [i ].Template = makePodTemplate ("prefill:v2" )
623+ latest .Spec .Roles [i ].Template = makePodTemplate (prefillImageVersionV2 )
611624 case decode :
612- latest .Spec .Roles [i ].Template = makePodTemplate ("decode:v2" )
625+ latest .Spec .Roles [i ].Template = makePodTemplate (decodeImageVersionV2 )
613626 }
614627 }
615628 g .Expect (k8sClient .Update (ctx , latest )).To (gomega .Succeed ())
616629 }, time .Second * 5 , time .Millisecond * 250 ).Should (gomega .Succeed ())
617630 },
618631 checkFunc : func (ctx context.Context , k8sClient client.Client , rs * orchestrationapi.RoleSet ) {
619- gomega .Eventually (func () bool {
632+ // Wait for prefill to be upgraded first, as it's defined first in the RoleSet
633+ gomega .Eventually (func (g gomega.Gomega ) bool {
620634 prefillPods := getPodsForRole (rs , prefill )
621- decodePods := getPodsForRole (rs , decode )
622-
623- prefillUpgraded := false
624- decodeUpgraded := false
625-
626635 for _ , pod := range prefillPods {
627- if pod .Spec .Containers [0 ].Image == "prefill:v2" {
628- prefillUpgraded = true
636+ if pod .Spec .Containers [0 ].Image == prefillImageVersionV2 {
629637 makePodReady (pod )
630- _ = k8sClient .Status ().Update (ctx , pod )
638+ g .Expect (k8sClient .Status ().Update (ctx , pod )).To (gomega .Succeed ())
639+ return true
631640 }
632641 }
642+ return false
643+ }, time .Second * 15 , time .Millisecond * 500 ).Should (gomega .BeTrue (),
644+ "prefill role should upgrade first" )
645+
646+ // Verify decode role is not upgraded yet
647+ decodePods := getPodsForRole (rs , decode )
648+ for _ , pod := range decodePods {
649+ gomega .Expect (pod .Spec .Containers [0 ].Image ).To (gomega .Equal (decodeImageVersionV1 ),
650+ "decode role should not be upgraded yet" )
651+ }
633652
653+ // Now, wait for decode role to be upgraded
654+ gomega .Eventually (func (g gomega.Gomega ) bool {
655+ decodePods := getPodsForRole (rs , decode )
634656 for _ , pod := range decodePods {
635- if pod .Spec .Containers [0 ].Image == "decode:v2" {
636- decodeUpgraded = true
657+ if pod .Spec .Containers [0 ].Image == decodeImageVersionV2 {
637658 makePodReady (pod )
638- _ = k8sClient .Status ().Update (ctx , pod )
659+ g .Expect (k8sClient .Status ().Update (ctx , pod )).To (gomega .Succeed ())
660+ return true
639661 }
640662 }
641-
642- return prefillUpgraded && decodeUpgraded
643- }, time . Second * 30 , time . Millisecond * 500 ). Should ( gomega . BeTrue (), "Both services with same upgrade order should be upgraded " )
663+ return false
664+ }, time . Second * 15 , time . Millisecond * 500 ). Should ( gomega . BeTrue (),
665+ "decode role should upgrade after prefill " )
644666 },
645667 },
646668 },
0 commit comments