@@ -26,7 +26,11 @@ const (
2626 allocatedStatusMessage = "pending task scheduling"
2727)
2828
29- var errNoChanges = errors .New ("task unchanged" )
29+ var (
30+ errNoChanges = errors .New ("task unchanged" )
31+
32+ retryInterval = 5 * time .Minute
33+ )
3034
3135func newIngressNetwork () * api.Network {
3236 return & api.Network {
@@ -57,19 +61,28 @@ type networkContext struct {
5761 // the actual network allocation.
5862 nwkAllocator * networkallocator.NetworkAllocator
5963
60- // A table of unallocated tasks which will be revisited if any thing
64+ // A set of tasks which are ready to be allocated as a batch. This is
65+ // distinct from "unallocatedTasks" which are tasks that failed to
66+ // allocate on the first try, being held for a future retry.
67+ pendingTasks map [string ]* api.Task
68+
69+ // A set of unallocated tasks which will be revisited if any thing
6170 // changes in system state that might help task allocation.
6271 unallocatedTasks map [string ]* api.Task
6372
64- // A table of unallocated services which will be revisited if
73+ // A set of unallocated services which will be revisited if
6574 // any thing changes in system state that might help service
6675 // allocation.
6776 unallocatedServices map [string ]* api.Service
6877
69- // A table of unallocated networks which will be revisited if
78+ // A set of unallocated networks which will be revisited if
7079 // any thing changes in system state that might help network
7180 // allocation.
7281 unallocatedNetworks map [string ]* api.Network
82+
83+ // lastRetry is the last timestamp when unallocated
84+ // tasks/services/networks were retried.
85+ lastRetry time.Time
7386}
7487
7588func (a * Allocator ) doNetworkInit (ctx context.Context ) (err error ) {
@@ -80,10 +93,12 @@ func (a *Allocator) doNetworkInit(ctx context.Context) (err error) {
8093
8194 nc := & networkContext {
8295 nwkAllocator : na ,
96+ pendingTasks : make (map [string ]* api.Task ),
8397 unallocatedTasks : make (map [string ]* api.Task ),
8498 unallocatedServices : make (map [string ]* api.Service ),
8599 unallocatedNetworks : make (map [string ]* api.Network ),
86100 ingressNetwork : newIngressNetwork (),
101+ lastRetry : time .Now (),
87102 }
88103 a .netCtx = nc
89104 defer func () {
@@ -401,12 +416,22 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) {
401416 case state.EventCreateNode , state.EventUpdateNode , state.EventDeleteNode :
402417 a .doNodeAlloc (ctx , ev )
403418 case state.EventCreateTask , state.EventUpdateTask , state.EventDeleteTask :
404- a .doTaskAlloc (ctx , ev )
419+ a .doTaskAlloc (ctx , ev , nc . pendingTasks )
405420 case state.EventCommit :
406- a .procUnallocatedNetworks (ctx )
407- a .procUnallocatedServices (ctx )
408- a .procUnallocatedTasksNetwork (ctx )
409- return
421+ a .procTasksNetwork (ctx , nc .pendingTasks , false )
422+
423+ if time .Since (nc .lastRetry ) > retryInterval {
424+ a .procUnallocatedNetworks (ctx )
425+ a .procUnallocatedServices (ctx )
426+ a .procTasksNetwork (ctx , nc .unallocatedTasks , true )
427+ nc .lastRetry = time .Now ()
428+ }
429+
430+ // Any left over tasks are moved to the unallocated set
431+ for _ , t := range nc .pendingTasks {
432+ nc .unallocatedTasks [t .ID ] = t
433+ }
434+ nc .pendingTasks = make (map [string ]* api.Task )
410435 }
411436}
412437
@@ -551,7 +576,7 @@ func (a *Allocator) taskCreateNetworkAttachments(t *api.Task, s *api.Service) {
551576 taskUpdateNetworks (t , networks )
552577}
553578
554- func (a * Allocator ) doTaskAlloc (ctx context.Context , ev events.Event ) {
579+ func (a * Allocator ) doTaskAlloc (ctx context.Context , ev events.Event , toAllocate map [ string ] * api. Task ) {
555580 var (
556581 isDelete bool
557582 t * api.Task
@@ -579,14 +604,16 @@ func (a *Allocator) doTaskAlloc(ctx context.Context, ev events.Event) {
579604 }
580605 }
581606
582- // Cleanup any task references that might exist in unallocatedTasks
607+ // Cleanup any task references that might exist
608+ delete (toAllocate , t .ID )
583609 delete (nc .unallocatedTasks , t .ID )
584610 return
585611 }
586612
587613 // If we are already in allocated state, there is
588614 // absolutely nothing else to do.
589615 if t .Status .State >= api .TaskStatePending {
616+ delete (toAllocate , t .ID )
590617 delete (nc .unallocatedTasks , t .ID )
591618 return
592619 }
@@ -616,7 +643,7 @@ func (a *Allocator) doTaskAlloc(ctx context.Context, ev events.Event) {
616643 // based on service spec.
617644 a .taskCreateNetworkAttachments (t , s )
618645
619- nc . unallocatedTasks [t .ID ] = t
646+ toAllocate [t .ID ] = t
620647}
621648
622649func (a * Allocator ) allocateNode (ctx context.Context , node * api.Node ) error {
@@ -948,15 +975,18 @@ func (a *Allocator) procUnallocatedServices(ctx context.Context) {
948975 }
949976}
950977
951- func (a * Allocator ) procUnallocatedTasksNetwork (ctx context.Context ) {
952- nc := a .netCtx
953- allocatedTasks := make ([]* api.Task , 0 , len (nc .unallocatedTasks ))
978+ func (a * Allocator ) procTasksNetwork (ctx context.Context , toAllocate map [string ]* api.Task , quiet bool ) {
979+ allocatedTasks := make ([]* api.Task , 0 , len (toAllocate ))
954980
955- for _ , t := range nc . unallocatedTasks {
981+ for _ , t := range toAllocate {
956982 if err := a .allocateTask (ctx , t ); err == nil {
957983 allocatedTasks = append (allocatedTasks , t )
958984 } else if err != errNoChanges {
959- log .G (ctx ).WithError (err ).Error ("task allocation failure" )
985+ if quiet {
986+ log .G (ctx ).WithError (err ).Debug ("task allocation failure" )
987+ } else {
988+ log .G (ctx ).WithError (err ).Error ("task allocation failure" )
989+ }
960990 }
961991 }
962992
@@ -978,11 +1008,11 @@ func (a *Allocator) procUnallocatedTasksNetwork(ctx context.Context) {
9781008 })
9791009
9801010 if err != nil {
981- log .G (ctx ).WithError (err ).Error ("failed a store batch operation while processing unallocated tasks" )
1011+ log .G (ctx ).WithError (err ).Error ("failed a store batch operation while processing tasks" )
9821012 }
9831013
9841014 for _ , t := range allocatedTasks [:committed ] {
985- delete (nc . unallocatedTasks , t .ID )
1015+ delete (toAllocate , t .ID )
9861016 }
9871017}
9881018
0 commit comments