Skip to content

Commit a8066c1

Browse files
author
Aaron Lehmann
committed
allocator: Fix panic when allocations happen at init time
a.netCtx is initialized too late, so if allocations happen as part of doNetworkInit, a nil pointer dereference will cause a panic. Initialize a.netCtx earlier and use a.netCtx directly in member functions instead of passing the network context separately, so there is no confusion about which to use. Also change allocator.go to have separate entries in the waitgroup for initialization and actually running the allocator, and defer `Done` for both. This should prevent a panic like this from leading to a deadlock, since the deferred `Done` will be reached. Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>
1 parent 0424477 commit a8066c1

File tree

2 files changed

+52
-29
lines changed

2 files changed

+52
-29
lines changed

manager/allocator/allocator.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ func (a *Allocator) Run(ctx context.Context) error {
125125
aaCopy := aa
126126
actor := func() error {
127127
wg.Add(1)
128+
defer wg.Done()
129+
128130
// init might return an allocator specific context
129131
// which is a child of the passed in context to hold
130132
// allocator specific state
@@ -133,10 +135,10 @@ func (a *Allocator) Run(ctx context.Context) error {
133135
// if we are failing in the init of
134136
// this allocator.
135137
aa.cancel()
136-
wg.Done()
137138
return err
138139
}
139140

141+
wg.Add(1)
140142
go func() {
141143
defer wg.Done()
142144
a.run(ctx, aaCopy)

manager/allocator/network.go

Lines changed: 49 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ type networkContext struct {
6868
unallocatedNetworks map[string]*api.Network
6969
}
7070

71-
func (a *Allocator) doNetworkInit(ctx context.Context) error {
71+
func (a *Allocator) doNetworkInit(ctx context.Context) (err error) {
7272
na, err := networkallocator.New()
7373
if err != nil {
7474
return err
@@ -81,6 +81,13 @@ func (a *Allocator) doNetworkInit(ctx context.Context) error {
8181
unallocatedNetworks: make(map[string]*api.Network),
8282
ingressNetwork: newIngressNetwork(),
8383
}
84+
a.netCtx = nc
85+
defer func() {
86+
// Clear a.netCtx if initialization was unsuccessful.
87+
if err != nil {
88+
a.netCtx = nil
89+
}
90+
}()
8491

8592
// Check if we have the ingress network. If not found create
8693
// it before reading all network objects for allocation.
@@ -125,7 +132,7 @@ func (a *Allocator) doNetworkInit(ctx context.Context) error {
125132
// that the we can get the preferred subnet for ingress
126133
// network.
127134
if !na.IsAllocated(nc.ingressNetwork) {
128-
if err := a.allocateNetwork(ctx, nc, nc.ingressNetwork); err != nil {
135+
if err := a.allocateNetwork(ctx, nc.ingressNetwork); err != nil {
129136
log.G(ctx).WithError(err).Error("failed allocating ingress network during init")
130137
}
131138

@@ -155,7 +162,7 @@ func (a *Allocator) doNetworkInit(ctx context.Context) error {
155162
continue
156163
}
157164

158-
if err := a.allocateNetwork(ctx, nc, n); err != nil {
165+
if err := a.allocateNetwork(ctx, n); err != nil {
159166
log.G(ctx).WithError(err).Errorf("failed allocating network %s during init", n.ID)
160167
}
161168
}
@@ -179,7 +186,7 @@ func (a *Allocator) doNetworkInit(ctx context.Context) error {
179186
}
180187

181188
node.Attachment.Network = nc.ingressNetwork.Copy()
182-
if err := a.allocateNode(ctx, nc, node); err != nil {
189+
if err := a.allocateNode(ctx, node); err != nil {
183190
log.G(ctx).WithError(err).Errorf("Failed to allocate network resources for node %s during init", node.ID)
184191
}
185192
}
@@ -198,7 +205,7 @@ func (a *Allocator) doNetworkInit(ctx context.Context) error {
198205
continue
199206
}
200207

201-
if err := a.allocateService(ctx, nc, s); err != nil {
208+
if err := a.allocateService(ctx, s); err != nil {
202209
log.G(ctx).WithError(err).Errorf("failed allocating service %s during init", s.ID)
203210
}
204211
}
@@ -260,7 +267,7 @@ func (a *Allocator) doNetworkInit(ctx context.Context) error {
260267
}
261268

262269
err := batch.Update(func(tx store.Tx) error {
263-
_, err := a.allocateTask(ctx, nc, tx, t)
270+
_, err := a.allocateTask(ctx, tx, t)
264271
return err
265272
})
266273
if err != nil {
@@ -274,7 +281,6 @@ func (a *Allocator) doNetworkInit(ctx context.Context) error {
274281
return err
275282
}
276283

277-
a.netCtx = nc
278284
return nil
279285
}
280286

@@ -288,7 +294,7 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) {
288294
break
289295
}
290296

291-
if err := a.allocateNetwork(ctx, nc, n); err != nil {
297+
if err := a.allocateNetwork(ctx, n); err != nil {
292298
log.G(ctx).WithError(err).Errorf("Failed allocation for network %s", n.ID)
293299
break
294300
}
@@ -309,7 +315,7 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) {
309315
break
310316
}
311317

312-
if err := a.allocateService(ctx, nc, s); err != nil {
318+
if err := a.allocateService(ctx, s); err != nil {
313319
log.G(ctx).WithError(err).Errorf("Failed allocation for service %s", s.ID)
314320
break
315321
}
@@ -320,7 +326,7 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) {
320326
break
321327
}
322328

323-
if err := a.allocateService(ctx, nc, s); err != nil {
329+
if err := a.allocateService(ctx, s); err != nil {
324330
log.G(ctx).WithError(err).Errorf("Failed allocation during update of service %s", s.ID)
325331
break
326332
}
@@ -335,18 +341,18 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) {
335341
// it's still there.
336342
delete(nc.unallocatedServices, s.ID)
337343
case state.EventCreateNode, state.EventUpdateNode, state.EventDeleteNode:
338-
a.doNodeAlloc(ctx, nc, ev)
344+
a.doNodeAlloc(ctx, ev)
339345
case state.EventCreateTask, state.EventUpdateTask, state.EventDeleteTask:
340-
a.doTaskAlloc(ctx, nc, ev)
346+
a.doTaskAlloc(ctx, ev)
341347
case state.EventCommit:
342-
a.procUnallocatedNetworks(ctx, nc)
343-
a.procUnallocatedServices(ctx, nc)
344-
a.procUnallocatedTasksNetwork(ctx, nc)
348+
a.procUnallocatedNetworks(ctx)
349+
a.procUnallocatedServices(ctx)
350+
a.procUnallocatedTasksNetwork(ctx)
345351
return
346352
}
347353
}
348354

349-
func (a *Allocator) doNodeAlloc(ctx context.Context, nc *networkContext, ev events.Event) {
355+
func (a *Allocator) doNodeAlloc(ctx context.Context, ev events.Event) {
350356
var (
351357
isDelete bool
352358
node *api.Node
@@ -362,6 +368,8 @@ func (a *Allocator) doNodeAlloc(ctx context.Context, nc *networkContext, ev even
362368
node = v.Node.Copy()
363369
}
364370

371+
nc := a.netCtx
372+
365373
if isDelete {
366374
if nc.nwkAllocator.IsNodeAllocated(node) {
367375
if err := nc.nwkAllocator.DeallocateNode(node); err != nil {
@@ -377,7 +385,7 @@ func (a *Allocator) doNodeAlloc(ctx context.Context, nc *networkContext, ev even
377385
}
378386

379387
node.Attachment.Network = nc.ingressNetwork.Copy()
380-
if err := a.allocateNode(ctx, nc, node); err != nil {
388+
if err := a.allocateNode(ctx, node); err != nil {
381389
log.G(ctx).WithError(err).Errorf("Failed to allocate network resources for node %s", node.ID)
382390
}
383391
}
@@ -460,7 +468,7 @@ func (a *Allocator) taskCreateNetworkAttachments(t *api.Task, s *api.Service) {
460468
taskUpdateNetworks(t, networks)
461469
}
462470

463-
func (a *Allocator) doTaskAlloc(ctx context.Context, nc *networkContext, ev events.Event) {
471+
func (a *Allocator) doTaskAlloc(ctx context.Context, ev events.Event) {
464472
var (
465473
isDelete bool
466474
t *api.Task
@@ -476,6 +484,8 @@ func (a *Allocator) doTaskAlloc(ctx context.Context, nc *networkContext, ev even
476484
t = v.Task.Copy()
477485
}
478486

487+
nc := a.netCtx
488+
479489
// If the task has stopped running or it's being deleted then
480490
// we should free the network resources associated with the
481491
// task right away.
@@ -526,7 +536,9 @@ func (a *Allocator) doTaskAlloc(ctx context.Context, nc *networkContext, ev even
526536
nc.unallocatedTasks[t.ID] = t
527537
}
528538

529-
func (a *Allocator) allocateNode(ctx context.Context, nc *networkContext, node *api.Node) error {
539+
func (a *Allocator) allocateNode(ctx context.Context, node *api.Node) error {
540+
nc := a.netCtx
541+
530542
if err := nc.nwkAllocator.AllocateNode(node); err != nil {
531543
return err
532544
}
@@ -559,7 +571,9 @@ func (a *Allocator) allocateNode(ctx context.Context, nc *networkContext, node *
559571
return nil
560572
}
561573

562-
func (a *Allocator) allocateService(ctx context.Context, nc *networkContext, s *api.Service) error {
574+
func (a *Allocator) allocateService(ctx context.Context, s *api.Service) error {
575+
nc := a.netCtx
576+
563577
if s.Spec.Endpoint != nil {
564578
// service has user-defined endpoint
565579
if s.Endpoint == nil {
@@ -644,7 +658,9 @@ func (a *Allocator) allocateService(ctx context.Context, nc *networkContext, s *
644658
return nil
645659
}
646660

647-
func (a *Allocator) allocateNetwork(ctx context.Context, nc *networkContext, n *api.Network) error {
661+
func (a *Allocator) allocateNetwork(ctx context.Context, n *api.Network) error {
662+
nc := a.netCtx
663+
648664
if err := nc.nwkAllocator.Allocate(n); err != nil {
649665
nc.unallocatedNetworks[n.ID] = n
650666
return errors.Wrapf(err, "failed during network allocation for network %s", n.ID)
@@ -666,7 +682,7 @@ func (a *Allocator) allocateNetwork(ctx context.Context, nc *networkContext, n *
666682
return nil
667683
}
668684

669-
func (a *Allocator) allocateTask(ctx context.Context, nc *networkContext, tx store.Tx, t *api.Task) (*api.Task, error) {
685+
func (a *Allocator) allocateTask(ctx context.Context, tx store.Tx, t *api.Task) (*api.Task, error) {
670686
taskUpdated := false
671687

672688
// Get the latest task state from the store before updating.
@@ -675,6 +691,8 @@ func (a *Allocator) allocateTask(ctx context.Context, nc *networkContext, tx sto
675691
return nil, fmt.Errorf("could not find task %s while trying to update network allocation", t.ID)
676692
}
677693

694+
nc := a.netCtx
695+
678696
// We might be here even if a task allocation has already
679697
// happened but wasn't successfully committed to store. In such
680698
// cases skip allocation and go straight ahead to updating the
@@ -734,10 +752,11 @@ func (a *Allocator) allocateTask(ctx context.Context, nc *networkContext, tx sto
734752
return storeT, nil
735753
}
736754

737-
func (a *Allocator) procUnallocatedNetworks(ctx context.Context, nc *networkContext) {
755+
func (a *Allocator) procUnallocatedNetworks(ctx context.Context) {
756+
nc := a.netCtx
738757
for _, n := range nc.unallocatedNetworks {
739758
if !nc.nwkAllocator.IsAllocated(n) {
740-
if err := a.allocateNetwork(ctx, nc, n); err != nil {
759+
if err := a.allocateNetwork(ctx, n); err != nil {
741760
log.G(ctx).Debugf("Failed allocation of unallocated network %s: %v", n.ID, err)
742761
continue
743762
}
@@ -747,10 +766,11 @@ func (a *Allocator) procUnallocatedNetworks(ctx context.Context, nc *networkCont
747766
}
748767
}
749768

750-
func (a *Allocator) procUnallocatedServices(ctx context.Context, nc *networkContext) {
769+
func (a *Allocator) procUnallocatedServices(ctx context.Context) {
770+
nc := a.netCtx
751771
for _, s := range nc.unallocatedServices {
752772
if !nc.nwkAllocator.IsServiceAllocated(s) {
753-
if err := a.allocateService(ctx, nc, s); err != nil {
773+
if err := a.allocateService(ctx, s); err != nil {
754774
log.G(ctx).Debugf("Failed allocation of unallocated service %s: %v", s.ID, err)
755775
continue
756776
}
@@ -760,15 +780,16 @@ func (a *Allocator) procUnallocatedServices(ctx context.Context, nc *networkCont
760780
}
761781
}
762782

763-
func (a *Allocator) procUnallocatedTasksNetwork(ctx context.Context, nc *networkContext) {
783+
func (a *Allocator) procUnallocatedTasksNetwork(ctx context.Context) {
784+
nc := a.netCtx
764785
tasks := make([]*api.Task, 0, len(nc.unallocatedTasks))
765786

766787
committed, err := a.store.Batch(func(batch *store.Batch) error {
767788
for _, t := range nc.unallocatedTasks {
768789
var allocatedT *api.Task
769790
err := batch.Update(func(tx store.Tx) error {
770791
var err error
771-
allocatedT, err = a.allocateTask(ctx, nc, tx, t)
792+
allocatedT, err = a.allocateTask(ctx, tx, t)
772793
return err
773794
})
774795

0 commit comments

Comments
 (0)