Skip to content

Commit 8c896af

Browse files
author
Aaron Lehmann
committed
constraintenforcer: Trigger task restarts when appropriate
The constraint enforcer currently sets task desired state to "shutdown" directly, which means the orchestrator will consider these tasks already processed, and won't trigger restarts. While this is appropriate for global services, replicated services should keep the desired number of replicas running, so each task shut down by the constraint enforcer should be restarted somewhere else. This changes the constraint enforcer to trigger a task shutdown by updating the actual state rather than desired state. This will cause the orchestrator to restart the task when necessary. It's not a perfect solution, because it bends rules about field ownership, and may cause the replacement task to start before the old one stops. However, it's a good compromise solution for this problem that doesn't require absorbing the constraint enforcer into each orchestrator (which wouldn't fit the model well), or adding a third type of state to every task. Also, update the global orchestrator to only restart a task when the node still meets the constraints. Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>
1 parent 03d5b15 commit 8c896af

File tree

3 files changed

+32
-5
lines changed

3 files changed

+32
-5
lines changed

manager/orchestrator/constraintenforcer/constraint_enforcer.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
package constraintenforcer
22

33
import (
4+
"time"
5+
46
"github.com/docker/swarmkit/api"
57
"github.com/docker/swarmkit/log"
68
"github.com/docker/swarmkit/manager/constraint"
79
"github.com/docker/swarmkit/manager/state"
810
"github.com/docker/swarmkit/manager/state/store"
11+
"github.com/docker/swarmkit/protobuf/ptypes"
912
)
1013

1114
// ConstraintEnforcer watches for updates to nodes and shuts down tasks that no
@@ -134,7 +137,16 @@ func (ce *ConstraintEnforcer) shutdownNoncompliantTasks(node *api.Node) {
134137
return nil
135138
}
136139

137-
t.DesiredState = api.TaskStateShutdown
140+
// We set the observed state to
141+
// REJECTED, rather than the desired
142+
// state. Desired state is owned by the
143+
// orchestrator, and setting it directly
144+
// will bypass actions such as
145+
// restarting the task on another node
146+
// (if applicable).
147+
t.Status.State = api.TaskStateRejected
148+
t.Status.Message = "assigned node no longer meets constraints"
149+
t.Status.Timestamp = ptypes.MustTimestampProto(time.Now())
138150
return store.UpdateTask(tx, t)
139151
})
140152
if err != nil {

manager/orchestrator/constraintenforcer/constraint_enforcer_test.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,10 @@ func TestConstraintEnforcer(t *testing.T) {
131131

132132
go constraintEnforcer.Run()
133133

134-
// id0 should be killed immediately
135-
shutdown1 := testutils.WatchShutdownTask(t, watch)
134+
// id0 should be rejected immediately
135+
shutdown1 := testutils.WatchTaskUpdate(t, watch)
136136
assert.Equal(t, "id0", shutdown1.ID)
137+
assert.Equal(t, api.TaskStateRejected, shutdown1.Status.State)
137138

138139
// Change node id1 to a manager
139140
err = s.Update(func(tx store.Tx) error {
@@ -147,8 +148,9 @@ func TestConstraintEnforcer(t *testing.T) {
147148
})
148149
assert.NoError(t, err)
149150

150-
shutdown2 := testutils.WatchShutdownTask(t, watch)
151+
shutdown2 := testutils.WatchTaskUpdate(t, watch)
151152
assert.Equal(t, "id2", shutdown2.ID)
153+
assert.Equal(t, api.TaskStateRejected, shutdown2.Status.State)
152154

153155
// Change resources on node id2
154156
err = s.Update(func(tx store.Tx) error {
@@ -162,6 +164,7 @@ func TestConstraintEnforcer(t *testing.T) {
162164
})
163165
assert.NoError(t, err)
164166

165-
shutdown3 := testutils.WatchShutdownTask(t, watch)
167+
shutdown3 := testutils.WatchTaskUpdate(t, watch)
166168
assert.Equal(t, "id4", shutdown3.ID)
169+
assert.Equal(t, api.TaskStateRejected, shutdown3.Status.State)
167170
}

manager/orchestrator/global/global.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,10 +513,22 @@ func (g *Orchestrator) tickTasks(ctx context.Context) {
513513
if t == nil || t.DesiredState > api.TaskStateRunning {
514514
return nil
515515
}
516+
516517
service := store.GetService(tx, t.ServiceID)
517518
if service == nil {
518519
return nil
519520
}
521+
522+
node, nodeExists := g.nodes[t.NodeID]
523+
serviceEntry, serviceExists := g.globalServices[t.ServiceID]
524+
if !nodeExists || !serviceExists {
525+
return nil
526+
}
527+
if !constraint.NodeMatches(serviceEntry.constraints, node) {
528+
t.DesiredState = api.TaskStateShutdown
529+
return store.UpdateTask(tx, t)
530+
}
531+
520532
return g.restarts.Restart(ctx, tx, g.cluster, service, *t)
521533
})
522534
if err != nil {

0 commit comments

Comments
 (0)