Skip to content

Commit e68c0ff

Browse files
committed
Change kill and stop implementation to enhance compatibility
1 parent 2e7fed9 commit e68c0ff

File tree

6 files changed

+75
-25
lines changed

6 files changed

+75
-25
lines changed

lib/portlayer/exec/base.go

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/vmware/govmomi/vim25/types"
3030
"github.com/vmware/vic/lib/config/executor"
3131
"github.com/vmware/vic/lib/migration"
32+
"github.com/vmware/vic/lib/tether/shared"
3233
"github.com/vmware/vic/pkg/trace"
3334
"github.com/vmware/vic/pkg/vsphere/extraconfig"
3435
"github.com/vmware/vic/pkg/vsphere/extraconfig/vmomi"
@@ -220,7 +221,7 @@ func (c *containerBase) State(op trace.Operation) State {
220221
func (c *containerBase) ReloadConfig(op trace.Operation) error {
221222
defer trace.End(trace.Begin(c.ExecConfig.ID, op))
222223

223-
return c.startGuestProgram(op, "reload", "")
224+
return c.startGuestProgram(op, shared.GuestActionReload, "")
224225
}
225226

226227
// WaitForExec waits exec'ed task to set started field or timeout
@@ -319,7 +320,7 @@ func (c *containerBase) kill(op trace.Operation) error {
319320
sig := string(ssh.SIGKILL)
320321
timeout.Infof("sending kill -%s %s", sig, c)
321322

322-
err := c.startGuestProgram(timeout, "kill", sig)
323+
err := c.startGuestProgram(timeout, shared.GuestActionKill, sig)
323324
if err == nil && timeout.Err() != nil {
324325
timeout.Warnf("timeout (%s) waiting for %s to power off via SIG%s", wait, c, sig)
325326
}
@@ -361,21 +362,30 @@ func (c *containerBase) shutdown(op trace.Operation, waitTime *int32) error {
361362
}
362363

363364
cs := c.ExecConfig.Sessions[c.ExecConfig.ID]
364-
stop := []string{cs.StopSignal, string(ssh.SIGKILL)}
365-
if stop[0] == "" {
366-
stop[0] = string(ssh.SIGTERM)
365+
366+
stopSignal := cs.StopSignal
367+
if stopSignal == "" {
368+
stopSignal = string(ssh.SIGTERM)
369+
}
370+
371+
actionSignals := []struct {
372+
action string
373+
sig string
374+
}{
375+
{shared.GuestActionKill, stopSignal},
376+
{shared.GuestActionGroupKill, string(ssh.SIGKILL)},
367377
}
368378

369379
var killed bool
370380

371-
for _, sig := range stop {
372-
msg := fmt.Sprintf("sending kill -%s %s", sig, c)
381+
for _, actionSignal := range actionSignals {
382+
msg := fmt.Sprintf("sending %s -%s %s", actionSignal.action, actionSignal.sig, c)
373383
op.Infof(msg)
374384

375385
timeout, cancel := trace.WithTimeout(&op, wait, "shutdown")
376386
defer cancel()
377387

378-
err := c.startGuestProgram(timeout, "kill", sig)
388+
err := c.startGuestProgram(timeout, actionSignal.action, actionSignal.sig)
379389
if err != nil {
380390
// Just warn and proceed to waiting for power state per issue https://github.com/vmware/vic/issues/5803
381391
// Description above in function kill()
@@ -399,14 +409,14 @@ func (c *containerBase) shutdown(op trace.Operation, waitTime *int32) error {
399409
return err // error other than timeout
400410
}
401411

402-
timeout.Warnf("timeout (%s) waiting for %s to power off via SIG%s", wait, c, sig)
412+
timeout.Warnf("timeout (%s) waiting for %s to power off via SIG%s", wait, c, actionSignal.sig)
403413

404414
if killed {
405415
return nil
406416
}
407417
}
408418

409-
return fmt.Errorf("failed to shutdown %s via kill signals %s", c, stop)
419+
return fmt.Errorf("failed to shutdown %s via kill signals %s and %s", c, stopSignal, string(ssh.SIGKILL))
410420
}
411421

412422
func (c *containerBase) poweroff(op trace.Operation) error {

lib/tether/shared/constants.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,7 @@ const (
2222
FilterSpecQueryName = "filter-spec"
2323
SkipRecurseQueryName = "skip-recurse"
2424
SkipDataQueryName = "skip-data"
25+
GuestActionReload = "reload"
26+
GuestActionKill = "kill"
27+
GuestActionGroupKill = "group-kill"
2528
)

lib/tether/tether.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -677,10 +677,6 @@ func (t *tether) cleanupSession(session *SessionConfig) {
677677
func (t *tether) handleSessionExit(session *SessionConfig) {
678678
defer trace.End(trace.Begin("handling exit of session " + session.ID))
679679

680-
log.Debugf("Waiting on session.wait")
681-
session.wait.Wait()
682-
log.Debugf("Wait on session.wait completed")
683-
684680
log.Debugf("Calling wait on cmd")
685681
if err := session.Cmd.Wait(); err != nil {
686682
// we expect this to get an error because the child reaper will have gathered it

lib/tether/tether_linux.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,15 @@ func (t *tether) childReaper() error {
138138
log.Debugf("Removed child pid: %d", pid)
139139
session.Lock()
140140
session.ExitStatus = exitCode
141+
session.Unlock()
142+
143+
// Don't hold the lock while waiting for the file descriptors
144+
// to close as these can be held open by child processes
145+
log.Debugf("Waiting on session.wait")
146+
session.wait.Wait()
147+
log.Debugf("Wait on session.wait completed")
141148

149+
session.Lock()
142150
t.handleSessionExit(session)
143151
session.Unlock()
144152
continue

lib/tether/toolbox.go

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232

3333
log "github.com/Sirupsen/logrus"
3434
dar "github.com/docker/docker/pkg/archive"
35+
3536
"golang.org/x/crypto/ssh"
3637

3738
"github.com/vmware/govmomi/toolbox"
@@ -181,10 +182,21 @@ func (t *Toolbox) kill(_ context.Context, name string) error {
181182

182183
session.Lock()
183184
defer session.Unlock()
184-
return t.killHelper(session, name)
185+
return t.killHelper(session, name, false)
186+
}
187+
188+
func (t *Toolbox) groupKill(_ context.Context, name string) error {
189+
session := t.session()
190+
if session == nil {
191+
return fmt.Errorf("failed to kill container: process not found")
192+
}
193+
194+
session.Lock()
195+
defer session.Unlock()
196+
return t.killHelper(session, name, true)
185197
}
186198

187-
func (t *Toolbox) killHelper(session *SessionConfig, name string) error {
199+
func (t *Toolbox) killHelper(session *SessionConfig, name string, groupKill bool) error {
188200
if name == "" {
189201
name = string(ssh.SIGTERM)
190202
}
@@ -201,9 +213,19 @@ func (t *Toolbox) killHelper(session *SessionConfig, name string) error {
201213

202214
num := syscall.Signal(sig.Signum())
203215

204-
log.Infof("sending signal %s (%d) to process group for %s", sig.Signal, num, session.ID)
205-
if err := syscall.Kill(-session.Cmd.Process.Pid, num); err != nil {
206-
return fmt.Errorf("failed to signal %s group: %s", session.ID, err)
216+
var pid int
217+
if groupKill == false {
218+
// Send signal to a single process
219+
pid = session.Cmd.Process.Pid
220+
log.Infof("sending signal %s (%d) to process %d for %s", sig.Signal, num, session.Cmd.Process.Pid, session.ID)
221+
} else {
222+
// Send signal to the whole process group
223+
pid = -session.Cmd.Process.Pid
224+
log.Infof("sending signal %s (%d) to process group %d for %s", sig.Signal, num, session.Cmd.Process.Pid, session.ID)
225+
}
226+
227+
if err := syscall.Kill(pid, num); err != nil {
228+
return fmt.Errorf("failed to signal %s: %s", session.ID, err)
207229
}
208230

209231
return nil
@@ -220,7 +242,9 @@ func (t *Toolbox) containerAuthenticate(_ vix.CommandRequestHeader, data []byte)
220242
return errors.New("not yet initialized")
221243
}
222244

245+
log.Debugf("containerAuthenticate, acquiring session lock: %s ..", session.ID)
223246
session.Lock()
247+
log.Debugf("containerAuthenticate, acquired session lock: %s", session.ID)
224248
defer session.Unlock()
225249

226250
// no authentication yet, just using container ID and device IDs as a sanity check for now
@@ -235,9 +259,11 @@ func (t *Toolbox) containerStartCommand(m *toolbox.ProcessManager, r *vix.StartP
235259
var p *toolbox.Process
236260

237261
switch r.ProgramPath {
238-
case "kill":
262+
case shared.GuestActionKill:
239263
p = toolbox.NewProcessFunc(t.kill)
240-
case "reload":
264+
case shared.GuestActionGroupKill:
265+
p = toolbox.NewProcessFunc(t.groupKill)
266+
case shared.GuestActionReload:
241267
p = toolbox.NewProcessFunc(func(_ context.Context, _ string) error {
242268
return ReloadConfig()
243269
})
@@ -263,7 +289,7 @@ func (t *Toolbox) halt() error {
263289

264290
log.Infof("stopping %s", session.ID)
265291

266-
if err := t.killHelper(session, session.StopSignal); err != nil {
292+
if err := t.killHelper(session, session.StopSignal, false); err != nil {
267293
return err
268294
}
269295

tests/test-cases/Group1-Docker-Commands/1-14-Docker-Kill.robot

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ Assert Container Output
3737
Should Be Equal As Integers ${rc} 0
3838
Should Contain ${output} ${match}
3939

40+
Assert Not In Container Output
41+
[Arguments] ${id} ${match}
42+
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} logs ${id}
43+
Should Be Equal As Integers ${rc} 0
44+
Should Not Contain ${output} ${match}
45+
4046
Check That Container Was Killed
4147
[Arguments] ${container}
4248
${rc} ${out}= Run And Return Rc And Output docker %{VCH-PARAMS} inspect -f {{.State.Running}} ${container}
@@ -82,7 +88,7 @@ Signal a container with SIGHUP
8288
Should Be Equal As Integers ${rc} 0
8389
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} logs --follow ${id}
8490

85-
Confirm signal delivered to entire process group
91+
Confirm signal is not delivered to entire process group
8692
${rc}= Run And Return Rc docker %{VCH-PARAMS} pull ${busybox}
8793
Should Be Equal As Integers ${rc} 0
8894
${trap}= Nested Trap Signal Command HUP
@@ -96,9 +102,10 @@ Confirm signal delivered to entire process group
96102
Should Be Equal As Integers ${rc} 1
97103
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} kill -s HUP ${id}
98104
Should Be Equal As Integers ${rc} 0
99-
Wait Until Keyword Succeeds 20x 200 milliseconds Assert Container Output ${id} KillSignalChildHUP
100-
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} kill -s TERM ${id}
105+
Wait Until Keyword Succeeds 20x 200 milliseconds Assert Container Output ${id} KillSignalParentHUP
106+
${rc}= Run And Return Rc docker %{VCH-PARAMS} stop ${id}
101107
Should Be Equal As Integers ${rc} 0
108+
Assert Not In Container Output ${id} KillSignalChildHUP
102109
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} logs --follow ${id}
103110

104111
Signal a non-existent container

0 commit comments

Comments
 (0)