Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions lib/portlayer/exec/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/vmware/govmomi/vim25/types"
"github.com/vmware/vic/lib/config/executor"
"github.com/vmware/vic/lib/migration"
"github.com/vmware/vic/lib/tether/shared"
"github.com/vmware/vic/pkg/trace"
"github.com/vmware/vic/pkg/vsphere/extraconfig"
"github.com/vmware/vic/pkg/vsphere/extraconfig/vmomi"
Expand Down Expand Up @@ -220,7 +221,7 @@ func (c *containerBase) State(op trace.Operation) State {
func (c *containerBase) ReloadConfig(op trace.Operation) error {
defer trace.End(trace.Begin(c.ExecConfig.ID, op))

return c.startGuestProgram(op, "reload", "")
return c.startGuestProgram(op, shared.GuestActionReload, "")
}

// WaitForExec waits exec'ed task to set started field or timeout
Expand Down Expand Up @@ -319,7 +320,7 @@ func (c *containerBase) kill(op trace.Operation) error {
sig := string(ssh.SIGKILL)
timeout.Infof("sending kill -%s %s", sig, c)

err := c.startGuestProgram(timeout, "kill", sig)
err := c.startGuestProgram(timeout, shared.GuestActionKill, sig)
if err == nil && timeout.Err() != nil {
timeout.Warnf("timeout (%s) waiting for %s to power off via SIG%s", wait, c, sig)
}
Expand Down Expand Up @@ -361,21 +362,29 @@ func (c *containerBase) shutdown(op trace.Operation, waitTime *int32) error {
}

cs := c.ExecConfig.Sessions[c.ExecConfig.ID]
stop := []string{cs.StopSignal, string(ssh.SIGKILL)}
if stop[0] == "" {
stop[0] = string(ssh.SIGTERM)

stopSignal := cs.StopSignal
if stopSignal == "" {
stopSignal = string(ssh.SIGTERM)
}

var killed bool
actionSignals := []struct {
action string
sig string
}{
{shared.GuestActionKill, stopSignal},
{shared.GuestActionGroupKill, string(ssh.SIGKILL)},
}

for _, sig := range stop {
msg := fmt.Sprintf("sending kill -%s %s", sig, c)
var killed bool
for _, actionSignal := range actionSignals {
msg := fmt.Sprintf("sending %s -%s %s", actionSignal.action, actionSignal.sig, c)
op.Infof(msg)

timeout, cancel := trace.WithTimeout(&op, wait, "shutdown")
defer cancel()

err := c.startGuestProgram(timeout, "kill", sig)
err := c.startGuestProgram(timeout, actionSignal.action, actionSignal.sig)
if err != nil {
// Just warn and proceed to waiting for power state per issue https://github.com/vmware/vic/issues/5803
// Description above in function kill()
Expand All @@ -399,14 +408,14 @@ func (c *containerBase) shutdown(op trace.Operation, waitTime *int32) error {
return err // error other than timeout
}

timeout.Warnf("timeout (%s) waiting for %s to power off via SIG%s", wait, c, sig)
timeout.Warnf("timeout (%s) waiting for %s to power off via SIG%s", wait, c, actionSignal.sig)

if killed {
return nil
}
}

return fmt.Errorf("failed to shutdown %s via kill signals %s", c, stop)
return fmt.Errorf("failed to shutdown %s via kill signals %s and %s", c, stopSignal, string(ssh.SIGKILL))
}

func (c *containerBase) poweroff(op trace.Operation) error {
Expand Down
3 changes: 3 additions & 0 deletions lib/tether/shared/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,7 @@ const (
FilterSpecQueryName = "filter-spec"
SkipRecurseQueryName = "skip-recurse"
SkipDataQueryName = "skip-data"
GuestActionReload = "reload"
GuestActionKill = "kill"
GuestActionGroupKill = "group-kill"
)
4 changes: 0 additions & 4 deletions lib/tether/tether.go
Original file line number Diff line number Diff line change
Expand Up @@ -677,10 +677,6 @@ func (t *tether) cleanupSession(session *SessionConfig) {
func (t *tether) handleSessionExit(session *SessionConfig) {
defer trace.End(trace.Begin("handling exit of session " + session.ID))

log.Debugf("Waiting on session.wait")
session.wait.Wait()
log.Debugf("Wait on session.wait completed")

log.Debugf("Calling wait on cmd")
if err := session.Cmd.Wait(); err != nil {
// we expect this to get an error because the child reaper will have gathered it
Expand Down
8 changes: 8 additions & 0 deletions lib/tether/tether_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,15 @@ func (t *tether) childReaper() error {
log.Debugf("Removed child pid: %d", pid)
session.Lock()
session.ExitStatus = exitCode
session.Unlock()

// Don't hold the lock while waiting for the file descriptors
// to close as these can be held open by child processes
log.Debugf("Waiting on session.wait")
session.wait.Wait()
log.Debugf("Wait on session.wait completed")

session.Lock()
t.handleSessionExit(session)
session.Unlock()
continue
Expand Down
42 changes: 34 additions & 8 deletions lib/tether/toolbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (

log "github.com/Sirupsen/logrus"
dar "github.com/docker/docker/pkg/archive"

"golang.org/x/crypto/ssh"

"github.com/vmware/govmomi/toolbox"
Expand Down Expand Up @@ -181,10 +182,21 @@ func (t *Toolbox) kill(_ context.Context, name string) error {

session.Lock()
defer session.Unlock()
return t.killHelper(session, name)
return t.killHelper(session, name, false)
}

func (t *Toolbox) groupKill(_ context.Context, name string) error {
session := t.session()
if session == nil {
return fmt.Errorf("failed to kill container: process not found")
}

session.Lock()
defer session.Unlock()
return t.killHelper(session, name, true)
}

func (t *Toolbox) killHelper(session *SessionConfig, name string) error {
func (t *Toolbox) killHelper(session *SessionConfig, name string, groupKill bool) error {
if name == "" {
name = string(ssh.SIGTERM)
}
Expand All @@ -201,9 +213,19 @@ func (t *Toolbox) killHelper(session *SessionConfig, name string) error {

num := syscall.Signal(sig.Signum())

log.Infof("sending signal %s (%d) to process group for %s", sig.Signal, num, session.ID)
if err := syscall.Kill(-session.Cmd.Process.Pid, num); err != nil {
return fmt.Errorf("failed to signal %s group: %s", session.ID, err)
var pid int
if groupKill == false {
// Send signal to a single process
pid = session.Cmd.Process.Pid
log.Infof("sending signal %s (%d) to process %d for %s", sig.Signal, num, session.Cmd.Process.Pid, session.ID)
} else {
// Send signal to the whole process group
pid = -session.Cmd.Process.Pid
log.Infof("sending signal %s (%d) to process group %d for %s", sig.Signal, num, session.Cmd.Process.Pid, session.ID)
}

if err := syscall.Kill(pid, num); err != nil {
return fmt.Errorf("failed to signal %s: %s", session.ID, err)
}

return nil
Expand All @@ -220,7 +242,9 @@ func (t *Toolbox) containerAuthenticate(_ vix.CommandRequestHeader, data []byte)
return errors.New("not yet initialized")
}

log.Debugf("containerAuthenticate, acquiring session lock: %s ..", session.ID)
session.Lock()
log.Debugf("containerAuthenticate, acquired session lock: %s", session.ID)
defer session.Unlock()

// no authentication yet, just using container ID and device IDs as a sanity check for now
Expand All @@ -235,9 +259,11 @@ func (t *Toolbox) containerStartCommand(m *toolbox.ProcessManager, r *vix.StartP
var p *toolbox.Process

switch r.ProgramPath {
case "kill":
case shared.GuestActionKill:
p = toolbox.NewProcessFunc(t.kill)
case "reload":
case shared.GuestActionGroupKill:
p = toolbox.NewProcessFunc(t.groupKill)
case shared.GuestActionReload:
p = toolbox.NewProcessFunc(func(_ context.Context, _ string) error {
return ReloadConfig()
})
Expand All @@ -263,7 +289,7 @@ func (t *Toolbox) halt() error {

log.Infof("stopping %s", session.ID)

if err := t.killHelper(session, session.StopSignal); err != nil {
if err := t.killHelper(session, session.StopSignal, false); err != nil {
return err
}

Expand Down
15 changes: 11 additions & 4 deletions tests/test-cases/Group1-Docker-Commands/1-14-Docker-Kill.robot
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,20 @@ Nested Trap Signal Command
# Container command runs an infinite loop, trapping and logging the given signal name in a nested shell
# This is to test process group behaviours - same command as above, but nested in another shell
[Arguments] ${sig}
[Return] ${busybox} sh -c "trap 'echo KillSignalParent${sig}' ${sig}; sh -c \\"trap 'echo KillSignalChild${sig}' ${sig}; echo READY; while true; do date && sleep 1; done\\""
[Return] ${busybox} sh -c "trap 'echo KillSignalParent${sig}' ${sig}; sh -c \\"trap 'echo KillSignalChild${sig}' ${sig}; echo READY_CHILD; while true; do date && sleep 1; done\\" & echo READY_PARENT; while true; do date && sleep 1; done"

Assert Container Output
[Arguments] ${id} ${match}
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} logs ${id}
Should Be Equal As Integers ${rc} 0
Should Contain ${output} ${match}

Assert Not In Container Output
[Arguments] ${id} ${match}
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} logs ${id}
Should Be Equal As Integers ${rc} 0
Should Not Contain ${output} ${match}

Check That Container Was Killed
[Arguments] ${container}
${rc} ${out}= Run And Return Rc And Output docker %{VCH-PARAMS} inspect -f {{.State.Running}} ${container}
Expand Down Expand Up @@ -82,7 +88,7 @@ Signal a container with SIGHUP
Should Be Equal As Integers ${rc} 0
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} logs --follow ${id}

Confirm signal delivered to entire process group
Confirm signal is not delivered to entire process group
${rc}= Run And Return Rc docker %{VCH-PARAMS} pull ${busybox}
Should Be Equal As Integers ${rc} 0
${trap}= Nested Trap Signal Command HUP
Expand All @@ -96,9 +102,10 @@ Confirm signal delivered to entire process group
Should Be Equal As Integers ${rc} 1
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} kill -s HUP ${id}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to wait for the HUP signal to be confirmed before we call stop:

Wait Until Keyword Succeeds  20x  200 milliseconds  Assert In Container Output  ${id}  KillSignalHUP

THEN we stop it and assert that the child didn't print output.

Should Be Equal As Integers ${rc} 0
Wait Until Keyword Succeeds 20x 200 milliseconds Assert Container Output ${id} KillSignalChildHUP
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} kill -s TERM ${id}
Wait Until Keyword Succeeds 20x 200 milliseconds Assert Container Output ${id} KillSignalParentHUP
${rc}= Run And Return Rc docker %{VCH-PARAMS} stop ${id}
Should Be Equal As Integers ${rc} 0
Assert Not In Container Output ${id} KillSignalChildHUP
${rc} ${output}= Run And Return Rc And Output docker %{VCH-PARAMS} logs --follow ${id}

Signal a non-existent container
Expand Down