Skip to content

make sure worker nodes actually join control plane on restart #9476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 16, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions pkg/minikube/bootstrapper/kubeadm/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -675,16 +675,12 @@ func (k *Bootstrapper) JoinCluster(cc config.ClusterConfig, n config.Node, joinC

out, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", joinCmd))
if err != nil {
if strings.Contains(err.Error(), "status \"Ready\" already exists in the cluster") {
klog.Infof("Node %s already joined the cluster, skip failure.", n.Name)
} else {
return errors.Wrapf(err, "cmd failed: %s\n%+v\n", joinCmd, out.Output())
}
return errors.Wrapf(err, "cmd failed: %s\n%+v\n", joinCmd, out.Output())
Copy link
Member

@medyagh medyagh Oct 16, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do the same check for the error message like before but this time retry and also let user know we are retrying

already exists in the cluster. You must delete the existing Node or change the name of this new joining Node

if that error is there, we will klog.Info (so the IDE users who usually dont like to wait without any logs, know we are retrying

if rr.Output() has that ^^ error, {
klog.Info("retrying to re-register worker node to the controlplane.... ")
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, RunCmd already includes the rr.Output in the error message,
return errors.Wrapf(err, "join cmd") is enough

}
return nil
}

if err := retry.Expo(join, 10*time.Second, 1*time.Minute); err != nil {
if err := retry.Expo(join, 10*time.Second, 3*time.Minute); err != nil {
return errors.Wrap(err, "joining cp")
}

Expand Down
7 changes: 3 additions & 4 deletions pkg/minikube/node/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ func Start(starter Starter, apiServer bool) (*kubeconfig.Settings, error) {
return nil, errors.Wrap(err, "setting up certs")
}

if err := bs.UpdateNode(*starter.Cfg, *starter.Node, cr); err != nil {
return nil, errors.Wrap(err, "update node")
}
}

var wg sync.WaitGroup
Expand Down Expand Up @@ -167,10 +170,6 @@ func Start(starter Starter, apiServer bool) (*kubeconfig.Settings, error) {
prepareNone()
}
} else {
if err := bs.UpdateNode(*starter.Cfg, *starter.Node, cr); err != nil {
return nil, errors.Wrap(err, "update node")
}

// Make sure to use the command runner for the control plane to generate the join token
cpBs, cpr, err := cluster.ControlPlaneBootstrapper(starter.MachineAPI, starter.Cfg, viper.GetString(cmdcfg.Bootstrapper))
if err != nil {
Expand Down