Skip to content

Commit 3a14881

Browse files
✨ Allow remediation of multiple failures in KCP (#13352)
* Allow remediation of multiple failures in KCP * Address feedback * more feedback * more feedback
1 parent cf8c264 commit 3a14881

16 files changed

+2014
-680
lines changed

controlplane/kubeadm/internal/control_plane.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ type ControlPlane struct {
8282
// NOTE: Those info are specifically designed for computing KCP's Available condition.
8383
EtcdMembers []*etcd.Member
8484
EtcdMembersAndMachinesAreMatching bool
85+
EtcdMembersAlarms []etcd.MemberAlarm
8586

8687
managementCluster ManagementCluster
8788
workloadCluster WorkloadCluster

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 156 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -468,9 +468,8 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl
468468
}
469469

470470
// Ensures the number of etcd members is in sync with the number of machines/nodes.
471-
// NOTE: This is usually required after a machine deletion.
472-
if err := r.reconcileEtcdMembers(ctx, controlPlane); err != nil {
473-
return ctrl.Result{}, err
471+
if result, err := r.reconcileEtcdMembers(ctx, controlPlane); err != nil || !result.IsZero() {
472+
return result, err
474473
}
475474

476475
// Handle machines in deletion phase; when drain and wait for volume detach completed, forward etcd leadership
@@ -1207,62 +1206,109 @@ func maxTime(t1, t2 time.Time) time.Time {
12071206
return t2
12081207
}
12091208

1210-
// reconcileEtcdMembers ensures the number of etcd members is in sync with the number of machines/nodes.
1211-
// This is usually required after a machine deletion.
1209+
// reconcileEtcdMembers ensures that the list of etcd members is consistent with the list of Machine/Node.
1210+
// If etcd members are not consistent with the list of Machine/Node "unknown" etcd members are removed and a new
1211+
// reconcile is triggered.
12121212
//
1213-
// NOTE: this func uses KCP conditions, it is required to call reconcileControlPlaneAndMachinesConditions before this.
1214-
func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context, controlPlane *internal.ControlPlane) error {
1213+
// NOTE: This operation is a safeguard in case for some reason the etcd member removal operation that is performed
1214+
// as part of the Machine deletion workflow doesn't work as expected (see reconcilePreTerminateHook).
1215+
// It is also a safeguard against "unmanaged" etcd members being added to the etcd cluster.
1216+
func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
12151217
log := ctrl.LoggerFrom(ctx)
12161218

12171219
// If etcd is not managed by KCP this is a no-op.
12181220
if !controlPlane.IsEtcdManaged() {
1219-
return nil
1221+
return ctrl.Result{}, nil
12201222
}
12211223

12221224
// If there is no KCP-owned control-plane machines, then control-plane has not been initialized yet.
12231225
if controlPlane.Machines.Len() == 0 {
1224-
return nil
1226+
return ctrl.Result{}, nil
12251227
}
12261228

12271229
// No op if for any reason the etcdMember list is not populated at this stage.
1228-
if controlPlane.EtcdMembers == nil {
1229-
return nil
1230+
if len(controlPlane.EtcdMembers) == 0 {
1231+
return ctrl.Result{}, nil
12301232
}
12311233

1232-
// Potential inconsistencies between the list of members and the list of machines/nodes are
1234+
// Potential inconsistencies between the list of members and the list of Machine/Node are
12331235
// surfaced using the EtcdClusterHealthyCondition; if this condition is true, meaning no inconsistencies exists, return early.
12341236
if conditions.IsTrue(controlPlane.KCP, controlplanev1.KubeadmControlPlaneEtcdClusterHealthyCondition) {
1235-
return nil
1237+
return ctrl.Result{}, nil
12361238
}
12371239

1238-
// Collect all the node names.
1239-
// Note: EtcdClusterHealthyCondition true also implies that there are no machines still provisioning,
1240-
// so we can ignore this case.
1240+
// Collect all the Node names from control plane Machines.
12411241
nodeNames := []string{}
12421242
for _, machine := range controlPlane.Machines {
12431243
if !machine.Status.NodeRef.IsDefined() {
1244-
// If there are provisioning machines (machines without a node yet), return.
1245-
return nil
1244+
// If there are provisioning machines (machines without a Node yet), KCP must wait for the control plane
1245+
// to become stable before acting on etcd members.
1246+
return ctrl.Result{}, nil
12461247
}
12471248
nodeNames = append(nodeNames, machine.Status.NodeRef.Name)
12481249
}
12491250

12501251
workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
12511252
if err != nil {
1252-
// Failing at connecting to the workload cluster can mean workload cluster is unhealthy for a variety of reasons such as etcd quorum loss.
1253-
return errors.Wrap(err, "cannot get remote client to workload cluster")
1253+
return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster")
12541254
}
12551255

1256-
removedMembers, err := workloadCluster.ReconcileEtcdMembersAndControlPlaneNodes(ctx, controlPlane.EtcdMembers, nodeNames)
1257-
if err != nil {
1258-
return errors.Wrap(err, "failed attempt to reconcile etcd members")
1256+
// Check if there are etcd member without the corresponding Node/Machines.
1257+
// If any, delete it with best effort
1258+
removedMembers := []string{}
1259+
allErrors := []error{}
1260+
1261+
for _, member := range controlPlane.EtcdMembers {
1262+
// If this member is just added, it has a empty name until the etcd pod starts. Ignore this member until a name will show up.
1263+
if member.Name == "" {
1264+
continue
1265+
}
1266+
1267+
// If the member have a corresponding Node/Machine, continue.
1268+
nodeFound := false
1269+
for _, nodeName := range nodeNames {
1270+
if member.Name == nodeName {
1271+
nodeFound = true
1272+
break
1273+
}
1274+
}
1275+
if nodeFound {
1276+
continue
1277+
}
1278+
1279+
// If the Node/Machineg corresponding to an etcd member cannot be found, KCP must remove it.
1280+
//
1281+
// Before deleting the etcd member, KCP should assess the potential effects of this operation.
1282+
//
1283+
// Most specifically KCP should determine if this operation is going to leave the Kubernetes control plane components
1284+
// and the etcd cluster in operational state or not.
1285+
//
1286+
// In this case, the Target cluster will have the same list of machines as the current clusters
1287+
// but we are going to remove the etcd members without a corresponding Node/Machine (no etcd member are going to be added).
1288+
etcdMemberToBeDeleted := member.Name
1289+
addEtcdMember := false
1290+
if !r.targetEtcdClusterHealthy(ctx, controlPlane, addEtcdMember, etcdMemberToBeDeleted) {
1291+
allErrors = append(allErrors, errors.Errorf("etcd member %s does not have a corresponding Machine, it must be removed from the cluster but this operation can lead to quorum loss. Please check the etcd status", etcdMemberToBeDeleted))
1292+
break
1293+
}
1294+
1295+
if err := workloadCluster.RemoveEtcdMember(ctx, member.Name); err != nil {
1296+
allErrors = append(allErrors, err)
1297+
continue
1298+
}
1299+
removedMembers = append(removedMembers, member.Name)
12591300
}
12601301

12611302
if len(removedMembers) > 0 {
1262-
log.Info("Etcd members without nodes removed from the cluster", "members", removedMembers)
1303+
log.Info("Etcd members without a corresponding Machines removed from the cluster", "members", removedMembers)
1304+
1305+
// If there are no errors, force a new reconcile after removing an etcd member.
1306+
if len(allErrors) == 0 {
1307+
return ctrl.Result{RequeueAfter: 1 * time.Second}, nil
1308+
}
12631309
}
12641310

1265-
return nil
1311+
return ctrl.Result{}, kerrors.NewAggregate(allErrors)
12661312
}
12671313

12681314
func (r *KubeadmControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
@@ -1307,34 +1353,97 @@ func (r *KubeadmControlPlaneReconciler) reconcilePreTerminateHook(ctx context.Co
13071353
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
13081354
}
13091355

1310-
// The following will execute and remove the pre-terminate hook from the Machine.
1356+
// If removing the last control plane machine, no need of additional checks/operations.
1357+
if controlPlane.Machines.Len() <= 1 {
1358+
if err := r.removePreTerminateHookAnnotationFromMachine(ctx, deletingMachine); err != nil {
1359+
return ctrl.Result{}, err
1360+
}
13111361

1312-
// If we have more than 1 Machine and etcd is managed we forward etcd leadership and remove the member
1313-
// to keep the etcd cluster healthy.
1314-
if controlPlane.Machines.Len() > 1 && controlPlane.IsEtcdManaged() {
1315-
workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
1316-
if err != nil {
1317-
return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s: failed to create client to workload cluster", klog.KObj(deletingMachine))
1362+
log.Info("Waiting for Machines to be deleted", "machines", strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", "))
1363+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
1364+
}
1365+
1366+
// Before removing the etcd member/removing the pre-terminate hook on KCP machines
1367+
// KCP determines again if deleting this machine is going to leave the Kubernetes control plane components
1368+
// and the etcd cluster in operational state or not.
1369+
//
1370+
// NOTE: This check acts as additional safeguard preventing deletion to complete.
1371+
// Similar checks are also performed before triggering deletion when remediating unhealthy machines and
1372+
// also before triggering deletion when scaling down the number of control plane replicas.
1373+
// (in case of scale down KCP actually checks that everything must be in healthy state).
1374+
//
1375+
// Target list of machines will have current machines -1 machine (the deletingMachine).
1376+
// As a consequence:
1377+
// - KubernetesControlPlane on the deletingMachine is going to be deleted, no KubernetesControlPlane is going to be added.
1378+
kubernetesControlPlaneToBeDeleted := deletingMachine.Name
1379+
addKubernetesControlPlane := false
1380+
1381+
// Check target Kubernetes control plane components.
1382+
if !r.targetKubernetesControlPlaneComponentsHealthy(ctx, controlPlane, addKubernetesControlPlane, kubernetesControlPlaneToBeDeleted) {
1383+
log.Info(fmt.Sprintf("Cannot delete control plane Machine %s when there are no control plane Machines with all Kubernetes control plane components in healthy state. Please check Kubernetes control plane component status", klog.KObj(deletingMachine)))
1384+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
1385+
}
1386+
1387+
// The following will execute and remove the pre-terminate hook from the Machine.
1388+
if !controlPlane.IsEtcdManaged() {
1389+
if err := r.removePreTerminateHookAnnotationFromMachine(ctx, deletingMachine); err != nil {
1390+
return ctrl.Result{}, err
13181391
}
13191392

1320-
// Note: In regular deletion cases (remediation, scale down) the leader should have been already moved.
1321-
// We're doing this again here in case the Machine became leader again or the Machine deletion was
1322-
// triggered in another way (e.g. a user running kubectl delete machine)
1323-
etcdLeaderCandidate := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)).Newest()
1324-
if etcdLeaderCandidate != nil {
1325-
if err := workloadCluster.ForwardEtcdLeadership(ctx, deletingMachine, etcdLeaderCandidate); err != nil {
1326-
return ctrl.Result{}, errors.Wrapf(err, "failed to move leadership to candidate Machine %s", etcdLeaderCandidate.Name)
1327-
}
1328-
} else {
1329-
log.Info("Skip forwarding etcd leadership, because there is no other control plane Machine without a deletionTimestamp")
1393+
log.Info("Waiting for Machines to be deleted", "machines", strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", "))
1394+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
1395+
}
1396+
1397+
if len(controlPlane.EtcdMembers) == 0 {
1398+
log.Info("Cannot check etcd cluster health before remediation, etcd member list is empty")
1399+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
1400+
}
1401+
1402+
// If etcd is managed by KCP, check target etcd cluster.
1403+
// Target list of machines will have current machines -1 machine (the deletingMachine).
1404+
// As a consequence:
1405+
// - etcd member on the deletingMachine is going to be deleted, no etcd member are going to be added.
1406+
etcdMemberToBeDeleted := r.tryGetEtcdMemberName(ctx, controlPlane, deletingMachine)
1407+
addEtcdMember := false
1408+
1409+
// If it was not possible to get the etcd member, no other checks can be performed. Continue with deletion.
1410+
// Note: reconcileEtcMembers acts a safeguard if an etcdMember is added/reported after this point.
1411+
if etcdMemberToBeDeleted == "" {
1412+
if err := r.removePreTerminateHookAnnotationFromMachine(ctx, deletingMachine); err != nil {
1413+
return ctrl.Result{}, err
13301414
}
13311415

1332-
// Note: Removing the etcd member will lead to the etcd and the kube-apiserver Pod on the Machine shutting down.
1333-
// If ControlPlaneKubeletLocalMode is used, the kubelet is communicating with the local apiserver and thus now
1334-
// won't be able to see any updates to e.g. Pods anymore.
1335-
if err := workloadCluster.RemoveEtcdMemberForMachine(ctx, deletingMachine); err != nil {
1336-
return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s", klog.KObj(deletingMachine))
1416+
log.Info("Waiting for Machines to be deleted", "machines", strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", "))
1417+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
1418+
}
1419+
1420+
if !r.targetEtcdClusterHealthy(ctx, controlPlane, addEtcdMember, etcdMemberToBeDeleted) {
1421+
log.Info(fmt.Sprintf("Cannot delete control plane Machine %s, the operation can lead to etcd quorum loss. Please check the etcd status", klog.KObj(deletingMachine)))
1422+
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
1423+
}
1424+
1425+
workloadCluster, err := controlPlane.GetWorkloadCluster(ctx)
1426+
if err != nil {
1427+
return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s: failed to create client to workload cluster", klog.KObj(deletingMachine))
1428+
}
1429+
1430+
// Note: In regular deletion cases (remediation, scale down) the leader should have been already moved.
1431+
// We're doing this again here in case the Machine became leader again or the Machine deletion was
1432+
// triggered in another way (e.g. a user running kubectl delete machine)
1433+
etcdLeaderCandidate := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp)).Newest()
1434+
if etcdLeaderCandidate != nil {
1435+
if err := workloadCluster.ForwardEtcdLeadership(ctx, deletingMachine, etcdLeaderCandidate); err != nil {
1436+
return ctrl.Result{}, errors.Wrapf(err, "failed to move leadership to candidate Machine %s", etcdLeaderCandidate.Name)
13371437
}
1438+
} else {
1439+
log.Info("Skip forwarding etcd leadership, there is no other control plane Machine without a deletionTimestamp")
1440+
}
1441+
1442+
// Note: Removing the etcd member will lead to the etcd and the kube-apiserver Pod on the Machine shutting down.
1443+
// If ControlPlaneKubeletLocalMode is used, the kubelet is communicating with the local apiserver and thus now
1444+
// won't be able to see any updates to e.g. Pods anymore.
1445+
if err := workloadCluster.RemoveEtcdMember(ctx, etcdMemberToBeDeleted); err != nil {
1446+
return ctrl.Result{}, errors.Wrapf(err, "failed to remove etcd member for deleting Machine %s", klog.KObj(deletingMachine))
13381447
}
13391448

13401449
if err := r.removePreTerminateHookAnnotationFromMachine(ctx, deletingMachine); err != nil {

0 commit comments

Comments
 (0)