fix(scheduler): rollback scale / deploy when the desired number of pods can not be brought up in a timely manner

helgi · helgi · commit 6406e88db144 · 2016-07-15T22:24:03.000-07:00
Prior to this if a deploy failed at bringing up pods then it would still scale down the old release instead of rolling back. This would cause users to basically have a broken app if the new pods for whatever reason stick around in Pending mode Fixes #706
diff --git a/rootfs/scheduler/__init__.py b/rootfs/scheduler/__init__.py
@@ -975,7 +975,11 @@ def _wait_until_pods_are_ready(self, namespace, containers, labels, desired, tim
         if waited > timeout:
             self.log(namespace, 'timed out ({}s) waiting for pods to come up in namespace {}'.format(timeout, namespace))  # noqa
 
-        self.log(namespace, "{} out of {} pods are in service".format(count, desired))  # noqa
+        self.log(namespace, "{} out of {} pods are in service".format(count, desired))
+        if count != desired:
+            # raising to allow operations to rollback
+            raise KubeException('Not enough pods in namespace {} came into service. '
+                                '{} out of {}'.format(namespace, count, desired))
 
     def _scale_rc(self, namespace, name, desired, timeout):
         rc = self.get_rc(namespace, name).json()