Skip to content
This repository was archived by the owner on May 6, 2020. It is now read-only.

Commit 4f3117d

Browse files
authored
Merge pull request #887 from helgi/configurable_deploy_timeout
feat(app): make deploy timeout configurable globally/per-app via DEIS_DEPLOY_TIMEOUT, default is 2 minutes
2 parents 26a75f2 + 84b8080 commit 4f3117d

4 files changed

Lines changed: 43 additions & 18 deletions

File tree

rootfs/api/models/app.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,9 @@ def _scale_pods(self, scale_types):
404404
# see if the app config has deploy batch preference, otherwise use global
405405
batches = release.config.values.get('DEIS_DEPLOY_BATCHES', settings.DEIS_DEPLOY_BATCHES)
406406

407+
# see if the app config has deploy timeout preference, otherwise use global
408+
deploy_timeout = release.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa
409+
407410
# see if there is a global or app specific setting to specify Deployments usage
408411
deployments = bool(envs.get('DEIS_KUBERNETES_DEPLOYMENTS', settings.DEIS_KUBERNETES_DEPLOYMENTS)) # noqa
409412

@@ -430,7 +433,7 @@ def _scale_pods(self, scale_types):
430433
'routable': routable,
431434
'deployments': deployments,
432435
'deploy_batches': batches,
433-
'deploy_timeout': 120, # 2 minutes
436+
'deploy_timeout': deploy_timeout,
434437
}
435438

436439
command = self._get_command(scale_type)
@@ -469,6 +472,9 @@ def deploy(self, release, force_deploy=False):
469472
# see if there is a global or app specific setting to specify Deployments usage
470473
deployments = bool(release.config.values.get('DEIS_KUBERNETES_DEPLOYMENTS', settings.DEIS_KUBERNETES_DEPLOYMENTS)) # noqa
471474

475+
# see if the app config has deploy timeout preference, otherwise use global
476+
deploy_timeout = release.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa
477+
472478
deployment_history = release.config.values.get('KUBERNETES_DEPLOYMENTS_REVISION_HISTORY_LIMIT', settings.KUBERNETES_DEPLOYMENTS_REVISION_HISTORY_LIMIT) # noqa
473479

474480
# deploy application to k8s. Also handles initial scaling
@@ -497,7 +503,7 @@ def deploy(self, release, force_deploy=False):
497503
'healthcheck': release.config.healthcheck,
498504
'routable': routable,
499505
'deploy_batches': batches,
500-
'deploy_timeout': 120, # 2 minutes
506+
'deploy_timeout': deploy_timeout,
501507
'deployment_history_limit': deployment_history,
502508
'deployments': deployments,
503509
'release_summary': release.summary
@@ -672,6 +678,9 @@ def pod_name(size=5, chars=string.ascii_lowercase + string.digits):
672678
if release.build is None:
673679
raise DeisException('No build associated with this release to run this command')
674680

681+
# see if the app config has deploy timeout preference, otherwise use global
682+
deploy_timeout = release.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa
683+
675684
# TODO: add support for interactive shell
676685
entrypoint, command = self._get_command_run(command)
677686

@@ -686,6 +695,7 @@ def pod_name(size=5, chars=string.ascii_lowercase + string.digits):
686695
'registry': release.config.registry,
687696
'version': "v{}".format(release.version),
688697
'build_type': release.build.type,
698+
'deploy_timeout': deploy_timeout
689699
}
690700

691701
try:

rootfs/api/models/release.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,12 @@ def _delete_release_in_scheduler(self, namespace, version):
359359
'version': version
360360
}
361361

362+
# see if the app config has deploy timeout preference, otherwise use global
363+
deploy_timeout = self.config.values.get('DEIS_DEPLOY_TIMEOUT', settings.DEIS_DEPLOY_TIMEOUT) # noqa
364+
362365
controllers = self._scheduler.get_rcs(namespace, labels=labels).json()
363366
for controller in controllers['items']:
364-
self._scheduler.cleanup_release(namespace, controller)
367+
self._scheduler.cleanup_release(namespace, controller, deploy_timeout)
365368

366369
# remove secret that contains env vars for the release
367370
try:

rootfs/api/settings/production.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,12 @@
265265
# Can also be overwritten on per app basis if desired
266266
DEIS_DEPLOY_BATCHES = os.environ.get('DEIS_DEPLOY_BATCHES', None)
267267

268+
# For old style deploys (RCs) defines how long each batch
269+
# (as defined by DEIS_DEPLOY_BATCHES) can take before giving up
270+
# For Kubernetes Deployments it is part of the global timeout
271+
# where it roughly goes BATCHES * TIMEOUT = global timeout
272+
DEIS_DEPLOY_TIMEOUT = os.environ.get('DEIS_DEPLOY_TIMEOUT', 120)
273+
268274
# If the k8s Deployments object should be used instead of ReplicationController
269275
DEIS_KUBERNETES_DEPLOYMENTS = bool(os.environ.get('DEIS_KUBERNETES_DEPLOYMENTS', False))
270276

rootfs/scheduler/__init__.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def log(self, namespace, message, level=logging.INFO):
109109

110110
def deploy(self, namespace, name, image, command, **kwargs): # noqa
111111
"""Scale RC or Deployment depending on what's requested"""
112-
self.deploy_timeout = kwargs.get('deploy_timeout', 120)
112+
self.deploy_timeout = kwargs.get('deploy_timeout')
113113
if kwargs.get('deployments', False):
114114
self.deploy_deployment(namespace, name, image, command, **kwargs)
115115
else:
@@ -163,6 +163,7 @@ def deploy_rc(self, namespace, name, image, command, **kwargs): # noqa
163163
routable = kwargs.get('routable', False)
164164
envs = kwargs.get('envs', {})
165165
port = envs.get('PORT', None)
166+
timeout = kwargs.get('deploy_timeout')
166167

167168
# Fetch old RC and create the new one for a release
168169
old_rc = self.get_old_rc(namespace, app_type)
@@ -199,23 +200,23 @@ def deploy_rc(self, namespace, name, image, command, **kwargs): # noqa
199200
self.log(namespace, 'scaling release {} to {} out of final {}'.format(
200201
new_name, count, desired
201202
))
202-
self._scale_rc(namespace, new_name, count)
203+
self._scale_rc(namespace, new_name, count, timeout)
203204

204205
if old_rc:
205206
old_name = old_rc["metadata"]["name"]
206207
self.log(namespace, 'scaling old release {} from original {} to {}'.format(
207208
old_name, desired, (desired-count))
208209
)
209-
self._scale_rc(namespace, old_name, (desired-count))
210+
self._scale_rc(namespace, old_name, (desired-count), timeout)
210211
except Exception as e:
211212
# New release is broken. Clean up
212213

213214
# Remove new release of the RC
214-
self.cleanup_release(namespace, new_rc)
215+
self.cleanup_release(namespace, new_rc, timeout)
215216

216217
# If there was a previous release then bring that back
217218
if old_rc:
218-
self._scale_rc(namespace, old_rc["metadata"]["name"], desired)
219+
self._scale_rc(namespace, old_rc["metadata"]["name"], desired, timeout)
219220

220221
raise KubeException(
221222
'Could not scale {} to {}. '
@@ -226,20 +227,20 @@ def deploy_rc(self, namespace, name, image, command, **kwargs): # noqa
226227

227228
# New release is live and kicking. Clean up old release
228229
if old_rc:
229-
self.cleanup_release(namespace, old_rc)
230+
self.cleanup_release(namespace, old_rc, timeout)
230231

231232
# Make sure the application is routable and uses the correct port
232233
# Done after the fact to let initial deploy settle before routing
233234
# traffic to the application
234235
self._update_application_service(namespace, name, app_type, port, routable)
235236

236-
def cleanup_release(self, namespace, controller):
237+
def cleanup_release(self, namespace, controller, timeout):
237238
"""
238239
Cleans up resources related to an application deployment
239240
"""
240241
# Deployment takes care of this in the API, RC does not
241242
# Have the RC scale down pods and delete itself
242-
self._scale_rc(namespace, controller['metadata']['name'], 0)
243+
self._scale_rc(namespace, controller['metadata']['name'], 0, timeout)
243244
self.delete_rc(namespace, controller['metadata']['name'])
244245

245246
# Remove stray pods that the scale down will have missed (this can occassionally happen)
@@ -302,10 +303,10 @@ def _update_application_service(self, namespace, name, app_type, port, routable=
302303

303304
def scale(self, namespace, name, image, command, **kwargs):
304305
"""Scale RC or Deployment depending on what's requested"""
306+
self.deploy_timeout = kwargs.get('deploy_timeout')
305307
if kwargs.get('deployments', False):
306308
self.scale_deployment(namespace, name, image, command, **kwargs)
307309
else:
308-
self.deploy_timeout = kwargs.get('deploy_timeout', 120)
309310
self.scale_rc(namespace, name, image, command, **kwargs)
310311

311312
def scale_deployment(self, namespace, name, image, command, **kwargs):
@@ -339,7 +340,7 @@ def scale_rc(self, namespace, name, image, command, **kwargs):
339340
raise
340341

341342
# let the scale failure bubble up
342-
self._scale_rc(namespace, name, replicas)
343+
self._scale_rc(namespace, name, replicas, kwargs.get('deploy_timeout'))
343344

344345
def _build_pod_manifest(self, namespace, name, image, **kwargs):
345346
app_type = kwargs.get('app_type')
@@ -474,7 +475,13 @@ def run(self, namespace, name, image, entrypoint, command, **kwargs):
474475
# wait for run pod to start - use the same function as scale
475476
labels = manifest['metadata']['labels']
476477
containers = manifest['spec']['containers']
477-
self._wait_until_pods_are_ready(namespace, containers, labels, desired=1)
478+
self._wait_until_pods_are_ready(
479+
namespace,
480+
containers,
481+
labels,
482+
desired=1,
483+
timeout=kwargs.get('deploy_timeout')
484+
)
478485

479486
try:
480487
# give pod 20 minutes to execute (after it got into ready state)
@@ -906,12 +913,11 @@ def _deploy_probe_timeout(self, timeout, namespace, labels, containers):
906913

907914
return timeout
908915

909-
def _wait_until_pods_are_ready(self, namespace, containers, labels, desired): # noqa
916+
def _wait_until_pods_are_ready(self, namespace, containers, labels, desired, timeout): # noqa
910917
# If desired is 0 then there is no ready state to check on
911918
if desired == 0:
912919
return
913920

914-
timeout = self.deploy_timeout
915921
timeout = self._deploy_probe_timeout(timeout, namespace, labels, containers)
916922
self.log(namespace, "waiting for {} pods in {} namespace to be in services ({}s timeout)".format(desired, namespace, timeout)) # noqa
917923

@@ -957,7 +963,7 @@ def _wait_until_pods_are_ready(self, namespace, containers, labels, desired): #
957963

958964
self.log(namespace, "{} out of {} pods are in service".format(count, desired)) # noqa
959965

960-
def _scale_rc(self, namespace, name, desired):
966+
def _scale_rc(self, namespace, name, desired, timeout):
961967
rc = self.get_rc(namespace, name).json()
962968

963969
current = int(rc['spec']['replicas'])
@@ -976,7 +982,7 @@ def _scale_rc(self, namespace, name, desired):
976982
# Double check enough pods are in the required state to service the application
977983
labels = rc['metadata']['labels']
978984
containers = rc['spec']['template']['spec']['containers']
979-
self._wait_until_pods_are_ready(namespace, containers, labels, desired)
985+
self._wait_until_pods_are_ready(namespace, containers, labels, desired, timeout)
980986

981987
# if it was a scale down operation, wait until terminating pods are done
982988
if int(desired) < int(current):

0 commit comments

Comments
 (0)