summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorScott Dodson <sdodson@redhat.com>2017-08-10 09:25:36 -0400
committerScott Dodson <sdodson@redhat.com>2018-01-10 11:01:11 -0500
commit0841917f05cfad2701164edbb271167c277d3300 (patch)
tree97ea4e657da1a3513d0ffe5b3f5518521af466d9
parent31d19c5e68e61d004a93db738772c120e7dd0eb5 (diff)
downloadopenshift-0841917f05cfad2701164edbb271167c277d3300.tar.gz
openshift-0841917f05cfad2701164edbb271167c277d3300.tar.bz2
openshift-0841917f05cfad2701164edbb271167c277d3300.tar.xz
openshift-0841917f05cfad2701164edbb271167c277d3300.zip
Add the ability to specify a timeout for node drain operations
-rw-r--r--inventory/hosts.example8
-rw-r--r--playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml12
-rw-r--r--playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml12
-rw-r--r--playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml12
-rw-r--r--playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml4
5 files changed, 37 insertions, 11 deletions
diff --git a/inventory/hosts.example b/inventory/hosts.example
index bc85d1020..b07e0d159 100644
--- a/inventory/hosts.example
+++ b/inventory/hosts.example
@@ -991,6 +991,14 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true',
# where as this would not
# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50
#
+# A timeout to wait for nodes to drain pods can be specified to ensure that the
+# upgrade continues even if nodes fail to drain pods in the allowed time. The
+# default value of 0 will wait indefinitely allowing the admin to investigate
+# the root cause and ensuring that disruption budgets are respected. If the
+# a timeout of 0 is used there will also be one attempt to re-try draining the
+# node. If a non zero timeout is specified there will be no attempt to retry.
+#openshift_upgrade_nodes_drain_timeout=0
+#
# Multiple data migrations take place and if they fail they will fail the upgrade
# You may wish to disable these or make them non fatal
#
diff --git a/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml
index ffb11670d..8392e21ee 100644
--- a/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml
+++ b/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml
@@ -51,13 +51,19 @@
- name: Drain Node for Kubelet upgrade
command: >
- {{ openshift_client_binary }} adm drain {{ openshift.node.nodename }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets
+ {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }}
+ --config={{ openshift.common.config_base }}/master/admin.kubeconfig
+ --force --delete-local-data --ignore-daemonsets
+ --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s
delegate_to: "{{ groups.oo_first_master.0 }}"
when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade
register: l_docker_upgrade_drain_result
until: not (l_docker_upgrade_drain_result is failed)
- retries: 60
- delay: 60
+ retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}"
+ delay: 5
+ failed_when:
+ - l_docker_upgrade_drain_result is failed
+ - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0
- include_tasks: tasks/upgrade.yml
when: l_docker_upgrade is defined and l_docker_upgrade | bool
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
index 91d496ff4..3f2ba8969 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
@@ -291,12 +291,18 @@
- name: Drain Node for Kubelet upgrade
command: >
- {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets
+ {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }}
+ --config={{ openshift.common.config_base }}/master/admin.kubeconfig
+ --force --delete-local-data --ignore-daemonsets
+ --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s
delegate_to: "{{ groups.oo_first_master.0 }}"
register: l_upgrade_control_plane_drain_result
until: not (l_upgrade_control_plane_drain_result is failed)
- retries: 60
- delay: 60
+ retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}"
+ delay: 5
+ failed_when:
+ - l_upgrade_control_plane_drain_result is failed
+ - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0
roles:
- openshift_facts
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
index aba179c2b..856c8328c 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -35,12 +35,18 @@
- name: Drain Node for Kubelet upgrade
command: >
- {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets
+ {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }}
+ --config={{ openshift.common.config_base }}/master/admin.kubeconfig
+ --force --delete-local-data --ignore-daemonsets
+ --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s
delegate_to: "{{ groups.oo_first_master.0 }}"
register: l_upgrade_nodes_drain_result
until: not (l_upgrade_nodes_drain_result is failed)
- retries: 60
- delay: 60
+ retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}"
+ delay: 5
+ failed_when:
+ - l_upgrade_nodes_drain_result is failed
+ - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0
post_tasks:
- import_role:
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml
index 6d59bfd0b..e259b5d09 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml
@@ -50,11 +50,11 @@
delegate_to: "{{ groups.oo_first_master.0 }}"
register: l_upgrade_nodes_drain_result
until: not (l_upgrade_nodes_drain_result is failed)
- retries: "{{ 1 if openshift_upgrade_nodes_drain_timeout | default(0) == '0' else 0 | int }}"
+ retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}"
delay: 5
failed_when:
- l_upgrade_nodes_drain_result is failed
- - openshift_upgrade_nodes_drain_timeout | default(0) == '0'
+ - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0
# Alright, let's clean up!
- name: clean up the old scale group