Merge pull request #2562 from sdodson/etcd3

etcd upgrade playbooks
author: Scott Dodson <sdodson@redhat.com> 2016-11-14 15:16:11 -0500
committer: GitHub <noreply@github.com> 2016-11-14 15:16:11 -0500
commit: ae34dc20b9e797d6b73576b30b73931beca693ec (patch)
tree: d8e3f3d52ae078fbecd19df3cacb4d1bab65986f /playbooks
parent: 8e872d99d99c6451ecd28f56931926f30fa1b613 (diff)
parent: 371c36eae16644bda921550371da713e63e44fac (diff)
download: openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.gz
openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.bz2
openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.xz
openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.zip
12 files changed, 322 insertions, 73 deletions
diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml
index 789f66b14..4ea639cbe 100644
--- a/playbooks/adhoc/uninstall.yml
+++ b/playbooks/adhoc/uninstall.yml
@@ -338,6 +338,7 @@
     failed_when: False
     with_items:
     - etcd
+    - etcd3
     - firewalld
 
   - name: Stop additional atomic services
@@ -352,6 +353,7 @@
     when: not is_atomic | bool
     with_items:
     - etcd
+    - etcd3
 
   - shell: systemctl reset-failed
     changed_when: False
@@ -365,6 +367,7 @@
     - /etc/ansible/facts.d/openshift.fact
     - /etc/etcd
     - /etc/systemd/system/etcd_container.service
+    - /etc/profile.d/etcdctl.sh
 
   # Intenationally using rm command over file module because if someone had mounted a filesystem
   # at /var/lib/etcd then the contents was not removed correctly
diff --git a/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml b/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml
new file mode 100644
index 000000000..c25f96212
--- /dev/null
+++ b/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml
@@ -0,0 +1,26 @@
+---
+- include: ../../../common/openshift-cluster/verify_ansible_version.yml
+
+- name: Create initial host groups for localhost
+  hosts: localhost
+  connection: local
+  become: no
+  gather_facts: no
+  tags:
+  - always
+  tasks:
+  - include_vars: ../cluster_hosts.yml
+  - add_host:
+      name: "{{ item }}"
+      groups: l_oo_all_hosts
+    with_items: "{{ g_all_hosts | default([]) }}"
+
+- name: Create initial host groups for all hosts
+  hosts: l_oo_all_hosts
+  gather_facts: no
+  tags:
+  - always
+  tasks:
+  - include_vars: ../cluster_hosts.yml
+
+- include: ../../../common/openshift-cluster/upgrades/etcd/main.yml
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml
new file mode 100644
index 000000000..57b156b1c
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml
@@ -0,0 +1,73 @@
+- name: Backup etcd
+  hosts: etcd_hosts_to_backup
+  vars:
+    embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}"
+    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
+  roles:
+  - openshift_facts
+  tasks:
+  # Ensure we persist the etcd role for this host in openshift_facts
+  - openshift_facts:
+      role: etcd
+      local_facts: {}
+    when: "'etcd' not in openshift"
+
+  - stat: path=/var/lib/openshift
+    register: var_lib_openshift
+
+  - stat: path=/var/lib/origin
+    register: var_lib_origin
+
+  - name: Create origin symlink if necessary
+    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
+    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
+
+  # TODO: replace shell module with command and update later checks
+  # We assume to be using the data dir for all backups.
+  - name: Check available disk space for etcd backup
+    shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
+    register: avail_disk
+
+  # TODO: replace shell module with command and update later checks
+  - name: Check current embedded etcd disk usage
+    shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
+    register: etcd_disk_usage
+    when: embedded_etcd | bool
+
+  - name: Abort if insufficient disk space for etcd backup
+    fail:
+      msg: >
+        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
+        {{ avail_disk.stdout }} Kb available.
+    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
+
+  - name: Install etcd (for etcdctl)
+    action: "{{ ansible_pkg_mgr }} name=etcd state=present"
+    when: not openshift.common.is_atomic | bool
+
+  - name: Generate etcd backup
+    command: >
+      etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }}
+      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}
+
+  - set_fact:
+      etcd_backup_complete: True
+
+  - name: Display location of etcd backup
+    debug:
+      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}"
+
+- name: Gate on etcd backup
+  hosts: localhost
+  connection: local
+  become: no
+  tasks:
+  - set_fact:
+      etcd_backup_completed: "{{ hostvars
+                                 | oo_select_keys(groups.etcd_hosts_to_backup)
+                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
+  - set_fact:
+      etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
+  - fail:
+      msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
+    when: etcd_backup_failed | length > 0
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml
new file mode 100644
index 000000000..35f391f8c
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml
@@ -0,0 +1,47 @@
+---
+- name: Verify cluster is healthy pre-upgrade
+  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
+
+- name: Get current image
+  shell: grep 'ExecStart=' /etc/systemd/system/etcd_container.service | awk '{print $NF}'
+  register: current_image
+
+- name: Set new_etcd_image
+  set_fact:
+    new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd3:' ~ upgrade_version ) if upgrade_version | version_compare('3.0','>=')
+                        else current_image.stdout.split(':')[0] ~ ':' ~ upgrade_version }}"
+
+- name: Pull new etcd image
+  command: "docker pull {{ new_etcd_image }}"
+
+- name: Update to latest etcd image
+  replace:
+    dest: /etc/systemd/system/etcd_container.service
+    regexp: "{{ current_image.stdout }}$"
+    replace: "{{ new_etcd_image }}"
+
+- name: Restart etcd_container
+  systemd:
+    name: etcd_container
+    daemon_reload: yes
+    state: restarted
+
+## TODO: probably should just move this into the backup playbooks, also this
+## will fail on atomic host. We need to revisit how to do etcd backups there as
+## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1)
+- name: Upgrade etcd for etcdctl when not atomic
+  action: "{{ ansible_pkg_mgr }} name=etcd ensure=latest"
+  when: not openshift.common.is_atomic | bool
+
+- name: Verify cluster is healthy
+  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
+  register: etcdctl
+  until: etcdctl.rc == 0
+  retries: 3
+  delay: 10
+
+- name: Store new etcd_image
+  openshift_facts:
+    role: etcd
+    local_facts:
+      etcd_image: "{{ new_etcd_image }}"
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml
new file mode 100644
index 000000000..30232110e
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml
@@ -0,0 +1,23 @@
+---
+# F23 GA'd with etcd 2.0, currently has 2.2 in updates
+# F24 GA'd with etcd-2.2, currently has 2.2 in updates
+# F25 Beta currently has etcd 3.0
+- name: Verify cluster is healthy pre-upgrade
+  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
+
+- name: Update etcd
+  package:
+    name: "etcd"
+    state: "latest"
+
+- name: Restart etcd
+  service:
+    name: etcd
+    state: restarted
+
+- name: Verify cluster is healthy
+  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
+  register: etcdctl
+  until: etcdctl.rc == 0
+  retries: 3
+  delay: 10
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh
new file mode 120000
index 000000000..641e04e44
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh
@@ -0,0 +1 @@
+../roles/etcd/files/etcdctl.sh
+\ No newline at end of file
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins
new file mode 120000
index 000000000..27ddaa18b
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins
@@ -0,0 +1 @@
+../../../../../filter_plugins
+\ No newline at end of file
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins
new file mode 120000
index 000000000..cf407f69b
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins
@@ -0,0 +1 @@
+../../../../../lookup_plugins
+\ No newline at end of file
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml
new file mode 100644
index 000000000..cce844403
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml
@@ -0,0 +1,122 @@
+---
+# For 1.4/3.4 we want to upgrade everyone to etcd-3.0. etcd docs say to
+# upgrade from 2.0.x to 2.1.x to 2.2.x to 2.3.x to 3.0.x. While this is a tedius
+# task for RHEL and CENTOS it's simply not possible in Fedora unless you've
+# mirrored packages on your own because only the GA and latest versions are
+# available in the repos. So for Fedora we'll simply skip this, sorry.
+
+- include: ../../evaluate_groups.yml
+  tags:
+  - always
+
+- name: Evaluate additional groups for upgrade
+  hosts: localhost
+  connection: local
+  become: no
+  tasks:
+  - name: Evaluate etcd_hosts_to_upgrade
+    add_host:
+      name: "{{ item }}"
+      groups: etcd_hosts_to_upgrade, etcd_hosts_to_backup
+    with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master }}"
+
+- name: Backup etcd before upgrading anything
+  include: backup.yml
+  vars:
+    backup_tag: "pre-upgrade-"
+
+- name: Drop etcdctl profiles
+  hosts: etcd_hosts_to_upgrade
+  tasks:
+  - include: roles/etcd/tasks/etcdctl.yml
+
+- name: Determine etcd version
+  hosts: etcd_hosts_to_upgrade
+  tasks:
+  - name: Record RPM based etcd version
+    command: rpm -qa --qf '%{version}' etcd\*
+    register: etcd_installed_version
+    failed_when: false
+    when: not openshift.common.is_containerized | bool
+  - name: Record containerized etcd version
+    command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\*
+    register: etcd_installed_version
+    failed_when: false
+    when: openshift.common.is_containerized | bool
+
+# I really dislike this copy/pasta but I wasn't able to find a way to get it to loop
+# through hosts, then loop through tasks only when appropriate
+- name: Upgrade to 2.1
+  hosts: etcd_hosts_to_upgrade
+  serial: 1
+  vars:
+    upgrade_version: '2.1'
+  tasks:
+  - include: rhel_tasks.yml
+    when: etcd_installed_version.stdout | default('99') | version_compare('2.1','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool
+
+- name: Upgrade RPM hosts to 2.2
+  hosts: etcd_hosts_to_upgrade
+  serial: 1
+  vars:
+    upgrade_version: '2.2'
+  tasks:
+  - include: rhel_tasks.yml
+    when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool
+
+- name: Upgrade containerized hosts to 2.2.5
+  hosts: etcd_hosts_to_upgrade
+  serial: 1
+  vars:
+    upgrade_version: 2.2.5
+  tasks:
+  - include: containerized_tasks.yml
+    when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and openshift.common.is_containerized | bool
+
+- name: Upgrade RPM hosts to 2.3
+  hosts: etcd_hosts_to_upgrade
+  serial: 1
+  vars:
+    upgrade_version: '2.3'
+  tasks:
+  - include: rhel_tasks.yml
+    when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool
+
+- name: Upgrade containerized hosts to 2.3.7
+  hosts: etcd_hosts_to_upgrade
+  serial: 1
+  vars:
+    upgrade_version: 2.3.7
+  tasks:
+  - include: containerized_tasks.yml
+    when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and openshift.common.is_containerized | bool
+
+- name: Upgrade RPM hosts to 3.0
+  hosts: etcd_hosts_to_upgrade
+  serial: 1
+  vars:
+    upgrade_version: '3.0'
+  tasks:
+  - include: rhel_tasks.yml
+    when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool
+
+- name: Upgrade containerized hosts to etcd3 image
+  hosts: etcd_hosts_to_upgrade
+  serial: 1
+  vars:
+    upgrade_version: 3.0.3
+  tasks:
+  - include: containerized_tasks.yml
+    when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and openshift.common.is_containerized | bool
+
+- name: Upgrade fedora to latest
+  hosts: etcd_hosts_to_upgrade
+  serial: 1
+  tasks:
+  - include: fedora_tasks.yml
+    when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool
+
+- name: Backup etcd
+  include: backup.yml
+  vars:
+    backup_tag: "post-3.0-"
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml
new file mode 100644
index 000000000..8e7dc9d9b
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml
@@ -0,0 +1,23 @@
+---
+- name: Verify cluster is healthy pre-upgrade
+  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
+
+- name: Update etcd package but exclude etcd3
+  command: "{{ ansible_pkg_mgr }} install -y etcd-{{ upgrade_version }}\\* --exclude etcd3"
+  when: upgrade_version | version_compare('3.0','<')
+
+- name: Update etcd package not excluding etcd3
+  command: "{{ ansible_pkg_mgr }} install -y etcd3-{{ upgrade_version }}\\*"
+  when: not upgrade_version | version_compare('3.0','<')
+
+- name: Restart etcd
+  service:
+    name: etcd
+    state: restarted
+
+- name: Verify cluster is healthy
+  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health"
+  register: etcdctl
+  until: etcdctl.rc == 0
+  retries: 3
+  delay: 10
diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/roles b/playbooks/common/openshift-cluster/upgrades/etcd/roles
new file mode 120000
index 000000000..6bc1a7aef
--- /dev/null
+++ b/playbooks/common/openshift-cluster/upgrades/etcd/roles
@@ -0,0 +1 @@
+../../../../../roles
+\ No newline at end of file
diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
index 1ed451af8..57c25aa41 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml
@@ -28,79 +28,7 @@
         debug_level: "{{ openshift_master_debug_level | default(openshift.common.debug_level | default(2)) }}"
 
 - name: Backup etcd
-  hosts: etcd_hosts_to_backup
-  vars:
-    embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"
-    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}"
-  roles:
-  - openshift_facts
-  tasks:
-  # Ensure we persist the etcd role for this host in openshift_facts
-  - openshift_facts:
-      role: etcd
-      local_facts: {}
-    when: "'etcd' not in openshift"
-
-  - stat: path=/var/lib/openshift
-    register: var_lib_openshift
-
-  - stat: path=/var/lib/origin
-    register: var_lib_origin
-
-  - name: Create origin symlink if necessary
-    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link
-    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False
-
-  # TODO: replace shell module with command and update later checks
-  # We assume to be using the data dir for all backups.
-  - name: Check available disk space for etcd backup
-    shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1
-    register: avail_disk
-
-  # TODO: replace shell module with command and update later checks
-  - name: Check current embedded etcd disk usage
-    shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1
-    register: etcd_disk_usage
-    when: embedded_etcd | bool
-
-  - name: Abort if insufficient disk space for etcd backup
-    fail:
-      msg: >
-        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup,
-        {{ avail_disk.stdout }} Kb available.
-    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int)
-
-  - name: Install etcd (for etcdctl)
-    action: "{{ ansible_pkg_mgr }} name=etcd state=installed"
-    when: not openshift.common.is_atomic | bool
-
-  - name: Generate etcd backup
-    command: >
-      etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }}
-      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}
-
-  - set_fact:
-      etcd_backup_complete: True
-
-  - name: Display location of etcd backup
-    debug:
-      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}"
-
-
-- name: Gate on etcd backup
-  hosts: localhost
-  connection: local
-  become: no
-  tasks:
-  - set_fact:
-      etcd_backup_completed: "{{ hostvars
-                                 | oo_select_keys(groups.etcd_hosts_to_backup)
-                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}"
-  - set_fact:
-      etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}"
-  - fail:
-      msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}"
-    when: etcd_backup_failed | length > 0
+  include: ./etcd/backup.yml
 
 - name: Upgrade master packages
   hosts: oo_masters_to_config
author	Scott Dodson <sdodson@redhat.com>	2016-11-14 15:16:11 -0500
committer	GitHub <noreply@github.com>	2016-11-14 15:16:11 -0500
commit	ae34dc20b9e797d6b73576b30b73931beca693ec (patch)
tree	d8e3f3d52ae078fbecd19df3cacb4d1bab65986f /playbooks
parent	8e872d99d99c6451ecd28f56931926f30fa1b613 (diff)
parent	371c36eae16644bda921550371da713e63e44fac (diff)
download	openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.gz openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.bz2 openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.xz openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.zip