diff options
| author | Scott Dodson <sdodson@redhat.com> | 2016-11-14 15:16:11 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-11-14 15:16:11 -0500 | 
| commit | ae34dc20b9e797d6b73576b30b73931beca693ec (patch) | |
| tree | d8e3f3d52ae078fbecd19df3cacb4d1bab65986f /playbooks | |
| parent | 8e872d99d99c6451ecd28f56931926f30fa1b613 (diff) | |
| parent | 371c36eae16644bda921550371da713e63e44fac (diff) | |
| download | openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.gz openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.bz2 openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.tar.xz openshift-ae34dc20b9e797d6b73576b30b73931beca693ec.zip | |
Merge pull request #2562 from sdodson/etcd3
etcd upgrade playbooks
Diffstat (limited to 'playbooks')
12 files changed, 322 insertions, 73 deletions
| diff --git a/playbooks/adhoc/uninstall.yml b/playbooks/adhoc/uninstall.yml index 789f66b14..4ea639cbe 100644 --- a/playbooks/adhoc/uninstall.yml +++ b/playbooks/adhoc/uninstall.yml @@ -338,6 +338,7 @@      failed_when: False      with_items:      - etcd +    - etcd3      - firewalld    - name: Stop additional atomic services @@ -352,6 +353,7 @@      when: not is_atomic | bool      with_items:      - etcd +    - etcd3    - shell: systemctl reset-failed      changed_when: False @@ -365,6 +367,7 @@      - /etc/ansible/facts.d/openshift.fact      - /etc/etcd      - /etc/systemd/system/etcd_container.service +    - /etc/profile.d/etcdctl.sh    # Intenationally using rm command over file module because if someone had mounted a filesystem    # at /var/lib/etcd then the contents was not removed correctly diff --git a/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml b/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml new file mode 100644 index 000000000..c25f96212 --- /dev/null +++ b/playbooks/byo/openshift-cluster/upgrades/upgrade_etcd.yml @@ -0,0 +1,26 @@ +--- +- include: ../../../common/openshift-cluster/verify_ansible_version.yml + +- name: Create initial host groups for localhost +  hosts: localhost +  connection: local +  become: no +  gather_facts: no +  tags: +  - always +  tasks: +  - include_vars: ../cluster_hosts.yml +  - add_host: +      name: "{{ item }}" +      groups: l_oo_all_hosts +    with_items: "{{ g_all_hosts | default([]) }}" + +- name: Create initial host groups for all hosts +  hosts: l_oo_all_hosts +  gather_facts: no +  tags: +  - always +  tasks: +  - include_vars: ../cluster_hosts.yml + +- include: ../../../common/openshift-cluster/upgrades/etcd/main.yml diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml new file mode 100644 index 000000000..57b156b1c --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -0,0 +1,73 @@ +- name: Backup etcd +  hosts: etcd_hosts_to_backup +  vars: +    embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" +    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" +  roles: +  - openshift_facts +  tasks: +  # Ensure we persist the etcd role for this host in openshift_facts +  - openshift_facts: +      role: etcd +      local_facts: {} +    when: "'etcd' not in openshift" + +  - stat: path=/var/lib/openshift +    register: var_lib_openshift + +  - stat: path=/var/lib/origin +    register: var_lib_origin + +  - name: Create origin symlink if necessary +    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link +    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False + +  # TODO: replace shell module with command and update later checks +  # We assume to be using the data dir for all backups. +  - name: Check available disk space for etcd backup +    shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 +    register: avail_disk + +  # TODO: replace shell module with command and update later checks +  - name: Check current embedded etcd disk usage +    shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 +    register: etcd_disk_usage +    when: embedded_etcd | bool + +  - name: Abort if insufficient disk space for etcd backup +    fail: +      msg: > +        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, +        {{ avail_disk.stdout }} Kb available. +    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + +  - name: Install etcd (for etcdctl) +    action: "{{ ansible_pkg_mgr }} name=etcd state=present" +    when: not openshift.common.is_atomic | bool + +  - name: Generate etcd backup +    command: > +      etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} +      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }} + +  - set_fact: +      etcd_backup_complete: True + +  - name: Display location of etcd backup +    debug: +      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}" + +- name: Gate on etcd backup +  hosts: localhost +  connection: local +  become: no +  tasks: +  - set_fact: +      etcd_backup_completed: "{{ hostvars +                                 | oo_select_keys(groups.etcd_hosts_to_backup) +                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" +  - set_fact: +      etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" +  - fail: +      msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" +    when: etcd_backup_failed | length > 0 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml new file mode 100644 index 000000000..35f391f8c --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml @@ -0,0 +1,47 @@ +--- +- name: Verify cluster is healthy pre-upgrade +  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Get current image +  shell: grep 'ExecStart=' /etc/systemd/system/etcd_container.service | awk '{print $NF}' +  register: current_image + +- name: Set new_etcd_image +  set_fact: +    new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd3:' ~ upgrade_version ) if upgrade_version | version_compare('3.0','>=') +                        else current_image.stdout.split(':')[0] ~ ':' ~ upgrade_version }}" + +- name: Pull new etcd image +  command: "docker pull {{ new_etcd_image }}" + +- name: Update to latest etcd image +  replace: +    dest: /etc/systemd/system/etcd_container.service +    regexp: "{{ current_image.stdout }}$" +    replace: "{{ new_etcd_image }}" + +- name: Restart etcd_container +  systemd: +    name: etcd_container +    daemon_reload: yes +    state: restarted + +## TODO: probably should just move this into the backup playbooks, also this +## will fail on atomic host. We need to revisit how to do etcd backups there as +## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1) +- name: Upgrade etcd for etcdctl when not atomic +  action: "{{ ansible_pkg_mgr }} name=etcd ensure=latest" +  when: not openshift.common.is_atomic | bool + +- name: Verify cluster is healthy +  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" +  register: etcdctl +  until: etcdctl.rc == 0 +  retries: 3 +  delay: 10 + +- name: Store new etcd_image +  openshift_facts: +    role: etcd +    local_facts: +      etcd_image: "{{ new_etcd_image }}" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml new file mode 100644 index 000000000..30232110e --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml @@ -0,0 +1,23 @@ +--- +# F23 GA'd with etcd 2.0, currently has 2.2 in updates +# F24 GA'd with etcd-2.2, currently has 2.2 in updates +# F25 Beta currently has etcd 3.0 +- name: Verify cluster is healthy pre-upgrade +  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update etcd +  package: +    name: "etcd" +    state: "latest" + +- name: Restart etcd +  service: +    name: etcd +    state: restarted + +- name: Verify cluster is healthy +  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" +  register: etcdctl +  until: etcdctl.rc == 0 +  retries: 3 +  delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh new file mode 120000 index 000000000..641e04e44 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh @@ -0,0 +1 @@ +../roles/etcd/files/etcdctl.sh
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins new file mode 120000 index 000000000..27ddaa18b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins @@ -0,0 +1 @@ +../../../../../filter_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins new file mode 120000 index 000000000..cf407f69b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins @@ -0,0 +1 @@ +../../../../../lookup_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml new file mode 100644 index 000000000..cce844403 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml @@ -0,0 +1,122 @@ +--- +# For 1.4/3.4 we want to upgrade everyone to etcd-3.0. etcd docs say to +# upgrade from 2.0.x to 2.1.x to 2.2.x to 2.3.x to 3.0.x. While this is a tedius +# task for RHEL and CENTOS it's simply not possible in Fedora unless you've +# mirrored packages on your own because only the GA and latest versions are +# available in the repos. So for Fedora we'll simply skip this, sorry. + +- include: ../../evaluate_groups.yml +  tags: +  - always + +- name: Evaluate additional groups for upgrade +  hosts: localhost +  connection: local +  become: no +  tasks: +  - name: Evaluate etcd_hosts_to_upgrade +    add_host: +      name: "{{ item }}" +      groups: etcd_hosts_to_upgrade, etcd_hosts_to_backup +    with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master }}" + +- name: Backup etcd before upgrading anything +  include: backup.yml +  vars: +    backup_tag: "pre-upgrade-" + +- name: Drop etcdctl profiles +  hosts: etcd_hosts_to_upgrade +  tasks: +  - include: roles/etcd/tasks/etcdctl.yml + +- name: Determine etcd version +  hosts: etcd_hosts_to_upgrade +  tasks: +  - name: Record RPM based etcd version +    command: rpm -qa --qf '%{version}' etcd\* +    register: etcd_installed_version +    failed_when: false +    when: not openshift.common.is_containerized | bool +  - name: Record containerized etcd version +    command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* +    register: etcd_installed_version +    failed_when: false +    when: openshift.common.is_containerized | bool + +# I really dislike this copy/pasta but I wasn't able to find a way to get it to loop +# through hosts, then loop through tasks only when appropriate +- name: Upgrade to 2.1 +  hosts: etcd_hosts_to_upgrade +  serial: 1 +  vars: +    upgrade_version: '2.1' +  tasks: +  - include: rhel_tasks.yml +    when: etcd_installed_version.stdout | default('99') | version_compare('2.1','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 2.2 +  hosts: etcd_hosts_to_upgrade +  serial: 1 +  vars: +    upgrade_version: '2.2' +  tasks: +  - include: rhel_tasks.yml +    when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to 2.2.5 +  hosts: etcd_hosts_to_upgrade +  serial: 1 +  vars: +    upgrade_version: 2.2.5 +  tasks: +  - include: containerized_tasks.yml +    when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 2.3 +  hosts: etcd_hosts_to_upgrade +  serial: 1 +  vars: +    upgrade_version: '2.3' +  tasks: +  - include: rhel_tasks.yml +    when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to 2.3.7 +  hosts: etcd_hosts_to_upgrade +  serial: 1 +  vars: +    upgrade_version: 2.3.7 +  tasks: +  - include: containerized_tasks.yml +    when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 3.0 +  hosts: etcd_hosts_to_upgrade +  serial: 1 +  vars: +    upgrade_version: '3.0' +  tasks: +  - include: rhel_tasks.yml +    when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to etcd3 image +  hosts: etcd_hosts_to_upgrade +  serial: 1 +  vars: +    upgrade_version: 3.0.3 +  tasks: +  - include: containerized_tasks.yml +    when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and openshift.common.is_containerized | bool + +- name: Upgrade fedora to latest +  hosts: etcd_hosts_to_upgrade +  serial: 1 +  tasks: +  - include: fedora_tasks.yml +    when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool + +- name: Backup etcd +  include: backup.yml +  vars: +    backup_tag: "post-3.0-" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml new file mode 100644 index 000000000..8e7dc9d9b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml @@ -0,0 +1,23 @@ +--- +- name: Verify cluster is healthy pre-upgrade +  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update etcd package but exclude etcd3 +  command: "{{ ansible_pkg_mgr }} install -y etcd-{{ upgrade_version }}\\* --exclude etcd3" +  when: upgrade_version | version_compare('3.0','<') + +- name: Update etcd package not excluding etcd3 +  command: "{{ ansible_pkg_mgr }} install -y etcd3-{{ upgrade_version }}\\*" +  when: not upgrade_version | version_compare('3.0','<') + +- name: Restart etcd +  service: +    name: etcd +    state: restarted + +- name: Verify cluster is healthy +  command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" +  register: etcdctl +  until: etcdctl.rc == 0 +  retries: 3 +  delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/roles b/playbooks/common/openshift-cluster/upgrades/etcd/roles new file mode 120000 index 000000000..6bc1a7aef --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/roles @@ -0,0 +1 @@ +../../../../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 1ed451af8..57c25aa41 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -28,79 +28,7 @@          debug_level: "{{ openshift_master_debug_level | default(openshift.common.debug_level | default(2)) }}"  - name: Backup etcd -  hosts: etcd_hosts_to_backup -  vars: -    embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" -    timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" -  roles: -  - openshift_facts -  tasks: -  # Ensure we persist the etcd role for this host in openshift_facts -  - openshift_facts: -      role: etcd -      local_facts: {} -    when: "'etcd' not in openshift" - -  - stat: path=/var/lib/openshift -    register: var_lib_openshift - -  - stat: path=/var/lib/origin -    register: var_lib_origin - -  - name: Create origin symlink if necessary -    file: src=/var/lib/openshift/ dest=/var/lib/origin state=link -    when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False - -  # TODO: replace shell module with command and update later checks -  # We assume to be using the data dir for all backups. -  - name: Check available disk space for etcd backup -    shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 -    register: avail_disk - -  # TODO: replace shell module with command and update later checks -  - name: Check current embedded etcd disk usage -    shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 -    register: etcd_disk_usage -    when: embedded_etcd | bool - -  - name: Abort if insufficient disk space for etcd backup -    fail: -      msg: > -        {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, -        {{ avail_disk.stdout }} Kb available. -    when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) - -  - name: Install etcd (for etcdctl) -    action: "{{ ansible_pkg_mgr }} name=etcd state=installed" -    when: not openshift.common.is_atomic | bool - -  - name: Generate etcd backup -    command: > -      etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} -      --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }} - -  - set_fact: -      etcd_backup_complete: True - -  - name: Display location of etcd backup -    debug: -      msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}" - - -- name: Gate on etcd backup -  hosts: localhost -  connection: local -  become: no -  tasks: -  - set_fact: -      etcd_backup_completed: "{{ hostvars -                                 | oo_select_keys(groups.etcd_hosts_to_backup) -                                 | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" -  - set_fact: -      etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" -  - fail: -      msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" -    when: etcd_backup_failed | length > 0 +  include: ./etcd/backup.yml  - name: Upgrade master packages    hosts: oo_masters_to_config | 
