diff options
| author | Scott Dodson <sdodson@redhat.com> | 2017-08-01 12:55:47 -0400 | 
|---|---|---|
| committer | Scott Dodson <sdodson@redhat.com> | 2017-08-22 12:57:57 -0400 | 
| commit | 4b5d8d2dc25dbca20be59f3d5d111d737fd865bc (patch) | |
| tree | b2707928f22b8a308ff8b110b2ce466e0a9b3c95 /playbooks/common/openshift-etcd | |
| parent | b107f677a3563f6db9a1ef17fe10518d321d495c (diff) | |
| download | openshift-4b5d8d2dc25dbca20be59f3d5d111d737fd865bc.tar.gz openshift-4b5d8d2dc25dbca20be59f3d5d111d737fd865bc.tar.bz2 openshift-4b5d8d2dc25dbca20be59f3d5d111d737fd865bc.tar.xz openshift-4b5d8d2dc25dbca20be59f3d5d111d737fd865bc.zip | |
Switch to migrating one host and forming a new cluster
With large datasets where there are many keys with TTLs the expiry was
creating a data inconsistency problem. The hope is that by performing
the migration once and then forming a new cluster this is avoided.
Fixes https://bugzilla.redhat.com/show_bug.cgi?id=1475351
Diffstat (limited to 'playbooks/common/openshift-etcd')
| -rw-r--r-- | playbooks/common/openshift-etcd/migrate.yml | 65 | ||||
| -rw-r--r-- | playbooks/common/openshift-etcd/scaleup.yml | 13 | 
2 files changed, 68 insertions, 10 deletions
| diff --git a/playbooks/common/openshift-etcd/migrate.yml b/playbooks/common/openshift-etcd/migrate.yml index 3e7a48669..311ff84b6 100644 --- a/playbooks/common/openshift-etcd/migrate.yml +++ b/playbooks/common/openshift-etcd/migrate.yml @@ -17,18 +17,14 @@    tags:    - always +# TODO: This will be different for release-3.6 branch  - name: Prepare masters for etcd data migration    hosts: oo_masters_to_config    tasks:    - set_fact:        master_services: -      - "{{ openshift.common.service_type + '-master' }}" -  - set_fact: -      master_services:        - "{{ openshift.common.service_type + '-master-controllers' }}"        - "{{ openshift.common.service_type + '-master-api' }}" -    when: -    - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool    - debug:        msg: "master service name: {{ master_services }}"    - name: Stop masters @@ -67,16 +63,59 @@      when:      - etcd_backup_failed | length > 0 -- name: Migrate etcd data from v2 to v3 +- name: Stop etcd    hosts: oo_etcd_to_migrate    gather_facts: no    tags:    - always +  pre_tasks: +  - set_fact: +      l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}" +  - name: Disable etcd members +    service: +      name: "{{ l_etcd_service }}" +      state: stopped + +- name: Migrate data on first etcd +  hosts: oo_etcd_to_migrate[0] +  gather_facts: no +  tags: +  - always    roles:    - role: etcd_migrate      r_etcd_migrate_action: migrate      r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}"      etcd_peer: "{{ ansible_default_ipv4.address }}" +    etcd_url_scheme: "https" +    etcd_peer_url_scheme: "https" + +- name: Clean data stores on remaining etcd hosts +  hosts: oo_etcd_to_migrate[1:] +  gather_facts: no +  tags: +  - always +  roles: +  - role: etcd_migrate +    r_etcd_migrate_action: clean_data +    r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" +    etcd_peer: "{{ ansible_default_ipv4.address }}" +    etcd_url_scheme: "https" +    etcd_peer_url_scheme: "https" +  post_tasks: +  - name: Add etcd hosts +    delegate_to: localhost +    add_host: +      name: "{{ item }}" +      groups: oo_new_etcd_to_config +      ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" +      ansible_become: "{{ g_sudo | default(omit) }}" +    with_items: "{{ groups.oo_etcd_to_migrate[1:] | default([]) }}" +    changed_when: no +  - name: Set success +    set_fact: +      r_etcd_migrate_success: true + +- include: ./scaleup.yml  - name: Gate on etcd migration    hosts: oo_masters_to_config @@ -89,6 +128,16 @@    - set_fact:        etcd_migration_failed: "{{ groups.oo_etcd_to_migrate | difference(etcd_migration_completed) }}" +- name: Add TTLs on the first master +  hosts: oo_first_master[0] +  roles: +  - role: etcd_migrate +    r_etcd_migrate_action: add_ttls +    etcd_peer: "{{ hostvars[groups.oo_etcd_to_migrate.0].ansible_default_ipv4.address }}" +    etcd_url_scheme: "https" +    etcd_peer_url_scheme: "https" +    when: etcd_migration_failed | length == 0 +  - name: Configure masters if etcd data migration is succesfull    hosts: oo_masters_to_config    roles: @@ -100,10 +149,6 @@        msg: "Skipping master re-configuration since migration failed."      when:      - etcd_migration_failed | length > 0 - -- name: Start masters after etcd data migration -  hosts: oo_masters_to_config -  tasks:    - name: Start master services      service:        name: "{{ item }}" diff --git a/playbooks/common/openshift-etcd/scaleup.yml b/playbooks/common/openshift-etcd/scaleup.yml index 192305bc8..52b90daca 100644 --- a/playbooks/common/openshift-etcd/scaleup.yml +++ b/playbooks/common/openshift-etcd/scaleup.yml @@ -24,6 +24,9 @@                         member add {{ etcd_hostname }} {{ etcd_peer_url_scheme }}://{{ etcd_ip }}:{{ etcd_peer_port }}      delegate_to: "{{ etcd_ca_host }}"      register: etcd_add_check +    retries: 3 +    delay: 10 +    until: etcd_add_check.rc == 0    roles:    - role: openshift_etcd      when: etcd_add_check.rc == 0 @@ -36,3 +39,13 @@      r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}"    - role: nickhammond.logrotate      when: etcd_add_check.rc == 0 +  post_tasks: +  - name: Verify cluster is stable +    command: > +      /usr/bin/etcdctl --cert-file {{ etcd_peer_cert_file }} +                       --key-file {{ etcd_peer_key_file }} +                       --ca-file {{ etcd_peer_ca_file }} +                       -C {{ etcd_peer_url_scheme }}://{{ hostvars[etcd_ca_host].etcd_hostname }}:{{ etcd_client_port }} +                       cluster-health +    retries: 1 +    delay: 30 | 
