diff options
Diffstat (limited to 'roles')
39 files changed, 2404 insertions, 131 deletions
| diff --git a/roles/calico/defaults/main.yaml b/roles/calico/defaults/main.yaml index 03c612982..c7eea46f2 100644 --- a/roles/calico/defaults/main.yaml +++ b/roles/calico/defaults/main.yaml @@ -1,15 +1,10 @@  ---  kubeconfig: "{{openshift.common.config_base}}/node/{{ 'system:node:' +  openshift.common.hostname }}.kubeconfig" -etcd_endpoints: "{{ hostvars[groups.oo_first_master.0].openshift.master.etcd_urls | join(',') }}"  cni_conf_dir: "/etc/cni/net.d/"  cni_bin_dir: "/opt/cni/bin/"  cni_url: "https://github.com/containernetworking/cni/releases/download/v0.4.0/cni-amd64-v0.4.0.tgz" -calico_etcd_ca_cert_file: "/etc/origin/calico/calico.etcd-ca.crt" -calico_etcd_cert_file: "/etc/origin/calico/calico.etcd-client.crt" -calico_etcd_key_file: "/etc/origin/calico/calico.etcd-client.key" -  calico_url_cni: "https://github.com/projectcalico/cni-plugin/releases/download/v1.5.5/calico"  calico_url_ipam: "https://github.com/projectcalico/cni-plugin/releases/download/v1.5.5/calico-ipam" diff --git a/roles/calico/tasks/gen_certs.yml b/roles/calico/tasks/gen_certs.yml new file mode 100644 index 000000000..2e6aa114e --- /dev/null +++ b/roles/calico/tasks/gen_certs.yml @@ -0,0 +1,17 @@ +--- +- name: Calico Node | Generate OpenShift-etcd certs +  include: ../../../roles/etcd_client_certificates/tasks/main.yml +  vars: +    etcd_cert_prefix: calico.etcd- +    etcd_cert_config_dir: "{{ openshift.common.config_base }}/calico" +    embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" +    etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}" +    etcd_cert_subdir: "openshift-calico-{{ openshift.common.hostname }}" + +- name: Calico Node | Set etcd cert location facts +  set_fact: +    calico_etcd_ca_cert_file: "/etc/origin/calico/calico.etcd-ca.crt" +    calico_etcd_cert_file: "/etc/origin/calico/calico.etcd-client.crt" +    calico_etcd_key_file: "/etc/origin/calico/calico.etcd-client.key" +    calico_etcd_endpoints: "{{ hostvars[groups.oo_first_master.0].openshift.master.etcd_urls | join(',') }}" +    calico_etcd_cert_dir: "/etc/origin/calico/" diff --git a/roles/calico/tasks/main.yml b/roles/calico/tasks/main.yml index fa5e338b3..8a7a61dc9 100644 --- a/roles/calico/tasks/main.yml +++ b/roles/calico/tasks/main.yml @@ -1,19 +1,36 @@  --- -- include: ../../../roles/etcd_client_certificates/tasks/main.yml -  vars: -    etcd_cert_prefix: calico.etcd- -    etcd_cert_config_dir: "{{ openshift.common.config_base }}/calico" -    embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" -    etcd_ca_host: "{{ groups.oo_etcd_to_config.0 }}" -    etcd_cert_subdir: "openshift-calico-{{ openshift.common.hostname }}" +- name: Calico Node | Error if invalid cert arguments +  fail: +    msg: "Must provide all or none for the following etcd params: calico_etcd_cert_dir, calico_etcd_ca_cert_file, calico_etcd_cert_file, calico_etcd_key_file, calico_etcd_endpoints" +  when: (calico_etcd_cert_dir is defined or calico_etcd_ca_cert_file is defined or calico_etcd_cert_file is defined or calico_etcd_key_file is defined or calico_etcd_endpoints is defined) and not (calico_etcd_cert_dir is defined and calico_etcd_ca_cert_file is defined and calico_etcd_cert_file is defined and calico_etcd_key_file is defined and calico_etcd_endpoints is defined) -- name: Calico Node | Assure the calico certs have been generated +- name: Calico Node | Generate certs if not provided +  include: gen_certs.yml +  when: item is not defined +  with_items: +    - calico_etcd_ca_cert_file +    - calico_etcd_cert_file +    - calico_etcd_key_file +    - calico_etcd_endpoints +    - calico_etcd_cert_dir + +- name: Calico Node | Error if no certs set. +  fail: +    msg: "Invalid etcd configuration for calico." +  when: item is not defined or item == '' +  with_items: +    - calico_etcd_ca_cert_file +    - calico_etcd_cert_file +    - calico_etcd_key_file +    - calico_etcd_endpoints + +- name: Calico Node | Assure the calico certs are present    stat:      path: "{{ item }}"    with_items: -  - "{{ calico_etcd_ca_cert_file }}" -  - "{{ calico_etcd_cert_file}}" -  - "{{ calico_etcd_key_file }}" +    - "{{ calico_etcd_ca_cert_file }}" +    - "{{ calico_etcd_cert_file}}" +    - "{{ calico_etcd_key_file }}"  - name: Calico Node | Configure Calico service unit file    template: diff --git a/roles/calico/templates/10-calico.conf.j2 b/roles/calico/templates/10-calico.conf.j2 index 3c8c6b046..1ec569cff 100644 --- a/roles/calico/templates/10-calico.conf.j2 +++ b/roles/calico/templates/10-calico.conf.j2 @@ -4,7 +4,7 @@    "ipam": {      "type": "calico-ipam"    }, -  "etcd_endpoints": "{{ etcd_endpoints }}", +  "etcd_endpoints": "{{ calico_etcd_endpoints }}",    "etcd_key_file": "{{ calico_etcd_key_file }}",    "etcd_cert_file": "{{ calico_etcd_cert_file }}",    "etcd_ca_cert_file": "{{ calico_etcd_ca_cert_file }}", diff --git a/roles/calico/templates/calico.service.j2 b/roles/calico/templates/calico.service.j2 index 719d7ba0d..302c5f34e 100644 --- a/roles/calico/templates/calico.service.j2 +++ b/roles/calico/templates/calico.service.j2 @@ -13,8 +13,8 @@ ExecStart=/usr/bin/docker run --net=host --privileged \   -e CALICO_IPV4POOL_IPIP={{ calico_ipv4pool_ipip }} \   -e CALICO_IPV4POOL_CIDR={{ calico_ipv4pool_cidr }} \   -e FELIX_IPV6SUPPORT=false \ - -e ETCD_ENDPOINTS={{ etcd_endpoints }} \ - -v /etc/origin/calico:/etc/origin/calico \ + -e ETCD_ENDPOINTS={{ calico_etcd_endpoints }} \ + -v {{ calico_etcd_cert_dir }}:{{ calico_etcd_cert_dir }}  \   -e ETCD_CA_CERT_FILE={{ calico_etcd_ca_cert_file }} \   -e ETCD_CERT_FILE={{ calico_etcd_cert_file }} \   -e ETCD_KEY_FILE={{ calico_etcd_key_file }} \ diff --git a/roles/calico/templates/calicoctl.cfg.j2 b/roles/calico/templates/calicoctl.cfg.j2 index 722385ed8..a00ea27dc 100644 --- a/roles/calico/templates/calicoctl.cfg.j2 +++ b/roles/calico/templates/calicoctl.cfg.j2 @@ -3,7 +3,7 @@ kind: calicoApiConfig  metadata:  spec:    datastoreType: "etcdv2" -  etcdEndpoints: "{{ etcd_endpoints }}" +  etcdEndpoints: "{{ calico_etcd_endpoints }}"    etcdKeyFile: "{{ calico_etcd_key_file }}"    etcdCertFile: "{{ calico_etcd_cert_file }}"    etcdCaCertFile: "{{ calico_etcd_ca_cert_file }}" diff --git a/roles/calico_master/templates/calico-policy-controller.yml.j2 b/roles/calico_master/templates/calico-policy-controller.yml.j2 index 3fb1abf0d..1b87758ce 100644 --- a/roles/calico_master/templates/calico-policy-controller.yml.j2 +++ b/roles/calico_master/templates/calico-policy-controller.yml.j2 @@ -78,7 +78,7 @@ spec:            env:              # The location of the Calico etcd cluster.              - name: ETCD_ENDPOINTS -              value: {{ etcd_endpoints }} +              value: {{ calico_etcd_endpoints }}              # Location of the CA certificate for etcd.              - name: ETCD_CA_CERT_FILE                value: {{ calico_etcd_ca_cert_file }} @@ -96,10 +96,10 @@ spec:            volumeMounts:              # Mount in the etcd TLS secrets.              - name: certs -              mountPath: /etc/origin/calico +              mountPath: {{ calico_etcd_cert_dir }}        volumes:          # Mount in the etcd TLS secrets.          - name: certs            hostPath: -            path: /etc/origin/calico +            path: {{ calico_etcd_cert_dir }} diff --git a/roles/docker/tasks/package_docker.yml b/roles/docker/tasks/package_docker.yml index e101730d2..c82d8659a 100644 --- a/roles/docker/tasks/package_docker.yml +++ b/roles/docker/tasks/package_docker.yml @@ -102,6 +102,21 @@    notify:    - restart docker +- stat: path=/etc/sysconfig/docker-network +  register: sysconfig_docker_network_check + +- name: Configure Docker Network OPTIONS +  lineinfile: +    dest: /etc/sysconfig/docker-network +    regexp: '^DOCKER_NETWORK_OPTIONS=.*$' +    line: "DOCKER_NETWORK_OPTIONS='\ +      {% if openshift.node is defined and openshift.node.sdn_mtu is defined %} --mtu={{ openshift.node.sdn_mtu }}{% endif %}'" +  when: +  - sysconfig_docker_network_check.stat.isreg is defined +  - sysconfig_docker_network_check.stat.isreg +  notify: +  - restart docker +  - name: Start the Docker service    systemd:      name: docker diff --git a/roles/openshift_ca/tasks/main.yml b/roles/openshift_ca/tasks/main.yml index c7b906949..b9a7ec32f 100644 --- a/roles/openshift_ca/tasks/main.yml +++ b/roles/openshift_ca/tasks/main.yml @@ -108,6 +108,38 @@    delegate_to: "{{ openshift_ca_host }}"    run_once: true +- name: Test local loopback context +  command: > +    {{ hostvars[openshift_ca_host].openshift.common.client_binary }} config view +    --config={{ openshift_master_loopback_config }} +  changed_when: false +  register: loopback_config +  delegate_to: "{{ openshift_ca_host }}" +  run_once: true + +- name: Generate the loopback master client config +  command: > +    {{ hostvars[openshift_ca_host].openshift.common.client_binary }} adm create-api-client-config +      {% for named_ca_certificate in openshift.master.named_certificates | default([]) | oo_collect('cafile') %} +      --certificate-authority {{ named_ca_certificate }} +      {% endfor %} +      --certificate-authority={{ openshift_ca_cert }} +      --client-dir={{ openshift_ca_config_dir }} +      --groups=system:masters,system:openshift-master +      --master={{ hostvars[openshift_ca_host].openshift.master.loopback_api_url }} +      --public-master={{ hostvars[openshift_ca_host].openshift.master.loopback_api_url }} +      --signer-cert={{ openshift_ca_cert }} +      --signer-key={{ openshift_ca_key }} +      --signer-serial={{ openshift_ca_serial }} +      --user=system:openshift-master +      --basename=openshift-master +      {% if openshift_version | oo_version_gte_3_5_or_1_5(openshift.common.deployment_type) | bool %} +      --expire-days={{ openshift_master_cert_expire_days }} +      {% endif %} +  when: loopback_context_string not in loopback_config.stdout +  delegate_to: "{{ openshift_ca_host }}" +  run_once: true +  - name: Restore original serviceaccount keys    copy:      src: "{{ item }}.keep" diff --git a/roles/openshift_ca/vars/main.yml b/roles/openshift_ca/vars/main.yml index a32e385ec..d04c1766d 100644 --- a/roles/openshift_ca/vars/main.yml +++ b/roles/openshift_ca/vars/main.yml @@ -4,3 +4,6 @@ openshift_ca_cert: "{{ openshift_ca_config_dir }}/ca.crt"  openshift_ca_key: "{{ openshift_ca_config_dir }}/ca.key"  openshift_ca_serial: "{{ openshift_ca_config_dir }}/ca.serial.txt"  openshift_version: "{{ openshift_pkg_version | default('') }}" + +openshift_master_loopback_config: "{{ openshift_ca_config_dir }}/openshift-master.kubeconfig" +loopback_context_string: "current-context: {{ openshift.master.loopback_context_name }}" diff --git a/roles/openshift_docker_facts/tasks/main.yml b/roles/openshift_docker_facts/tasks/main.yml index 350512452..95e94171d 100644 --- a/roles/openshift_docker_facts/tasks/main.yml +++ b/roles/openshift_docker_facts/tasks/main.yml @@ -17,6 +17,9 @@        hosted_registry_insecure: "{{ openshift_docker_hosted_registry_insecure | default(openshift.docker.hosted_registry_insecure | default(False)) }}"        hosted_registry_network: "{{ openshift_docker_hosted_registry_network | default(None) }}"        use_system_container: "{{ openshift_docker_use_system_container | default(False) }}" +  - role: node +    local_facts: +      sdn_mtu: "{{ openshift_node_sdn_mtu | default(None) }}"  - set_fact:      docker_additional_registries: "{{ openshift.docker.additional_registries diff --git a/roles/openshift_health_checker/library/aos_version.py b/roles/openshift_health_checker/library/aos_version.py index 4460ec324..4c205e48c 100755 --- a/roles/openshift_health_checker/library/aos_version.py +++ b/roles/openshift_health_checker/library/aos_version.py @@ -16,8 +16,6 @@ of release availability already. Without duplicating all that, we would  like the user to have a helpful error message if we detect things will  not work out right. Note that if openshift_release is not specified in  the inventory, the version comparison checks just pass. - -TODO: fail gracefully on non-yum systems (dnf in Fedora)  '''  from ansible.module_utils.basic import AnsibleModule @@ -26,7 +24,7 @@ IMPORT_EXCEPTION = None  try:      import yum  # pylint: disable=import-error  except ImportError as err: -    IMPORT_EXCEPTION = err  # in tox test env, yum import fails +    IMPORT_EXCEPTION = err  class AosVersionException(Exception): @@ -37,12 +35,10 @@ class AosVersionException(Exception):  def main(): -    '''Entrypoint for this Ansible module''' +    """Entrypoint for this Ansible module"""      module = AnsibleModule(          argument_spec=dict( -            requested_openshift_release=dict(type="str", default=''), -            openshift_deployment_type=dict(required=True), -            rpm_prefix=dict(required=True),  # atomic-openshift, origin, ...? +            package_list=dict(type="list", required=True),          ),          supports_check_mode=True      ) @@ -51,32 +47,37 @@ def main():          module.fail_json(msg="aos_version module could not import yum: %s" % IMPORT_EXCEPTION)      # determine the packages we will look for -    rpm_prefix = module.params['rpm_prefix'] -    if not rpm_prefix: -        module.fail_json(msg="rpm_prefix must not be empty") -    expected_pkgs = set([ -        rpm_prefix, -        rpm_prefix + '-master', -        rpm_prefix + '-node', -    ]) - -    # determine what level of precision the user specified for the openshift version. -    # should look like a version string with possibly many segments e.g. "3.4.1": -    requested_openshift_release = module.params['requested_openshift_release'] +    package_list = module.params['package_list'] +    if not package_list: +        module.fail_json(msg="package_list must not be empty") + +    # generate set with only the names of expected packages +    expected_pkg_names = [p["name"] for p in package_list] + +    # gather packages that require a multi_minor_release check +    multi_minor_pkgs = [p for p in package_list if p["check_multi"]] + +    # generate list of packages with a specified (non-empty) version +    # should look like a version string with possibly many segments e.g. "3.4.1" +    versioned_pkgs = [p for p in package_list if p["version"]]      # get the list of packages available and complain if anything is wrong      try: -        pkgs = _retrieve_available_packages(expected_pkgs) -        if requested_openshift_release: -            _check_precise_version_found(pkgs, expected_pkgs, requested_openshift_release) -            _check_higher_version_found(pkgs, expected_pkgs, requested_openshift_release) -        if module.params['openshift_deployment_type'] in ['openshift-enterprise']: -            _check_multi_minor_release(pkgs, expected_pkgs) +        pkgs = _retrieve_available_packages(expected_pkg_names) +        if versioned_pkgs: +            _check_precise_version_found(pkgs, _to_dict(versioned_pkgs)) +            _check_higher_version_found(pkgs, _to_dict(versioned_pkgs)) +        if multi_minor_pkgs: +            _check_multi_minor_release(pkgs, _to_dict(multi_minor_pkgs))      except AosVersionException as excinfo:          module.fail_json(msg=str(excinfo))      module.exit_json(changed=False) +def _to_dict(pkg_list): +    return {pkg["name"]: pkg for pkg in pkg_list} + +  def _retrieve_available_packages(expected_pkgs):      # search for package versions available for openshift pkgs      yb = yum.YumBase()  # pylint: disable=invalid-name @@ -104,56 +105,60 @@ def _retrieve_available_packages(expected_pkgs):  class PreciseVersionNotFound(AosVersionException): -    '''Exception for reporting packages not available at given release''' -    def __init__(self, requested_release, not_found): -        msg = ['Not all of the required packages are available at requested version %s:' % requested_release] -        msg += ['  ' + name for name in not_found] +    """Exception for reporting packages not available at given version""" +    def __init__(self, not_found): +        msg = ['Not all of the required packages are available at their requested version'] +        msg += ['{}:{} '.format(pkg["name"], pkg["version"]) for pkg in not_found]          msg += ['Please check your subscriptions and enabled repositories.']          AosVersionException.__init__(self, '\n'.join(msg), not_found) -def _check_precise_version_found(pkgs, expected_pkgs, requested_openshift_release): +def _check_precise_version_found(pkgs, expected_pkgs_dict):      # see if any packages couldn't be found at requested release version      # we would like to verify that the latest available pkgs have however specific a version is given.      # so e.g. if there is a package version 3.4.1.5 the check passes; if only 3.4.0, it fails. -    pkgs_precise_version_found = {} +    pkgs_precise_version_found = set()      for pkg in pkgs: -        if pkg.name not in expected_pkgs: +        if pkg.name not in expected_pkgs_dict:              continue          # does the version match, to the precision requested?          # and, is it strictly greater, at the precision requested? -        match_version = '.'.join(pkg.version.split('.')[:requested_openshift_release.count('.') + 1]) -        if match_version == requested_openshift_release: -            pkgs_precise_version_found[pkg.name] = True +        expected_pkg_version = expected_pkgs_dict[pkg.name]["version"] +        match_version = '.'.join(pkg.version.split('.')[:expected_pkg_version.count('.') + 1]) +        if match_version == expected_pkg_version: +            pkgs_precise_version_found.add(pkg.name)      not_found = [] -    for name in expected_pkgs: +    for name, pkg in expected_pkgs_dict.items():          if name not in pkgs_precise_version_found: -            not_found.append(name) +            not_found.append(pkg)      if not_found: -        raise PreciseVersionNotFound(requested_openshift_release, not_found) +        raise PreciseVersionNotFound(not_found)  class FoundHigherVersion(AosVersionException): -    '''Exception for reporting that a higher version than requested is available''' -    def __init__(self, requested_release, higher_found): +    """Exception for reporting that a higher version than requested is available""" +    def __init__(self, higher_found):          msg = ['Some required package(s) are available at a version', -               'that is higher than requested %s:' % requested_release] +               'that is higher than requested']          msg += ['  ' + name for name in higher_found]          msg += ['This will prevent installing the version you requested.']          msg += ['Please check your enabled repositories or adjust openshift_release.']          AosVersionException.__init__(self, '\n'.join(msg), higher_found) -def _check_higher_version_found(pkgs, expected_pkgs, requested_openshift_release): -    req_release_arr = [int(segment) for segment in requested_openshift_release.split(".")] +def _check_higher_version_found(pkgs, expected_pkgs_dict): +    expected_pkg_names = list(expected_pkgs_dict) +      # see if any packages are available in a version higher than requested      higher_version_for_pkg = {}      for pkg in pkgs: -        if pkg.name not in expected_pkgs: +        if pkg.name not in expected_pkg_names:              continue +        expected_pkg_version = expected_pkgs_dict[pkg.name]["version"] +        req_release_arr = [int(segment) for segment in expected_pkg_version.split(".")]          version = [int(segment) for segment in pkg.version.split(".")]          too_high = version[:len(req_release_arr)] > req_release_arr          higher_than_seen = version > higher_version_for_pkg.get(pkg.name, []) @@ -164,11 +169,11 @@ def _check_higher_version_found(pkgs, expected_pkgs, requested_openshift_release          higher_found = []          for name, version in higher_version_for_pkg.items():              higher_found.append(name + '-' + '.'.join(str(segment) for segment in version)) -        raise FoundHigherVersion(requested_openshift_release, higher_found) +        raise FoundHigherVersion(higher_found)  class FoundMultiRelease(AosVersionException): -    '''Exception for reporting multiple minor releases found for same package''' +    """Exception for reporting multiple minor releases found for same package"""      def __init__(self, multi_found):          msg = ['Multiple minor versions of these packages are available']          msg += ['  ' + name for name in multi_found] @@ -176,18 +181,18 @@ class FoundMultiRelease(AosVersionException):          AosVersionException.__init__(self, '\n'.join(msg), multi_found) -def _check_multi_minor_release(pkgs, expected_pkgs): +def _check_multi_minor_release(pkgs, expected_pkgs_dict):      # see if any packages are available in more than one minor version      pkgs_by_name_version = {}      for pkg in pkgs:          # keep track of x.y (minor release) versions seen          minor_release = '.'.join(pkg.version.split('.')[:2])          if pkg.name not in pkgs_by_name_version: -            pkgs_by_name_version[pkg.name] = {} -        pkgs_by_name_version[pkg.name][minor_release] = True +            pkgs_by_name_version[pkg.name] = set() +        pkgs_by_name_version[pkg.name].add(minor_release)      multi_found = [] -    for name in expected_pkgs: +    for name in expected_pkgs_dict:          if name in pkgs_by_name_version and len(pkgs_by_name_version[name]) > 1:              multi_found.append(name) diff --git a/roles/openshift_health_checker/library/ocutil.py b/roles/openshift_health_checker/library/ocutil.py new file mode 100644 index 000000000..2e60735d6 --- /dev/null +++ b/roles/openshift_health_checker/library/ocutil.py @@ -0,0 +1,74 @@ +#!/usr/bin/python +"""Interface to OpenShift oc command""" + +import os +import shlex +import shutil +import subprocess + +from ansible.module_utils.basic import AnsibleModule + + +ADDITIONAL_PATH_LOOKUPS = ['/usr/local/bin', os.path.expanduser('~/bin')] + + +def locate_oc_binary(): +    """Find and return oc binary file""" +    # https://github.com/openshift/openshift-ansible/issues/3410 +    # oc can be in /usr/local/bin in some cases, but that may not +    # be in $PATH due to ansible/sudo +    paths = os.environ.get("PATH", os.defpath).split(os.pathsep) + ADDITIONAL_PATH_LOOKUPS + +    oc_binary = 'oc' + +    # Use shutil.which if it is available, otherwise fallback to a naive path search +    try: +        which_result = shutil.which(oc_binary, path=os.pathsep.join(paths)) +        if which_result is not None: +            oc_binary = which_result +    except AttributeError: +        for path in paths: +            if os.path.exists(os.path.join(path, oc_binary)): +                oc_binary = os.path.join(path, oc_binary) +                break + +    return oc_binary + + +def main(): +    """Module that executes commands on a remote OpenShift cluster""" + +    module = AnsibleModule( +        argument_spec=dict( +            namespace=dict(type="str", required=True), +            config_file=dict(type="str", required=True), +            cmd=dict(type="str", required=True), +            extra_args=dict(type="list", default=[]), +        ), +    ) + +    cmd = [ +        locate_oc_binary(), +        '--config', module.params["config_file"], +        '-n', module.params["namespace"], +    ] + shlex.split(module.params["cmd"]) + +    failed = True +    try: +        cmd_result = subprocess.check_output(list(cmd), stderr=subprocess.STDOUT) +        failed = False +    except subprocess.CalledProcessError as exc: +        cmd_result = '[rc {}] {}\n{}'.format(exc.returncode, ' '.join(exc.cmd), exc.output) +    except OSError as exc: +        # we get this when 'oc' is not there +        cmd_result = str(exc) + +    module.exit_json( +        changed=False, +        failed=failed, +        result=cmd_result, +    ) + + +if __name__ == '__main__': +    main() diff --git a/roles/openshift_health_checker/library/rpm_version.py b/roles/openshift_health_checker/library/rpm_version.py new file mode 100644 index 000000000..8ea223055 --- /dev/null +++ b/roles/openshift_health_checker/library/rpm_version.py @@ -0,0 +1,127 @@ +#!/usr/bin/python +""" +Ansible module for rpm-based systems determining existing package version information in a host. +""" + +from ansible.module_utils.basic import AnsibleModule + +IMPORT_EXCEPTION = None +try: +    import rpm  # pylint: disable=import-error +except ImportError as err: +    IMPORT_EXCEPTION = err  # in tox test env, rpm import fails + + +class RpmVersionException(Exception): +    """Base exception class for package version problems""" +    def __init__(self, message, problem_pkgs=None): +        Exception.__init__(self, message) +        self.problem_pkgs = problem_pkgs + + +def main(): +    """Entrypoint for this Ansible module""" +    module = AnsibleModule( +        argument_spec=dict( +            package_list=dict(type="list", required=True), +        ), +        supports_check_mode=True +    ) + +    if IMPORT_EXCEPTION: +        module.fail_json(msg="rpm_version module could not import rpm: %s" % IMPORT_EXCEPTION) + +    # determine the packages we will look for +    pkg_list = module.params['package_list'] +    if not pkg_list: +        module.fail_json(msg="package_list must not be empty") + +    # get list of packages available and complain if any +    # of them are missing or if any errors occur +    try: +        pkg_versions = _retrieve_expected_pkg_versions(_to_dict(pkg_list)) +        _check_pkg_versions(pkg_versions, _to_dict(pkg_list)) +    except RpmVersionException as excinfo: +        module.fail_json(msg=str(excinfo)) +    module.exit_json(changed=False) + + +def _to_dict(pkg_list): +    return {pkg["name"]: pkg for pkg in pkg_list} + + +def _retrieve_expected_pkg_versions(expected_pkgs_dict): +    """Search for installed packages matching given pkg names +    and versions. Returns a dictionary: {pkg_name: [versions]}""" + +    transaction = rpm.TransactionSet() +    pkgs = {} + +    for pkg_name in expected_pkgs_dict: +        matched_pkgs = transaction.dbMatch("name", pkg_name) +        if not matched_pkgs: +            continue + +        for header in matched_pkgs: +            if header['name'] == pkg_name: +                if pkg_name not in pkgs: +                    pkgs[pkg_name] = [] + +                pkgs[pkg_name].append(header['version']) + +    return pkgs + + +def _check_pkg_versions(found_pkgs_dict, expected_pkgs_dict): +    invalid_pkg_versions = {} +    not_found_pkgs = [] + +    for pkg_name, pkg in expected_pkgs_dict.items(): +        if not found_pkgs_dict.get(pkg_name): +            not_found_pkgs.append(pkg_name) +            continue + +        found_versions = [_parse_version(version) for version in found_pkgs_dict[pkg_name]] +        expected_version = _parse_version(pkg["version"]) +        if expected_version not in found_versions: +            invalid_pkg_versions[pkg_name] = { +                "found_versions": found_versions, +                "required_version": expected_version, +            } + +    if not_found_pkgs: +        raise RpmVersionException( +            '\n'.join([ +                "The following packages were not found to be installed: {}".format('\n    '.join([ +                    "{}".format(pkg) +                    for pkg in not_found_pkgs +                ])) +            ]), +            not_found_pkgs, +        ) + +    if invalid_pkg_versions: +        raise RpmVersionException( +            '\n    '.join([ +                "The following packages were found to be installed with an incorrect version: {}".format('\n'.join([ +                    "    \n{}\n    Required version: {}\n    Found versions: {}".format( +                        pkg_name, +                        pkg["required_version"], +                        ', '.join([version for version in pkg["found_versions"]])) +                    for pkg_name, pkg in invalid_pkg_versions.items() +                ])) +            ]), +            invalid_pkg_versions, +        ) + + +def _parse_version(version_str): +    segs = version_str.split('.') +    if not segs or len(segs) <= 2: +        return version_str + +    return '.'.join(segs[0:2]) + + +if __name__ == '__main__': +    main() diff --git a/roles/openshift_health_checker/openshift_checks/__init__.py b/roles/openshift_health_checker/openshift_checks/__init__.py index be63d864a..5c9949ced 100644 --- a/roles/openshift_health_checker/openshift_checks/__init__.py +++ b/roles/openshift_health_checker/openshift_checks/__init__.py @@ -66,16 +66,26 @@ class OpenShiftCheck(object):  LOADER_EXCLUDES = (      "__init__.py",      "mixins.py", +    "logging.py",  ) -def load_checks(): +def load_checks(path=None, subpkg=""):      """Dynamically import all check modules for the side effect of registering checks.""" -    return [ -        import_module(__package__ + "." + name[:-3]) -        for name in os.listdir(os.path.dirname(__file__)) -        if name.endswith(".py") and name not in LOADER_EXCLUDES -    ] +    if path is None: +        path = os.path.dirname(__file__) + +    modules = [] + +    for name in os.listdir(path): +        if os.path.isdir(os.path.join(path, name)): +            modules = modules + load_checks(os.path.join(path, name), subpkg + "." + name) +            continue + +        if name.endswith(".py") and name not in LOADER_EXCLUDES: +            modules.append(import_module(__package__ + subpkg + "." + name[:-3])) + +    return modules  def get_var(task_vars, *keys, **kwargs): diff --git a/roles/openshift_health_checker/openshift_checks/logging/__init__.py b/roles/openshift_health_checker/openshift_checks/logging/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/__init__.py diff --git a/roles/openshift_health_checker/openshift_checks/logging/curator.py b/roles/openshift_health_checker/openshift_checks/logging/curator.py new file mode 100644 index 000000000..c9fc59896 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/curator.py @@ -0,0 +1,61 @@ +""" +Module for performing checks on an Curator logging deployment +""" + +from openshift_checks import get_var +from openshift_checks.logging.logging import LoggingCheck + + +class Curator(LoggingCheck): +    """Module that checks an integrated logging Curator deployment""" + +    name = "curator" +    tags = ["health", "logging"] + +    logging_namespace = None + +    def run(self, tmp, task_vars): +        """Check various things and gather errors. Returns: result as hash""" + +        self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging") +        curator_pods, error = super(Curator, self).get_pods_for_component( +            self.module_executor, +            self.logging_namespace, +            "curator", +            task_vars +        ) +        if error: +            return {"failed": True, "changed": False, "msg": error} +        check_error = self.check_curator(curator_pods) + +        if check_error: +            msg = ("The following Curator deployment issue was found:" +                   "\n-------\n" +                   "{}".format(check_error)) +            return {"failed": True, "changed": False, "msg": msg} + +        # TODO(lmeyer): run it all again for the ops cluster +        return {"failed": False, "changed": False, "msg": 'No problems found with Curator deployment.'} + +    def check_curator(self, pods): +        """Check to see if curator is up and working. Returns: error string""" +        if not pods: +            return ( +                "There are no Curator pods for the logging stack,\n" +                "so nothing will prune Elasticsearch indexes.\n" +                "Is Curator correctly deployed?" +            ) + +        not_running = super(Curator, self).not_running_pods(pods) +        if len(not_running) == len(pods): +            return ( +                "The Curator pod is not currently in a running state,\n" +                "so Elasticsearch indexes may increase without bound." +            ) +        if len(pods) - len(not_running) > 1: +            return ( +                "There is more than one Curator pod running. This should not normally happen.\n" +                "Although this doesn't cause any problems, you may want to investigate." +            ) + +        return None diff --git a/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py new file mode 100644 index 000000000..01cb35b81 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/elasticsearch.py @@ -0,0 +1,217 @@ +""" +Module for performing checks on an Elasticsearch logging deployment +""" + +import json +import re + +from openshift_checks import get_var +from openshift_checks.logging.logging import LoggingCheck + + +class Elasticsearch(LoggingCheck): +    """Module that checks an integrated logging Elasticsearch deployment""" + +    name = "elasticsearch" +    tags = ["health", "logging"] + +    logging_namespace = None + +    def run(self, tmp, task_vars): +        """Check various things and gather errors. Returns: result as hash""" + +        self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging") +        es_pods, error = super(Elasticsearch, self).get_pods_for_component( +            self.execute_module, +            self.logging_namespace, +            "es", +            task_vars, +        ) +        if error: +            return {"failed": True, "changed": False, "msg": error} +        check_error = self.check_elasticsearch(es_pods, task_vars) + +        if check_error: +            msg = ("The following Elasticsearch deployment issue was found:" +                   "\n-------\n" +                   "{}".format(check_error)) +            return {"failed": True, "changed": False, "msg": msg} + +        # TODO(lmeyer): run it all again for the ops cluster +        return {"failed": False, "changed": False, "msg": 'No problems found with Elasticsearch deployment.'} + +    def _not_running_elasticsearch_pods(self, es_pods): +        """Returns: list of running pods, list of errors about non-running pods""" +        not_running = super(Elasticsearch, self).not_running_pods(es_pods) +        if not_running: +            return not_running, [( +                'The following Elasticsearch pods are not running:\n' +                '{pods}' +                'These pods will not aggregate logs from their nodes.' +            ).format(pods=''.join( +                "  {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None')) +                for pod in not_running +            ))] +        return not_running, [] + +    def check_elasticsearch(self, es_pods, task_vars): +        """Various checks for elasticsearch. Returns: error string""" +        not_running_pods, error_msgs = self._not_running_elasticsearch_pods(es_pods) +        running_pods = [pod for pod in es_pods if pod not in not_running_pods] +        pods_by_name = { +            pod['metadata']['name']: pod for pod in running_pods +            # Filter out pods that are not members of a DC +            if pod['metadata'].get('labels', {}).get('deploymentconfig') +        } +        if not pods_by_name: +            return 'No logging Elasticsearch pods were found. Is logging deployed?' +        error_msgs += self._check_elasticsearch_masters(pods_by_name, task_vars) +        error_msgs += self._check_elasticsearch_node_list(pods_by_name, task_vars) +        error_msgs += self._check_es_cluster_health(pods_by_name, task_vars) +        error_msgs += self._check_elasticsearch_diskspace(pods_by_name, task_vars) +        return '\n'.join(error_msgs) + +    @staticmethod +    def _build_es_curl_cmd(pod_name, url): +        base = "exec {name} -- curl -s --cert {base}cert --key {base}key --cacert {base}ca -XGET '{url}'" +        return base.format(base="/etc/elasticsearch/secret/admin-", name=pod_name, url=url) + +    def _check_elasticsearch_masters(self, pods_by_name, task_vars): +        """Check that Elasticsearch masters are sane. Returns: list of error strings""" +        es_master_names = set() +        error_msgs = [] +        for pod_name in pods_by_name.keys(): +            # Compare what each ES node reports as master and compare for split brain +            get_master_cmd = self._build_es_curl_cmd(pod_name, "https://localhost:9200/_cat/master") +            master_name_str = self._exec_oc(get_master_cmd, [], task_vars) +            master_names = (master_name_str or '').split(' ') +            if len(master_names) > 1: +                es_master_names.add(master_names[1]) +            else: +                error_msgs.append( +                    'No master? Elasticsearch {pod} returned bad string when asked master name:\n' +                    '  {response}'.format(pod=pod_name, response=master_name_str) +                ) + +        if not es_master_names: +            error_msgs.append('No logging Elasticsearch masters were found. Is logging deployed?') +            return '\n'.join(error_msgs) + +        if len(es_master_names) > 1: +            error_msgs.append( +                'Found multiple Elasticsearch masters according to the pods:\n' +                '{master_list}\n' +                'This implies that the masters have "split brain" and are not correctly\n' +                'replicating data for the logging cluster. Log loss is likely to occur.' +                .format(master_list='\n'.join('  ' + master for master in es_master_names)) +            ) + +        return error_msgs + +    def _check_elasticsearch_node_list(self, pods_by_name, task_vars): +        """Check that reported ES masters are accounted for by pods. Returns: list of error strings""" + +        if not pods_by_name: +            return ['No logging Elasticsearch masters were found. Is logging deployed?'] + +        # get ES cluster nodes +        node_cmd = self._build_es_curl_cmd(list(pods_by_name.keys())[0], 'https://localhost:9200/_nodes') +        cluster_node_data = self._exec_oc(node_cmd, [], task_vars) +        try: +            cluster_nodes = json.loads(cluster_node_data)['nodes'] +        except (ValueError, KeyError): +            return [ +                'Failed to query Elasticsearch for the list of ES nodes. The output was:\n' + +                cluster_node_data +            ] + +        # Try to match all ES-reported node hosts to known pods. +        error_msgs = [] +        for node in cluster_nodes.values(): +            # Note that with 1.4/3.4 the pod IP may be used as the master name +            if not any(node['host'] in (pod_name, pod['status'].get('podIP')) +                       for pod_name, pod in pods_by_name.items()): +                error_msgs.append( +                    'The Elasticsearch cluster reports a member node "{node}"\n' +                    'that does not correspond to any known ES pod.'.format(node=node['host']) +                ) + +        return error_msgs + +    def _check_es_cluster_health(self, pods_by_name, task_vars): +        """Exec into the elasticsearch pods and check the cluster health. Returns: list of errors""" +        error_msgs = [] +        for pod_name in pods_by_name.keys(): +            cluster_health_cmd = self._build_es_curl_cmd(pod_name, 'https://localhost:9200/_cluster/health?pretty=true') +            cluster_health_data = self._exec_oc(cluster_health_cmd, [], task_vars) +            try: +                health_res = json.loads(cluster_health_data) +                if not health_res or not health_res.get('status'): +                    raise ValueError() +            except ValueError: +                error_msgs.append( +                    'Could not retrieve cluster health status from logging ES pod "{pod}".\n' +                    'Response was:\n{output}'.format(pod=pod_name, output=cluster_health_data) +                ) +                continue + +            if health_res['status'] not in ['green', 'yellow']: +                error_msgs.append( +                    'Elasticsearch cluster health status is RED according to pod "{}"'.format(pod_name) +                ) + +        return error_msgs + +    def _check_elasticsearch_diskspace(self, pods_by_name, task_vars): +        """ +        Exec into an ES pod and query the diskspace on the persistent volume. +        Returns: list of errors +        """ +        error_msgs = [] +        for pod_name in pods_by_name.keys(): +            df_cmd = 'exec {} -- df --output=ipcent,pcent /elasticsearch/persistent'.format(pod_name) +            disk_output = self._exec_oc(df_cmd, [], task_vars) +            lines = disk_output.splitlines() +            # expecting one header looking like 'IUse% Use%' and one body line +            body_re = r'\s*(\d+)%?\s+(\d+)%?\s*$' +            if len(lines) != 2 or len(lines[0].split()) != 2 or not re.match(body_re, lines[1]): +                error_msgs.append( +                    'Could not retrieve storage usage from logging ES pod "{pod}".\n' +                    'Response to `df` command was:\n{output}'.format(pod=pod_name, output=disk_output) +                ) +                continue +            inode_pct, disk_pct = re.match(body_re, lines[1]).groups() + +            inode_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_inode_pct', default='90') +            if int(inode_pct) >= int(inode_pct_thresh): +                error_msgs.append( +                    'Inode percent usage on the storage volume for logging ES pod "{pod}"\n' +                    '  is {pct}, greater than threshold {limit}.\n' +                    '  Note: threshold can be specified in inventory with {param}'.format( +                        pod=pod_name, +                        pct=str(inode_pct), +                        limit=str(inode_pct_thresh), +                        param='openshift_check_efk_es_inode_pct', +                    )) +            disk_pct_thresh = get_var(task_vars, 'openshift_check_efk_es_storage_pct', default='80') +            if int(disk_pct) >= int(disk_pct_thresh): +                error_msgs.append( +                    'Disk percent usage on the storage volume for logging ES pod "{pod}"\n' +                    '  is {pct}, greater than threshold {limit}.\n' +                    '  Note: threshold can be specified in inventory with {param}'.format( +                        pod=pod_name, +                        pct=str(disk_pct), +                        limit=str(disk_pct_thresh), +                        param='openshift_check_efk_es_storage_pct', +                    )) + +        return error_msgs + +    def _exec_oc(self, cmd_str, extra_args, task_vars): +        return super(Elasticsearch, self).exec_oc( +            self.execute_module, +            self.logging_namespace, +            cmd_str, +            extra_args, +            task_vars, +        ) diff --git a/roles/openshift_health_checker/openshift_checks/logging/fluentd.py b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py new file mode 100644 index 000000000..627567293 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/fluentd.py @@ -0,0 +1,170 @@ +""" +Module for performing checks on an Fluentd logging deployment +""" + +import json + +from openshift_checks import get_var +from openshift_checks.logging.logging import LoggingCheck + + +class Fluentd(LoggingCheck): +    """Module that checks an integrated logging Fluentd deployment""" +    name = "fluentd" +    tags = ["health", "logging"] + +    logging_namespace = None + +    def run(self, tmp, task_vars): +        """Check various things and gather errors. Returns: result as hash""" + +        self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging") +        fluentd_pods, error = super(Fluentd, self).get_pods_for_component( +            self.execute_module, +            self.logging_namespace, +            "fluentd", +            task_vars, +        ) +        if error: +            return {"failed": True, "changed": False, "msg": error} +        check_error = self.check_fluentd(fluentd_pods, task_vars) + +        if check_error: +            msg = ("The following Fluentd deployment issue was found:" +                   "\n-------\n" +                   "{}".format(check_error)) +            return {"failed": True, "changed": False, "msg": msg} + +        # TODO(lmeyer): run it all again for the ops cluster +        return {"failed": False, "changed": False, "msg": 'No problems found with Fluentd deployment.'} + +    @staticmethod +    def _filter_fluentd_labeled_nodes(nodes_by_name, node_selector): +        """Filter to all nodes with fluentd label. Returns dict(name: node), error string""" +        label, value = node_selector.split('=', 1) +        fluentd_nodes = { +            name: node for name, node in nodes_by_name.items() +            if node['metadata']['labels'].get(label) == value +        } +        if not fluentd_nodes: +            return None, ( +                'There are no nodes with the fluentd label {label}.\n' +                'This means no logs will be aggregated from the nodes.' +            ).format(label=node_selector) +        return fluentd_nodes, None + +    @staticmethod +    def _check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars): +        """Note if nodes are not labeled as expected. Returns: error string""" +        intended_nodes = get_var(task_vars, 'openshift_logging_fluentd_hosts', default=['--all']) +        if not intended_nodes or '--all' in intended_nodes: +            intended_nodes = nodes_by_name.keys() +        nodes_missing_labels = set(intended_nodes) - set(fluentd_nodes.keys()) +        if nodes_missing_labels: +            return ( +                'The following nodes are supposed to be labeled with {label} but are not:\n' +                '  {nodes}\n' +                'Fluentd will not aggregate logs from these nodes.' +            ).format(label=node_selector, nodes=', '.join(nodes_missing_labels)) +        return None + +    @staticmethod +    def _check_nodes_have_fluentd(pods, fluentd_nodes): +        """Make sure fluentd is on all the labeled nodes. Returns: error string""" +        unmatched_nodes = fluentd_nodes.copy() +        node_names_by_label = { +            node['metadata']['labels']['kubernetes.io/hostname']: name +            for name, node in fluentd_nodes.items() +        } +        node_names_by_internal_ip = { +            address['address']: name +            for name, node in fluentd_nodes.items() +            for address in node['status']['addresses'] +            if address['type'] == "InternalIP" +        } +        for pod in pods: +            for name in [ +                    pod['spec']['nodeName'], +                    node_names_by_internal_ip.get(pod['spec']['nodeName']), +                    node_names_by_label.get(pod.get('spec', {}).get('host')), +            ]: +                unmatched_nodes.pop(name, None) +        if unmatched_nodes: +            return ( +                'The following nodes are supposed to have a Fluentd pod but do not:\n' +                '{nodes}' +                'These nodes will not have their logs aggregated.' +            ).format(nodes=''.join( +                "  {}\n".format(name) +                for name in unmatched_nodes.keys() +            )) +        return None + +    def _check_fluentd_pods_running(self, pods): +        """Make sure all fluentd pods are running. Returns: error string""" +        not_running = super(Fluentd, self).not_running_pods(pods) +        if not_running: +            return ( +                'The following Fluentd pods are supposed to be running but are not:\n' +                '{pods}' +                'These pods will not aggregate logs from their nodes.' +            ).format(pods=''.join( +                "  {} ({})\n".format(pod['metadata']['name'], pod['spec'].get('host', 'None')) +                for pod in not_running +            )) +        return None + +    def check_fluentd(self, pods, task_vars): +        """Verify fluentd is running everywhere. Returns: error string""" + +        node_selector = get_var(task_vars, 'openshift_logging_fluentd_nodeselector', +                                default='logging-infra-fluentd=true') + +        nodes_by_name, error = self.get_nodes_by_name(task_vars) + +        if error: +            return error +        fluentd_nodes, error = self._filter_fluentd_labeled_nodes(nodes_by_name, node_selector) +        if error: +            return error + +        error_msgs = [] +        error = self._check_node_labeling(nodes_by_name, fluentd_nodes, node_selector, task_vars) +        if error: +            error_msgs.append(error) +        error = self._check_nodes_have_fluentd(pods, fluentd_nodes) +        if error: +            error_msgs.append(error) +        error = self._check_fluentd_pods_running(pods) +        if error: +            error_msgs.append(error) + +        # Make sure there are no extra fluentd pods +        if len(pods) > len(fluentd_nodes): +            error_msgs.append( +                'There are more Fluentd pods running than nodes labeled.\n' +                'This may not cause problems with logging but it likely indicates something wrong.' +            ) + +        return '\n'.join(error_msgs) + +    def get_nodes_by_name(self, task_vars): +        """Retrieve all the node definitions. Returns: dict(name: node), error string""" +        nodes_json = self._exec_oc("get nodes -o json", [], task_vars) +        try: +            nodes = json.loads(nodes_json) +        except ValueError:  # no valid json - should not happen +            return None, "Could not obtain a list of nodes to validate fluentd. Output from oc get:\n" + nodes_json +        if not nodes or not nodes.get('items'):  # also should not happen +            return None, "No nodes appear to be defined according to the API." +        return { +            node['metadata']['name']: node +            for node in nodes['items'] +        }, None + +    def _exec_oc(self, cmd_str, extra_args, task_vars): +        return super(Fluentd, self).exec_oc(self.execute_module, +                                            self.logging_namespace, +                                            cmd_str, +                                            extra_args, +                                            task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/logging/kibana.py b/roles/openshift_health_checker/openshift_checks/logging/kibana.py new file mode 100644 index 000000000..442f407b1 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/kibana.py @@ -0,0 +1,229 @@ +""" +Module for performing checks on a Kibana logging deployment +""" + +import json +import ssl + +try: +    from urllib2 import HTTPError, URLError +    import urllib2 +except ImportError: +    from urllib.error import HTTPError, URLError +    import urllib.request as urllib2 + +from openshift_checks import get_var +from openshift_checks.logging.logging import LoggingCheck + + +class Kibana(LoggingCheck): +    """Module that checks an integrated logging Kibana deployment""" + +    name = "kibana" +    tags = ["health", "logging"] + +    logging_namespace = None + +    def run(self, tmp, task_vars): +        """Check various things and gather errors. Returns: result as hash""" + +        self.logging_namespace = get_var(task_vars, "openshift_logging_namespace", default="logging") +        kibana_pods, error = super(Kibana, self).get_pods_for_component( +            self.execute_module, +            self.logging_namespace, +            "kibana", +            task_vars, +        ) +        if error: +            return {"failed": True, "changed": False, "msg": error} +        check_error = self.check_kibana(kibana_pods) + +        if not check_error: +            check_error = self._check_kibana_route(task_vars) + +        if check_error: +            msg = ("The following Kibana deployment issue was found:" +                   "\n-------\n" +                   "{}".format(check_error)) +            return {"failed": True, "changed": False, "msg": msg} + +        # TODO(lmeyer): run it all again for the ops cluster +        return {"failed": False, "changed": False, "msg": 'No problems found with Kibana deployment.'} + +    def _verify_url_internal(self, url, task_vars): +        """ +        Try to reach a URL from the host. +        Returns: success (bool), reason (for failure) +        """ +        args = dict( +            url=url, +            follow_redirects='none', +            validate_certs='no',  # likely to be signed with internal CA +            # TODO(lmeyer): give users option to validate certs +            status_code=302, +        ) +        result = self.execute_module('uri', args, task_vars) +        if result.get('failed'): +            return result['msg'] +        return None + +    @staticmethod +    def _verify_url_external(url): +        """ +        Try to reach a URL from ansible control host. +        Returns: success (bool), reason (for failure) +        """ +        # This actually checks from the ansible control host, which may or may not +        # really be "external" to the cluster. + +        # Disable SSL cert validation to work around internally signed certs +        ctx = ssl.create_default_context() +        ctx.check_hostname = False  # or setting CERT_NONE is refused +        ctx.verify_mode = ssl.CERT_NONE + +        # Verify that the url is returning a valid response +        try: +            # We only care if the url connects and responds +            return_code = urllib2.urlopen(url, context=ctx).getcode() +        except HTTPError as httperr: +            return httperr.reason +        except URLError as urlerr: +            return str(urlerr) + +        # there appears to be no way to prevent urlopen from following redirects +        if return_code != 200: +            return 'Expected success (200) but got return code {}'.format(int(return_code)) + +        return None + +    def check_kibana(self, pods): +        """Check to see if Kibana is up and working. Returns: error string.""" + +        if not pods: +            return "There are no Kibana pods deployed, so no access to the logging UI." + +        not_running = self.not_running_pods(pods) +        if len(not_running) == len(pods): +            return "No Kibana pod is in a running state, so there is no access to the logging UI." +        elif not_running: +            return ( +                "The following Kibana pods are not currently in a running state:\n" +                "{pods}" +                "However at least one is, so service may not be impacted." +            ).format(pods="".join("  " + pod['metadata']['name'] + "\n" for pod in not_running)) + +        return None + +    def _get_kibana_url(self, task_vars): +        """ +        Get kibana route or report error. +        Returns: url (or empty), reason for failure +        """ + +        # Get logging url +        get_route = self._exec_oc("get route logging-kibana -o json", [], task_vars) +        if not get_route: +            return None, 'no_route_exists' + +        route = json.loads(get_route) + +        # check that the route has been accepted by a router +        ingress = route["status"]["ingress"] +        # ingress can be null if there is no router, or empty if not routed +        if not ingress or not ingress[0]: +            return None, 'route_not_accepted' + +        host = route.get("spec", {}).get("host") +        if not host: +            return None, 'route_missing_host' + +        return 'https://{}/'.format(host), None + +    def _check_kibana_route(self, task_vars): +        """ +        Check to see if kibana route is up and working. +        Returns: error string +        """ +        known_errors = dict( +            no_route_exists=( +                'No route is defined for Kibana in the logging namespace,\n' +                'so the logging stack is not accessible. Is logging deployed?\n' +                'Did something remove the logging-kibana route?' +            ), +            route_not_accepted=( +                'The logging-kibana route is not being routed by any router.\n' +                'Is the router deployed and working?' +            ), +            route_missing_host=( +                'The logging-kibana route has no hostname defined,\n' +                'which should never happen. Did something alter its definition?' +            ), +        ) + +        kibana_url, error = self._get_kibana_url(task_vars) +        if not kibana_url: +            return known_errors.get(error, error) + +        # first, check that kibana is reachable from the master. +        error = self._verify_url_internal(kibana_url, task_vars) +        if error: +            if 'urlopen error [Errno 111] Connection refused' in error: +                error = ( +                    'Failed to connect from this master to Kibana URL {url}\n' +                    'Is kibana running, and is at least one router routing to it?' +                ).format(url=kibana_url) +            elif 'urlopen error [Errno -2] Name or service not known' in error: +                error = ( +                    'Failed to connect from this master to Kibana URL {url}\n' +                    'because the hostname does not resolve.\n' +                    'Is DNS configured for the Kibana hostname?' +                ).format(url=kibana_url) +            elif 'Status code was not' in error: +                error = ( +                    'A request from this master to the Kibana URL {url}\n' +                    'did not return the correct status code (302).\n' +                    'This could mean that Kibana is malfunctioning, the hostname is\n' +                    'resolving incorrectly, or other network issues. The output was:\n' +                    '  {error}' +                ).format(url=kibana_url, error=error) +            return 'Error validating the logging Kibana route:\n' + error + +        # in production we would like the kibana route to work from outside the +        # cluster too; but that may not be the case, so allow disabling just this part. +        if not get_var(task_vars, "openshift_check_efk_kibana_external", default=True): +            return None +        error = self._verify_url_external(kibana_url) +        if error: +            if 'urlopen error [Errno 111] Connection refused' in error: +                error = ( +                    'Failed to connect from the Ansible control host to Kibana URL {url}\n' +                    'Is the router for the Kibana hostname exposed externally?' +                ).format(url=kibana_url) +            elif 'urlopen error [Errno -2] Name or service not known' in error: +                error = ( +                    'Failed to resolve the Kibana hostname in {url}\n' +                    'from the Ansible control host.\n' +                    'Is DNS configured to resolve this Kibana hostname externally?' +                ).format(url=kibana_url) +            elif 'Expected success (200)' in error: +                error = ( +                    'A request to Kibana at {url}\n' +                    'returned the wrong error code:\n' +                    '  {error}\n' +                    'This could mean that Kibana is malfunctioning, the hostname is\n' +                    'resolving incorrectly, or other network issues.' +                ).format(url=kibana_url, error=error) +            error = ( +                'Error validating the logging Kibana route:\n{error}\n' +                'To disable external Kibana route validation, set in your inventory:\n' +                '  openshift_check_efk_kibana_external=False' +            ).format(error=error) +            return error +        return None + +    def _exec_oc(self, cmd_str, extra_args, task_vars): +        return super(Kibana, self).exec_oc(self.execute_module, +                                           self.logging_namespace, +                                           cmd_str, +                                           extra_args, +                                           task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/logging/logging.py b/roles/openshift_health_checker/openshift_checks/logging/logging.py new file mode 100644 index 000000000..05b4d300c --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/logging/logging.py @@ -0,0 +1,96 @@ +""" +Util functions for performing checks on an Elasticsearch, Fluentd, and Kibana stack +""" + +import json +import os + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var + + +class LoggingCheck(OpenShiftCheck): +    """Base class for logging component checks""" + +    name = "logging" + +    @classmethod +    def is_active(cls, task_vars): +        return super(LoggingCheck, cls).is_active(task_vars) and cls.is_first_master(task_vars) + +    @staticmethod +    def is_first_master(task_vars): +        """Run only on first master and only when logging is configured. Returns: bool""" +        logging_deployed = get_var(task_vars, "openshift_hosted_logging_deploy", default=True) +        # Note: It would be nice to use membership in oo_first_master group, however for now it +        # seems best to avoid requiring that setup and just check this is the first master. +        hostname = get_var(task_vars, "ansible_ssh_host") or [None] +        masters = get_var(task_vars, "groups", "masters", default=None) or [None] +        return logging_deployed and masters[0] == hostname + +    def run(self, tmp, task_vars): +        pass + +    def get_pods_for_component(self, execute_module, namespace, logging_component, task_vars): +        """Get all pods for a given component. Returns: list of pods for component, error string""" +        pod_output = self.exec_oc( +            execute_module, +            namespace, +            "get pods -l component={} -o json".format(logging_component), +            [], +            task_vars +        ) +        try: +            pods = json.loads(pod_output) +            if not pods or not pods.get('items'): +                raise ValueError() +        except ValueError: +            # successful run but non-parsing data generally means there were no pods in the namespace +            return None, 'There are no pods in the {} namespace. Is logging deployed?'.format(namespace) + +        return pods['items'], None + +    @staticmethod +    def not_running_pods(pods): +        """Returns: list of pods not in a ready and running state""" +        return [ +            pod for pod in pods +            if any( +                container['ready'] is False +                for container in pod['status']['containerStatuses'] +            ) or not any( +                condition['type'] == 'Ready' and condition['status'] == 'True' +                for condition in pod['status']['conditions'] +            ) +        ] + +    @staticmethod +    def exec_oc(execute_module=None, namespace="logging", cmd_str="", extra_args=None, task_vars=None): +        """ +        Execute an 'oc' command in the remote host. +        Returns: output of command and namespace, +        or raises OpenShiftCheckException on error +        """ +        config_base = get_var(task_vars, "openshift", "common", "config_base") +        args = { +            "namespace": namespace, +            "config_file": os.path.join(config_base, "master", "admin.kubeconfig"), +            "cmd": cmd_str, +            "extra_args": list(extra_args) if extra_args else [], +        } + +        result = execute_module("ocutil", args, task_vars) +        if result.get("failed"): +            msg = ( +                'Unexpected error using `oc` to validate the logging stack components.\n' +                'Error executing `oc {cmd}`:\n' +                '{error}' +            ).format(cmd=args['cmd'], error=result['result']) + +            if result['result'] == '[Errno 2] No such file or directory': +                msg = ( +                    "This host is supposed to be a master but does not have the `oc` command where expected.\n" +                    "Has an installation been run on this host yet?" +                ) +            raise OpenShiftCheckException(msg) + +        return result.get("result", "") diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py new file mode 100644 index 000000000..1e45ae3af --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -0,0 +1,78 @@ +""" +Ansible module for determining if an installed version of Open vSwitch is incompatible with the +currently installed version of OpenShift. +""" + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var +from openshift_checks.mixins import NotContainerizedMixin + + +class OvsVersion(NotContainerizedMixin, OpenShiftCheck): +    """Check that packages in a package_list are installed on the host +    and are the correct version as determined by an OpenShift installation. +    """ + +    name = "ovs_version" +    tags = ["health"] + +    openshift_to_ovs_version = { +        "3.6": "2.6", +        "3.5": "2.6", +        "3.4": "2.4", +    } + +    # map major release versions across releases +    # to a common major version +    openshift_major_release_version = { +        "1": "3", +    } + +    @classmethod +    def is_active(cls, task_vars): +        """Skip hosts that do not have package requirements.""" +        group_names = get_var(task_vars, "group_names", default=[]) +        master_or_node = 'masters' in group_names or 'nodes' in group_names +        return super(OvsVersion, cls).is_active(task_vars) and master_or_node + +    def run(self, tmp, task_vars): +        args = { +            "package_list": [ +                { +                    "name": "openvswitch", +                    "version": self.get_required_ovs_version(task_vars), +                }, +            ], +        } +        return self.execute_module("rpm_version", args, task_vars) + +    def get_required_ovs_version(self, task_vars): +        """Return the correct Open vSwitch version for the current OpenShift version""" +        openshift_version = self._get_openshift_version(task_vars) + +        if float(openshift_version) < 3.5: +            return self.openshift_to_ovs_version["3.4"] + +        ovs_version = self.openshift_to_ovs_version.get(str(openshift_version)) +        if ovs_version: +            return self.openshift_to_ovs_version[str(openshift_version)] + +        msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}" +        raise OpenShiftCheckException(msg.format(openshift_version)) + +    def _get_openshift_version(self, task_vars): +        openshift_version = get_var(task_vars, "openshift_image_tag") +        if openshift_version and openshift_version[0] == 'v': +            openshift_version = openshift_version[1:] + +        return self._parse_version(openshift_version) + +    def _parse_version(self, version): +        components = version.split(".") +        if not components or len(components) < 2: +            msg = "An invalid version of OpenShift was found for this host: {}" +            raise OpenShiftCheckException(msg.format(version)) + +        if components[0] in self.openshift_major_release_version: +            components[0] = self.openshift_major_release_version[components[0]] + +        return '.'.join(components[:2]) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 682f6bd40..2e737818b 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -1,5 +1,5 @@  # pylint: disable=missing-docstring -from openshift_checks import OpenShiftCheck, get_var +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var  from openshift_checks.mixins import NotContainerizedMixin @@ -9,6 +9,25 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):      name = "package_version"      tags = ["preflight"] +    openshift_to_ovs_version = { +        "3.6": "2.6", +        "3.5": "2.6", +        "3.4": "2.4", +    } + +    openshift_to_docker_version = { +        "3.1": "1.8", +        "3.2": "1.10", +        "3.3": "1.10", +        "3.4": "1.12", +    } + +    # map major release versions across releases +    # to a common major version +    openshift_major_release_version = { +        "1": "3", +    } +      @classmethod      def is_active(cls, task_vars):          """Skip hosts that do not have package requirements.""" @@ -17,9 +36,90 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck):          return super(PackageVersion, cls).is_active(task_vars) and master_or_node      def run(self, tmp, task_vars): +        rpm_prefix = get_var(task_vars, "openshift", "common", "service_type") +        openshift_release = get_var(task_vars, "openshift_release", default='') +        deployment_type = get_var(task_vars, "openshift_deployment_type") +        check_multi_minor_release = deployment_type in ['openshift-enterprise'] +          args = { -            "requested_openshift_release": get_var(task_vars, "openshift_release", default=''), -            "openshift_deployment_type": get_var(task_vars, "openshift_deployment_type"), -            "rpm_prefix": get_var(task_vars, "openshift", "common", "service_type"), +            "package_list": [ +                { +                    "name": "openvswitch", +                    "version": self.get_required_ovs_version(task_vars), +                    "check_multi": False, +                }, +                { +                    "name": "docker", +                    "version": self.get_required_docker_version(task_vars), +                    "check_multi": False, +                }, +                { +                    "name": "{}".format(rpm_prefix), +                    "version": openshift_release, +                    "check_multi": check_multi_minor_release, +                }, +                { +                    "name": "{}-master".format(rpm_prefix), +                    "version": openshift_release, +                    "check_multi": check_multi_minor_release, +                }, +                { +                    "name": "{}-node".format(rpm_prefix), +                    "version": openshift_release, +                    "check_multi": check_multi_minor_release, +                }, +            ],          } +          return self.execute_module("aos_version", args, tmp, task_vars) + +    def get_required_ovs_version(self, task_vars): +        """Return the correct Open vSwitch version for the current OpenShift version. +        If the current OpenShift version is >= 3.5, ensure Open vSwitch version 2.6, +        Else ensure Open vSwitch version 2.4""" +        openshift_version = self.get_openshift_version(task_vars) + +        if float(openshift_version) < 3.5: +            return self.openshift_to_ovs_version["3.4"] + +        ovs_version = self.openshift_to_ovs_version.get(str(openshift_version)) +        if ovs_version: +            return ovs_version + +        msg = "There is no recommended version of Open vSwitch for the current version of OpenShift: {}" +        raise OpenShiftCheckException(msg.format(openshift_version)) + +    def get_required_docker_version(self, task_vars): +        """Return the correct Docker version for the current OpenShift version. +        If the OpenShift version is 3.1, ensure Docker version 1.8. +        If the OpenShift version is 3.2 or 3.3, ensure Docker version 1.10. +        If the current OpenShift version is >= 3.4, ensure Docker version 1.12.""" +        openshift_version = self.get_openshift_version(task_vars) + +        if float(openshift_version) >= 3.4: +            return self.openshift_to_docker_version["3.4"] + +        docker_version = self.openshift_to_docker_version.get(str(openshift_version)) +        if docker_version: +            return docker_version + +        msg = "There is no recommended version of Docker for the current version of OpenShift: {}" +        raise OpenShiftCheckException(msg.format(openshift_version)) + +    def get_openshift_version(self, task_vars): +        openshift_version = get_var(task_vars, "openshift_image_tag") +        if openshift_version and openshift_version[0] == 'v': +            openshift_version = openshift_version[1:] + +        return self.parse_version(openshift_version) + +    def parse_version(self, version): +        components = version.split(".") +        if not components or len(components) < 2: +            msg = "An invalid version of OpenShift was found for this host: {}" +            raise OpenShiftCheckException(msg.format(version)) + +        if components[0] in self.openshift_major_release_version: +            components[0] = self.openshift_major_release_version[components[0]] + +        return '.'.join(components[:2]) diff --git a/roles/openshift_health_checker/test/aos_version_test.py b/roles/openshift_health_checker/test/aos_version_test.py index 39c86067a..697805dd2 100644 --- a/roles/openshift_health_checker/test/aos_version_test.py +++ b/roles/openshift_health_checker/test/aos_version_test.py @@ -4,89 +4,118 @@ import aos_version  from collections import namedtuple  Package = namedtuple('Package', ['name', 'version']) -expected_pkgs = set(['spam', 'eggs']) +expected_pkgs = { +    "spam": { +        "name": "spam", +        "version": "3.2.1", +        "check_multi": False, +    }, +    "eggs": { +        "name": "eggs", +        "version": "3.2.1", +        "check_multi": False, +    }, +} -@pytest.mark.parametrize('pkgs, requested_release, expect_not_found', [ +@pytest.mark.parametrize('pkgs, expect_not_found', [      (          [], -        '3.2.1', -        expected_pkgs,  # none found +        { +            "spam": { +                "name": "spam", +                "version": "3.2.1", +                "check_multi": False, +            }, +            "eggs": { +                "name": "eggs", +                "version": "3.2.1", +                "check_multi": False, +            } +        },  # none found      ),      (          [Package('spam', '3.2.1')], -        '3.2', -        ['eggs'],  # completely missing +        { +            "eggs": { +                "name": "eggs", +                "version": "3.2.1", +                "check_multi": False, +            } +        },  # completely missing      ),      (          [Package('spam', '3.2.1'), Package('eggs', '3.3.2')], -        '3.2', -        ['eggs'],  # not the right version +        { +            "eggs": { +                "name": "eggs", +                "version": "3.2.1", +                "check_multi": False, +            } +        },  # not the right version      ),      (          [Package('spam', '3.2.1'), Package('eggs', '3.2.1')], -        '3.2', -        [],  # all found +        {},  # all found      ),      (          [Package('spam', '3.2.1'), Package('eggs', '3.2.1.5')], -        '3.2.1', -        [],  # found with more specific version +        {},  # found with more specific version      ),      (          [Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5')], -        '3.2.1', -        ['spam'],  # eggs found with multiple versions +        { +            "spam": { +                "name": "spam", +                "version": "3.2.1", +                "check_multi": False, +            } +        },  # eggs found with multiple versions      ),  ]) -def test_check_pkgs_for_precise_version(pkgs, requested_release, expect_not_found): +def test_check_pkgs_for_precise_version(pkgs, expect_not_found):      if expect_not_found:          with pytest.raises(aos_version.PreciseVersionNotFound) as e: -            aos_version._check_precise_version_found(pkgs, expected_pkgs, requested_release) -        assert set(expect_not_found) == set(e.value.problem_pkgs) +            aos_version._check_precise_version_found(pkgs, expected_pkgs) + +        assert list(expect_not_found.values()) == e.value.problem_pkgs      else: -        aos_version._check_precise_version_found(pkgs, expected_pkgs, requested_release) +        aos_version._check_precise_version_found(pkgs, expected_pkgs) -@pytest.mark.parametrize('pkgs, requested_release, expect_higher', [ +@pytest.mark.parametrize('pkgs, expect_higher', [      (          [], -        '3.2.1',          [],      ),      ( -        [Package('spam', '3.2.1')], -        '3.2', +        [Package('spam', '3.2.1.9')],          [],  # more precise but not strictly higher      ),      (          [Package('spam', '3.3')], -        '3.2.1',          ['spam-3.3'],  # lower precision, but higher      ),      (          [Package('spam', '3.2.1'), Package('eggs', '3.3.2')], -        '3.2',          ['eggs-3.3.2'],  # one too high      ),      (          [Package('eggs', '1.2.3'), Package('eggs', '3.2.1.5'), Package('eggs', '3.4')], -        '3.2.1',          ['eggs-3.4'],  # multiple versions, one is higher      ),      (          [Package('eggs', '3.2.1'), Package('eggs', '3.4'), Package('eggs', '3.3')], -        '3.2.1',          ['eggs-3.4'],  # multiple versions, two are higher      ),  ]) -def test_check_pkgs_for_greater_version(pkgs, requested_release, expect_higher): +def test_check_pkgs_for_greater_version(pkgs, expect_higher):      if expect_higher:          with pytest.raises(aos_version.FoundHigherVersion) as e: -            aos_version._check_higher_version_found(pkgs, expected_pkgs, requested_release) +            aos_version._check_higher_version_found(pkgs, expected_pkgs)          assert set(expect_higher) == set(e.value.problem_pkgs)      else: -        aos_version._check_higher_version_found(pkgs, expected_pkgs, requested_release) +        aos_version._check_higher_version_found(pkgs, expected_pkgs)  @pytest.mark.parametrize('pkgs, expect_to_flag_pkgs', [ diff --git a/roles/openshift_health_checker/test/curator_test.py b/roles/openshift_health_checker/test/curator_test.py new file mode 100644 index 000000000..ae108c96e --- /dev/null +++ b/roles/openshift_health_checker/test/curator_test.py @@ -0,0 +1,68 @@ +import pytest + +from openshift_checks.logging.curator import Curator + + +def canned_curator(exec_oc=None): +    """Create a Curator check object with canned exec_oc method""" +    check = Curator("dummy")  # fails if a module is actually invoked +    if exec_oc: +        check._exec_oc = exec_oc +    return check + + +def assert_error(error, expect_error): +    if expect_error: +        assert error +        assert expect_error in error +    else: +        assert not error + + +plain_curator_pod = { +    "metadata": { +        "labels": {"component": "curator", "deploymentconfig": "logging-curator"}, +        "name": "logging-curator-1", +    }, +    "status": { +        "containerStatuses": [{"ready": True}], +        "conditions": [{"status": "True", "type": "Ready"}], +        "podIP": "10.10.10.10", +    } +} + +not_running_curator_pod = { +    "metadata": { +        "labels": {"component": "curator", "deploymentconfig": "logging-curator"}, +        "name": "logging-curator-2", +    }, +    "status": { +        "containerStatuses": [{"ready": False}], +        "conditions": [{"status": "False", "type": "Ready"}], +        "podIP": "10.10.10.10", +    } +} + + +@pytest.mark.parametrize('pods, expect_error', [ +    ( +        [], +        "no Curator pods", +    ), +    ( +        [plain_curator_pod], +        None, +    ), +    ( +        [not_running_curator_pod], +        "not currently in a running state", +    ), +    ( +        [plain_curator_pod, plain_curator_pod], +        "more than one Curator pod", +    ), +]) +def test_get_curator_pods(pods, expect_error): +    check = canned_curator() +    error = check.check_curator(pods) +    assert_error(error, expect_error) diff --git a/roles/openshift_health_checker/test/elasticsearch_test.py b/roles/openshift_health_checker/test/elasticsearch_test.py new file mode 100644 index 000000000..b9d375d8c --- /dev/null +++ b/roles/openshift_health_checker/test/elasticsearch_test.py @@ -0,0 +1,180 @@ +import pytest +import json + +from openshift_checks.logging.elasticsearch import Elasticsearch + +task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin'))) + + +def canned_elasticsearch(exec_oc=None): +    """Create an Elasticsearch check object with canned exec_oc method""" +    check = Elasticsearch("dummy")  # fails if a module is actually invoked +    if exec_oc: +        check._exec_oc = exec_oc +    return check + + +def assert_error(error, expect_error): +    if expect_error: +        assert error +        assert expect_error in error +    else: +        assert not error + + +plain_es_pod = { +    "metadata": { +        "labels": {"component": "es", "deploymentconfig": "logging-es"}, +        "name": "logging-es", +    }, +    "status": { +        "conditions": [{"status": "True", "type": "Ready"}], +        "containerStatuses": [{"ready": True}], +        "podIP": "10.10.10.10", +    }, +    "_test_master_name_str": "name logging-es", +} + +split_es_pod = { +    "metadata": { +        "labels": {"component": "es", "deploymentconfig": "logging-es-2"}, +        "name": "logging-es-2", +    }, +    "status": { +        "conditions": [{"status": "True", "type": "Ready"}], +        "containerStatuses": [{"ready": True}], +        "podIP": "10.10.10.10", +    }, +    "_test_master_name_str": "name logging-es-2", +} + + +def test_check_elasticsearch(): +    assert 'No logging Elasticsearch pods' in canned_elasticsearch().check_elasticsearch([], {}) + +    # canned oc responses to match so all the checks pass +    def _exec_oc(cmd, args, task_vars): +        if '_cat/master' in cmd: +            return 'name logging-es' +        elif '/_nodes' in cmd: +            return json.dumps(es_node_list) +        elif '_cluster/health' in cmd: +            return '{"status": "green"}' +        elif ' df ' in cmd: +            return 'IUse% Use%\n 3%  4%\n' +        else: +            raise Exception(cmd) + +    assert not canned_elasticsearch(_exec_oc).check_elasticsearch([plain_es_pod], {}) + + +def pods_by_name(pods): +    return {pod['metadata']['name']: pod for pod in pods} + + +@pytest.mark.parametrize('pods, expect_error', [ +    ( +        [], +        'No logging Elasticsearch masters', +    ), +    ( +        [plain_es_pod], +        None, +    ), +    ( +        [plain_es_pod, split_es_pod], +        'Found multiple Elasticsearch masters', +    ), +]) +def test_check_elasticsearch_masters(pods, expect_error): +    test_pods = list(pods) +    check = canned_elasticsearch(lambda cmd, args, task_vars: test_pods.pop(0)['_test_master_name_str']) + +    errors = check._check_elasticsearch_masters(pods_by_name(pods), task_vars_config_base) +    assert_error(''.join(errors), expect_error) + + +es_node_list = { +    'nodes': { +        'random-es-name': { +            'host': 'logging-es', +        }}} + + +@pytest.mark.parametrize('pods, node_list, expect_error', [ +    ( +        [], +        {}, +        'No logging Elasticsearch masters', +    ), +    ( +        [plain_es_pod], +        es_node_list, +        None, +    ), +    ( +        [plain_es_pod], +        {},  # empty list of nodes triggers KeyError +        "Failed to query", +    ), +    ( +        [split_es_pod], +        es_node_list, +        'does not correspond to any known ES pod', +    ), +]) +def test_check_elasticsearch_node_list(pods, node_list, expect_error): +    check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(node_list)) + +    errors = check._check_elasticsearch_node_list(pods_by_name(pods), task_vars_config_base) +    assert_error(''.join(errors), expect_error) + + +@pytest.mark.parametrize('pods, health_data, expect_error', [ +    ( +        [plain_es_pod], +        [{"status": "green"}], +        None, +    ), +    ( +        [plain_es_pod], +        [{"no-status": "should bomb"}], +        'Could not retrieve cluster health status', +    ), +    ( +        [plain_es_pod, split_es_pod], +        [{"status": "green"}, {"status": "red"}], +        'Elasticsearch cluster health status is RED', +    ), +]) +def test_check_elasticsearch_cluster_health(pods, health_data, expect_error): +    test_health_data = list(health_data) +    check = canned_elasticsearch(lambda cmd, args, task_vars: json.dumps(test_health_data.pop(0))) + +    errors = check._check_es_cluster_health(pods_by_name(pods), task_vars_config_base) +    assert_error(''.join(errors), expect_error) + + +@pytest.mark.parametrize('disk_data, expect_error', [ +    ( +        'df: /elasticsearch/persistent: No such file or directory\n', +        'Could not retrieve storage usage', +    ), +    ( +        'IUse% Use%\n 3%  4%\n', +        None, +    ), +    ( +        'IUse% Use%\n 95%  40%\n', +        'Inode percent usage on the storage volume', +    ), +    ( +        'IUse% Use%\n 3%  94%\n', +        'Disk percent usage on the storage volume', +    ), +]) +def test_check_elasticsearch_diskspace(disk_data, expect_error): +    check = canned_elasticsearch(lambda cmd, args, task_vars: disk_data) + +    errors = check._check_elasticsearch_diskspace(pods_by_name([plain_es_pod]), task_vars_config_base) +    assert_error(''.join(errors), expect_error) diff --git a/roles/openshift_health_checker/test/fluentd_test.py b/roles/openshift_health_checker/test/fluentd_test.py new file mode 100644 index 000000000..d151c0b19 --- /dev/null +++ b/roles/openshift_health_checker/test/fluentd_test.py @@ -0,0 +1,109 @@ +import pytest +import json + +from openshift_checks.logging.fluentd import Fluentd + + +def canned_fluentd(exec_oc=None): +    """Create a Fluentd check object with canned exec_oc method""" +    check = Fluentd("dummy")  # fails if a module is actually invoked +    if exec_oc: +        check._exec_oc = exec_oc +    return check + + +def assert_error(error, expect_error): +    if expect_error: +        assert error +        assert expect_error in error +    else: +        assert not error + + +fluentd_pod_node1 = { +    "metadata": { +        "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"}, +        "name": "logging-fluentd-1", +    }, +    "spec": {"host": "node1", "nodeName": "node1"}, +    "status": { +        "containerStatuses": [{"ready": True}], +        "conditions": [{"status": "True", "type": "Ready"}], +    } +} +fluentd_pod_node2_down = { +    "metadata": { +        "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"}, +        "name": "logging-fluentd-2", +    }, +    "spec": {"host": "node2", "nodeName": "node2"}, +    "status": { +        "containerStatuses": [{"ready": False}], +        "conditions": [{"status": "False", "type": "Ready"}], +    } +} +fluentd_node1 = { +    "metadata": { +        "labels": {"logging-infra-fluentd": "true", "kubernetes.io/hostname": "node1"}, +        "name": "node1", +    }, +    "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.1"}]}, +} +fluentd_node2 = { +    "metadata": { +        "labels": {"logging-infra-fluentd": "true", "kubernetes.io/hostname": "hostname"}, +        "name": "node2", +    }, +    "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.2"}]}, +} +fluentd_node3_unlabeled = { +    "metadata": { +        "labels": {"kubernetes.io/hostname": "hostname"}, +        "name": "node3", +    }, +    "status": {"addresses": [{"type": "InternalIP", "address": "10.10.1.3"}]}, +} + + +@pytest.mark.parametrize('pods, nodes, expect_error', [ +    ( +        [], +        [], +        'No nodes appear to be defined', +    ), +    ( +        [], +        [fluentd_node3_unlabeled], +        'There are no nodes with the fluentd label', +    ), +    ( +        [], +        [fluentd_node1, fluentd_node3_unlabeled], +        'Fluentd will not aggregate logs from these nodes.', +    ), +    ( +        [], +        [fluentd_node2], +        "nodes are supposed to have a Fluentd pod but do not", +    ), +    ( +        [fluentd_pod_node1, fluentd_pod_node1], +        [fluentd_node1], +        'more Fluentd pods running than nodes labeled', +    ), +    ( +        [fluentd_pod_node2_down], +        [fluentd_node2], +        "Fluentd pods are supposed to be running", +    ), +    ( +        [fluentd_pod_node1], +        [fluentd_node1], +        None, +    ), +]) +def test_get_fluentd_pods(pods, nodes, expect_error): +    check = canned_fluentd(lambda cmd, args, task_vars: json.dumps(dict(items=nodes))) + +    error = check.check_fluentd(pods, {}) +    assert_error(error, expect_error) diff --git a/roles/openshift_health_checker/test/kibana_test.py b/roles/openshift_health_checker/test/kibana_test.py new file mode 100644 index 000000000..19140a1b6 --- /dev/null +++ b/roles/openshift_health_checker/test/kibana_test.py @@ -0,0 +1,218 @@ +import pytest +import json + +try: +    import urllib2 +    from urllib2 import HTTPError, URLError +except ImportError: +    from urllib.error import HTTPError, URLError +    import urllib.request as urllib2 + +from openshift_checks.logging.kibana import Kibana + + +def canned_kibana(exec_oc=None): +    """Create a Kibana check object with canned exec_oc method""" +    check = Kibana("dummy")  # fails if a module is actually invoked +    if exec_oc: +        check._exec_oc = exec_oc +    return check + + +def assert_error(error, expect_error): +    if expect_error: +        assert error +        assert expect_error in error +    else: +        assert not error + + +plain_kibana_pod = { +    "metadata": { +        "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, +        "name": "logging-kibana-1", +    }, +    "status": { +        "containerStatuses": [{"ready": True}, {"ready": True}], +        "conditions": [{"status": "True", "type": "Ready"}], +    } +} +not_running_kibana_pod = { +    "metadata": { +        "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, +        "name": "logging-kibana-2", +    }, +    "status": { +        "containerStatuses": [{"ready": True}, {"ready": False}], +        "conditions": [{"status": "True", "type": "Ready"}], +    } +} + + +@pytest.mark.parametrize('pods, expect_error', [ +    ( +        [], +        "There are no Kibana pods deployed", +    ), +    ( +        [plain_kibana_pod], +        None, +    ), +    ( +        [not_running_kibana_pod], +        "No Kibana pod is in a running state", +    ), +    ( +        [plain_kibana_pod, not_running_kibana_pod], +        "The following Kibana pods are not currently in a running state", +    ), +]) +def test_check_kibana(pods, expect_error): +    check = canned_kibana() +    error = check.check_kibana(pods) +    assert_error(error, expect_error) + + +@pytest.mark.parametrize('route, expect_url, expect_error', [ +    ( +        None, +        None, +        'no_route_exists', +    ), + +    # test route with no ingress +    ( +        { +            "metadata": { +                "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, +                "name": "logging-kibana", +            }, +            "status": { +                "ingress": [], +            }, +            "spec": { +                "host": "hostname", +            } +        }, +        None, +        'route_not_accepted', +    ), + +    # test route with no host +    ( +        { +            "metadata": { +                "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, +                "name": "logging-kibana", +            }, +            "status": { +                "ingress": [{ +                    "status": True, +                }], +            }, +            "spec": {}, +        }, +        None, +        'route_missing_host', +    ), + +    # test route that looks fine +    ( +        { +            "metadata": { +                "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, +                "name": "logging-kibana", +            }, +            "status": { +                "ingress": [{ +                    "status": True, +                }], +            }, +            "spec": { +                "host": "hostname", +            }, +        }, +        "https://hostname/", +        None, +    ), +]) +def test_get_kibana_url(route, expect_url, expect_error): +    check = canned_kibana(lambda cmd, args, task_vars: json.dumps(route) if route else "") + +    url, error = check._get_kibana_url({}) +    if expect_url: +        assert url == expect_url +    else: +        assert not url +    if expect_error: +        assert error == expect_error +    else: +        assert not error + + +@pytest.mark.parametrize('exec_result, expect', [ +    ( +        'urlopen error [Errno 111] Connection refused', +        'at least one router routing to it?', +    ), +    ( +        'urlopen error [Errno -2] Name or service not known', +        'DNS configured for the Kibana hostname?', +    ), +    ( +        'Status code was not [302]: HTTP Error 500: Server error', +        'did not return the correct status code', +    ), +    ( +        'bork bork bork', +        'bork bork bork',  # should pass through +    ), +]) +def test_verify_url_internal_failure(exec_result, expect): +    check = Kibana(execute_module=lambda module_name, args, task_vars: dict(failed=True, msg=exec_result)) +    check._get_kibana_url = lambda task_vars: ('url', None) + +    error = check._check_kibana_route({}) +    assert_error(error, expect) + + +@pytest.mark.parametrize('lib_result, expect', [ +    ( +        HTTPError('url', 500, "it broke", hdrs=None, fp=None), +        'it broke', +    ), +    ( +        URLError('it broke'), +        'it broke', +    ), +    ( +        302, +        'returned the wrong error code', +    ), +    ( +        200, +        None, +    ), +]) +def test_verify_url_external_failure(lib_result, expect, monkeypatch): + +    class _http_return: + +        def __init__(self, code): +            self.code = code + +        def getcode(self): +            return self.code + +    def urlopen(url, context): +        if type(lib_result) is int: +            return _http_return(lib_result) +        raise lib_result +    monkeypatch.setattr(urllib2, 'urlopen', urlopen) + +    check = canned_kibana() +    check._get_kibana_url = lambda task_vars: ('url', None) +    check._verify_url_internal = lambda url, task_vars: None + +    error = check._check_kibana_route({}) +    assert_error(error, expect) diff --git a/roles/openshift_health_checker/test/logging_check_test.py b/roles/openshift_health_checker/test/logging_check_test.py new file mode 100644 index 000000000..b6db34fe3 --- /dev/null +++ b/roles/openshift_health_checker/test/logging_check_test.py @@ -0,0 +1,137 @@ +import pytest +import json + +from openshift_checks.logging.logging import LoggingCheck, OpenShiftCheckException + +task_vars_config_base = dict(openshift=dict(common=dict(config_base='/etc/origin'))) + + +logging_namespace = "logging" + + +def canned_loggingcheck(exec_oc=None): +    """Create a LoggingCheck object with canned exec_oc method""" +    check = LoggingCheck("dummy")  # fails if a module is actually invoked +    check.logging_namespace = 'logging' +    if exec_oc: +        check.exec_oc = exec_oc +    return check + + +def assert_error(error, expect_error): +    if expect_error: +        assert error +        assert expect_error in error +    else: +        assert not error + + +plain_es_pod = { +    "metadata": { +        "labels": {"component": "es", "deploymentconfig": "logging-es"}, +        "name": "logging-es", +    }, +    "status": { +        "conditions": [{"status": "True", "type": "Ready"}], +        "containerStatuses": [{"ready": True}], +        "podIP": "10.10.10.10", +    }, +    "_test_master_name_str": "name logging-es", +} + +plain_kibana_pod = { +    "metadata": { +        "labels": {"component": "kibana", "deploymentconfig": "logging-kibana"}, +        "name": "logging-kibana-1", +    }, +    "status": { +        "containerStatuses": [{"ready": True}, {"ready": True}], +        "conditions": [{"status": "True", "type": "Ready"}], +    } +} + +fluentd_pod_node1 = { +    "metadata": { +        "labels": {"component": "fluentd", "deploymentconfig": "logging-fluentd"}, +        "name": "logging-fluentd-1", +    }, +    "spec": {"host": "node1", "nodeName": "node1"}, +    "status": { +        "containerStatuses": [{"ready": True}], +        "conditions": [{"status": "True", "type": "Ready"}], +    } +} + +plain_curator_pod = { +    "metadata": { +        "labels": {"component": "curator", "deploymentconfig": "logging-curator"}, +        "name": "logging-curator-1", +    }, +    "status": { +        "containerStatuses": [{"ready": True}], +        "conditions": [{"status": "True", "type": "Ready"}], +        "podIP": "10.10.10.10", +    } +} + + +@pytest.mark.parametrize('problem, expect', [ +    ("[Errno 2] No such file or directory", "supposed to be a master"), +    ("Permission denied", "Unexpected error using `oc`"), +]) +def test_oc_failure(problem, expect): +    def execute_module(module_name, args, task_vars): +        if module_name == "ocutil": +            return dict(failed=True, result=problem) +        return dict(changed=False) + +    check = LoggingCheck({}) + +    with pytest.raises(OpenShiftCheckException) as excinfo: +        check.exec_oc(execute_module, logging_namespace, 'get foo', [], task_vars=task_vars_config_base) +    assert expect in str(excinfo) + + +groups_with_first_master = dict(masters=['this-host', 'other-host']) +groups_with_second_master = dict(masters=['other-host', 'this-host']) +groups_not_a_master = dict(masters=['other-host']) + + +@pytest.mark.parametrize('groups, logging_deployed, is_active', [ +    (groups_with_first_master, True, True), +    (groups_with_first_master, False, False), +    (groups_not_a_master, True, False), +    (groups_with_second_master, True, False), +    (groups_not_a_master, True, False), +]) +def test_is_active(groups, logging_deployed, is_active): +    task_vars = dict( +        ansible_ssh_host='this-host', +        groups=groups, +        openshift_hosted_logging_deploy=logging_deployed, +    ) + +    assert LoggingCheck.is_active(task_vars=task_vars) == is_active + + +@pytest.mark.parametrize('pod_output, expect_pods, expect_error', [ +    ( +        'No resources found.', +        None, +        'There are no pods in the logging namespace', +    ), +    ( +        json.dumps({'items': [plain_kibana_pod, plain_es_pod, plain_curator_pod, fluentd_pod_node1]}), +        [plain_es_pod], +        None, +    ), +]) +def test_get_pods_for_component(pod_output, expect_pods, expect_error): +    check = canned_loggingcheck(lambda exec_module, namespace, cmd, args, task_vars: pod_output) +    pods, error = check.get_pods_for_component( +        lambda name, args, task_vars: {}, +        logging_namespace, +        "es", +        {} +    ) +    assert_error(error, expect_error) diff --git a/roles/openshift_health_checker/test/ovs_version_test.py b/roles/openshift_health_checker/test/ovs_version_test.py new file mode 100644 index 000000000..6494e1c06 --- /dev/null +++ b/roles/openshift_health_checker/test/ovs_version_test.py @@ -0,0 +1,89 @@ +import pytest + +from openshift_checks.ovs_version import OvsVersion, OpenShiftCheckException + + +def test_openshift_version_not_supported(): +    def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): +        return {} + +    openshift_release = '111.7.0' + +    task_vars = dict( +        openshift=dict(common=dict(service_type='origin')), +        openshift_release=openshift_release, +        openshift_image_tag='v' + openshift_release, +        openshift_deployment_type='origin', +    ) + +    check = OvsVersion(execute_module=execute_module) +    with pytest.raises(OpenShiftCheckException) as excinfo: +        check.run(tmp=None, task_vars=task_vars) + +    assert "no recommended version of Open vSwitch" in str(excinfo.value) + + +def test_invalid_openshift_release_format(): +    def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): +        return {} + +    task_vars = dict( +        openshift=dict(common=dict(service_type='origin')), +        openshift_image_tag='v0', +        openshift_deployment_type='origin', +    ) + +    check = OvsVersion(execute_module=execute_module) +    with pytest.raises(OpenShiftCheckException) as excinfo: +        check.run(tmp=None, task_vars=task_vars) +    assert "invalid version" in str(excinfo.value) + + +@pytest.mark.parametrize('openshift_release,expected_ovs_version', [ +    ("3.5", "2.6"), +    ("3.6", "2.6"), +    ("3.4", "2.4"), +    ("3.3", "2.4"), +    ("1.0", "2.4"), +]) +def test_ovs_package_version(openshift_release, expected_ovs_version): +    task_vars = dict( +        openshift=dict(common=dict(service_type='origin')), +        openshift_release=openshift_release, +        openshift_image_tag='v' + openshift_release, +    ) +    return_value = object() + +    def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): +        assert module_name == 'rpm_version' +        assert "package_list" in module_args + +        for pkg in module_args["package_list"]: +            if pkg["name"] == "openvswitch": +                assert pkg["version"] == expected_ovs_version + +        return return_value + +    check = OvsVersion(execute_module=execute_module) +    result = check.run(tmp=None, task_vars=task_vars) +    assert result is return_value + + +@pytest.mark.parametrize('group_names,is_containerized,is_active', [ +    (['masters'], False, True), +    # ensure check is skipped on containerized installs +    (['masters'], True, False), +    (['nodes'], False, True), +    (['masters', 'nodes'], False, True), +    (['masters', 'etcd'], False, True), +    ([], False, False), +    (['etcd'], False, False), +    (['lb'], False, False), +    (['nfs'], False, False), +]) +def test_ovs_version_skip_when_not_master_nor_node(group_names, is_containerized, is_active): +    task_vars = dict( +        group_names=group_names, +        openshift=dict(common=dict(is_containerized=is_containerized)), +    ) +    assert OvsVersion.is_active(task_vars=task_vars) == is_active diff --git a/roles/openshift_health_checker/test/package_version_test.py b/roles/openshift_health_checker/test/package_version_test.py index 196d9816a..91eace512 100644 --- a/roles/openshift_health_checker/test/package_version_test.py +++ b/roles/openshift_health_checker/test/package_version_test.py @@ -1,24 +1,132 @@  import pytest -from openshift_checks.package_version import PackageVersion +from openshift_checks.package_version import PackageVersion, OpenShiftCheckException -def test_package_version(): +@pytest.mark.parametrize('openshift_release, extra_words', [ +    ('111.7.0', ["no recommended version of Open vSwitch"]), +    ('0.0.0', ["no recommended version of Docker"]), +]) +def test_openshift_version_not_supported(openshift_release, extra_words): +    def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): +        return {} + +    task_vars = dict( +        openshift=dict(common=dict(service_type='origin')), +        openshift_release=openshift_release, +        openshift_image_tag='v' + openshift_release, +        openshift_deployment_type='origin', +    ) + +    check = PackageVersion(execute_module=execute_module) +    with pytest.raises(OpenShiftCheckException) as excinfo: +        check.run(tmp=None, task_vars=task_vars) + +    for word in extra_words: +        assert word in str(excinfo.value) + + +def test_invalid_openshift_release_format(): +    def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): +        return {} + +    task_vars = dict( +        openshift=dict(common=dict(service_type='origin')), +        openshift_image_tag='v0', +        openshift_deployment_type='origin', +    ) + +    check = PackageVersion(execute_module=execute_module) +    with pytest.raises(OpenShiftCheckException) as excinfo: +        check.run(tmp=None, task_vars=task_vars) +    assert "invalid version" in str(excinfo.value) + + +@pytest.mark.parametrize('openshift_release', [ +    "3.5", +    "3.6", +    "3.4", +    "3.3", +]) +def test_package_version(openshift_release):      task_vars = dict(          openshift=dict(common=dict(service_type='origin')), -        openshift_release='3.5', +        openshift_release=openshift_release, +        openshift_image_tag='v' + openshift_release,          openshift_deployment_type='origin',      )      return_value = object()      def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None):          assert module_name == 'aos_version' -        assert 'requested_openshift_release' in module_args -        assert 'openshift_deployment_type' in module_args -        assert 'rpm_prefix' in module_args -        assert module_args['requested_openshift_release'] == task_vars['openshift_release'] -        assert module_args['openshift_deployment_type'] == task_vars['openshift_deployment_type'] -        assert module_args['rpm_prefix'] == task_vars['openshift']['common']['service_type'] +        assert "package_list" in module_args + +        for pkg in module_args["package_list"]: +            if "-master" in pkg["name"] or "-node" in pkg["name"]: +                assert pkg["version"] == task_vars["openshift_release"] + +        return return_value + +    check = PackageVersion(execute_module=execute_module) +    result = check.run(tmp=None, task_vars=task_vars) +    assert result is return_value + + +@pytest.mark.parametrize('deployment_type,openshift_release,expected_ovs_version', [ +    ("openshift-enterprise", "3.5", "2.6"), +    ("origin", "3.6", "2.6"), +    ("openshift-enterprise", "3.4", "2.4"), +    ("origin", "3.3", "2.4"), +]) +def test_ovs_package_version(deployment_type, openshift_release, expected_ovs_version): +    task_vars = dict( +        openshift=dict(common=dict(service_type='origin')), +        openshift_release=openshift_release, +        openshift_image_tag='v' + openshift_release, +        openshift_deployment_type=deployment_type, +    ) +    return_value = object() + +    def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): +        assert module_name == 'aos_version' +        assert "package_list" in module_args + +        for pkg in module_args["package_list"]: +            if pkg["name"] == "openvswitch": +                assert pkg["version"] == expected_ovs_version + +        return return_value + +    check = PackageVersion(execute_module=execute_module) +    result = check.run(tmp=None, task_vars=task_vars) +    assert result is return_value + + +@pytest.mark.parametrize('deployment_type,openshift_release,expected_docker_version', [ +    ("origin", "3.5", "1.12"), +    ("openshift-enterprise", "3.4", "1.12"), +    ("origin", "3.3", "1.10"), +    ("openshift-enterprise", "3.2", "1.10"), +    ("origin", "3.1", "1.8"), +    ("openshift-enterprise", "3.1", "1.8"), +]) +def test_docker_package_version(deployment_type, openshift_release, expected_docker_version): +    task_vars = dict( +        openshift=dict(common=dict(service_type='origin')), +        openshift_release=openshift_release, +        openshift_image_tag='v' + openshift_release, +        openshift_deployment_type=deployment_type, +    ) +    return_value = object() + +    def execute_module(module_name=None, module_args=None, tmp=None, task_vars=None): +        assert module_name == 'aos_version' +        assert "package_list" in module_args + +        for pkg in module_args["package_list"]: +            if pkg["name"] == "docker": +                assert pkg["version"] == expected_docker_version +          return return_value      check = PackageVersion(execute_module=execute_module) diff --git a/roles/openshift_health_checker/test/rpm_version_test.py b/roles/openshift_health_checker/test/rpm_version_test.py new file mode 100644 index 000000000..2f09ef965 --- /dev/null +++ b/roles/openshift_health_checker/test/rpm_version_test.py @@ -0,0 +1,82 @@ +import pytest +import rpm_version + +expected_pkgs = { +    "spam": { +        "name": "spam", +        "version": "3.2.1", +    }, +    "eggs": { +        "name": "eggs", +        "version": "3.2.1", +    }, +} + + +@pytest.mark.parametrize('pkgs, expect_not_found', [ +    ( +        {}, +        ["spam", "eggs"],  # none found +    ), +    ( +        {"spam": ["3.2.1", "4.5.1"]}, +        ["eggs"],  # completely missing +    ), +    ( +        { +            "spam": ["3.2.1", "4.5.1"], +            "eggs": ["3.2.1"], +        }, +        [],  # all found +    ), +]) +def test_check_pkg_found(pkgs, expect_not_found): +    if expect_not_found: +        with pytest.raises(rpm_version.RpmVersionException) as e: +            rpm_version._check_pkg_versions(pkgs, expected_pkgs) + +        assert "not found to be installed" in str(e.value) +        assert set(expect_not_found) == set(e.value.problem_pkgs) +    else: +        rpm_version._check_pkg_versions(pkgs, expected_pkgs) + + +@pytest.mark.parametrize('pkgs, expect_not_found', [ +    ( +        { +            'spam': ['3.2.1'], +            'eggs': ['3.3.2'], +        }, +        { +            "eggs": { +                "required_version": "3.2", +                "found_versions": ["3.3"], +            } +        },  # not the right version +    ), +    ( +        { +            'spam': ['3.1.2', "3.3.2"], +            'eggs': ['3.3.2', "1.2.3"], +        }, +        { +            "eggs": { +                "required_version": "3.2", +                "found_versions": ["3.3", "1.2"], +            }, +            "spam": { +                "required_version": "3.2", +                "found_versions": ["3.1", "3.3"], +            } +        },  # not the right version +    ), +]) +def test_check_pkg_version_found(pkgs, expect_not_found): +    if expect_not_found: +        with pytest.raises(rpm_version.RpmVersionException) as e: +            rpm_version._check_pkg_versions(pkgs, expected_pkgs) + +        assert "found to be installed with an incorrect version" in str(e.value) +        assert expect_not_found == e.value.problem_pkgs +    else: +        rpm_version._check_pkg_versions(pkgs, expected_pkgs) diff --git a/roles/openshift_logging/tasks/delete_logging.yaml b/roles/openshift_logging/tasks/delete_logging.yaml index 0c7152b16..6d023a02d 100644 --- a/roles/openshift_logging/tasks/delete_logging.yaml +++ b/roles/openshift_logging/tasks/delete_logging.yaml @@ -32,9 +32,8 @@  # delete our old secrets  - name: delete logging secrets -  oc_obj: +  oc_secret:      state: absent -    kind: secret      namespace: "{{ openshift_logging_namespace }}"      name: "{{ item }}"    with_items: diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index dde76b142..7c1062b77 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -10,7 +10,7 @@      name: "{{ openshift_logging_namespace }}"      node_selector: "{{ openshift_logging_nodeselector | default(null) }}" -- name: Labelling logging project +- name: Labeling logging project    oc_label:      state: present      kind: namespace @@ -23,7 +23,7 @@    - openshift_logging_labels is defined    - openshift_logging_labels is dict -- name: Labelling logging project +- name: Labeling logging project    oc_label:      state: present      kind: namespace @@ -78,6 +78,8 @@    - "{{ openshift_logging_facts.elasticsearch.deploymentconfigs }}"    - "{{ openshift_logging_facts.elasticsearch.pvcs }}"    - "{{ es_indices }}" +  when: +  - openshift_logging_facts.elasticsearch.deploymentconfigs.keys() | count > 0  # Create any new DC that may be required  - include_role: @@ -124,6 +126,7 @@    - "{{ es_ops_indices }}"    when:    - openshift_logging_use_ops | bool +  - openshift_logging_facts.elasticsearch_ops.deploymentconfigs.keys() | count > 0  # Create any new DC that may be required  - include_role: diff --git a/roles/openshift_logging_kibana/tasks/main.yaml b/roles/openshift_logging_kibana/tasks/main.yaml index 55b28ee24..d13255386 100644 --- a/roles/openshift_logging_kibana/tasks/main.yaml +++ b/roles/openshift_logging_kibana/tasks/main.yaml @@ -179,7 +179,7 @@      #  path: "{{ generated_certs_dir }}/kibana-internal.key"      #- name: server-cert      #  path: "{{ generated_certs_dir }}/kibana-internal.crt" -    #- name: server-tls +    #- name: server-tls.json      #  path: "{{ generated_certs_dir }}/server-tls.json"      contents:      - path: oauth-secret @@ -190,7 +190,7 @@        data: "{{ key_pairs | entry_from_named_pair('kibana_internal_key') | b64decode }}"      - path: server-cert        data: "{{ key_pairs | entry_from_named_pair('kibana_internal_cert') | b64decode }}" -    - path: server-tls +    - path: server-tls.json        data: "{{ key_pairs | entry_from_named_pair('server_tls') | b64decode }}"  # create Kibana DC diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2 index 770a2bfbd..502cd3347 100644 --- a/roles/openshift_logging_mux/templates/mux.j2 +++ b/roles/openshift_logging_mux/templates/mux.j2 @@ -109,7 +109,7 @@ spec:            name: logging-mux        - name: certs          secret: -          secretName: logging-mux +          secretName: logging-fluentd        - name: dockerhostname          hostPath:            path: /etc/hostname diff --git a/roles/openshift_master/tasks/main.yml b/roles/openshift_master/tasks/main.yml index 5522fef26..aed5598c0 100644 --- a/roles/openshift_master/tasks/main.yml +++ b/roles/openshift_master/tasks/main.yml @@ -177,6 +177,7 @@  # https://github.com/openshift/origin/issues/6447  - name: Start and enable master    systemd: +    daemon_reload: yes      name: "{{ openshift.common.service_type }}-master"      enabled: yes      state: started diff --git a/roles/openshift_master_certificates/tasks/main.yml b/roles/openshift_master_certificates/tasks/main.yml index 9706da24b..62413536b 100644 --- a/roles/openshift_master_certificates/tasks/main.yml +++ b/roles/openshift_master_certificates/tasks/main.yml @@ -71,7 +71,7 @@    delegate_to: "{{ openshift_ca_host }}"    run_once: true -- name: Generate the master client config +- name: Generate the loopback master client config    command: >      {{ hostvars[openshift_ca_host].openshift.common.client_binary }} adm create-api-client-config        {% for named_ca_certificate in openshift.master.named_certificates | default([]) | oo_collect('cafile') %} @@ -80,8 +80,8 @@        --certificate-authority={{ openshift_ca_cert }}        --client-dir={{ openshift_generated_configs_dir }}/master-{{ hostvars[item].openshift.common.hostname }}        --groups=system:masters,system:openshift-master -      --master={{ openshift.master.api_url }} -      --public-master={{ openshift.master.public_api_url }} +      --master={{ hostvars[item].openshift.master.loopback_api_url }} +      --public-master={{ hostvars[item].openshift.master.loopback_api_url }}        --signer-cert={{ openshift_ca_cert }}        --signer-key={{ openshift_ca_key }}        --signer-serial={{ openshift_ca_serial }} diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index a8beaa060..573051504 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -206,6 +206,7 @@  - name: Start and enable node dep    systemd: +    daemon_reload: yes      name: "{{ openshift.common.service_type }}-node-dep"      enabled: yes      state: started | 
