diff options
Diffstat (limited to 'roles')
7 files changed, 173 insertions, 47 deletions
| diff --git a/roles/openshift_health_checker/action_plugins/openshift_health_check.py b/roles/openshift_health_checker/action_plugins/openshift_health_check.py index 03c40b78b..a62e4331e 100644 --- a/roles/openshift_health_checker/action_plugins/openshift_health_check.py +++ b/roles/openshift_health_checker/action_plugins/openshift_health_check.py @@ -25,9 +25,11 @@ class ActionModule(ActionBase):      def run(self, tmp=None, task_vars=None):          result = super(ActionModule, self).run(tmp, task_vars) +        task_vars = task_vars or {} -        if task_vars is None: -            task_vars = {} +        # vars are not supportably available in the callback plugin, +        # so record any it will need in the result. +        result['playbook_context'] = task_vars.get('r_openshift_health_checker_playbook_context')          if "openshift" not in task_vars:              result["failed"] = True @@ -46,19 +48,27 @@ class ActionModule(ActionBase):          result["checks"] = check_results = {} +        user_disabled_checks = [ +            check.strip() +            for check in task_vars.get("openshift_disable_check", "").split(",") +        ] +          for check_name in resolved_checks:              display.banner("CHECK [{} : {}]".format(check_name, task_vars["ansible_host"]))              check = known_checks[check_name] -            if check.is_active(task_vars): +            if not check.is_active(task_vars): +                r = dict(skipped=True, skipped_reason="Not active for this host") +            elif check_name in user_disabled_checks: +                r = dict(skipped=True, skipped_reason="Disabled by user request") +            else:                  try:                      r = check.run(tmp, task_vars)                  except OpenShiftCheckException as e: -                    r = {} -                    r["failed"] = True -                    r["msg"] = str(e) -            else: -                r = {"skipped": True} +                    r = dict( +                        failed=True, +                        msg=str(e), +                    )              check_results[check_name] = r diff --git a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py index 7bce7f107..64c29a8d9 100644 --- a/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py +++ b/roles/openshift_health_checker/callback_plugins/zz_failure_summary.py @@ -2,6 +2,12 @@  Ansible callback plugin.  ''' +# Reason: In several locations below we disable pylint protected-access +#         for Ansible objects that do not give us any public way +#         to access the full details we need to report check failures. +# Status: disabled permanently or until Ansible object has a public API. +# This does leave the code more likely to be broken by future Ansible changes. +  from pprint import pformat  from ansible.plugins.callback import CallbackBase @@ -20,38 +26,37 @@ class CallbackModule(CallbackBase):      CALLBACK_TYPE = 'aggregate'      CALLBACK_NAME = 'failure_summary'      CALLBACK_NEEDS_WHITELIST = False +    _playbook_file = None      def __init__(self):          super(CallbackModule, self).__init__()          self.__failures = [] +    def v2_playbook_on_start(self, playbook): +        super(CallbackModule, self).v2_playbook_on_start(playbook) +        # re: playbook attrs see top comment  # pylint: disable=protected-access +        self._playbook_file = playbook._file_name +      def v2_runner_on_failed(self, result, ignore_errors=False):          super(CallbackModule, self).v2_runner_on_failed(result, ignore_errors)          self.__failures.append(dict(result=result, ignore_errors=ignore_errors))      def v2_playbook_on_stats(self, stats):          super(CallbackModule, self).v2_playbook_on_stats(stats) -        # TODO: update condition to consider a host var or env var to -        # enable/disable the summary, so that we can control the output from a -        # play.          if self.__failures: -            self._print_failure_summary() +            self._print_failure_details(self.__failures) -    def _print_failure_summary(self): -        '''Print a summary of failed tasks (including ignored failures).''' +    def _print_failure_details(self, failures): +        '''Print a summary of failed tasks or checks.'''          self._display.display(u'\nFailure summary:\n') -        # TODO: group failures by host or by task. If grouped by host, it is -        # easy to see all problems of a given host. If grouped by task, it is -        # easy to see what hosts needs the same fix. - -        width = len(str(len(self.__failures))) +        width = len(str(len(failures)))          initial_indent_format = u'  {{:>{width}}}. '.format(width=width)          initial_indent_len = len(initial_indent_format.format(0))          subsequent_indent = u' ' * initial_indent_len          subsequent_extra_indent = u' ' * (initial_indent_len + 10) -        for i, failure in enumerate(self.__failures, 1): +        for i, failure in enumerate(failures, 1):              entries = _format_failure(failure)              self._display.display(u'\n{}{}'.format(initial_indent_format.format(i), entries[0]))              for entry in entries[1:]: @@ -59,11 +64,52 @@ class CallbackModule(CallbackBase):                  indented = u'{}{}'.format(subsequent_indent, entry)                  self._display.display(indented) - -# Reason: disable pylint protected-access because we need to access _* -#         attributes of a task result to implement this method. -# Status: permanently disabled unless Ansible's API changes. -# pylint: disable=protected-access +        failed_checks = set() +        playbook_context = None +        # re: result attrs see top comment  # pylint: disable=protected-access +        for failure in failures: +            # get context from check task result since callback plugins cannot access task vars +            playbook_context = playbook_context or failure['result']._result.get('playbook_context') +            failed_checks.update( +                name +                for name, result in failure['result']._result.get('checks', {}).items() +                if result.get('failed') +            ) +        if failed_checks: +            self._print_check_failure_summary(failed_checks, playbook_context) + +    def _print_check_failure_summary(self, failed_checks, context): +        checks = ','.join(sorted(failed_checks)) +        # NOTE: context is not set if all failures occurred prior to checks task +        summary = ( +            '\n' +            'The execution of "{playbook}"\n' +            'includes checks designed to fail early if the requirements\n' +            'of the playbook are not met. One or more of these checks\n' +            'failed. To disregard these results, you may choose to\n' +            'disable failing checks by setting an Ansible variable:\n\n' +            '   openshift_disable_check={checks}\n\n' +            'Failing check names are shown in the failure details above.\n' +            'Some checks may be configurable by variables if your requirements\n' +            'are different from the defaults; consult check documentation.\n' +            'Variables can be set in the inventory or passed on the\n' +            'command line using the -e flag to ansible-playbook.\n' +        ).format(playbook=self._playbook_file, checks=checks) +        if context in ['pre-install', 'health']: +            summary = ( +                '\n' +                'You may choose to configure or disable failing checks by\n' +                'setting Ansible variables. To disable those above:\n\n' +                '    openshift_disable_check={checks}\n\n' +                'Consult check documentation for configurable variables.\n' +                'Variables can be set in the inventory or passed on the\n' +                'command line using the -e flag to ansible-playbook.\n' +            ).format(checks=checks) +        # other expected contexts: install, upgrade +        self._display.display(summary) + + +# re: result attrs see top comment  # pylint: disable=protected-access  def _format_failure(failure):      '''Return a list of pretty-formatted text entries describing a failure, including      relevant information about it. Expect that the list of text entries will be joined @@ -100,11 +146,8 @@ def _format_failed_checks(checks):          return stringc(pformat(checks), C.COLOR_ERROR) -# Reason: disable pylint protected-access because we need to access _* -#         attributes of obj to implement this function. -#         This is inspired by ansible.playbook.base.Base.dump_me. -# Status: permanently disabled unless Ansible's API changes. -# pylint: disable=protected-access +# This is inspired by ansible.playbook.base.Base.dump_me. +# re: play/task/block attrs see top comment  # pylint: disable=protected-access  def _get_play(obj):      '''Given a task or block, recursively tries to find its parent play.'''      if hasattr(obj, '_play'): diff --git a/roles/openshift_health_checker/openshift_checks/disk_availability.py b/roles/openshift_health_checker/openshift_checks/disk_availability.py index c2792a0fe..962148cb8 100644 --- a/roles/openshift_health_checker/openshift_checks/disk_availability.py +++ b/roles/openshift_health_checker/openshift_checks/disk_availability.py @@ -27,10 +27,12 @@ class DiskAvailability(NotContainerizedMixin, OpenShiftCheck):      def run(self, tmp, task_vars):          group_names = get_var(task_vars, "group_names")          ansible_mounts = get_var(task_vars, "ansible_mounts") - -        min_free_bytes = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names)          free_bytes = self.openshift_available_disk(ansible_mounts) +        recommended_min = max(self.recommended_disk_space_bytes.get(name, 0) for name in group_names) +        configured_min = int(get_var(task_vars, "openshift_check_min_host_disk_gb", default=0)) * 10**9 +        min_free_bytes = configured_min or recommended_min +          if free_bytes < min_free_bytes:              return {                  'failed': True, diff --git a/roles/openshift_health_checker/openshift_checks/memory_availability.py b/roles/openshift_health_checker/openshift_checks/memory_availability.py index 28805dc37..8b1a58ef4 100644 --- a/roles/openshift_health_checker/openshift_checks/memory_availability.py +++ b/roles/openshift_health_checker/openshift_checks/memory_availability.py @@ -13,7 +13,7 @@ class MemoryAvailability(OpenShiftCheck):      recommended_memory_bytes = {          "masters": 16 * 10**9,          "nodes": 8 * 10**9, -        "etcd": 20 * 10**9, +        "etcd": 8 * 10**9,      }      @classmethod @@ -27,7 +27,9 @@ class MemoryAvailability(OpenShiftCheck):          group_names = get_var(task_vars, "group_names")          total_memory_bytes = get_var(task_vars, "ansible_memtotal_mb") * 10**6 -        min_memory_bytes = max(self.recommended_memory_bytes.get(name, 0) for name in group_names) +        recommended_min = max(self.recommended_memory_bytes.get(name, 0) for name in group_names) +        configured_min = int(get_var(task_vars, "openshift_check_min_host_memory_gb", default=0)) * 10**9 +        min_memory_bytes = configured_min or recommended_min          if total_memory_bytes < min_memory_bytes:              return { diff --git a/roles/openshift_health_checker/test/action_plugin_test.py b/roles/openshift_health_checker/test/action_plugin_test.py index 2693ae37b..6ebf0ebb2 100644 --- a/roles/openshift_health_checker/test/action_plugin_test.py +++ b/roles/openshift_health_checker/test/action_plugin_test.py @@ -67,6 +67,7 @@ def changed(result):      return result.get('changed', False) +# tests whether task is skipped, not individual checks  def skipped(result):      return result.get('skipped', False) @@ -101,7 +102,20 @@ def test_action_plugin_skip_non_active_checks(plugin, task_vars, monkeypatch):      result = plugin.run(tmp=None, task_vars=task_vars) -    assert result['checks']['fake_check'] == {'skipped': True} +    assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Not active for this host") +    assert not failed(result) +    assert not changed(result) +    assert not skipped(result) + + +def test_action_plugin_skip_disabled_checks(plugin, task_vars, monkeypatch): +    checks = [fake_check('fake_check', is_active=True)] +    monkeypatch.setattr('openshift_checks.OpenShiftCheck.subclasses', classmethod(lambda cls: checks)) + +    task_vars['openshift_disable_check'] = 'fake_check' +    result = plugin.run(tmp=None, task_vars=task_vars) + +    assert result['checks']['fake_check'] == dict(skipped=True, skipped_reason="Disabled by user request")      assert not failed(result)      assert not changed(result)      assert not skipped(result) diff --git a/roles/openshift_health_checker/test/disk_availability_test.py b/roles/openshift_health_checker/test/disk_availability_test.py index 970b474d7..b353fa610 100644 --- a/roles/openshift_health_checker/test/disk_availability_test.py +++ b/roles/openshift_health_checker/test/disk_availability_test.py @@ -42,9 +42,10 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):          assert word in str(excinfo.value) -@pytest.mark.parametrize('group_names,ansible_mounts', [ +@pytest.mark.parametrize('group_names,configured_min,ansible_mounts', [      (          ['masters'], +        0,          [{              'mount': '/',              'size_available': 40 * 10**9 + 1, @@ -52,6 +53,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):      ),      (          ['nodes'], +        0,          [{              'mount': '/',              'size_available': 15 * 10**9 + 1, @@ -59,6 +61,7 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):      ),      (          ['etcd'], +        0,          [{              'mount': '/',              'size_available': 20 * 10**9 + 1, @@ -66,6 +69,15 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):      ),      (          ['etcd'], +        1,  # configure lower threshold +        [{ +            'mount': '/', +            'size_available': 1 * 10**9 + 1,  # way smaller than recommended +        }], +    ), +    ( +        ['etcd'], +        0,          [{              # not enough space on / ...              'mount': '/', @@ -77,9 +89,10 @@ def test_cannot_determine_available_disk(ansible_mounts, extra_words):          }],      ),  ]) -def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts): +def test_succeeds_with_recommended_disk_space(group_names, configured_min, ansible_mounts):      task_vars = dict(          group_names=group_names, +        openshift_check_min_host_disk_gb=configured_min,          ansible_mounts=ansible_mounts,      ) @@ -89,9 +102,10 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):      assert not result.get('failed', False) -@pytest.mark.parametrize('group_names,ansible_mounts,extra_words', [ +@pytest.mark.parametrize('group_names,configured_min,ansible_mounts,extra_words', [      (          ['masters'], +        0,          [{              'mount': '/',              'size_available': 1, @@ -99,7 +113,17 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):          ['0.0 GB'],      ),      ( +        ['masters'], +        100,  # set a higher threshold +        [{ +            'mount': '/', +            'size_available': 50 * 10**9,  # would normally be enough... +        }], +        ['100.0 GB'], +    ), +    (          ['nodes'], +        0,          [{              'mount': '/',              'size_available': 1 * 10**9, @@ -108,6 +132,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):      ),      (          ['etcd'], +        0,          [{              'mount': '/',              'size_available': 1, @@ -116,6 +141,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):      ),      (          ['nodes', 'masters'], +        0,          [{              'mount': '/',              # enough space for a node, not enough for a master @@ -125,6 +151,7 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):      ),      (          ['etcd'], +        0,          [{              # enough space on / ...              'mount': '/', @@ -137,9 +164,10 @@ def test_succeeds_with_recommended_disk_space(group_names, ansible_mounts):          ['0.0 GB'],      ),  ]) -def test_fails_with_insufficient_disk_space(group_names, ansible_mounts, extra_words): +def test_fails_with_insufficient_disk_space(group_names, configured_min, ansible_mounts, extra_words):      task_vars = dict(          group_names=group_names, +        openshift_check_min_host_disk_gb=configured_min,          ansible_mounts=ansible_mounts,      ) diff --git a/roles/openshift_health_checker/test/memory_availability_test.py b/roles/openshift_health_checker/test/memory_availability_test.py index e161a5b9e..1db203854 100644 --- a/roles/openshift_health_checker/test/memory_availability_test.py +++ b/roles/openshift_health_checker/test/memory_availability_test.py @@ -20,27 +20,37 @@ def test_is_active(group_names, is_active):      assert MemoryAvailability.is_active(task_vars=task_vars) == is_active -@pytest.mark.parametrize('group_names,ansible_memtotal_mb', [ +@pytest.mark.parametrize('group_names,configured_min,ansible_memtotal_mb', [      (          ['masters'], +        0,          17200,      ),      (          ['nodes'], +        0,          8200,      ),      ( +        ['nodes'], +        1,  # configure lower threshold +        2000,  # too low for recommended but not for configured +    ), +    (          ['etcd'], -        22200, +        0, +        8200,      ),      (          ['masters', 'nodes'], +        0,          17000,      ),  ]) -def test_succeeds_with_recommended_memory(group_names, ansible_memtotal_mb): +def test_succeeds_with_recommended_memory(group_names, configured_min, ansible_memtotal_mb):      task_vars = dict(          group_names=group_names, +        openshift_check_min_host_memory_gb=configured_min,          ansible_memtotal_mb=ansible_memtotal_mb,      ) @@ -50,39 +60,56 @@ def test_succeeds_with_recommended_memory(group_names, ansible_memtotal_mb):      assert not result.get('failed', False) -@pytest.mark.parametrize('group_names,ansible_memtotal_mb,extra_words', [ +@pytest.mark.parametrize('group_names,configured_min,ansible_memtotal_mb,extra_words', [      (          ['masters'],          0, +        0,          ['0.0 GB'],      ),      (          ['nodes'], +        0,          100,          ['0.1 GB'],      ),      ( +        ['nodes'], +        24,  # configure higher threshold +        20000,  # enough to meet recommended but not configured +        ['20.0 GB'], +    ), +    (          ['etcd'], -        -1, -        ['0.0 GB'], +        0, +        7000, +        ['7.0 GB'], +    ), +    ( +        ['etcd', 'masters'], +        0, +        9000,  # enough memory for etcd, not enough for a master +        ['9.0 GB'],      ),      (          ['nodes', 'masters'], +        0,          # enough memory for a node, not enough for a master          11000,          ['11.0 GB'],      ),  ]) -def test_fails_with_insufficient_memory(group_names, ansible_memtotal_mb, extra_words): +def test_fails_with_insufficient_memory(group_names, configured_min, ansible_memtotal_mb, extra_words):      task_vars = dict(          group_names=group_names, +        openshift_check_min_host_memory_gb=configured_min,          ansible_memtotal_mb=ansible_memtotal_mb,      )      check = MemoryAvailability(execute_module=fake_execute_module)      result = check.run(tmp=None, task_vars=task_vars) -    assert result['failed'] +    assert result.get('failed', False)      for word in 'below recommended'.split() + extra_words:          assert word in result['msg'] | 
