diff options
Diffstat (limited to 'roles')
3 files changed, 534 insertions, 0 deletions
| diff --git a/roles/openshift_health_checker/library/etcdkeysize.py b/roles/openshift_health_checker/library/etcdkeysize.py new file mode 100644 index 000000000..620e82d87 --- /dev/null +++ b/roles/openshift_health_checker/library/etcdkeysize.py @@ -0,0 +1,122 @@ +#!/usr/bin/python +"""Ansible module that recursively determines if the size of a key in an etcd cluster exceeds a given limit.""" + +from ansible.module_utils.basic import AnsibleModule + + +try: +    import etcd + +    IMPORT_EXCEPTION_MSG = None +except ImportError as err: +    IMPORT_EXCEPTION_MSG = str(err) + +    from collections import namedtuple +    EtcdMock = namedtuple("etcd", ["EtcdKeyNotFound"]) +    etcd = EtcdMock(KeyError) + + +# pylint: disable=too-many-arguments +def check_etcd_key_size(client, key, size_limit, total_size=0, depth=0, depth_limit=1000, visited=None): +    """Check size of an etcd path starting at given key. Returns tuple (string, bool)""" +    if visited is None: +        visited = set() + +    if key in visited: +        return 0, False + +    visited.add(key) + +    try: +        result = client.read(key, recursive=False) +    except etcd.EtcdKeyNotFound: +        return 0, False + +    size = 0 +    limit_exceeded = False + +    for node in result.leaves: +        if depth >= depth_limit: +            raise Exception("Maximum recursive stack depth ({}) exceeded.".format(depth_limit)) + +        if size_limit and total_size + size > size_limit: +            return size, True + +        if not node.dir: +            size += len(node.value) +            continue + +        key_size, limit_exceeded = check_etcd_key_size(client, node.key, +                                                       size_limit, +                                                       total_size + size, +                                                       depth + 1, +                                                       depth_limit, visited) +        size += key_size + +    max_limit_exceeded = limit_exceeded or (total_size + size > size_limit) +    return size, max_limit_exceeded + + +def main():  # pylint: disable=missing-docstring,too-many-branches +    module = AnsibleModule( +        argument_spec=dict( +            size_limit_bytes=dict(type="int", default=0), +            paths=dict(type="list", default=["/openshift.io/images"]), +            host=dict(type="str", default="127.0.0.1"), +            port=dict(type="int", default=4001), +            protocol=dict(type="str", default="http"), +            version_prefix=dict(type="str", default=""), +            allow_redirect=dict(type="bool", default=False), +            cert=dict(type="dict", default=""), +            ca_cert=dict(type="str", default=None), +        ), +        supports_check_mode=True +    ) + +    module.params["cert"] = ( +        module.params["cert"]["cert"], +        module.params["cert"]["key"], +    ) + +    size_limit = module.params.pop("size_limit_bytes") +    paths = module.params.pop("paths") + +    limit_exceeded = False + +    try: +        # pylint: disable=no-member +        client = etcd.Client(**module.params) +    except AttributeError as attrerr: +        msg = str(attrerr) +        if IMPORT_EXCEPTION_MSG: +            msg = IMPORT_EXCEPTION_MSG +            if "No module named etcd" in IMPORT_EXCEPTION_MSG: +                # pylint: disable=redefined-variable-type +                msg = ('Unable to import the python "etcd" dependency. ' +                       'Make sure python-etcd is installed on the host.') + +        module.exit_json( +            failed=True, +            changed=False, +            size_limit_exceeded=limit_exceeded, +            msg=msg, +        ) + +        return + +    size = 0 +    for path in paths: +        path_size, limit_exceeded = check_etcd_key_size(client, path, size_limit - size) +        size += path_size + +        if limit_exceeded: +            break + +    module.exit_json( +        changed=False, +        size_limit_exceeded=limit_exceeded, +    ) + + +if __name__ == '__main__': +    main() diff --git a/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py new file mode 100644 index 000000000..c04a69765 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/etcd_imagedata_size.py @@ -0,0 +1,84 @@ +""" +Ansible module for determining if the size of OpenShift image data exceeds a specified limit in an etcd cluster. +""" + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var + + +class EtcdImageDataSize(OpenShiftCheck): +    """Check that total size of OpenShift image data does not exceed the recommended limit in an etcd cluster""" + +    name = "etcd_imagedata_size" +    tags = ["etcd"] + +    def run(self, tmp, task_vars): +        etcd_mountpath = self._get_etcd_mountpath(get_var(task_vars, "ansible_mounts")) +        etcd_avail_diskspace = etcd_mountpath["size_available"] +        etcd_total_diskspace = etcd_mountpath["size_total"] + +        etcd_imagedata_size_limit = get_var(task_vars, +                                            "etcd_max_image_data_size_bytes", +                                            default=int(0.5 * float(etcd_total_diskspace - etcd_avail_diskspace))) + +        etcd_is_ssl = get_var(task_vars, "openshift", "master", "etcd_use_ssl", default=False) +        etcd_port = get_var(task_vars, "openshift", "master", "etcd_port", default=2379) +        etcd_hosts = get_var(task_vars, "openshift", "master", "etcd_hosts") + +        config_base = get_var(task_vars, "openshift", "common", "config_base") + +        cert = task_vars.get("etcd_client_cert", config_base + "/master/master.etcd-client.crt") +        key = task_vars.get("etcd_client_key", config_base + "/master/master.etcd-client.key") +        ca_cert = task_vars.get("etcd_client_ca_cert", config_base + "/master/master.etcd-ca.crt") + +        for etcd_host in list(etcd_hosts): +            args = { +                "size_limit_bytes": etcd_imagedata_size_limit, +                "paths": ["/openshift.io/images", "/openshift.io/imagestreams"], +                "host": etcd_host, +                "port": etcd_port, +                "protocol": "https" if etcd_is_ssl else "http", +                "version_prefix": "/v2", +                "allow_redirect": True, +                "ca_cert": ca_cert, +                "cert": { +                    "cert": cert, +                    "key": key, +                }, +            } + +            etcdkeysize = self.module_executor("etcdkeysize", args, task_vars) + +            if etcdkeysize.get("rc", 0) != 0 or etcdkeysize.get("failed"): +                msg = 'Failed to retrieve stats for etcd host "{host}": {reason}' +                reason = etcdkeysize.get("msg") +                if etcdkeysize.get("module_stderr"): +                    reason = etcdkeysize["module_stderr"] + +                msg = msg.format(host=etcd_host, reason=reason) +                return {"failed": True, "changed": False, "msg": msg} + +            if etcdkeysize["size_limit_exceeded"]: +                limit = self._to_gigabytes(etcd_imagedata_size_limit) +                msg = ("The size of OpenShift image data stored in etcd host " +                       "\"{host}\" exceeds the maximum recommended limit of {limit:.2f} GB. " +                       "Use the `oadm prune images` command to cleanup unused Docker images.") +                return {"failed": True, "msg": msg.format(host=etcd_host, limit=limit)} + +        return {"changed": False} + +    @staticmethod +    def _get_etcd_mountpath(ansible_mounts): +        valid_etcd_mount_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"] + +        mount_for_path = {mnt.get("mount"): mnt for mnt in ansible_mounts} +        for path in valid_etcd_mount_paths: +            if path in mount_for_path: +                return mount_for_path[path] + +        paths = ', '.join(sorted(mount_for_path)) or 'none' +        msg = "Unable to determine a valid etcd mountpath. Paths mounted: {}.".format(paths) +        raise OpenShiftCheckException(msg) + +    @staticmethod +    def _to_gigabytes(byte_size): +        return float(byte_size) / 10.0**9 diff --git a/roles/openshift_health_checker/test/etcd_imagedata_size_test.py b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py new file mode 100644 index 000000000..df9d52d41 --- /dev/null +++ b/roles/openshift_health_checker/test/etcd_imagedata_size_test.py @@ -0,0 +1,328 @@ +import pytest + +from collections import namedtuple +from openshift_checks.etcd_imagedata_size import EtcdImageDataSize, OpenShiftCheckException +from etcdkeysize import check_etcd_key_size + + +def fake_etcd_client(root): +    fake_nodes = dict() +    fake_etcd_node(root, fake_nodes) + +    clientclass = namedtuple("client", ["read"]) +    return clientclass(lambda key, recursive: fake_etcd_result(fake_nodes[key])) + + +def fake_etcd_result(fake_node): +    resultclass = namedtuple("result", ["leaves"]) +    if not fake_node.dir: +        return resultclass([fake_node]) + +    return resultclass(fake_node.leaves) + + +def fake_etcd_node(node, visited): +    min_req_fields = ["dir", "key"] +    fields = list(node) +    leaves = [] + +    if node["dir"] and node.get("leaves"): +        for leaf in node["leaves"]: +            leaves.append(fake_etcd_node(leaf, visited)) + +    if len(set(min_req_fields) - set(fields)) > 0: +        raise ValueError("fake etcd nodes require at least {} fields.".format(min_req_fields)) + +    if node.get("leaves"): +        node["leaves"] = leaves + +    nodeclass = namedtuple("node", fields) +    nodeinst = nodeclass(**node) +    visited[nodeinst.key] = nodeinst + +    return nodeinst + + +@pytest.mark.parametrize('ansible_mounts,extra_words', [ +    ([], ['none']),  # empty ansible_mounts +    ([{'mount': '/mnt'}], ['/mnt']),  # missing relevant mount paths +]) +def test_cannot_determine_available_mountpath(ansible_mounts, extra_words): +    task_vars = dict( +        ansible_mounts=ansible_mounts, +    ) +    check = EtcdImageDataSize(execute_module=fake_execute_module) + +    with pytest.raises(OpenShiftCheckException) as excinfo: +        check.run(tmp=None, task_vars=task_vars) + +    for word in 'determine valid etcd mountpath'.split() + extra_words: +        assert word in str(excinfo.value) + + +@pytest.mark.parametrize('ansible_mounts,tree,size_limit,should_fail,extra_words', [ +    ( +        # test that default image size limit evals to 1/2 * (total size in use) +        [{ +            'mount': '/', +            'size_available': 40 * 10**9, +            'size_total': 80 * 10**9, +        }], +        {"dir": False, "key": "/", "value": "1234"}, +        None, +        False, +        [], +    ), +    ( +        [{ +            'mount': '/', +            'size_available': 40 * 10**9, +            'size_total': 48 * 10**9, +        }], +        {"dir": False, "key": "/", "value": "1234"}, +        None, +        False, +        [], +    ), +    ( +        # set max size limit for image data to be below total node value +        # total node value is defined as the sum of the value field +        # from every node +        [{ +            'mount': '/', +            'size_available': 40 * 10**9, +            'size_total': 48 * 10**9, +        }], +        {"dir": False, "key": "/", "value": "12345678"}, +        7, +        True, +        ["exceeds the maximum recommended limit", "0.00 GB"], +    ), +    ( +        [{ +            'mount': '/', +            'size_available': 48 * 10**9 - 1, +            'size_total': 48 * 10**9, +        }], +        {"dir": False, "key": "/", "value": "1234"}, +        None, +        True, +        ["exceeds the maximum recommended limit", "0.00 GB"], +    ) +]) +def test_check_etcd_key_size_calculates_correct_limit(ansible_mounts, tree, size_limit, should_fail, extra_words): +    def execute_module(module_name, args, tmp=None, task_vars=None): +        if module_name != "etcdkeysize": +            return { +                "changed": False, +            } + +        client = fake_etcd_client(tree) +        s, limit_exceeded = check_etcd_key_size(client, tree["key"], args["size_limit_bytes"]) + +        return {"size_limit_exceeded": limit_exceeded} + +    task_vars = dict( +        etcd_max_image_data_size_bytes=size_limit, +        ansible_mounts=ansible_mounts, +        openshift=dict( +            master=dict(etcd_hosts=["localhost"]), +            common=dict(config_base="/var/lib/origin") +        ) +    ) +    if size_limit is None: +        task_vars.pop("etcd_max_image_data_size_bytes") + +    check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + +    if should_fail: +        assert check["failed"] + +        for word in extra_words: +            assert word in check["msg"] +    else: +        assert not check.get("failed", False) + + +@pytest.mark.parametrize('ansible_mounts,tree,root_path,expected_size,extra_words', [ +    ( +        [{ +            'mount': '/', +            'size_available': 40 * 10**9, +            'size_total': 80 * 10**9, +        }], +        # test recursive size check on tree with height > 1 +        { +            "dir": True, +            "key": "/", +            "leaves": [ +                {"dir": False, "key": "/foo1", "value": "1234"}, +                {"dir": False, "key": "/foo2", "value": "1234"}, +                {"dir": False, "key": "/foo3", "value": "1234"}, +                {"dir": False, "key": "/foo4", "value": "1234"}, +                { +                    "dir": True, +                    "key": "/foo5", +                    "leaves": [ +                        {"dir": False, "key": "/foo/bar1", "value": "56789"}, +                        {"dir": False, "key": "/foo/bar2", "value": "56789"}, +                        {"dir": False, "key": "/foo/bar3", "value": "56789"}, +                        { +                            "dir": True, +                            "key": "/foo/bar4", +                            "leaves": [ +                                {"dir": False, "key": "/foo/bar/baz1", "value": "123"}, +                                {"dir": False, "key": "/foo/bar/baz2", "value": "123"}, +                            ] +                        }, +                    ] +                }, +            ] +        }, +        "/", +        37, +        [], +    ), +    ( +        [{ +            'mount': '/', +            'size_available': 40 * 10**9, +            'size_total': 80 * 10**9, +        }], +        # test correct sub-tree size calculation +        { +            "dir": True, +            "key": "/", +            "leaves": [ +                {"dir": False, "key": "/foo1", "value": "1234"}, +                {"dir": False, "key": "/foo2", "value": "1234"}, +                {"dir": False, "key": "/foo3", "value": "1234"}, +                {"dir": False, "key": "/foo4", "value": "1234"}, +                { +                    "dir": True, +                    "key": "/foo5", +                    "leaves": [ +                        {"dir": False, "key": "/foo/bar1", "value": "56789"}, +                        {"dir": False, "key": "/foo/bar2", "value": "56789"}, +                        {"dir": False, "key": "/foo/bar3", "value": "56789"}, +                        { +                            "dir": True, +                            "key": "/foo/bar4", +                            "leaves": [ +                                {"dir": False, "key": "/foo/bar/baz1", "value": "123"}, +                                {"dir": False, "key": "/foo/bar/baz2", "value": "123"}, +                            ] +                        }, +                    ] +                }, +            ] +        }, +        "/foo5", +        21, +        [], +    ), +    ( +        [{ +            'mount': '/', +            'size_available': 40 * 10**9, +            'size_total': 80 * 10**9, +        }], +        # test that a non-existing key is handled correctly +        { +            "dir": False, +            "key": "/", +            "value": "1234", +        }, +        "/missing", +        0, +        [], +    ), +    ( +        [{ +            'mount': '/', +            'size_available': 40 * 10**9, +            'size_total': 80 * 10**9, +        }], +        # test etcd cycle handling +        { +            "dir": True, +            "key": "/", +            "leaves": [ +                {"dir": False, "key": "/foo1", "value": "1234"}, +                {"dir": False, "key": "/foo2", "value": "1234"}, +                {"dir": False, "key": "/foo3", "value": "1234"}, +                {"dir": False, "key": "/foo4", "value": "1234"}, +                { +                    "dir": True, +                    "key": "/", +                    "leaves": [ +                        {"dir": False, "key": "/foo1", "value": "1"}, +                    ], +                }, +            ] +        }, +        "/", +        16, +        [], +    ), +]) +def test_etcd_key_size_check_calculates_correct_size(ansible_mounts, tree, root_path, expected_size, extra_words): +    def execute_module(module_name, args, tmp=None, task_vars=None): +        if module_name != "etcdkeysize": +            return { +                "changed": False, +            } + +        client = fake_etcd_client(tree) +        size, limit_exceeded = check_etcd_key_size(client, root_path, args["size_limit_bytes"]) + +        assert size == expected_size +        return { +            "size_limit_exceeded": limit_exceeded, +        } + +    task_vars = dict( +        ansible_mounts=ansible_mounts, +        openshift=dict( +            master=dict(etcd_hosts=["localhost"]), +            common=dict(config_base="/var/lib/origin") +        ) +    ) + +    check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) +    assert not check.get("failed", False) + + +def test_etcdkeysize_module_failure(): +    def execute_module(module_name, tmp=None, task_vars=None): +        if module_name != "etcdkeysize": +            return { +                "changed": False, +            } + +        return { +            "rc": 1, +            "module_stderr": "failure", +        } + +    task_vars = dict( +        ansible_mounts=[{ +            'mount': '/', +            'size_available': 40 * 10**9, +            'size_total': 80 * 10**9, +        }], +        openshift=dict( +            master=dict(etcd_hosts=["localhost"]), +            common=dict(config_base="/var/lib/origin") +        ) +    ) + +    check = EtcdImageDataSize(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + +    assert check["failed"] +    for word in "Failed to retrieve stats": +        assert word in check["msg"] + + +def fake_execute_module(*args): +    raise AssertionError('this function should not be called') | 
