diff options
33 files changed, 740 insertions, 133 deletions
| diff --git a/README_GCE.md b/README_GCE.md index f6c5138c1..50f8ade70 100644 --- a/README_GCE.md +++ b/README_GCE.md @@ -39,6 +39,13 @@ Create a gce.ini file for GCE  * gce_service_account_pem_file_path - Full path from previous steps  * gce_project_id - Found in "Projects", it list all the gce projects you are associated with.  The page lists their "Project Name" and "Project ID".  You want the "Project ID" +Mandatory customization variables (check the values according to your tenant): +* zone = europe-west1-d +* network = default +* gce_machine_type = n1-standard-2 +* gce_machine_image = preinstalled-slave-50g-v5 + +  1. vi ~/.gce/gce.ini  1. make the contents look like this:  ``` @@ -46,11 +53,15 @@ Create a gce.ini file for GCE  gce_service_account_email_address = long...@developer.gserviceaccount.com  gce_service_account_pem_file_path = /full/path/to/project_id-gce_key_hash.pem  gce_project_id = project_id +zone = europe-west1-d +network = default +gce_machine_type = n1-standard-2 +gce_machine_image = preinstalled-slave-50g-v5 +  ``` -1. Setup a sym link so that gce.py will pick it up (link must be in same dir as gce.py) +1. Define the environment variable GCE_INI_PATH so gce.py can pick it up and bin/cluster can also read it  ``` -  cd openshift-ansible/inventory/gce -  ln -s ~/.gce/gce.ini gce.ini +export GCE_INI_PATH=~/.gce/gce.ini  ``` diff --git a/README_libvirt.md b/README_libvirt.md index 1a710ff3b..3f8bbb5f0 100644 --- a/README_libvirt.md +++ b/README_libvirt.md @@ -94,7 +94,8 @@ dns=dnsmasq  -	Configure dnsmasq to use the Virtual Network router for example.com:  ```sh -sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf server=/example.com/192.168.55.1 +sudo vi /etc/NetworkManager/dnsmasq.d/libvirt_dnsmasq.conf +server=/example.com/192.168.55.1  ```  Test The Setup diff --git a/bin/cluster b/bin/cluster index 96dcf01e8..59a6755d3 100755 --- a/bin/cluster +++ b/bin/cluster @@ -137,10 +137,14 @@ class Cluster(object):          """          config = ConfigParser.ConfigParser()          if 'gce' == provider: -            config.readfp(open('inventory/gce/hosts/gce.ini')) - -            for key in config.options('gce'): -                os.environ[key] = config.get('gce', key) +            gce_ini_default_path = os.path.join( +                'inventory/gce/hosts/gce.ini') +            gce_ini_path = os.environ.get('GCE_INI_PATH', gce_ini_default_path) +            if os.path.exists(gce_ini_path):  +                config.readfp(open(gce_ini_path)) + +                for key in config.options('gce'): +                    os.environ[key] = config.get('gce', key)              inventory = '-i inventory/gce/hosts'          elif 'aws' == provider: diff --git a/inventory/gce/hosts/gce.py b/inventory/gce/hosts/gce.py index 3403f735e..6ed12e011 100755 --- a/inventory/gce/hosts/gce.py +++ b/inventory/gce/hosts/gce.py @@ -120,6 +120,7 @@ class GceInventory(object):              os.path.dirname(os.path.realpath(__file__)), "gce.ini")          gce_ini_path = os.environ.get('GCE_INI_PATH', gce_ini_default_path) +          # Create a ConfigParser.          # This provides empty defaults to each key, so that environment          # variable configuration (as opposed to INI configuration) is able @@ -173,6 +174,7 @@ class GceInventory(object):          args[1] = os.environ.get('GCE_PEM_FILE_PATH', args[1])          kwargs['project'] = os.environ.get('GCE_PROJECT', kwargs['project']) +                  # Retrieve and return the GCE driver.          gce = get_driver(Provider.GCE)(*args, **kwargs)          gce.connection.user_agent_append( @@ -211,7 +213,8 @@ class GceInventory(object):              'gce_image': inst.image,              'gce_machine_type': inst.size,              'gce_private_ip': inst.private_ips[0], -            'gce_public_ip': inst.public_ips[0], +            # Hosts don't always have a public IP name +            #'gce_public_ip': inst.public_ips[0],              'gce_name': inst.name,              'gce_description': inst.extra['description'],              'gce_status': inst.extra['status'], @@ -219,8 +222,8 @@ class GceInventory(object):              'gce_tags': inst.extra['tags'],              'gce_metadata': md,              'gce_network': net, -            # Hosts don't have a public name, so we add an IP -            'ansible_ssh_host': inst.public_ips[0] +            # Hosts don't always have a public IP name +            #'ansible_ssh_host': inst.public_ips[0]          }      def get_instance(self, instance_name): diff --git a/inventory/openstack/hosts/nova.py b/inventory/openstack/hosts/nova.py index d5bd8d1ee..3197a57bc 100755 --- a/inventory/openstack/hosts/nova.py +++ b/inventory/openstack/hosts/nova.py @@ -34,7 +34,7 @@ except ImportError:  # executed with no parameters, return the list of  # all groups and hosts -NOVA_CONFIG_FILES = [os.getcwd() + "/nova.ini", +NOVA_CONFIG_FILES = [os.path.join(os.path.dirname(os.path.realpath(__file__)), "nova.ini"),                       os.path.expanduser(os.environ.get('ANSIBLE_CONFIG', "~/nova.ini")),                       "/etc/ansible/nova.ini"] diff --git a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml index c9ae923bb..b6a2d2f26 100644 --- a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml +++ b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml @@ -27,9 +27,8 @@    gather_facts: no    vars: -    cli_volume_type: io1 +    cli_volume_type: gp2      cli_volume_size: 30 -    cli_volume_iops: "{{ 30 * cli_volume_size }}"    pre_tasks:    - fail: @@ -104,7 +103,6 @@        volume_size: "{{ cli_volume_size | default(30, True)}}"        volume_type: "{{ cli_volume_type }}"        device_name: /dev/xvdb -      iops: "{{ 30 * cli_volume_size }}"      register: vol    - debug: var=vol @@ -142,10 +140,3 @@    - debug: var=dockerstart -  - name: Wait for docker to stabilize -    pause: -      seconds: 30 - -  # leaving off the '-t' for docker exec.  With it, it doesn't work with ansible and tty support -  - name: update zabbix docker items -    command: docker exec -i oso-rhel7-zagg-client /usr/local/bin/cron-send-docker-metrics.py diff --git a/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml b/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml index ef9b45abd..63d473146 100644 --- a/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml +++ b/playbooks/adhoc/grow_docker_vg/grow_docker_vg.yml @@ -172,7 +172,7 @@    - name: pvmove onto new volume      command: "pvmove {{ docker_pv_name.stdout }} /dev/xvdc1" -    async: 3600 +    async: 43200      poll: 10    - name: Remove the old docker drive from the volume group diff --git a/playbooks/common/openshift-cluster/set_infra_launch_facts_tasks.yml b/playbooks/common/openshift-cluster/set_infra_launch_facts_tasks.yml new file mode 100644 index 000000000..01d70a1a6 --- /dev/null +++ b/playbooks/common/openshift-cluster/set_infra_launch_facts_tasks.yml @@ -0,0 +1,15 @@ +--- +- set_fact: k8s_type=infra +- set_fact: sub_host_type="{{ type }}" +- set_fact: number_infra="{{ count }}" + +- name: Generate infra  instance names(s) +  set_fact: +    scratch_name: "{{ cluster_id }}-{{ k8s_type }}-{{ sub_host_type }}-{{ '%05x' | format(1048576 | random) }}" +  register: infra_names_output +  with_sequence: count={{ number_infra }} + +- set_fact: +    infra_names: "{{ infra_names_output.results | default([], true) +                    | oo_collect('ansible_facts') +                    | oo_collect('scratch_name') }}" diff --git a/playbooks/gce/openshift-cluster/config.yml b/playbooks/gce/openshift-cluster/config.yml index fd5dfcc72..7bd3f1a56 100644 --- a/playbooks/gce/openshift-cluster/config.yml +++ b/playbooks/gce/openshift-cluster/config.yml @@ -10,6 +10,8 @@    - set_fact:        g_ssh_user_tmp: "{{ deployment_vars[deployment_type].ssh_user }}"        g_sudo_tmp: "{{ deployment_vars[deployment_type].sudo }}" +      use_sdn: "{{ do_we_use_openshift_sdn }}" +      sdn_plugin: "{{ sdn_network_plugin }}"  - include: ../../common/openshift-cluster/config.yml    vars: @@ -22,3 +24,5 @@      openshift_debug_level: 2      openshift_deployment_type: "{{ deployment_type }}"      openshift_hostname: "{{ gce_private_ip }}" +    openshift_use_openshift_sdn: "{{ hostvars.localhost.use_sdn  }}" +    os_sdn_network_plugin_name: "{{ hostvars.localhost.sdn_plugin }}" diff --git a/playbooks/gce/openshift-cluster/join_node.yml b/playbooks/gce/openshift-cluster/join_node.yml new file mode 100644 index 000000000..0dfa3e9d7 --- /dev/null +++ b/playbooks/gce/openshift-cluster/join_node.yml @@ -0,0 +1,49 @@ +--- +- name: Populate oo_hosts_to_update group +  hosts: localhost +  gather_facts: no +  vars_files: +  - vars.yml +  tasks: +  - name: Evaluate oo_hosts_to_update +    add_host: +      name: "{{ node_ip }}" +      groups: oo_hosts_to_update +      ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user }}" +      ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" + +- include: ../../common/openshift-cluster/update_repos_and_packages.yml + +- name: Populate oo_masters_to_config host group +  hosts: localhost +  gather_facts: no +  vars_files: +  - vars.yml +  tasks: +  - name: Evaluate oo_nodes_to_config +    add_host: +      name: "{{ node_ip }}" +      ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user }}" +      ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" +      groups: oo_nodes_to_config + +  - name: Evaluate oo_first_master +    add_host: +      name: "{{ groups['tag_env-host-type-' ~ cluster_id ~ '-openshift-master'][0] }}" +      ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user }}" +      ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" +      groups: oo_first_master +    when: "'tag_env-host-type-{{ cluster_id }}-openshift-master' in groups" + +#- include: config.yml +- include: ../../common/openshift-node/config.yml +  vars: +    openshift_cluster_id: "{{ cluster_id }}" +    openshift_debug_level: 4 +    openshift_deployment_type: "{{ deployment_type }}" +    openshift_hostname: "{{ ansible_default_ipv4.address }}" +    openshift_use_openshift_sdn: true +    openshift_node_labels: "{{ lookup('oo_option', 'openshift_node_labels') }} " +    os_sdn_network_plugin_name: "redhat/openshift-ovs-subnet" +    osn_cluster_dns_domain: "{{ hostvars[groups.oo_first_master.0].openshift.dns.domain }}" +    osn_cluster_dns_ip: "{{ hostvars[groups.oo_first_master.0].openshift.dns.ip }}" diff --git a/playbooks/gce/openshift-cluster/launch.yml b/playbooks/gce/openshift-cluster/launch.yml index 7a3b80da0..94e57fe4e 100644 --- a/playbooks/gce/openshift-cluster/launch.yml +++ b/playbooks/gce/openshift-cluster/launch.yml @@ -28,33 +28,33 @@        type: "{{ k8s_type }}"        g_sub_host_type: "{{ sub_host_type }}" -  - include: ../../common/openshift-cluster/set_node_launch_facts_tasks.yml -    vars: -      type: "infra" -      count: "{{ num_infra }}" -  - include: tasks/launch_instances.yml -    vars: -      instances: "{{ infra_names }}" -      cluster: "{{ cluster_id }}" -      type: "{{ k8s_type }}" -      g_sub_host_type: "{{ sub_host_type }}" - -  - set_fact: -      a_infra: "{{ infra_names[0] }}" -  - add_host: name={{ a_infra }} groups=service_master - +#  - include: ../../common/openshift-cluster/set_infra_launch_facts_tasks.yml +#    vars: +#      type: "infra" +#      count: "{{ num_infra }}" +#  - include: tasks/launch_instances.yml +#    vars: +#      instances: "{{ infra_names }}" +#      cluster: "{{ cluster_id }}" +#      type: "{{ k8s_type }}" +#      g_sub_host_type: "{{ sub_host_type }}" +# +#  - set_fact: +#      a_infra: "{{ infra_names[0] }}" +#  - add_host: name={{ a_infra }} groups=service_master +#  - include: update.yml - -- name: Deploy OpenShift Services -  hosts: service_master -  connection: ssh -  gather_facts: yes -  roles: -  - openshift_registry -  - openshift_router - -- include: ../../common/openshift-cluster/create_services.yml -  vars: -     g_svc_master: "{{ service_master }}" +# +#- name: Deploy OpenShift Services +#  hosts: service_master +#  connection: ssh +#  gather_facts: yes +#  roles: +#  - openshift_registry +#  - openshift_router +# +#- include: ../../common/openshift-cluster/create_services.yml +#  vars: +#     g_svc_master: "{{ service_master }}"  - include: list.yml diff --git a/playbooks/gce/openshift-cluster/list.yml b/playbooks/gce/openshift-cluster/list.yml index 5ba0f5a48..53b2b9a5e 100644 --- a/playbooks/gce/openshift-cluster/list.yml +++ b/playbooks/gce/openshift-cluster/list.yml @@ -14,11 +14,11 @@        groups: oo_list_hosts        ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user | default(ansible_ssh_user, true) }}"        ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" -    with_items: groups[scratch_group] | default([]) | difference(['localhost']) | difference(groups.status_terminated) +    with_items: groups[scratch_group] | default([], true) | difference(['localhost']) | difference(groups.status_terminated | default([], true))  - name: List instance(s)    hosts: oo_list_hosts    gather_facts: no    tasks:    - debug: -      msg: "public ip:{{ hostvars[inventory_hostname].gce_public_ip }} private ip:{{ hostvars[inventory_hostname].gce_private_ip }}" +      msg: "private ip:{{ hostvars[inventory_hostname].gce_private_ip }}" diff --git a/playbooks/gce/openshift-cluster/tasks/launch_instances.yml b/playbooks/gce/openshift-cluster/tasks/launch_instances.yml index 6307ecc27..e300b5b5a 100644 --- a/playbooks/gce/openshift-cluster/tasks/launch_instances.yml +++ b/playbooks/gce/openshift-cluster/tasks/launch_instances.yml @@ -10,33 +10,38 @@      service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}"      pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}"      project_id: "{{ lookup('env', 'gce_project_id') }}" +    zone: "{{ lookup('env', 'zone') }}" +    network: "{{ lookup('env', 'network') }}" +# unsupported in 1.9.+ +    #service_account_permissions: "datastore,logging-write"      tags:        - created-by-{{ lookup('env', 'LOGNAME') |default(cluster, true) }}        - env-{{ cluster }}        - host-type-{{ type }} -      - sub-host-type-{{ sub_host_type }} +      - sub-host-type-{{ g_sub_host_type }}        - env-host-type-{{ cluster }}-openshift-{{ type }} +  when: instances |length > 0     register: gce  - name: Add new instances to groups and set variables needed    add_host:      hostname: "{{ item.name }}" -    ansible_ssh_host: "{{ item.public_ip }}" +    ansible_ssh_host: "{{ item.name }}"      ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user | default(ansible_ssh_user, true) }}"      ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}"      groups: "{{ item.tags | oo_prepend_strings_in_list('tag_') | join(',') }}"      gce_public_ip: "{{ item.public_ip }}"      gce_private_ip: "{{ item.private_ip }}" -  with_items: gce.instance_data +  with_items: gce.instance_data | default([], true)  - name: Wait for ssh -  wait_for: port=22 host={{ item.public_ip }} -  with_items: gce.instance_data +  wait_for: port=22 host={{ item.name }} +  with_items: gce.instance_data | default([], true)  - name: Wait for user setup    command: "ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o ConnectTimeout=10 -o UserKnownHostsFile=/dev/null {{ hostvars[item.name].ansible_ssh_user }}@{{ item.public_ip }} echo {{ hostvars[item.name].ansible_ssh_user }} user is setup"    register: result    until: result.rc == 0 -  retries: 20 -  delay: 10 -  with_items: gce.instance_data +  retries: 30 +  delay: 5 +  with_items: gce.instance_data | default([], true) diff --git a/playbooks/gce/openshift-cluster/terminate.yml b/playbooks/gce/openshift-cluster/terminate.yml index 098b0df73..e20e0a8bc 100644 --- a/playbooks/gce/openshift-cluster/terminate.yml +++ b/playbooks/gce/openshift-cluster/terminate.yml @@ -1,25 +1,18 @@  ---  - name: Terminate instance(s)    hosts: localhost +  connection: local    gather_facts: no    vars_files:    - vars.yml    tasks: -  - set_fact: scratch_group=tag_env-host-type-{{ cluster_id }}-openshift-node +  - set_fact: scratch_group=tag_env-{{ cluster_id }}    - add_host:        name: "{{ item }}" -      groups: oo_hosts_to_terminate, oo_nodes_to_terminate +      groups: oo_hosts_to_terminate        ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user | default(ansible_ssh_user, true) }}"        ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" -    with_items: groups[scratch_group] | default([]) | difference(['localhost']) | difference(groups.status_terminated) - -  - set_fact: scratch_group=tag_env-host-type-{{ cluster_id }}-openshift-master -  - add_host: -      name: "{{ item }}" -      groups: oo_hosts_to_terminate, oo_masters_to_terminate -      ansible_ssh_user: "{{ deployment_vars[deployment_type].ssh_user | default(ansible_ssh_user, true) }}" -      ansible_sudo: "{{ deployment_vars[deployment_type].sudo }}" -    with_items: groups[scratch_group] | default([]) | difference(['localhost']) | difference(groups.status_terminated) +    with_items: groups[scratch_group] | default([], true) | difference(['localhost']) | difference(groups.status_terminated | default([], true))  - name: Unsubscribe VMs    hosts: oo_hosts_to_terminate @@ -32,14 +25,34 @@            lookup('oo_option', 'rhel_skip_subscription') | default(rhsub_skip, True) |              default('no', True) | lower in ['no', 'false'] -- include: ../openshift-node/terminate.yml -  vars: -    gce_service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" -    gce_pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" -    gce_project_id: "{{ lookup('env', 'gce_project_id') }}" +- name: Terminate instances(s) +  hosts: localhost +  connection: local +  gather_facts: no +  vars_files: +  - vars.yml +  tasks: + +    - name: Terminate instances that were previously launched +      local_action: +        module: gce +        state: 'absent' +        name: "{{ item }}" +        service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" +        pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" +        project_id: "{{ lookup('env', 'gce_project_id') }}" +        zone: "{{ lookup('env', 'zone') }}" +      with_items: groups['oo_hosts_to_terminate'] | default([], true) +      when: item is defined -- include: ../openshift-master/terminate.yml -  vars: -    gce_service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" -    gce_pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" -    gce_project_id: "{{ lookup('env', 'gce_project_id') }}" +#- include: ../openshift-node/terminate.yml +#  vars: +#    gce_service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" +#    gce_pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" +#    gce_project_id: "{{ lookup('env', 'gce_project_id') }}" +# +#- include: ../openshift-master/terminate.yml +#  vars: +#    gce_service_account_email: "{{ lookup('env', 'gce_service_account_email_address') }}" +#    gce_pem_file: "{{ lookup('env', 'gce_service_account_pem_file_path') }}" +#    gce_project_id: "{{ lookup('env', 'gce_project_id') }}" diff --git a/playbooks/gce/openshift-cluster/vars.yml b/playbooks/gce/openshift-cluster/vars.yml index ae33083b9..6de007807 100644 --- a/playbooks/gce/openshift-cluster/vars.yml +++ b/playbooks/gce/openshift-cluster/vars.yml @@ -1,8 +1,11 @@  --- +do_we_use_openshift_sdn: true +sdn_network_plugin: redhat/openshift-ovs-subnet  +# os_sdn_network_plugin_name can be ovssubnet or multitenant, see https://docs.openshift.org/latest/architecture/additional_concepts/sdn.html#ovssubnet-plugin-operation  deployment_vars:    origin: -    image: centos-7 -    ssh_user: +    image: preinstalled-slave-50g-v5 +    ssh_user: root      sudo: yes    online:      image: libra-rhel7 @@ -12,4 +15,3 @@ deployment_vars:      image: rhel-7      ssh_user:      sudo: yes - diff --git a/roles/lib_zabbix/library/zbx_item.py b/roles/lib_zabbix/library/zbx_item.py index 2ccc21292..6faa82dfc 100644 --- a/roles/lib_zabbix/library/zbx_item.py +++ b/roles/lib_zabbix/library/zbx_item.py @@ -53,6 +53,8 @@ def get_value_type(value_type):      vtype = 0      if 'int' in value_type:          vtype = 3 +    elif 'log' in value_type: +        vtype = 2      elif 'char' in value_type:          vtype = 1      elif 'str' in value_type: diff --git a/roles/lib_zabbix/library/zbx_itemprototype.py b/roles/lib_zabbix/library/zbx_itemprototype.py index 4ec1b8e02..e7fd6fa21 100644 --- a/roles/lib_zabbix/library/zbx_itemprototype.py +++ b/roles/lib_zabbix/library/zbx_itemprototype.py @@ -128,12 +128,12 @@ def get_status(status):      return _status -def get_app_ids(zapi, application_names): +def get_app_ids(zapi, application_names, templateid):      ''' get application ids from names      '''      app_ids = []      for app_name in application_names: -        content = zapi.get_content('application', 'get', {'search': {'name': app_name}}) +        content = zapi.get_content('application', 'get', {'filter': {'name': app_name}, 'templateids': templateid})          if content.has_key('result'):              app_ids.append(content['result'][0]['applicationid'])      return app_ids @@ -212,7 +212,7 @@ def main():                    'ruleid': get_rule_id(zapi, module.params['discoveryrule_key'], template['templateid']),                    'type': get_type(module.params['ztype']),                    'value_type': get_value_type(module.params['value_type']), -                  'applications': get_app_ids(zapi, module.params['applications']), +                  'applications': get_app_ids(zapi, module.params['applications'], template['templateid']),                    'description': module.params['description'],                   } diff --git a/roles/lib_zabbix/library/zbx_trigger.py b/roles/lib_zabbix/library/zbx_trigger.py index 21d0fcfd2..ab7731faa 100644 --- a/roles/lib_zabbix/library/zbx_trigger.py +++ b/roles/lib_zabbix/library/zbx_trigger.py @@ -86,6 +86,24 @@ def get_trigger_status(inc_status):      return r_status +def get_template_id(zapi, template_name): +    ''' +    get related templates +    ''' +    template_ids = [] +    app_ids = {} +    # Fetch templates by name +    content = zapi.get_content('template', +                               'get', +                               {'search': {'host': template_name}, +                                'selectApplications': ['applicationid', 'name']}) +    if content.has_key('result'): +        template_ids.append(content['result'][0]['templateid']) +        for app in content['result'][0]['applications']: +            app_ids[app['name']] = app['applicationid'] + +    return template_ids, app_ids +  def main():      '''      Create a trigger in zabbix @@ -117,6 +135,7 @@ def main():              url=dict(default=None, type='str'),              status=dict(default=None, type='str'),              state=dict(default='present', type='str'), +            template_name=dict(default=None, type='str'),          ),          #supports_check_mode=True      ) @@ -132,11 +151,16 @@ def main():      state = module.params['state']      tname = module.params['name'] +    templateid = None +    if module.params['template_name']: +        templateid, _ = get_template_id(zapi, module.params['template_name']) +      content = zapi.get_content(zbx_class_name,                                 'get',                                 {'filter': {'description': tname},                                  'expandExpression': True,                                  'selectDependencies': 'triggerid', +                                'templateids': templateid,                                 })      # Get diff --git a/roles/lib_zabbix/library/zbx_user_media.py b/roles/lib_zabbix/library/zbx_user_media.py index 9ed838f81..8895c78c3 100644 --- a/roles/lib_zabbix/library/zbx_user_media.py +++ b/roles/lib_zabbix/library/zbx_user_media.py @@ -260,6 +260,9 @@ def main():          for user in params['users']:              diff['users']['userid'] = user['userid'] +        # Medias have no real unique key so therefore we need to make it like the incoming user's request +        diff['medias'] = medias +          # We have differences and need to update          content = zapi.get_content(zbx_class_name, 'updatemedia', diff) diff --git a/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json b/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json index 0497e6824..5df36ccc2 100644 --- a/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json +++ b/roles/openshift_examples/files/examples/xpaas-templates/eap6-https-sti.json @@ -6,10 +6,10 @@              "iconClass" : "icon-jboss",              "description": "Application template for EAP 6 applications built using STI."          }, -        "name": "eap6-basic-sti" +        "name": "eap6-https-sti"      },      "labels": { -        "template": "eap6-basic-sti" +        "template": "eap6-https-sti"      },      "parameters": [          { diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index 987f7f7da..69bb49c9b 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -480,11 +480,11 @@ def set_deployment_facts_if_unset(facts):          if role in facts:              deployment_type = facts['common']['deployment_type']              if 'registry_url' not in facts[role]: -                registry_url = 'aos3/aos-${component}:${version}' -                if deployment_type in ['enterprise', 'online']: +                registry_url = 'openshift/origin-${component}:${version}' +                if deployment_type in ['enterprise', 'online', 'openshift-enterprise']:                      registry_url = 'openshift3/ose-${component}:${version}' -                elif deployment_type == 'origin': -                    registry_url = 'openshift/origin-${component}:${version}' +                elif deployment_type == 'atomic-enterprise': +                    registry_url = 'aep3/aep-${component}:${version}'                  facts[role]['registry_url'] = registry_url      return facts diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index fd3d20800..6301d4fc0 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -1,5 +1,5 @@  --- -- name: Verify Ansible version is greater than 1.8.0 and not 1.9.0 +- name: Verify Ansible version is greater than 1.8.0 and not 1.9.0 and not 1.9.0.1    assert:      that:      - ansible_version | version_compare('1.8.0', 'ge') diff --git a/roles/openshift_manage_node/tasks/main.yml b/roles/openshift_manage_node/tasks/main.yml index 7c4f45ce6..637e494ea 100644 --- a/roles/openshift_manage_node/tasks/main.yml +++ b/roles/openshift_manage_node/tasks/main.yml @@ -1,21 +1,21 @@  - name: Wait for Node Registration    command: > -      {{ openshift.common.client_binary }} get node {{ item }} +      {{ openshift.common.client_binary }} get node {{ item | lower }}    register: omd_get_node    until: omd_get_node.rc == 0 -  retries: 10 +  retries: 20    delay: 5    with_items: openshift_nodes  - name: Set node schedulability    command: > -    {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }} +    {{ openshift.common.admin_binary }} manage-node {{ item.openshift.common.hostname | lower }} --schedulable={{ 'true' if item.openshift.node.schedulable | bool else 'false' }}    with_items:      -  "{{ openshift_node_vars }}"  - name: Label nodes    command: > -    {{ openshift.common.client_binary }} label --overwrite node {{ item.openshift.common.hostname }} {{ item.openshift.node.labels | oo_combine_dict  }} +    {{ openshift.common.client_binary }} label --overwrite node {{ item.openshift.common.hostname | lower }} {{ item.openshift.node.labels | oo_combine_dict  }}    with_items:      -  "{{ openshift_node_vars }}"    when: "'labels' in item.openshift.node and item.openshift.node.labels != {}" diff --git a/roles/openshift_master/templates/scheduler.json.j2 b/roles/openshift_master/templates/scheduler.json.j2 index 835f2383e..cb5f43bb2 100644 --- a/roles/openshift_master/templates/scheduler.json.j2 +++ b/roles/openshift_master/templates/scheduler.json.j2 @@ -1,4 +1,6 @@  { +  "kind": "Policy", +  "apiVersion": "v1",    "predicates": [      {"name": "MatchNodeSelector"},      {"name": "PodFitsResources"}, diff --git a/roles/openshift_master/templates/v1_partials/oauthConfig.j2 b/roles/openshift_master/templates/v1_partials/oauthConfig.j2 index 72889bc29..8a4f5a746 100644 --- a/roles/openshift_master/templates/v1_partials/oauthConfig.j2 +++ b/roles/openshift_master/templates/v1_partials/oauthConfig.j2 @@ -80,6 +80,7 @@ oauthConfig:      provider:  {{ identity_provider_config(identity_provider) }}  {%- endfor %} +  masterCA: ca.crt    masterPublicURL: {{ openshift.master.public_api_url }}    masterURL: {{ openshift.master.api_url }}    sessionConfig: diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index e8cc499c0..d45dd8073 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -22,7 +22,7 @@        deployment_type: "{{ openshift_deployment_type }}"    - role: node      local_facts: -      labels: "{{ openshift_node_labels | default(none) }}" +      labels: "{{ lookup('oo_option', 'openshift_node_labels') | default( openshift_node_labels | default(none), true) }}"        annotations: "{{ openshift_node_annotations | default(none) }}"        registry_url: "{{ oreg_url | default(none) }}"        debug_level: "{{ openshift_node_debug_level | default(openshift.common.debug_level) }}" diff --git a/roles/openshift_node/templates/node.yaml.v1.j2 b/roles/openshift_node/templates/node.yaml.v1.j2 index 07d80f99b..946c0b655 100644 --- a/roles/openshift_node/templates/node.yaml.v1.j2 +++ b/roles/openshift_node/templates/node.yaml.v1.j2 @@ -18,7 +18,7 @@ networkPluginName: {{ openshift.common.sdn_network_plugin_name }}  networkConfig:     mtu: {{ openshift.node.sdn_mtu }}     networkPluginName: {{ openshift.common.sdn_network_plugin_name }} -nodeName: {{ openshift.common.hostname }} +nodeName: {{ openshift.common.hostname | lower }}  podManifestConfig:  servingInfo:    bindAddress: 0.0.0.0:10250 diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index 28e900255..a503b24d7 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -13,6 +13,8 @@  - include_vars: template_openshift_master.yml  - include_vars: template_openshift_node.yml  - include_vars: template_ops_tools.yml +- include_vars: template_app_zabbix_server.yml +- include_vars: template_app_zabbix_agent.yml  - name: Include Template Heartbeat    include: ../../lib_zabbix/tasks/create_template.yml @@ -61,3 +63,19 @@      server: "{{ ozb_server }}"      user: "{{ ozb_user }}"      password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Server +  include: ../../lib_zabbix/tasks/create_template.yml +  vars: +    template: "{{ g_template_app_zabbix_server }}" +    server: "{{ ozb_server }}" +    user: "{{ ozb_user }}" +    password: "{{ ozb_password }}" + +- name: Include Template App Zabbix Agent +  include: ../../lib_zabbix/tasks/create_template.yml +  vars: +    template: "{{ g_template_app_zabbix_agent }}" +    server: "{{ ozb_server }}" +    user: "{{ ozb_user }}" +    password: "{{ ozb_password }}" diff --git a/roles/os_zabbix/vars/template_app_zabbix_agent.yml b/roles/os_zabbix/vars/template_app_zabbix_agent.yml new file mode 100644 index 000000000..06c4eda8b --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_agent.yml @@ -0,0 +1,23 @@ +--- +g_template_app_zabbix_agent: +   name: Template App Zabbix Agent +   zitems: +   - key: agent.hostname +     applications: +     - Zabbix agent +     value_type: character +     zabbix_type: '0' + +   - key: agent.ping +     applications: +     - Zabbix agent +     description: The agent always returns 1 for this item. It could be used in combination with nodata() for availability check. +     value_type: int +     zabbix_type: '0' + +   ztriggers: +   - name: '[Reboot] Zabbix agent on {HOST.NAME} is unreachable for 15 minutes' +     description: Zabbix agent is unreachable for 15 minutes. +     expression: '{Template App Zabbix Agent:agent.ping.nodata(15m)}=1' +     priority: high +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_ping.asciidoc diff --git a/roles/os_zabbix/vars/template_app_zabbix_server.yml b/roles/os_zabbix/vars/template_app_zabbix_server.yml new file mode 100644 index 000000000..dace2aa29 --- /dev/null +++ b/roles/os_zabbix/vars/template_app_zabbix_server.yml @@ -0,0 +1,408 @@ +--- +g_template_app_zabbix_server: +   name: Template App Zabbix Server +   zitems: +   - key: housekeeper_creates +     applications: +     - Zabbix server +     description: A simple count of the number of partition creates output by the housekeeper script. +     units: '' +     value_type: int +     zabbix_type: '2' + +   - key: housekeeper_drops +     applications: +     - Zabbix server +     description: A simple count of the number of partition drops output by the housekeeper script. +     units: '' +     value_type: int +     zabbix_type: '2' + +   - key: housekeeper_errors +     applications: +     - Zabbix server +     description: A simple count of the number of errors output by the housekeeper script. +     units: '' +     value_type: int +     zabbix_type: '2' + +   - key: housekeeper_total +     applications: +     - Zabbix server +     description: A simple count of the total number of lines output by the housekeeper +       script. +     units: '' +     value_type: int +     zabbix_type: '2' + +   - key: zabbix[process,alerter,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,configuration syncer,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,db watchdog,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,discoverer,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,escalator,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,history syncer,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,housekeeper,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,http poller,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,icmp pinger,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,ipmi poller,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,java poller,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,node watcher,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,poller,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,proxy poller,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,self-monitoring,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,snmp trapper,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,timer,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,trapper,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[process,unreachable poller,avg,busy] +     applications: +     - Zabbix server +     description: '' +     units: '%' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[queue,10m] +     applications: +     - Zabbix server +     description: '' +     units: '' +     value_type: int +     zabbix_type: '5' + +   - key: zabbix[queue] +     applications: +     - Zabbix server +     description: '' +     units: '' +     value_type: int +     zabbix_type: '5' + +   - key: zabbix[rcache,buffer,pfree] +     applications: +     - Zabbix server +     description: '' +     units: '' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[wcache,history,pfree] +     applications: +     - Zabbix server +     description: '' +     units: '' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[wcache,text,pfree] +     applications: +     - Zabbix server +     description: '' +     units: '' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[wcache,trend,pfree] +     applications: +     - Zabbix server +     description: '' +     units: '' +     value_type: float +     zabbix_type: '5' + +   - key: zabbix[wcache,values] +     applications: +     - Zabbix server +     description: '' +     units: '' +     value_type: float +     zabbix_type: '5' +   ztriggers: +   - description: "There has been unexpected output while running the housekeeping script\ +       \ on the Zabbix. There are only three kinds of lines we expect to see in the output,\ +       \ and we've gotten something enw.\r\n\r\nCheck the script's output in /var/lib/zabbix/state\ +       \ for more details." +     expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}+{Template App Zabbix Server:housekeeper_creates.last(0)}+{Template App Zabbix Server:housekeeper_drops.last(0)}<>{Template App Zabbix Server:housekeeper_total.last(0)}' +     name: Unexpected output in Zabbix DB Housekeeping +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_DB_Housekeeping.asciidoc + +   - description: An error has occurred during running the housekeeping script on the Zabbix. Check the script's output in /var/lib/zabbix/state for more details. +     expression: '{Template App Zabbix Server:housekeeper_errors.last(0)}>0' +     name: Errors during Zabbix DB Housekeeping +     priority: high +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,alerter,avg,busy].min(600)}>75' +     name: Zabbix alerter processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,configuration syncer,avg,busy].min(600)}>75' +     name: Zabbix configuration syncer processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,db watchdog,avg,busy].min(600)}>75' +     name: Zabbix db watchdog processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,discoverer,avg,busy].min(600)}>75' +     name: Zabbix discoverer processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,escalator,avg,busy].min(600)}>75' +     name: Zabbix escalator processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,history syncer,avg,busy].min(600)}>75' +     name: Zabbix history syncer processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,housekeeper,avg,busy].min(1800)}>75' +     name: Zabbix housekeeper processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,http poller,avg,busy].min(600)}>75' +     name: Zabbix http poller processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,icmp pinger,avg,busy].min(600)}>75' +     name: Zabbix icmp pinger processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,ipmi poller,avg,busy].min(600)}>75' +     name: Zabbix ipmi poller processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,java poller,avg,busy].min(600)}>75' +     name: Zabbix java poller processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,node watcher,avg,busy].min(600)}>75' +     name: Zabbix node watcher processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,poller,avg,busy].min(600)}>75' +     name: Zabbix poller processes more than 75% busy +     priority: high +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,proxy poller,avg,busy].min(600)}>75' +     name: Zabbix proxy poller processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,self-monitoring,avg,busy].min(600)}>75' +     name: Zabbix self-monitoring processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,snmp trapper,avg,busy].min(600)}>75' +     name: Zabbix snmp trapper processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: Timer processes usually are busy because they have to process time +       based trigger functions +     expression: '{Template App Zabbix Server:zabbix[process,timer,avg,busy].min(600)}>75' +     name: Zabbix timer processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,trapper,avg,busy].min(600)}>75' +     name: Zabbix trapper processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[process,unreachable poller,avg,busy].min(600)}>75' +     name: Zabbix unreachable poller processes more than 75% busy +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/Zabbix_state_check.asciidoc + +   - description: "This alert generally indicates a performance problem or a problem\ +       \ with the zabbix-server or proxy.\r\n\r\nThe first place to check for issues\ +       \ is Administration > Queue. Be sure to check the general view and the per-proxy\ +       \ view." +     expression: '{Template App Zabbix Server:zabbix[queue,10m].min(600)}>1000' +     name: More than 1000 items having missing data for more than 10 minutes +     priority: high +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/data_lost_overview_plugin.asciidoc + +   - description: Consider increasing CacheSize in the zabbix_server.conf configuration +       file +     expression: '{Template App Zabbix Server:zabbix[rcache,buffer,pfree].min(600)}<5' +     name: Less than 5% free in the configuration cache +     priority: info +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[wcache,history,pfree].min(600)}<25' +     name: Less than 25% free in the history cache +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[wcache,text,pfree].min(600)}<25' +     name: Less than 25% free in the text history cache +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc + +   - description: '' +     expression: '{Template App Zabbix Server:zabbix[wcache,trend,pfree].min(600)}<25' +     name: Less than 25% free in the trends cache +     priority: avg +     url: https://github.com/openshift/ops-sop/blob/master/Alerts/check_cache.asciidoc diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index c71e07910..4ae918ec6 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -13,6 +13,18 @@ g_template_openshift_master:      applications:      - Openshift Master +  - key: openshift.master.user.count +    description: Shows number of users in a cluster +    type: int +    applications: +    - Openshift Master + +  - key: openshift.master.pod.running.count +    description: Shows number of pods running +    type: int +    applications: +    - Openshift Master +    ztriggers:    - name: 'Application creation has failed on {HOST.NAME}'      expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -28,3 +40,9 @@ g_template_openshift_master:      expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'      priority: high + +  - name: 'Number of users for Openshift Master on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.user.count.last()}=0' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    priority: info + diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml index 36f9cc4a3..ce28b1048 100644 --- a/roles/os_zabbix/vars/template_openshift_node.yml +++ b/roles/os_zabbix/vars/template_openshift_node.yml @@ -8,13 +8,37 @@ g_template_openshift_node:      applications:      - Openshift Node +  - key: openshift.node.ovs.pids.count +    description: Shows number of ovs process ids running +    type: int +    applications: +    - Openshift Node + +  - key: openshift.node.ovs.ports.count +    description: Shows number of OVS ports defined +    type: int +    applications: +    - Openshift Node +    ztriggers:    - name: 'Openshift Node process not running on {HOST.NAME}'      expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc' +    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'      priority: high    - name: 'Too many Openshift Node processes running on {HOST.NAME}'      expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc' +    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' +    priority: high + +  - name: 'OVS may not be running on {HOST.NAME}' +    expression: '{Template Openshift Node:openshift.node.ovs.pids.count.last()}<>4' +    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc'      priority: high + +  - name: 'Number of OVS ports is 0 on {HOST.NAME}' +    expression: '{Template Openshift Node:openshift.node.ovs.ports.count.last()}=0' +    url: 'https://github.com/openshift/ops-sop/blob/node/V3/Alerts/openshift_node.asciidoc' +    priority: high + + diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 70c3809bd..69432273f 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -188,18 +188,6 @@ g_template_os_linux:      multiplier: 1024      units: B -  # Disk items -  - key: filesys.full.xvda2 -    applications: -    - Disk -    value_type: float - -  - key: filesys.full.xvda3 -    applications: -    - Disk -    value_type: float - -    zdiscoveryrules:    - name: disc.filesys      key: disc.filesys @@ -215,38 +203,36 @@ g_template_os_linux:      applications:      - Disk -  ztriggerprototypes: -  - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free on {HOST.NAME}' -    expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' -    priority: warn - -  - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free on {HOST.NAME}' -    expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>95' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc' -    priority: high +  - discoveryrule_key: disc.filesys +    name: "Percentage of used inodes on {#OSO_FILESYS}" +    key: "disc.filesys.inodes.pused[{#OSO_FILESYS}]" +    value_type: float +    description: "PCP derived value of percentage of used inodes on a filesystem." +    applications: +    - Disk -  ztriggers: -  - name: 'Filesystem: / has less than 10% free on {HOST.NAME}' -    expression: '{Template OS Linux:filesys.full.xvda2.last()}>90' +  ztriggerprototypes: +  - name: 'Filesystem: {#OSO_FILESYS} has less than 15% free disk space on {HOST.NAME}' +    expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>85'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'      priority: warn -  - name: 'Filesystem: / has less than 5% free on {HOST.NAME}' -    expression: '{Template OS Linux:filesys.full.xvda2.last()}>95' +  - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free disk space on {HOST.NAME}' +    expression: '{Template OS Linux:disc.filesys.full[{#OSO_FILESYS}].last()}>90'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'      priority: high -  - name: 'Filesystem: /var has less than 10% free on {HOST.NAME}' -    expression: '{Template OS Linux:filesys.full.xvda3.last()}>90' +  - name: 'Filesystem: {#OSO_FILESYS} has less than 10% free inodes on {HOST.NAME}' +    expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>90'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'      priority: warn -  - name: 'Filesystem: /var has less than 5% free on {HOST.NAME}' -    expression: '{Template OS Linux:filesys.full.xvda3.last()}>95' +  - name: 'Filesystem: {#OSO_FILESYS} has less than 5% free inodes on {HOST.NAME}' +    expression: '{Template OS Linux:disc.filesys.inodes.pused[{#OSO_FILESYS}].last()}>95'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_filesys_full.asciidoc'      priority: high +  ztriggers:    - name: 'Too many TOTAL processes on {HOST.NAME}'      expression: '{Template OS Linux:proc.nprocs.last()}>5000'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_proc.asciidoc' | 
