diff options
Diffstat (limited to 'roles/os_zabbix')
| -rw-r--r-- | roles/os_zabbix/vars/template_docker.yml | 10 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 8 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_zagg_server.yml | 16 | 
3 files changed, 31 insertions, 3 deletions
diff --git a/roles/os_zabbix/vars/template_docker.yml b/roles/os_zabbix/vars/template_docker.yml index bfabf50c5..91a2c400e 100644 --- a/roles/os_zabbix/vars/template_docker.yml +++ b/roles/os_zabbix/vars/template_docker.yml @@ -12,6 +12,11 @@ g_template_docker:      - Docker Daemon      value_type: int +  - key: docker.container.dns.resolution +    applications: +    - Docker Daemon +    value_type: int +    - key: docker.storage.is_loopback      applications:      - Docker Storage @@ -62,6 +67,11 @@ g_template_docker:      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_ping.asciidoc'      priority: high +  - name: 'docker.container.dns.resolution failed on {HOST.NAME}' +    expression: '{Template Docker:docker.container.dns.resolution.max(#3)}>0' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_dns.asciidoc' +    priority: high +    - name: 'Docker storage is using LOOPBACK on {HOST.NAME}'      expression: '{Template Docker:docker.storage.is_loopback.last()}<>0'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_docker_loopback.asciidoc' diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 514d6fd24..a0ba8d104 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -269,6 +269,14 @@ g_template_openshift_master:      - 'Openshift Master process not running on {HOST.NAME}'      priority: avg +  - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}' +    expression: '{Template Openshift Master:create_app.sum(1h)}>3' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    description: The application create loop has failed 4 or more times in the last hour +    priority: avg +    - name: 'Openshift Master API health check is failing on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml index 0e8e53bb7..db5665993 100644 --- a/roles/os_zabbix/vars/template_zagg_server.yml +++ b/roles/os_zabbix/vars/template_zagg_server.yml @@ -7,7 +7,12 @@ g_template_zagg_server:      - Zagg Server      value_type: int -  - key: zagg.server.processor.errors +  - key: zagg.server.metrics.errors +    applications: +    - Zagg Server +    value_type: int + +  - key: zagg.server.heartbeat.errors      applications:      - Zagg Server      value_type: int @@ -18,8 +23,13 @@ g_template_zagg_server:      value_type: int    ztriggers: -  - name: 'Error sending metrics on {HOST.NAME}' -    expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0' +  - name: 'Error processing metrics on {HOST.NAME}' +    expression: '{Template Zagg Server:zagg.server.metrics.errors.min(#3)}>0' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' +    priority: average + +  - name: 'Error processing heartbeats on {HOST.NAME}' +    expression: '{Template Zagg Server:zagg.server.heartbeat.errors.min(#3)}>0'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc'      priority: average  | 
