diff options
Diffstat (limited to 'roles')
| -rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 63 | 
1 files changed, 37 insertions, 26 deletions
| diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 522e6bbe1..514d6fd24 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -231,26 +231,6 @@ g_template_openshift_master:      - Openshift Master Metrics    ztriggers: -  - name: 'Application creation has failed on {HOST.NAME}' -    expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' -    priority: avg - -  - name: 'Openshift Master API health check is failing on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' -    priority: high - -  - name: 'Openshift Master API PING check is failing on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' -    priority: high - -  - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' -    priority: avg -    - name: 'Openshift Master process not running on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' @@ -261,6 +241,16 @@ g_template_openshift_master:      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'      priority: high +  - name: 'Low number of etcd watchers on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' +    priority: avg + +  - name: 'Etcd ping failed on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' +    priority: high +    - name: 'Number of users for Openshift Master on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.user.count.last()}=0'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' @@ -271,19 +261,40 @@ g_template_openshift_master:      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'      priority: info -  - name: 'Low number of etcd watchers on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' +  # Put triggers that depend on other triggers here (deps must be created first) +  - name: 'Application creation has failed on {HOST.NAME}' +    expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}'      priority: avg -  - name: 'Etcd ping failed on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' +  - name: 'Openshift Master API health check is failing on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}'      priority: high +  - name: 'Openshift Master API PING check is failing on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    priority: high + +  - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    priority: avg +    - name: 'Docker Registry check failed on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.registry.healthz.max(#2)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}'      priority: high    zgraphs: | 
