diff options
Diffstat (limited to 'roles')
| -rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 82 | 
1 files changed, 82 insertions, 0 deletions
| diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 174486e15..512adad4c 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -13,6 +13,12 @@ g_template_openshift_master:      applications:      - Openshift Master +  - key: openshift.master.api.ping +    description: "Verify that the Openshift API is up" +    type: int +    applications: +    - Openshift Master +    - key: openshift.master.api.healthz      description: "Checks the healthz check of the master's api: https://master_host/healthz"      type: int @@ -44,6 +50,12 @@ g_template_openshift_master:      applications:      - Openshift Master +  - key: openshift.master.node.count +    description: Shows the total number of nodes found in the Openshift Cluster +    type: int +    applications: +    - Openshift Master +    - key: openshift.project.count      description: Shows number of projects on a cluster      type: int @@ -122,6 +134,66 @@ g_template_openshift_master:      applications:      - Openshift Etcd +  - key: openshift.master.metric.ping +    description: "This check verifies that the https://master/metrics check is alive and communicating properly." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.apiserver.latency.summary.pods.quantile.list.5 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the pod operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.apiserver.latency.summary.pods.quantile.list.9 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the pod operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.apiserver.latency.summary.pods.quantile.list.99 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the pod operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.5 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the pod operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.9 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the pod operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.apiserver.latency.summary.pods.quantile.watchlist.99 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the pod operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.5 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 50% of the end to end scheduling operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.9 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 90% of the end to end scheduling operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics + +  - key: openshift.master.scheduler.e2e.scheduling.latency.quantile.99 +    description: "Value from https://master/metrics.  This is the time, in miliseconds, that 99% of the end to end scheduling operations have taken to completed." +    type: int +    applications: +    - Openshift Master Metrics +    ztriggers:    - name: 'Application creation has failed on {HOST.NAME}'      expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' @@ -133,6 +205,16 @@ g_template_openshift_master:      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'      priority: high +  - name: 'Openshift Master API PING check is failing on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    priority: high + +  - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    priority: avg +    - name: 'Openshift Master process not running on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' | 
