diff options
Diffstat (limited to 'roles/os_zabbix')
| -rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 93 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_os_linux.yml | 12 | 
2 files changed, 67 insertions, 38 deletions
| diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 6972ac877..514d6fd24 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -68,6 +68,36 @@ g_template_openshift_master:      applications:      - Openshift Master +  - key: openshift.master.pv.total.count +    description: Total number of Persistent Volumes in the Openshift Cluster +    type: int +    applications: +    - Openshift Master + +  - key: openshift.master.pv.available.count +    description: Total number of Available Persistent Volumes in the Openshift Cluster +    type: int +    applications: +    - Openshift Master + +  - key: openshift.master.pv.released.count +    description: Total number of Released Persistent Volumes in the Openshift Cluster +    type: int +    applications: +    - Openshift Master + +  - key: openshift.master.pv.bound.count +    description: Total number of Bound Persistent Volumes in the Openshift Cluster +    type: int +    applications: +    - Openshift Master + +  - key: openshift.master.pv.failed.count +    description: Total number of Failed Persistent Volumes in the Openshift Cluster +    type: int +    applications: +    - Openshift Master +    - key: openshift.master.etcd.create.success      description: Show number of successful create actions      type: int @@ -201,26 +231,6 @@ g_template_openshift_master:      - Openshift Master Metrics    ztriggers: -  - name: 'Application creation has failed on {HOST.NAME}' -    expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' -    priority: avg - -  - name: 'Openshift Master API health check is failing on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' -    priority: high - -  - name: 'Openshift Master API PING check is failing on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' -    priority: high - -  - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' -    priority: avg -    - name: 'Openshift Master process not running on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' @@ -231,6 +241,16 @@ g_template_openshift_master:      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'      priority: high +  - name: 'Low number of etcd watchers on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' +    priority: avg + +  - name: 'Etcd ping failed on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' +    priority: high +    - name: 'Number of users for Openshift Master on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.user.count.last()}=0'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' @@ -241,19 +261,40 @@ g_template_openshift_master:      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'      priority: info -  - name: 'Low number of etcd watchers on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.etcd.watchers.last(#1)}<10 and {Template Openshift Master:openshift.master.etcd.watchers.last(#2)}<10' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' +  # Put triggers that depend on other triggers here (deps must be created first) +  - name: 'Application creation has failed on {HOST.NAME}' +    expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}'      priority: avg -  - name: 'Etcd ping failed on {HOST.NAME}' -    expression: '{Template Openshift Master:openshift.master.etcd.ping.last(#1)}=0 and {Template Openshift Master:openshift.master.etcd.ping.last(#2)}=0' -    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_etcd.asciidoc' +  - name: 'Openshift Master API health check is failing on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    priority: high + +  - name: 'Openshift Master API PING check is failing on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.api.ping.max(#3)}<1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}'      priority: high +  - name: 'Openshift Master metric PING check is failing on {HOST.NAME}' +    expression: '{Template Openshift Master:openshift.master.metric.ping.max(#3)}<1' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    priority: avg +    - name: 'Docker Registry check failed on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.registry.healthz.max(#2)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}'      priority: high    zgraphs: diff --git a/roles/os_zabbix/vars/template_os_linux.yml b/roles/os_zabbix/vars/template_os_linux.yml index 79d52ef9b..04665be62 100644 --- a/roles/os_zabbix/vars/template_os_linux.yml +++ b/roles/os_zabbix/vars/template_os_linux.yml @@ -304,15 +304,3 @@ g_template_os_linux:      description: 'CPU is less than 10% idle'      dependencies:      - 'CPU idle less than 5% on {HOST.NAME}' - -  zgraphprototypes: -  - name: Network Interface Usage -    width: 1000 -    height: 400 -    graph_items: -    - item_name: "Bytes per second IN on network interface {#OSO_NET_INTERFACE}" -      item_type: prototype -      color: red -    - item_name: "Bytes per second OUT on network interface {#OSO_NET_INTERFACE}" -      item_type: prototype -      color: blue | 
