diff options
Diffstat (limited to 'roles')
| -rw-r--r-- | roles/os_zabbix/tasks/main.yml | 13 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_openshift_master.yml | 8 | ||||
| -rw-r--r-- | roles/os_zabbix/vars/template_zagg_server.yml | 36 | 
3 files changed, 57 insertions, 0 deletions
| diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml index d0b307a3d..7552086d4 100644 --- a/roles/os_zabbix/tasks/main.yml +++ b/roles/os_zabbix/tasks/main.yml @@ -37,6 +37,9 @@  - include_vars: template_aws.yml    tags:    - aws +- include_vars: template_zagg_server.yml +  tags: +  - zagg_server  - name: Include Template Heartbeat    include: ../../lib_zabbix/tasks/create_template.yml @@ -137,3 +140,13 @@      password: "{{ ozb_password }}"    tags:    - aws + +- name: Include Template Zagg Server +  include: ../../lib_zabbix/tasks/create_template.yml +  vars: +    template: "{{ g_template_zagg_server }}" +    server: "{{ ozb_server }}" +    user: "{{ ozb_user }}" +    password: "{{ ozb_password }}" +  tags: +  - zagg_server diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml index 514d6fd24..a0ba8d104 100644 --- a/roles/os_zabbix/vars/template_openshift_master.yml +++ b/roles/os_zabbix/vars/template_openshift_master.yml @@ -269,6 +269,14 @@ g_template_openshift_master:      - 'Openshift Master process not running on {HOST.NAME}'      priority: avg +  - name: 'Application creation has failed multiple times in the last hour on {HOST.NAME}' +    expression: '{Template Openshift Master:create_app.sum(1h)}>3' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc' +    dependencies: +    - 'Openshift Master process not running on {HOST.NAME}' +    description: The application create loop has failed 4 or more times in the last hour +    priority: avg +    - name: 'Openshift Master API health check is failing on {HOST.NAME}'      expression: '{Template Openshift Master:openshift.master.api.healthz.max(#3)}<1'      url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc' diff --git a/roles/os_zabbix/vars/template_zagg_server.yml b/roles/os_zabbix/vars/template_zagg_server.yml new file mode 100644 index 000000000..0e8e53bb7 --- /dev/null +++ b/roles/os_zabbix/vars/template_zagg_server.yml @@ -0,0 +1,36 @@ +--- +g_template_zagg_server: +  name: Template Zagg Server +  zitems: +  - key: zagg.server.metrics.count +    applications: +    - Zagg Server +    value_type: int + +  - key: zagg.server.processor.errors +    applications: +    - Zagg Server +    value_type: int + +  - key: zagg.server.heartbeat.count +    applications: +    - Zagg Server +    value_type: int + +  ztriggers: +  - name: 'Error sending metrics on {HOST.NAME}' +    expression: '{Template Zagg Server:zagg.server.processor.errors.min(#3)}>0' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' +    priority: average + +  - name: 'Critically High number of metrics in Zagg queue {HOST.NAME}' +    expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>10000' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' +    priority: high + +  - name: 'High number of metrics in Zagg queue {HOST.NAME}' +    expression: '{Template Zagg Server:zagg.server.metrics.count.min(#3)}>5000' +    url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/zagg_server.asciidoc' +    dependencies: +    - 'Critically High number of metrics in Zagg queue {HOST.NAME}' +    priority: average | 
