Add playbooks and templates for CPUHog

This commit is contained in:
2024-04-17 13:16:41 -04:00
parent 5707153521
commit e5ec521ec4
3 changed files with 54 additions and 5 deletions

View File

@@ -0,0 +1,18 @@
---
- name: Investigate High CPU
hosts: all
become: true
tasks:
- name: Gather information on top CPU consuming processes
ansible.builtin.command:
cmd: 'ps -eo pid,ppid,%mem,%cpu,cmd --sort=-%cpu'
register: processes_cpu
- name: Gather information on top Memory consuming processes
ansible.builtin.command:
cmd: 'ps -eo pid,ppid,%mem,%cpu,cmd --sort=-%mem'
register: processes_mem
- name: Dump CPU details
ansible.builtin.debug:
msg: "{{ lookup('template','../templates/cpuhog_ticket.j2') }}"

View File

@@ -10,8 +10,8 @@
- name: Resolve Disk Usage
condition:
all:
- event.alert.labels.org == "OYS" and event.alert.status == "firing" \
and event.alert.labels.alertname == "root filesystem over 80% full"
- event.alert.labels.org == "OYS" and event.alert.status == "firing"
- event.alert.labels.alertname == "root filesystem over 80% full"
actions:
- run_job_template:
name: Demo - Clean Log Directory
@@ -22,17 +22,28 @@
alertmanager_generator_url: "{{ event.alert.generatorURL }}"
event_mountpoint: "{{ event.alert.labels.mountpoint }}"
alertmanager_instance: "{{ event.alert.labels.instance }}"
- name: Investigate High CPU
condition:
all:
- event.alert.labels.org == "OYS" and event.alert.status == "firing" \
and event.alert.labels.alertname == "ProcessCPUHog"
- event.alert.labels.org == "OYS" and event.alert.status == "firing"
- and event.alert.labels.alertname == "ProcessCPUHog"
actions:
- print_event:
pretty: true
- run_job_template:
name: Demo - Investigate High CPU
organization: OYS
job_args:
extra_vars:
alertmanager_annotations: "{{ event.alert.annotations }}"
alertmanager_generator_url: "{{ event.alert.generatorURL }}"
event_severity: "{{ event.alert.labels.severity }}"
alertmanager_instance: "{{ event.alert.labels.instance }}"
event_values: "{{ event.alert.values }}"
- name: Test Contact Point
condition: event.alert.labels.alertname == "TestAlert" or event.alert.labels.org == "OYS"
condition: event.alert.labels.alertname == "TestAlert" and event.alert.labels.org == "OYS"
actions:
- print_event:
pretty: true

View File

@@ -0,0 +1,20 @@
= CPUHog Report =
A high CPU event was triggered from AlertManager.
{% if event is defined %}
Annotations: "{{ event.alert.annotations }}"
Generator URL: "{{ event.alert.generatorURL }}"
Severity: "{{ event.alert.labels.severity }}"
Instance: "{{ event.alert.labels.instance }}"
Values: "{{ event.alert.values }}"
{% endif %}
** Top CPU Consumers **
{% for line in processes_cpu.stdout_lines[0:10] %}
{{ line }}
{% endfor %}
** Top Memory Consumers **
{% for line in processes_mem.stdout_lines[0:10] %}
{{ line }}
{% endfor %}