diff --git a/playbooks/investigate_high_cpu.yml b/playbooks/investigate_high_cpu.yml new file mode 100644 index 0000000..f4ea349 --- /dev/null +++ b/playbooks/investigate_high_cpu.yml @@ -0,0 +1,18 @@ +--- +- name: Investigate High CPU + hosts: all + become: true + tasks: + - name: Gather information on top CPU consuming processes + ansible.builtin.command: + cmd: 'ps -eo pid,ppid,%mem,%cpu,cmd --sort=-%cpu' + register: processes_cpu + + - name: Gather information on top Memory consuming processes + ansible.builtin.command: + cmd: 'ps -eo pid,ppid,%mem,%cpu,cmd --sort=-%mem' + register: processes_mem + + - name: Dump CPU details + ansible.builtin.debug: + msg: "{{ lookup('template','../templates/cpuhog_ticket.j2') }}" \ No newline at end of file diff --git a/rulebooks/alertmanager_listener.yml b/rulebooks/alertmanager_listener.yml index 0765216..f646338 100644 --- a/rulebooks/alertmanager_listener.yml +++ b/rulebooks/alertmanager_listener.yml @@ -10,8 +10,8 @@ - name: Resolve Disk Usage condition: all: - - event.alert.labels.org == "OYS" and event.alert.status == "firing" \ - and event.alert.labels.alertname == "root filesystem over 80% full" + - event.alert.labels.org == "OYS" and event.alert.status == "firing" + - event.alert.labels.alertname == "root filesystem over 80% full" actions: - run_job_template: name: Demo - Clean Log Directory @@ -22,17 +22,28 @@ alertmanager_generator_url: "{{ event.alert.generatorURL }}" event_mountpoint: "{{ event.alert.labels.mountpoint }}" alertmanager_instance: "{{ event.alert.labels.instance }}" + - name: Investigate High CPU condition: all: - - event.alert.labels.org == "OYS" and event.alert.status == "firing" \ - and event.alert.labels.alertname == "ProcessCPUHog" + - event.alert.labels.org == "OYS" and event.alert.status == "firing" + - and event.alert.labels.alertname == "ProcessCPUHog" actions: - print_event: pretty: true + - run_job_template: + name: Demo - Investigate High CPU + organization: OYS + job_args: + extra_vars: + alertmanager_annotations: "{{ event.alert.annotations }}" + alertmanager_generator_url: "{{ event.alert.generatorURL }}" + event_severity: "{{ event.alert.labels.severity }}" + alertmanager_instance: "{{ event.alert.labels.instance }}" + event_values: "{{ event.alert.values }}" - name: Test Contact Point - condition: event.alert.labels.alertname == "TestAlert" or event.alert.labels.org == "OYS" + condition: event.alert.labels.alertname == "TestAlert" and event.alert.labels.org == "OYS" actions: - print_event: pretty: true diff --git a/templates/cpuhog_ticket.j2 b/templates/cpuhog_ticket.j2 new file mode 100644 index 0000000..f33f02d --- /dev/null +++ b/templates/cpuhog_ticket.j2 @@ -0,0 +1,20 @@ += CPUHog Report = +A high CPU event was triggered from AlertManager. + +{% if event is defined %} +Annotations: "{{ event.alert.annotations }}" +Generator URL: "{{ event.alert.generatorURL }}" +Severity: "{{ event.alert.labels.severity }}" +Instance: "{{ event.alert.labels.instance }}" +Values: "{{ event.alert.values }}" +{% endif %} + +** Top CPU Consumers ** +{% for line in processes_cpu.stdout_lines[0:10] %} +{{ line }} +{% endfor %} + +** Top Memory Consumers ** +{% for line in processes_mem.stdout_lines[0:10] %} +{{ line }} +{% endfor %} \ No newline at end of file