Promethus Alert Manager
Put alert message to kafka
via fluent-bit
.
bash
Prometheus Alert Manager
v
Fluent Bit HTTP Webhook
v
Kafka Topic
Fluent Bit
bash
[SERVICE]
Flush 1
Log_Level debug
[INPUT]
Name http
Listen 0.0.0.0
Port 9880
[FILTER]
Name lua
Match *
script /Users/kuga/osc/alertmanager/split_alerts.lua
call split_alerts
[OUTPUT]
Name stdout
Match *
Format json
# Format json_lines
Json_date_key timestamp
Json_date_format iso8601
Lua Script
https://docs.fluentbit.io/manual/3.2/pipeline/filters/lua
lua
function split_alerts(tag, timestamp, record)
local new_records = {}
if record.alerts then
for _, alert in ipairs(record.alerts) do
local new_record = {
title = alert.name,
content = alert.annotations.description,
host = alert.labels.instance,
hostname= alert.labels.hostname,
severity= alert.labels.severity,
eventTime = alert.status == "resolved" and alert.endsAt or alert.startsAt,
bussiness = "foobar",
status = alert.status,
timestamp = timestamp
}
table.insert(new_records, new_record)
end
end
return 2, timestamp, new_records
end
prometheus.yml
bash
prometheus --config.file=xxx
lua
global:
scrape_interval: 5s
evaluation_interval: 5s
rule_files:
- "alert_rules.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- "localhost:9093"
scrape_configs:
- job_name: "node"
static_configs:
- targets:
- "localhost:9100" # node_exporter
alert_rules.yml
lua
groups:
- name: example
rules:
- alert: up greater than 0
expr: up > 0
labels:
severity: critical
annotations:
description: "服务器up"
alertmanager.yml
bash
alertmanager --config.file=xxx
lua
global:
resolve_timeout: 5m
route:
group_by: [ 'alertname' ]
group_wait: 0s
group_interval: 5s
repeat_interval: 5s
receiver: 'fluent-bit-webhook'
receivers:
- name: 'fluent-bit-webhook'
webhook_configs:
- url: 'http://localhost:9880'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: [ 'instance' ]
Curl Test
bash
curl -X POST -H "Content-Type: application/json" -d '{
"alerts": [
{
"name": "CPU使用率过高",
"annotations": {
"description": "服务器CPU使用率超过90%"
},
"labels": {
"instance": "192.168.1.100",
"hostname": "web-server-01",
"severity": "critical"
},
"status": "firing",
"startsAt": "2023-10-01T13:45:22Z"
}
]
}' http://localhost:9880
json
[
{
"host": "localhost:9100",
"bussiness": "foobar",
"timestamp": 1745175831.614667,
"status": "firing",
"severity": "critical",
"content": "服务器up",
"eventTime": "2025-04-20T18:59:26.568Z"
}
]