Prometheus的告警数据上传指定api接口
当某项目上有两套同样类型的系统便会产生数据对接的问题以下是一个监控的对接#!/usr/bin/env python3# -*- coding: utf-8 -*- Prometheus Alertmanager Webhook 服务 用于接收告警并转发到指定api接口 importhttp.serverimportjsonimportrequestsimporttimeimportdatetimeimportosimportloggingfromtypingimportDict,Any# --- 配置区全部从环境变量读取 ---PORTint(os.getenv(WEBHOOK_PORT,5000))BK_URLos.getenv(BK_URL,http://itom.chinalife.com.hk/t/devops-tools/yunxiao_alarm_data_cleaning/)ALARM_AUTHos.getenv(ALARM_AUTH,df7b29a28eec37c8d572c0ebdbbbe27e)ALARM_CHANNELos.getenv(ALARM_CHANNEL,云效系统)bk_obj_idos.getenv(bk_obj_id,host)#ALARM_BIZ os.getenv(ALARM_BIZ, HK-OneLife)# 日志级别LOG_LEVELos.getenv(LOG_LEVEL,INFO).upper()# 告警级别映射LEVEL_MAP{critical:fatal,warning:warning,info:remind}# --- 配置日志系统 ---logging.basicConfig(levelgetattr(logging,LOG_LEVEL),format%(asctime)s - %(levelname)s - %(message)s,datefmt%Y-%m-%d %H:%M:%S)loggerlogging.getLogger(Webhook)classWebhookHandler(http.server.BaseHTTPRequestHandler):处理Alertmanager webhook请求defdo_POST(self):处理POST请求# 获取客户端IP# client_ip self.client_address[0]try:# 读取请求体content_lengthint(self.headers.get(Content-Length,0))ifcontent_length0:logger.warning(f请求体长度为零#################################)self._send_response(400,{error:Empty request body})returnpost_dataself.rfile.read(content_length)# 解析JSONtry:datajson.loads(post_data.decode(utf-8))exceptjson.JSONDecodeErrorase:logger.error(fJSON解析失败:{e})self._send_response(400,{error:Invalid JSON})return# 记录接收到的告警# logger.info(f收到Alertmanager告警 - 来源: {client_ip})logger.info(f收到Alertmanager数据##################################)logger.debug(f原始数据:{json.dumps(data,ensure_asciiFalse)})# 处理告警alertsdata.get(alerts,[])logger.info(f本次接收{len(alerts)}条告警)success_count0fori,alertinenumerate(alerts,1):# alert_name alert.get(labels, {}).get(alertname, 未知)# logger.info(f处理告警 [{i}/{len(alerts)}]: {alert_name})ifself._process_alert(alert):success_count1# 返回成功响应logger.info(f推送对端完成:{success_count}/{len(alerts)}成功######################)self._send_response(200,{status:ok,processed:success_count,total:len(alerts)})exceptExceptionase:logger.exception(f处理请求时发生异常:{e}###################)self._send_response(500,{error:Internal server error})def_process_alert(self,alert):处理单条告警try:# 提取信息statusalert.get(status,firing)labelsalert.get(labels,{})annotationsalert.get(annotations,{})# 构建发送给的payloadpayload{alarm_action:resolvedifstatusresolvedelsefiring,alarm_level:LEVEL_MAP.get(labels.get(severity),remind),alarm_type:labels.get(alertname,PrometheusAlert),alarm_time:datetime.datetime.now().strftime(%Y-%m-%d %H:%M:%S),alarm_id:alert.get(fingerprint,fnoid-{time.time()}),alarm_name:labels.get(alertname,未知告警),alarm_content:annotations.get(description,无详细描述),alarm_channel:ALARM_CHANNEL,alarm_host:labels.get(instance,127.0.0.1),alarm_auth:ALARM_AUTH,bk_obj_id:bk_obj_id}# 记录要发送的数据# logger.info(f转发到: {payload[alarm_name]} ({payload[alarm_action]}))logger.info(f数据Payload:{json.dumps(payload,ensure_asciiFalse)}#################################)# 发送数据resprequests.post(BK_URL,jsonpayload,timeout10,headers{Content-Type:application/json})# 记录响应logger.info(f开始响应: HTTP{resp.status_code})logger.info(f响应内容:{resp.text})returnresp.status_code200exceptrequests.exceptions.Timeout:logger.error(发送到超时)returnFalseexceptrequests.exceptions.ConnectionError:logger.error(连接失败)returnFalseexceptExceptionase:logger.exception(f处理告警时异常:{e})returnFalsedef_send_response(self,status_code:int,data:Dict[str,Any]):发送HTTP响应self.send_response(status_code)self.send_header(Content-Type,application/json)self.end_headers()self.wfile.write(json.dumps(data).encode())deflog_message(self,format,*args):重写HTTP服务器的日志方法使用我们的loggerlogger.info(fHTTP访问:{self.address_string()}-{format%args})defrun():启动HTTP服务器# 创建服务器server_address(0.0.0.0,PORT)httpdhttp.server.HTTPServer(server_address,WebhookHandler)logger.info(*50)logger.info(fWebhook服务启动)logger.info(f监听端口:{PORT})logger.info(f目标BK_URL:{BK_URL})# logger.info(f日志级别: {LOG_LEVEL})# logger.info(f进程PID: {os.getpid()})logger.info(*50)httpd.serve_forever()if__name____main__:run()