2022-07-08 22:37:09 +02:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2022-07-04 01:03:24 +02:00
|
|
|
import logging
|
|
|
|
|
2022-07-11 23:18:57 +02:00
|
|
|
import prometheus_client
|
|
|
|
from aiohttp import ClientError, web, web_request
|
2022-07-09 12:43:18 +02:00
|
|
|
from aiohttp_prometheus_exporter.handler import metrics
|
|
|
|
from aiohttp_prometheus_exporter.middleware import prometheus_middleware_factory
|
2022-07-06 00:54:13 +02:00
|
|
|
from diskcache import Cache
|
2022-08-08 12:38:09 +02:00
|
|
|
from nio import AsyncClient, LocalProtocolError, SendRetryError
|
2022-07-04 01:03:24 +02:00
|
|
|
|
2022-07-28 17:39:47 +02:00
|
|
|
from matrix_alertbot.alert import Alert, AlertRenderer
|
2022-08-08 00:28:36 +02:00
|
|
|
from matrix_alertbot.alertmanager import AlertmanagerClient
|
2022-07-04 01:03:24 +02:00
|
|
|
from matrix_alertbot.chat_functions import send_text_to_room
|
|
|
|
from matrix_alertbot.config import Config
|
2022-08-08 00:28:36 +02:00
|
|
|
from matrix_alertbot.errors import (
|
|
|
|
AlertmanagerError,
|
|
|
|
SilenceExtendError,
|
|
|
|
SilenceNotFoundError,
|
|
|
|
)
|
2022-07-04 01:03:24 +02:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
routes = web.RouteTableDef()
|
|
|
|
|
|
|
|
|
2022-07-12 18:19:52 +02:00
|
|
|
@routes.get("/health")
|
|
|
|
async def get_health(request: web_request.Request) -> web.Response:
|
|
|
|
return web.Response(status=200)
|
|
|
|
|
|
|
|
|
2022-07-28 14:37:23 +02:00
|
|
|
@routes.post("/alerts/{room_id}")
|
2022-07-26 19:33:04 +02:00
|
|
|
async def create_alerts(request: web_request.Request) -> web.Response:
|
2022-07-04 01:03:24 +02:00
|
|
|
data = await request.json()
|
2022-07-28 14:37:23 +02:00
|
|
|
room_id = request.match_info["room_id"]
|
|
|
|
|
2022-07-06 00:54:13 +02:00
|
|
|
config: Config = request.app["config"]
|
2022-07-04 01:03:24 +02:00
|
|
|
|
2022-07-28 14:37:23 +02:00
|
|
|
if room_id not in config.allowed_rooms:
|
|
|
|
logger.error("Cannot send alerts to room ID {room_id}.")
|
2022-07-28 17:39:47 +02:00
|
|
|
return web.Response(
|
|
|
|
status=401, body=f"Cannot send alerts to room ID {room_id}."
|
|
|
|
)
|
2022-07-28 14:37:23 +02:00
|
|
|
|
2022-07-11 23:18:57 +02:00
|
|
|
if "alerts" not in data:
|
2022-07-28 14:37:23 +02:00
|
|
|
logger.error("Received data without 'alerts' key")
|
2022-07-11 23:18:57 +02:00
|
|
|
return web.Response(status=400, body="Data must contain 'alerts' key.")
|
|
|
|
|
2022-08-08 00:28:36 +02:00
|
|
|
alert_dicts = data["alerts"]
|
2022-07-28 14:37:23 +02:00
|
|
|
|
2022-07-11 23:18:57 +02:00
|
|
|
if not isinstance(data["alerts"], list):
|
2022-08-08 00:28:36 +02:00
|
|
|
alerts_type = type(alert_dicts).__name__
|
2022-07-28 14:37:23 +02:00
|
|
|
logger.error(f"Received data with invalid alerts type '{alerts_type}'.")
|
|
|
|
return web.Response(
|
|
|
|
status=400, body=f"Alerts must be a list, got '{alerts_type}'."
|
|
|
|
)
|
|
|
|
|
2022-08-08 00:28:36 +02:00
|
|
|
logger.info(f"Received {len(alert_dicts)} alerts for room ID {room_id}: {data}")
|
2022-07-11 23:18:57 +02:00
|
|
|
|
|
|
|
if len(data["alerts"]) == 0:
|
|
|
|
return web.Response(status=400, body="Alerts cannot be empty.")
|
|
|
|
|
2022-08-08 00:28:36 +02:00
|
|
|
alerts = []
|
|
|
|
for alert in alert_dicts:
|
2022-07-11 23:18:57 +02:00
|
|
|
try:
|
|
|
|
alert = Alert.from_dict(alert)
|
2022-07-28 14:37:23 +02:00
|
|
|
except KeyError as e:
|
|
|
|
logger.error(f"Cannot parse alert dict: {e}")
|
2022-07-11 23:18:57 +02:00
|
|
|
return web.Response(status=400, body=f"Invalid alert: {alert}.")
|
2022-08-08 00:28:36 +02:00
|
|
|
alerts.append(alert)
|
2022-07-04 01:03:24 +02:00
|
|
|
|
2022-08-08 00:28:36 +02:00
|
|
|
for alert in alerts:
|
2022-07-26 19:33:04 +02:00
|
|
|
try:
|
2022-08-08 00:28:36 +02:00
|
|
|
await create_alert(alert, room_id, request)
|
|
|
|
except AlertmanagerError as e:
|
|
|
|
logger.error(
|
|
|
|
f"An error occured with Alertmanager when handling alert with fingerprint {alert.fingerprint}: {e}"
|
|
|
|
)
|
|
|
|
return web.Response(
|
|
|
|
status=500,
|
|
|
|
body=f"An error occured with Alertmanager when handling alert with fingerprint {alert.fingerprint}.",
|
2022-07-26 19:33:04 +02:00
|
|
|
)
|
2022-08-08 12:38:09 +02:00
|
|
|
except (SendRetryError, LocalProtocolError, ClientError) as e:
|
2022-07-26 19:33:04 +02:00
|
|
|
logger.error(
|
|
|
|
f"Unable to send alert {alert.fingerprint} to Matrix room: {e}"
|
|
|
|
)
|
|
|
|
return web.Response(
|
|
|
|
status=500,
|
|
|
|
body=f"An error occured when sending alert with fingerprint '{alert.fingerprint}' to Matrix room.",
|
|
|
|
)
|
|
|
|
|
2022-08-08 00:28:36 +02:00
|
|
|
return web.Response(status=200)
|
|
|
|
|
|
|
|
|
|
|
|
async def create_alert(
|
|
|
|
alert: Alert, room_id: str, request: web_request.Request
|
|
|
|
) -> None:
|
|
|
|
alertmanager_client: AlertmanagerClient = request.app["alertmanager_client"]
|
|
|
|
alert_renderer: AlertRenderer = request.app["alert_renderer"]
|
|
|
|
matrix_client: AsyncClient = request.app["matrix_client"]
|
|
|
|
cache: Cache = request.app["cache"]
|
|
|
|
config: Config = request.app["config"]
|
|
|
|
|
|
|
|
if alert.firing:
|
|
|
|
try:
|
|
|
|
silence_id = await alertmanager_client.update_silence(alert.fingerprint)
|
|
|
|
logger.debug(
|
|
|
|
f"Extended silence ID {silence_id} for alert with fingerprint {alert.fingerprint}"
|
|
|
|
)
|
|
|
|
return
|
|
|
|
except SilenceNotFoundError as e:
|
|
|
|
logger.debug(
|
|
|
|
f"Unable to extend silence for alert with fingerprint {alert.fingerprint}: {e}"
|
|
|
|
)
|
|
|
|
cache.delete(alert.fingerprint)
|
|
|
|
except SilenceExtendError as e:
|
|
|
|
logger.debug(
|
|
|
|
f"Unable to extend silence for alert with fingerprint {alert.fingerprint}: {e}"
|
2022-07-28 14:37:23 +02:00
|
|
|
)
|
2022-07-26 19:33:04 +02:00
|
|
|
|
2022-08-08 00:28:36 +02:00
|
|
|
plaintext = alert_renderer.render(alert, html=False)
|
|
|
|
html = alert_renderer.render(alert, html=True)
|
|
|
|
|
|
|
|
event = await send_text_to_room(
|
|
|
|
matrix_client, room_id, plaintext, html, notice=False
|
|
|
|
)
|
|
|
|
|
|
|
|
if alert.firing:
|
|
|
|
cache.set(event.event_id, alert.fingerprint, expire=config.cache_expire_time)
|
|
|
|
else:
|
|
|
|
cache.delete(alert.fingerprint)
|
2022-07-04 01:03:24 +02:00
|
|
|
|
|
|
|
|
2022-07-08 22:46:04 +02:00
|
|
|
class Webhook:
|
2022-08-08 00:28:36 +02:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
matrix_client: AsyncClient,
|
|
|
|
alertmanager_client: AlertmanagerClient,
|
|
|
|
cache: Cache,
|
|
|
|
config: Config,
|
|
|
|
) -> None:
|
2022-07-04 01:03:24 +02:00
|
|
|
self.app = web.Application(logger=logger)
|
2022-08-08 00:28:36 +02:00
|
|
|
self.app["matrix_client"] = matrix_client
|
|
|
|
self.app["alertmanager_client"] = alertmanager_client
|
2022-07-06 00:54:13 +02:00
|
|
|
self.app["config"] = config
|
2022-07-04 01:03:24 +02:00
|
|
|
self.app["cache"] = cache
|
2022-07-28 17:39:47 +02:00
|
|
|
self.app["alert_renderer"] = AlertRenderer(config.template_dir)
|
2022-07-04 01:03:24 +02:00
|
|
|
self.app.add_routes(routes)
|
2022-07-09 12:43:18 +02:00
|
|
|
|
2022-07-11 23:18:57 +02:00
|
|
|
prometheus_registry = prometheus_client.CollectorRegistry(auto_describe=True)
|
|
|
|
self.app.middlewares.append(
|
|
|
|
prometheus_middleware_factory(registry=prometheus_registry)
|
|
|
|
)
|
2022-07-09 12:43:18 +02:00
|
|
|
self.app.router.add_get("/metrics", metrics())
|
|
|
|
|
2022-07-04 01:03:24 +02:00
|
|
|
self.runner = web.AppRunner(self.app)
|
|
|
|
|
|
|
|
self.config = config
|
|
|
|
self.address = config.address
|
|
|
|
self.port = config.port
|
|
|
|
self.socket = config.socket
|
|
|
|
|
|
|
|
async def start(self) -> None:
|
|
|
|
await self.runner.setup()
|
|
|
|
|
|
|
|
site: web.BaseSite
|
|
|
|
if self.address and self.port:
|
|
|
|
site = web.TCPSite(self.runner, self.address, self.port)
|
2022-07-28 17:39:47 +02:00
|
|
|
logger.info(f"Listening on {self.address}:{self.port}")
|
2022-07-04 01:03:24 +02:00
|
|
|
elif self.socket:
|
|
|
|
site = web.UnixSite(self.runner, self.socket)
|
2022-07-28 17:39:47 +02:00
|
|
|
logger.info(f"Listening on unix://{self.socket}")
|
2022-07-04 01:03:24 +02:00
|
|
|
|
|
|
|
await site.start()
|
|
|
|
|
|
|
|
async def close(self) -> None:
|
|
|
|
await self.runner.cleanup()
|