allow silence deletion

This commit is contained in:
HgO 2022-07-06 00:54:13 +02:00
parent 17bf73f22b
commit 6687e7a92a
9 changed files with 130 additions and 78 deletions

View file

@ -1,29 +1,40 @@
import datetime import datetime
from typing import Dict, List from typing import Dict, List
import diskcache
import pytimeparse import pytimeparse
import requests import requests
from requests import RequestException
from matrix_alertbot.cache import Cache from matrix_alertbot.config import Config
from matrix_alertbot.errors import AlertNotFoundError from matrix_alertbot.errors import (
AlertmanagerError,
AlertNotFoundError,
SilenceNotFoundError,
)
class AlertmanagerClient: class AlertmanagerClient:
def __init__(self, url: str, cache: Cache) -> None: def __init__(self, config: Config) -> None:
url = config.alertmanager_url
self.api_url = f"{url}/api/v2" self.api_url = f"{url}/api/v2"
self.cache = cache self.cache = diskcache.Cache(config.cache_dir)
def get_alerts(self) -> List[Dict]: def get_alerts(self) -> List[Dict]:
response = requests.get(f"{self.api_url}/alerts") try:
response.raise_for_status() response = requests.get(f"{self.api_url}/alerts")
response.raise_for_status()
except RequestException as e:
raise AlertmanagerError(f"Cannot fetch alerts from Alertmanager") from e
return response.json() return response.json()
def get_alert(self, fingerprint: str) -> Dict:
alerts = self.get_alerts()
return self._find_alert(fingerprint, alerts)
def get_alert_labels(self, fingerprint: str) -> Dict[str, str]: def get_alert_labels(self, fingerprint: str) -> Dict[str, str]:
if fingerprint not in self.cache: alert = self.get_alert(fingerprint)
alerts = self.get_alerts() return alert["labels"]
alert = self._find_alert(alerts, fingerprint)
self.cache[fingerprint] = alert["labels"]
return self.cache[fingerprint]
def create_silence(self, fingerprint: str, duration: str, user: str) -> str: def create_silence(self, fingerprint: str, duration: str, user: str) -> str:
labels = self.get_alert_labels(fingerprint) labels = self.get_alert_labels(fingerprint)
@ -45,13 +56,37 @@ class AlertmanagerClient:
"createdBy": user, "createdBy": user,
"comment": "Acknowledge alert from Matrix", "comment": "Acknowledge alert from Matrix",
} }
response = requests.post(f"{self.api_url}/silences", json=silence) try:
response.raise_for_status() response = requests.post(f"{self.api_url}/silences", json=silence)
response.raise_for_status()
except RequestException as e:
raise AlertmanagerError(
f"Cannot create silence for alert fingerprint {fingerprint}"
) from e
data = response.json() data = response.json()
return data["silenceID"] return data["silenceID"]
def delete_silence(self, fingerprint: str) -> None:
alert = self.get_alert(fingerprint)
alert_state = alert["status"]["state"]
if alert_state != "suppressed":
raise SilenceNotFoundError(
f"Cannot find silences for alert fingerprint {fingerprint} in state {alert_state}"
)
silences = alert["status"]["silencedBy"]
for silence in silences:
try:
response = requests.delete(f"{self.api_url}/silence/{silence}")
response.raise_for_status()
except RequestException as e:
raise AlertmanagerError(
f"Cannot delete silence with ID {silence}"
) from e
@staticmethod @staticmethod
def _find_alert(alerts: List[Dict], fingerprint: str) -> Dict: def _find_alert(fingerprint: str, alerts: List[Dict]) -> Dict:
for alert in alerts: for alert in alerts:
if alert["fingerprint"] == fingerprint: if alert["fingerprint"] == fingerprint:
return alert return alert

View file

@ -1,11 +1,12 @@
import logging import logging
from diskcache import Cache
from nio import AsyncClient, MatrixRoom, RoomMessageText from nio import AsyncClient, MatrixRoom, RoomMessageText
from matrix_alertbot.alertmanager import AlertmanagerClient from matrix_alertbot.alertmanager import AlertmanagerClient
from matrix_alertbot.cache import Cache
from matrix_alertbot.chat_functions import react_to_event, send_text_to_room from matrix_alertbot.chat_functions import react_to_event, send_text_to_room
from matrix_alertbot.config import Config from matrix_alertbot.config import Config
from matrix_alertbot.errors import AlertmanagerError, AlertNotFoundError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -49,6 +50,8 @@ class Command:
"""Process the command""" """Process the command"""
if self.command.startswith("ack"): if self.command.startswith("ack"):
await self._ack() await self._ack()
if self.command.startswith("unack") or self.command.startswith("nack"):
await self._unack()
elif self.command.startswith("react"): elif self.command.startswith("react"):
await self._react() await self._react()
elif self.command.startswith("help"): elif self.command.startswith("help"):
@ -63,7 +66,7 @@ class Command:
else: else:
duration = "1d" duration = "1d"
logger.debug( logger.debug(
f"Acknowledging alert with fingerprint {self.room.display_name} for a duration of {duration} | " f"Receiving a command to create a silence for a duration of {duration} | "
f"{self.room.user_name(self.event.sender)}: {self.event.body}" f"{self.room.user_name(self.event.sender)}: {self.event.body}"
) )
@ -75,21 +78,59 @@ class Command:
return return
logger.debug(f"Read alert fingerprints for event {alert_event_id} from cache") logger.debug(f"Read alert fingerprints for event {alert_event_id} from cache")
silence_ids = [] count_created_silences = 0
alert_fingerprints = self.cache[alert_event_id] alert_fingerprints = self.cache[alert_event_id]
for alert_fingerprint in alert_fingerprints: for alert_fingerprint in alert_fingerprints:
logger.debug( logger.debug(
f"Create silence for alert with fingerprint {alert_fingerprint} for a duration of {duration}" f"Create silence for alert with fingerprint {alert_fingerprint} for a duration of {duration}"
) )
silence_id = self.alertmanager.create_silence( try:
alert_fingerprint, duration, self.room.user_name(self.event.sender) silence_id = self.alertmanager.create_silence(
) alert_fingerprint, duration, self.room.user_name(self.event.sender)
silence_ids.append(silence_id) )
silences = ", ".join(silence_ids) except (AlertNotFoundError, AlertmanagerError) as e:
logger.error(f"Unable to create silence: {e}")
continue
count_created_silences += 1
await send_text_to_room( await send_text_to_room(
self.client, self.client,
self.room.room_id, self.room.room_id,
f"Created silences {silences} for a duration of {duration}", f"Created {count_created_silences} silences with a duration of {duration}",
)
async def _unack(self) -> None:
"""Delete an alert's acknowledgement of an alert and remove corresponding silence in Alertmanager"""
logger.debug(
f"Receiving a command to delete a silence | "
f"{self.room.user_name(self.event.sender)}: {self.event.body}"
)
source_content = self.event.source["content"]
try:
alert_event_id = source_content["m.relates_to"]["m.in_reply_to"]["event_id"]
except KeyError:
logger.debug("Unable to find the event ID of the alert")
return
logger.debug(f"Read alert fingerprints for event {alert_event_id} from cache")
count_removed_silences = 0
alert_fingerprints = self.cache[alert_event_id]
for alert_fingerprint in alert_fingerprints:
logger.debug(
f"Delete silence for alert with fingerprint {alert_fingerprint}"
)
try:
self.alertmanager.delete_silence(alert_fingerprint)
except (AlertNotFoundError, AlertmanagerError) as e:
logger.error(f"Unable to delete silence: {e}")
continue
count_removed_silences += 1
await send_text_to_room(
self.client,
self.room.room_id,
f"Removed {count_removed_silences} silences",
) )
async def _react(self) -> None: async def _react(self) -> None:

View file

@ -1,18 +0,0 @@
from typing import Any
import diskcache
class Cache:
def __init__(self, directory: str, expire: int):
self.cache = diskcache.Cache(directory)
self.expire = expire
def __getitem__(self, key: str) -> Any:
return self.cache[key]
def __setitem__(self, key: str, value: Any) -> None:
self.cache.set(key, value, expire=self.expire)
def __contains__(self, key: str) -> bool:
return key in self.cache

View file

@ -1,5 +1,6 @@
import logging import logging
from diskcache import Cache
from nio import ( from nio import (
AsyncClient, AsyncClient,
InviteMemberEvent, InviteMemberEvent,
@ -13,13 +14,7 @@ from nio import (
from matrix_alertbot.alertmanager import AlertmanagerClient from matrix_alertbot.alertmanager import AlertmanagerClient
from matrix_alertbot.bot_commands import Command from matrix_alertbot.bot_commands import Command
from matrix_alertbot.cache import Cache from matrix_alertbot.chat_functions import make_pill, send_text_to_room, strip_fallback
from matrix_alertbot.chat_functions import (
make_pill,
react_to_event,
send_text_to_room,
strip_fallback,
)
from matrix_alertbot.config import Config from matrix_alertbot.config import Config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -29,7 +24,6 @@ class Callbacks:
def __init__( def __init__(
self, self,
client: AsyncClient, client: AsyncClient,
cache: Cache,
alertmanager: AlertmanagerClient, alertmanager: AlertmanagerClient,
config: Config, config: Config,
): ):
@ -44,7 +38,7 @@ class Callbacks:
config: Bot configuration parameters. config: Bot configuration parameters.
""" """
self.client = client self.client = client
self.cache = cache self.cache = Cache(config.cache_dir)
self.alertmanager = alertmanager self.alertmanager = alertmanager
self.config = config self.config = config
self.command_prefix = config.command_prefix self.command_prefix = config.command_prefix

View file

@ -1,15 +1,7 @@
import logging import logging
from typing import Optional, Union from typing import Optional, Union
from nio import ( from nio import AsyncClient, ErrorResponse, Response, RoomSendResponse, SendRetryError
AsyncClient,
ErrorResponse,
MatrixRoom,
MegolmEvent,
Response,
RoomSendResponse,
SendRetryError,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View file

@ -19,3 +19,11 @@ class AlertNotFoundError(RuntimeError):
""" """
pass pass
class SilenceNotFoundError(RuntimeError):
pass
class AlertmanagerError(RuntimeError):
pass

View file

@ -5,7 +5,8 @@ import sys
from asyncio import TimeoutError from asyncio import TimeoutError
from time import sleep from time import sleep
from aiohttp import ClientConnectionError, ServerDisconnectedError, web import diskcache
from aiohttp import ClientConnectionError, ServerDisconnectedError
from nio import ( from nio import (
AsyncClient, AsyncClient,
AsyncClientConfig, AsyncClientConfig,
@ -18,7 +19,6 @@ from nio import (
) )
from matrix_alertbot.alertmanager import AlertmanagerClient from matrix_alertbot.alertmanager import AlertmanagerClient
from matrix_alertbot.cache import Cache
from matrix_alertbot.callbacks import Callbacks from matrix_alertbot.callbacks import Callbacks
from matrix_alertbot.config import Config from matrix_alertbot.config import Config
from matrix_alertbot.webhook import Webhook from matrix_alertbot.webhook import Webhook
@ -88,11 +88,8 @@ def main() -> None:
# Read the parsed config file and create a Config object # Read the parsed config file and create a Config object
config = Config(config_path) config = Config(config_path)
# Configure the cache
cache = Cache(config.cache_dir, config.cache_expire_time)
# Configure Alertmanager client # Configure Alertmanager client
alertmanager = AlertmanagerClient(config.alertmanager_url, cache) alertmanager = AlertmanagerClient(config)
# Configuration options for the AsyncClient # Configuration options for the AsyncClient
client_config = AsyncClientConfig( client_config = AsyncClientConfig(
@ -116,7 +113,7 @@ def main() -> None:
client.user_id = config.user_id client.user_id = config.user_id
# Set up event callbacks # Set up event callbacks
callbacks = Callbacks(client, cache, alertmanager, config) callbacks = Callbacks(client, alertmanager, config)
client.add_event_callback(callbacks.message, (RoomMessageText,)) client.add_event_callback(callbacks.message, (RoomMessageText,))
client.add_event_callback( client.add_event_callback(
callbacks.invite_event_filtered_callback, (InviteMemberEvent,) callbacks.invite_event_filtered_callback, (InviteMemberEvent,)
@ -124,7 +121,7 @@ def main() -> None:
client.add_event_callback(callbacks.decryption_failure, (MegolmEvent,)) client.add_event_callback(callbacks.decryption_failure, (MegolmEvent,))
client.add_event_callback(callbacks.unknown, (UnknownEvent,)) client.add_event_callback(callbacks.unknown, (UnknownEvent,))
webhook_server = Webhook(client, cache, config) webhook_server = Webhook(client, config)
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.create_task(webhook_server.start()) loop.create_task(webhook_server.start())

View file

@ -1,9 +1,8 @@
import logging import logging
from typing import List
from aiohttp import web, web_request from aiohttp import web, web_request
from diskcache import Cache
from nio import AsyncClient, SendRetryError from nio import AsyncClient, SendRetryError
from matrix_alertbot.cache import Cache
from matrix_alertbot.chat_functions import send_text_to_room from matrix_alertbot.chat_functions import send_text_to_room
from matrix_alertbot.config import Config from matrix_alertbot.config import Config
@ -18,8 +17,9 @@ routes = web.RouteTableDef()
async def create_alert(request: web_request.Request) -> web.Response: async def create_alert(request: web_request.Request) -> web.Response:
data = await request.json() data = await request.json()
logger.info(f"Received alert: {data}") logger.info(f"Received alert: {data}")
client = request.app["client"] client: AsyncClient = request.app["client"]
cache = request.app["cache"] config: Config = request.app["config"]
cache: Cache = request.app["cache"]
plaintext = "" plaintext = ""
html = "" html = ""
@ -34,21 +34,26 @@ async def create_alert(request: web_request.Request) -> web.Response:
try: try:
event = await send_text_to_room( event = await send_text_to_room(
client, request.app["room_id"], plaintext, html, notice=False client, config.room, plaintext, html, notice=False
) )
except SendRetryError as e: except SendRetryError as e:
logger.error(e) logger.error(e)
return web.Response(status=500) return web.Response(status=500)
cache[event.event_id] = tuple(alert["fingerprint"] for alert in data["alerts"]) fingerprints = tuple(alert["fingerprint"] for alert in data["alerts"])
cache.set(
event.event_id, fingerprints, expire=config.cache_expire_time, tag="event"
)
return web.Response(status=200) return web.Response(status=200)
class Webhook: class Webhook:
def __init__(self, client: AsyncClient, cache: Cache, config: Config) -> None: def __init__(self, client: AsyncClient, config: Config) -> None:
cache = Cache(config.cache_dir)
self.app = web.Application(logger=logger) self.app = web.Application(logger=logger)
self.app["client"] = client self.app["client"] = client
self.app["room_id"] = config.room self.app["config"] = config
self.app["cache"] = cache self.app["cache"] = cache
self.app.add_routes(routes) self.app.add_routes(routes)
self.runner = web.AppRunner(self.app) self.runner = web.AppRunner(self.app)

View file

@ -4,7 +4,6 @@ from unittest.mock import Mock
import nio import nio
from matrix_alertbot.alertmanager import AlertmanagerClient from matrix_alertbot.alertmanager import AlertmanagerClient
from matrix_alertbot.cache import Cache
from matrix_alertbot.callbacks import Callbacks from matrix_alertbot.callbacks import Callbacks
from tests.utils import make_awaitable, run_coroutine from tests.utils import make_awaitable, run_coroutine
@ -16,14 +15,13 @@ class CallbacksTestCase(unittest.TestCase):
self.fake_client = Mock(spec=nio.AsyncClient) self.fake_client = Mock(spec=nio.AsyncClient)
self.fake_client.user = "@fake_user:example.com" self.fake_client.user = "@fake_user:example.com"
self.fake_cache = Mock(spec=Cache)
self.fake_alertmanager = Mock(spec=AlertmanagerClient) self.fake_alertmanager = Mock(spec=AlertmanagerClient)
# We don't spec config, as it doesn't currently have well defined attributes # We don't spec config, as it doesn't currently have well defined attributes
self.fake_config = Mock() self.fake_config = Mock()
self.callbacks = Callbacks( self.callbacks = Callbacks(
self.fake_client, self.fake_cache, self.fake_alertmanager, self.fake_config self.fake_client, self.fake_alertmanager, self.fake_config
) )
def test_invite(self) -> None: def test_invite(self) -> None: