create silence with specific matchers

This commit is contained in:
HgO 2022-07-10 02:40:04 +02:00
parent e0c5ea386f
commit 3ca45fccb1
9 changed files with 349 additions and 59 deletions

View file

@ -1,7 +1,8 @@
from __future__ import annotations
import logging
from typing import Dict
import re
from typing import Any, Dict
logger = logging.getLogger(__name__)

View file

@ -1,19 +1,21 @@
from __future__ import annotations
import datetime
from datetime import datetime, timedelta
from typing import Dict, List
import aiohttp
import pytimeparse
import pytimeparse2
from aiohttp import ClientError
from aiohttp_prometheus_exporter.trace import PrometheusTraceConfig
from diskcache import Cache
from matrix_alertbot.errors import (
AlertmanagerServerError,
AlertMismatchError,
AlertNotFoundError,
SilenceNotFoundError,
)
from matrix_alertbot.matcher import AbstractAlertMatcher
class AlertmanagerClient:
@ -39,23 +41,25 @@ class AlertmanagerClient:
alerts = await self.get_alerts()
return self._find_alert(fingerprint, alerts)
async def create_silence(self, fingerprint: str, duration: str, user: str) -> str:
async def create_silence(
self,
fingerprint: str,
duration: str,
user: str,
matchers: List[AbstractAlertMatcher],
) -> str:
alert = await self.get_alert(fingerprint)
labels = alert["labels"]
matchers = []
for label_name, label_value in labels.items():
matchers.append(
{"name": label_name, "value": label_value, "isRegex": False}
)
self._match_alert(alert, matchers)
matchers_json = {m.label: m.value for m in matchers}
start_time = datetime.datetime.now()
duration_seconds = pytimeparse.parse(duration)
duration_delta = datetime.timedelta(seconds=duration_seconds)
start_time = datetime.now()
duration_seconds = pytimeparse2.parse(duration)
duration_delta = timedelta(seconds=duration_seconds)
end_time = start_time + duration_delta
silence = {
"matchers": matchers,
"matchers": matchers_json,
"startsAt": start_time.isoformat(),
"endsAt": end_time.isoformat(),
"createdBy": user,
@ -106,3 +110,18 @@ class AlertmanagerClient:
if alert["fingerprint"] == fingerprint:
return alert
raise AlertNotFoundError(f"Cannot find alert with fingerprint {fingerprint}")
@staticmethod
def _match_alert(alert: Dict, matchers: List[AbstractAlertMatcher]) -> None:
labels = alert["labels"]
for matcher in matchers:
if matcher.label not in labels:
labels_text = ", ".join(labels)
raise AlertMismatchError(
f"Cannot find label {matcher.label} in alert labels: {labels_text}"
)
if not matcher.match(labels):
raise AlertMismatchError(
f"Alert with label {matcher} does not match {labels[matcher.label]}"
)

View file

@ -1,4 +1,5 @@
import logging
from typing import List
from diskcache import Cache
from nio import AsyncClient, MatrixRoom, RoomMessageText
@ -7,6 +8,11 @@ from matrix_alertbot.alertmanager import AlertmanagerClient
from matrix_alertbot.chat_functions import react_to_event, send_text_to_room
from matrix_alertbot.config import Config
from matrix_alertbot.errors import AlertmanagerError
from matrix_alertbot.matcher import (
AbstractAlertMatcher,
AlertMatcher,
AlertRegexMatcher,
)
logger = logging.getLogger(__name__)
@ -61,10 +67,25 @@ class Command:
async def _ack(self) -> None:
"""Acknowledge an alert and silence it for a certain duration in Alertmanager"""
if len(self.args) > 0:
duration = " ".join(self.args)
matchers: List[AbstractAlertMatcher] = []
durations = []
for arg in self.args:
if "=~" in arg:
label, regex = arg.split("=~")
regex_matcher = AlertRegexMatcher(label, regex)
matchers.append(regex_matcher)
elif "=" in arg:
label, value = arg.split("=")
matcher = AlertMatcher(label, value)
matchers.append(matcher)
else:
durations.append(arg)
if len(durations) > 0:
duration = " ".join(durations)
else:
duration = "1d"
logger.debug(
f"Receiving a command to create a silence for a duration of {duration} | "
f"{self.room.user_name(self.event.sender)}: {self.event.body}"
@ -86,7 +107,10 @@ class Command:
)
try:
await self.alertmanager.create_silence(
alert_fingerprint, duration, self.room.user_name(self.event.sender)
alert_fingerprint,
duration,
self.room.user_name(self.event.sender),
matchers,
)
count_created_silences += 1
except AlertmanagerError as e:

View file

@ -37,6 +37,12 @@ class AlertNotFoundError(AlertmanagerError):
pass
class AlertMismatchError(AlertmanagerError):
"""An error encountered when alert's labels don't match."""
pass
class SilenceNotFoundError(AlertmanagerError):
"""An error encountered when a silence cannot be found in Alertmanager."""

View file

@ -0,0 +1,35 @@
import re
from typing import Any, Dict
class AbstractAlertMatcher:
def __init__(self, label: str, value: str, op: str) -> None:
self.label = label
self.value = value
self._op = op
def match(self, labels: Dict[str, str]) -> bool:
raise NotImplementedError
def __str__(self) -> str:
return f"{self.label}{self._op}{self.value}"
def __eq__(self, matcher: Any) -> bool:
return self.label == matcher.label and self.value == matcher.value
class AlertMatcher(AbstractAlertMatcher):
def __init__(self, label: str, value: str) -> None:
super().__init__(label, value, "=")
def match(self, labels: Dict[str, str]) -> bool:
return self.label in labels and self.value == labels[self.label]
class AlertRegexMatcher(AbstractAlertMatcher):
def __init__(self, label: str, regex: str) -> None:
super().__init__(label, regex, "=~")
self.regex = re.compile(regex)
def match(self, labels: Dict[str, str]) -> bool:
return self.label in labels and self.regex.match(labels[self.label]) is not None

View file

@ -11,7 +11,7 @@ from nio import AsyncClient, SendRetryError
from matrix_alertbot.chat_functions import send_text_to_room
from matrix_alertbot.config import Config
from matrix_alertbot.storage import Alert
from matrix_alertbot.alert import Alert
logger = logging.getLogger(__name__)

View file

@ -31,7 +31,7 @@ setup(
"diskcache>=5.4.0",
"matrix-nio>=0.19.0",
"Markdown>=3.3.7",
"pytimeparse>=1.1.8",
"pytimeparse2>=1.4.0",
"PyYAML>=6.0",
"typing-extensions>=4.3.0",
],

View file

@ -2,8 +2,9 @@ from __future__ import annotations
import json
import unittest
from typing import Any
from unittest.mock import MagicMock, Mock
from datetime import datetime
from typing import Any, List
from unittest.mock import MagicMock, Mock, patch
import aiohttp
import aiohttp.test_utils
@ -14,9 +15,23 @@ from diskcache import Cache
from matrix_alertbot.alertmanager import AlertmanagerClient
from matrix_alertbot.errors import (
AlertmanagerServerError,
AlertMismatchError,
AlertNotFoundError,
SilenceNotFoundError,
)
from matrix_alertbot.matcher import (
AbstractAlertMatcher,
AlertMatcher,
AlertRegexMatcher,
)
class FakeTimeDelta:
def __init__(self, seconds: int) -> None:
self.seconds = seconds
def __radd__(self, other: Any) -> datetime:
return datetime.utcfromtimestamp(self.seconds)
class AbstractFakeAlertmanagerServer:
@ -31,6 +46,7 @@ class AbstractFakeAlertmanagerServer:
)
self.runner = web.AppRunner(self.app)
self.response = None
async def __aenter__(self) -> AbstractFakeAlertmanagerServer:
await self.start()
@ -125,7 +141,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
alerts = await alertmanager.get_alerts()
self.assertEqual(
[
@ -152,7 +168,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
alerts = await alertmanager.get_alerts()
self.assertEqual([], alerts)
@ -162,7 +178,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
with self.assertRaises(AlertmanagerServerError):
await alertmanager.get_alerts()
@ -172,7 +188,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
alert = await alertmanager.get_alert("fingerprint1")
self.assertEqual(
{
@ -189,7 +205,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
with self.assertRaises(AlertNotFoundError):
await alertmanager.get_alert("fingerprint1")
@ -199,21 +215,145 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
with self.assertRaises(AlertmanagerServerError):
await alertmanager.get_alert("fingerprint1")
async def test_create_silence_happy(self) -> None:
@patch("matrix_alertbot.alertmanager.timedelta", side_effect=FakeTimeDelta)
async def test_create_silence_without_matchers(self, fake_timedelta: Mock) -> None:
async with FakeAlertmanagerServer() as fake_alertmanager_server:
port = fake_alertmanager_server.port
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
silence = await alertmanager.create_silence(
"fingerprint1", "1d", "user"
"fingerprint1", "1d", "user", []
)
self.assertEqual("silence1", silence)
self.assertEqual("silence1", silence)
fake_timedelta.assert_called_once_with(seconds=86400)
@patch("matrix_alertbot.alertmanager.timedelta", side_effect=FakeTimeDelta)
async def test_create_silence_with_complex_duration(
self, fake_timedelta: Mock
) -> None:
async with FakeAlertmanagerServer() as fake_alertmanager_server:
port = fake_alertmanager_server.port
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager):
silence = await alertmanager.create_silence(
"fingerprint1", "1w 3d", "user", []
)
self.assertEqual("silence1", silence)
fake_timedelta.assert_called_once_with(seconds=864000)
@patch("matrix_alertbot.alertmanager.timedelta", side_effect=FakeTimeDelta)
async def test_create_silence_with_matchers(self, fake_timedelta: Mock) -> None:
matchers: List[AbstractAlertMatcher] = [
AlertMatcher(label="alertname", value="alert1")
]
async with FakeAlertmanagerServer() as fake_alertmanager_server:
port = fake_alertmanager_server.port
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager):
silence = await alertmanager.create_silence(
"fingerprint1",
"1d",
"user",
matchers,
)
self.assertEqual("silence1", silence)
fake_timedelta.assert_called_once_with(seconds=86400)
@patch("matrix_alertbot.alertmanager.timedelta", side_effect=FakeTimeDelta)
async def test_create_silence_with_regex_matchers(
self, fake_timedelta: Mock
) -> None:
matchers: List[AbstractAlertMatcher] = [
AlertRegexMatcher(label="alertname", regex=r"alert\d+")
]
async with FakeAlertmanagerServer() as fake_alertmanager_server:
port = fake_alertmanager_server.port
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager):
silence = await alertmanager.create_silence(
"fingerprint1",
"1d",
"user",
matchers,
)
self.assertEqual("silence1", silence)
fake_timedelta.assert_called_once_with(seconds=86400)
async def test_create_silence_raise_missing_label(self) -> None:
matchers: List[AbstractAlertMatcher] = [
AlertMatcher(label="alertname", value="alert1"),
AlertMatcher(label="severity", value="critical"),
]
async with FakeAlertmanagerServer() as fake_alertmanager_server:
port = fake_alertmanager_server.port
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager):
with self.assertRaises(AlertMismatchError):
await alertmanager.create_silence(
"fingerprint1",
"1d",
"user",
matchers,
)
async def test_create_silence_raise_mismatch_label(self) -> None:
matchers: List[AbstractAlertMatcher] = [
AlertMatcher(label="alertname", value="alert2")
]
async with FakeAlertmanagerServer() as fake_alertmanager_server:
port = fake_alertmanager_server.port
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager):
with self.assertRaises(AlertMismatchError):
await alertmanager.create_silence(
"fingerprint1",
"1d",
"user",
matchers,
)
async def test_create_silence_raise_mismatch_regex_label(self) -> None:
matchers: List[AbstractAlertMatcher] = [
AlertRegexMatcher(label="alertname", regex=r"alert[^\d]+")
]
async with FakeAlertmanagerServer() as fake_alertmanager_server:
port = fake_alertmanager_server.port
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager):
with self.assertRaises(AlertMismatchError):
await alertmanager.create_silence(
"fingerprint1",
"1d",
"user",
matchers,
)
async def test_create_silence_raise_alert_not_found(self) -> None:
async with FakeAlertmanagerServerWithoutAlert() as fake_alertmanager_server:
@ -221,9 +361,9 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
with self.assertRaises(AlertNotFoundError):
await alertmanager.create_silence("fingerprint1", "1d", "user")
await alertmanager.create_silence("fingerprint1", "1d", "user", [])
async def test_create_silence_raise_alertmanager_error(self) -> None:
async with FakeAlertmanagerServerWithErrorCreateSilence() as fake_alertmanager_server:
@ -231,11 +371,11 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
await alertmanager.get_alert("fingerprint1")
with self.assertRaises(AlertmanagerServerError):
await alertmanager.create_silence("fingerprint1", "1d", "user")
await alertmanager.create_silence("fingerprint1", "1d", "user", [])
async def test_delete_silences_happy(self) -> None:
async with FakeAlertmanagerServer() as fake_alertmanager_server:
@ -243,7 +383,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
silences = await alertmanager.delete_silences("fingerprint2")
self.assertEqual(["silence1", "silence2"], silences)
@ -253,7 +393,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
with self.assertRaises(SilenceNotFoundError):
await alertmanager.delete_silences("fingerprint1")
@ -263,7 +403,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
with self.assertRaises(AlertNotFoundError):
await alertmanager.delete_silences("fingerprint2")
@ -273,7 +413,7 @@ class AlertmanagerClientTestCase(unittest.IsolatedAsyncioTestCase):
alertmanager = AlertmanagerClient(
f"http://localhost:{port}", self.fake_cache
)
async with aiotools.closing_async(alertmanager) as alertmanager:
async with aiotools.closing_async(alertmanager):
await alertmanager.get_alert("fingerprint1")
with self.assertRaises(AlertmanagerServerError):

View file

@ -9,26 +9,19 @@ import matrix_alertbot.callback
from matrix_alertbot.alertmanager import AlertmanagerClient
from matrix_alertbot.command import Command
from matrix_alertbot.errors import AlertmanagerError
from matrix_alertbot.matcher import AbstractAlertMatcher, AlertMatcher
from tests.utils import make_awaitable
async def create_silence_raise_alert_manager_error(
fingerprint: str, duration: str, user: str
async def create_silence_raise_alertmanager_error(
fingerprint: str, duration: str, user: str, matchers: List[AbstractAlertMatcher]
) -> str:
if fingerprint == "fingerprint1":
raise AlertmanagerError
return "silence1"
async def create_silence_raise_alertmanager_error(
fingerprint: str, duration: str, user: str
) -> str:
if fingerprint == "fingerprint1":
raise AlertmanagerError
return "silence2"
async def delete_silence_raise_alertmanager_error(fingerprint: str) -> List[str]:
if fingerprint == "fingerprint1":
raise AlertmanagerError
@ -194,7 +187,7 @@ class CommandTestCase(unittest.IsolatedAsyncioTestCase):
self.fake_client.room_send.assert_not_called()
@patch.object(matrix_alertbot.command, "send_text_to_room")
async def test_ack_in_reply_without_duration(
async def test_ack_in_reply_without_duration_nor_matchers(
self, fake_send_text_to_room: Mock
) -> None:
"""Tests the callback for InviteMemberEvents"""
@ -213,6 +206,43 @@ class CommandTestCase(unittest.IsolatedAsyncioTestCase):
)
await command._ack()
# Check that we attempted to create silences
self.fake_alertmanager.create_silence.assert_has_calls(
[
call(fingerprint, "1d", self.fake_message_event.sender, [])
for fingerprint in self.fake_fingerprints
]
)
fake_send_text_to_room.assert_called_once_with(
self.fake_client,
self.fake_room.room_id,
"Created 2 silences with a duration of 1d.",
)
@patch.object(matrix_alertbot.command, "send_text_to_room")
async def test_ack_in_reply_without_duration_and_with_matchers(
self, fake_send_text_to_room: Mock
) -> None:
"""Tests the callback for InviteMemberEvents"""
# Tests that the bot attempts to join a room after being invited to it
matchers: List[AbstractAlertMatcher] = [
AlertMatcher(label="alertname", value="alert1"),
AlertMatcher(label="severity", value="critical"),
]
self.fake_message_event.source = self.fake_source_in_reply
command = Command(
self.fake_client,
self.fake_cache,
self.fake_alertmanager,
self.fake_config,
"ack alertname=alert1 severity=critical",
self.fake_room,
self.fake_message_event,
)
await command._ack()
# Check that we attempted to create silences
self.fake_alertmanager.create_silence.assert_has_calls(
[
@ -220,6 +250,7 @@ class CommandTestCase(unittest.IsolatedAsyncioTestCase):
fingerprint,
"1d",
self.fake_message_event.sender,
matchers,
)
for fingerprint in self.fake_fingerprints
]
@ -231,7 +262,7 @@ class CommandTestCase(unittest.IsolatedAsyncioTestCase):
)
@patch.object(matrix_alertbot.command, "send_text_to_room")
async def test_ack_in_reply_with_duration(
async def test_ack_in_reply_with_duration_and_without_matchers(
self, fake_send_text_to_room: Mock
) -> None:
"""Tests the callback for InviteMemberEvents"""
@ -244,7 +275,44 @@ class CommandTestCase(unittest.IsolatedAsyncioTestCase):
self.fake_cache,
self.fake_alertmanager,
self.fake_config,
"ack 2d",
"ack 1w 2d",
self.fake_room,
self.fake_message_event,
)
await command._ack()
# Check that we attempted to create silences
self.fake_alertmanager.create_silence.assert_has_calls(
[
call(fingerprint, "1w 2d", self.fake_message_event.sender, [])
for fingerprint in self.fake_fingerprints
]
)
fake_send_text_to_room.assert_called_once_with(
self.fake_client,
self.fake_room.room_id,
"Created 2 silences with a duration of 1w 2d.",
)
@patch.object(matrix_alertbot.command, "send_text_to_room")
async def test_ack_in_reply_with_duration_and_matchers(
self, fake_send_text_to_room: Mock
) -> None:
"""Tests the callback for InviteMemberEvents"""
# Tests that the bot attempts to join a room after being invited to it
matchers: List[AbstractAlertMatcher] = [
AlertMatcher(label="alertname", value="alert1"),
AlertMatcher(label="severity", value="critical"),
]
self.fake_message_event.source = self.fake_source_in_reply
command = Command(
self.fake_client,
self.fake_cache,
self.fake_alertmanager,
self.fake_config,
"ack 1w 2d alertname=alert1 severity=critical",
self.fake_room,
self.fake_message_event,
)
@ -255,8 +323,9 @@ class CommandTestCase(unittest.IsolatedAsyncioTestCase):
[
call(
fingerprint,
"2d",
"1w 2d",
self.fake_message_event.sender,
matchers,
)
for fingerprint in self.fake_fingerprints
]
@ -264,7 +333,7 @@ class CommandTestCase(unittest.IsolatedAsyncioTestCase):
fake_send_text_to_room.assert_called_once_with(
self.fake_client,
self.fake_room.room_id,
"Created 2 silences with a duration of 2d.",
"Created 2 silences with a duration of 1w 2d.",
)
@patch.object(matrix_alertbot.command, "send_text_to_room")
@ -294,11 +363,7 @@ class CommandTestCase(unittest.IsolatedAsyncioTestCase):
# Check that we attempted to create silences
self.fake_alertmanager.create_silence.assert_has_calls(
[
call(
fingerprint,
"1d",
self.fake_message_event.sender,
)
call(fingerprint, "1d", self.fake_message_event.sender, [])
for fingerprint in self.fake_fingerprints
]
)