Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
fed139b
Add draft test for composite alarm
tiurin Oct 31, 2024
11b6a7a
Comment out further test cases for now
tiurin Nov 6, 2024
5bbbd9f
Inline metric alarm variables
tiurin Nov 6, 2024
f9937d2
Add missing comma
tiurin Nov 6, 2024
09e83e8
Use unique alarm names
tiurin Nov 6, 2024
21d48c0
Use more descriptive topic names
tiurin Nov 7, 2024
7685da5
Subscribe SQS queu to alarm SNS topic
tiurin Nov 7, 2024
c50889d
Check that composite alarm is saved
tiurin Nov 7, 2024
0d47210
Add composite alarm message check
tiurin Nov 7, 2024
7735a5d
Record a snapshot of composite alarm
tiurin Nov 7, 2024
d056955
Use metric alarms ARN in composite alarm rule
tiurin Nov 7, 2024
c4151c3
Use only necessary transformers
tiurin Nov 8, 2024
97ca92c
Add composite alarms store
tiurin Nov 8, 2024
ae871e7
Revert "Add composite alarms store"
tiurin Nov 8, 2024
432202d
Set default attributes for a composite alarm
tiurin Nov 8, 2024
6a9b0cf
Remove composite-to-metric alarm conversion
tiurin Nov 8, 2024
0ee5919
Add evaluation of composite alarm state
tiurin Nov 9, 2024
39cc786
Invoke composite alarm actions
tiurin Nov 9, 2024
9edf934
Create SNS message response for composite alarm
tiurin Nov 9, 2024
c59f723
Add state reason data for composite alarm
tiurin Nov 9, 2024
3614be7
Remove scheduler stub for a composite alarm
tiurin Nov 9, 2024
235462a
Format state reason for composite alarm
tiurin Nov 9, 2024
5c5a71b
Move transformers setup before test cases run
tiurin Nov 11, 2024
fce2ec1
Add composite alarm back to OK test case
tiurin Nov 11, 2024
e1fb6a2
Add composite alarm in ALARM because of alarm-2 test case
tiurin Nov 11, 2024
505d10d
Add composite alarm not changed by second trigger test case
tiurin Nov 11, 2024
20f97d3
Remove SQS message logging in test
tiurin Nov 11, 2024
e9a83cb
Format code
tiurin Nov 11, 2024
168514b
Remove implemented TODO
tiurin Nov 11, 2024
b5af73b
Extract method for evaluating single composite alarm
tiurin Nov 12, 2024
b15caba
Extract method for for running composite alarm actions
tiurin Nov 12, 2024
91d09da
Apply format and lint
tiurin Nov 12, 2024
c12a188
Validate if metric alarms from alarm rule exist
tiurin Nov 12, 2024
f0afeb0
Validate if each rule operand starts with ALARM
tiurin Nov 12, 2024
b4a4ccf
Add clarifying comment
tiurin Nov 12, 2024
51f07f4
Update limitations in provider class description
tiurin Nov 13, 2024
23b2185
Add typing to create_message_response_update_state_sns signature
tiurin Nov 13, 2024
deea700
Add typing to create_message_response_update_composite_alarm_state_sn…
tiurin Nov 13, 2024
2898beb
Fix test_put_composite_alarm_describe_alarms
tiurin Nov 13, 2024
f717ef1
Verify put_composite_alarm response with snapshot
tiurin Nov 13, 2024
4cf2275
Add test fixture TODOs
tiurin Nov 13, 2024
525fb73
Apply format
tiurin Nov 13, 2024
e47fba9
Add TODO for store lock management improvements
tiurin Nov 13, 2024
6fd4f9d
Log a warning when alarm rule has unsupported expressions
tiurin Nov 14, 2024
6038241
Ignore alarm rule when it has unsupported expressions
tiurin Nov 14, 2024
8cc0848
Add helper inner methods to check alarm action result
tiurin Nov 14, 2024
346cf75
Add test when 2nd alarm from the rule is back to OK
tiurin Nov 14, 2024
384979c
Add snapshot for both alarms from rule are triggered test case
tiurin Nov 14, 2024
5080f12
Add TODO for describe-composite-alarm snapshot
tiurin Nov 14, 2024
cf51bbe
Skip rule evaluation on missing alarms
tiurin Nov 15, 2024
afe714a
Revert "Fix test_put_composite_alarm_describe_alarms"
tiurin Nov 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions localstack-core/localstack/services/cloudwatch/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
from datetime import timezone
from typing import Dict, List

from localstack.aws.api.cloudwatch import CompositeAlarm, DashboardBody, MetricAlarm, StateValue
Expand All @@ -24,7 +25,7 @@ def __init__(self, account_id: str, region: str, alarm: MetricAlarm):
self.set_default_attributes()

def set_default_attributes(self):
current_time = datetime.datetime.utcnow()
current_time = datetime.datetime.now(timezone.utc)
Comment on lines -27 to +28
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for correcting this!

self.alarm["AlarmArn"] = arns.cloudwatch_alarm_arn(
self.alarm["AlarmName"], account_id=self.account_id, region_name=self.region
)
Expand Down Expand Up @@ -52,8 +53,19 @@ def __init__(self, account_id: str, region: str, alarm: CompositeAlarm):
self.set_default_attributes()

def set_default_attributes(self):
# TODO
pass
current_time = datetime.datetime.now(timezone.utc)
self.alarm["AlarmArn"] = arns.cloudwatch_alarm_arn(
self.alarm["AlarmName"], account_id=self.account_id, region_name=self.region
)
self.alarm["AlarmConfigurationUpdatedTimestamp"] = current_time
self.alarm.setdefault("ActionsEnabled", True)
self.alarm.setdefault("OKActions", [])
self.alarm.setdefault("AlarmActions", [])
self.alarm.setdefault("InsufficientDataActions", [])
self.alarm["StateValue"] = StateValue.INSUFFICIENT_DATA
self.alarm["StateReason"] = "Unchecked: Initial alarm creation"
self.alarm["StateUpdatedTimestamp"] = current_time
self.alarm["StateTransitionedTimestamp"] = current_time


class LocalStackDashboard:
Expand Down
191 changes: 173 additions & 18 deletions localstack-core/localstack/services/cloudwatch/provider_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import re
import threading
import uuid
from datetime import timezone
from typing import List

from localstack.aws.api import CommonServiceException, RequestContext, handler
Expand Down Expand Up @@ -79,6 +80,7 @@
from localstack.services.cloudwatch.models import (
CloudWatchStore,
LocalStackAlarm,
LocalStackCompositeAlarm,
LocalStackDashboard,
LocalStackMetricAlarm,
cloudwatch_stores,
Expand Down Expand Up @@ -145,7 +147,10 @@ class CloudwatchProvider(CloudwatchApi, ServiceLifecycleHook):
Cloudwatch provider.

LIMITATIONS:
- no alarm rule evaluation
- simplified composite alarm rule evaluation:
- only OR operator is supported
- only ALARM expression is supported
- only metric alarms can be included in the rule and they should be referenced by ARN only
"""

def __init__(self):
Expand Down Expand Up @@ -339,7 +344,7 @@ def set_alarm_state(
if old_state == state_value:
return

alarm.alarm["StateTransitionedTimestamp"] = datetime.datetime.now()
alarm.alarm["StateTransitionedTimestamp"] = datetime.datetime.now(timezone.utc)
# update startDate (=last ALARM date) - should only update when a new alarm is triggered
# the date is only updated if we have a reason-data, which is set by an alarm
if state_reason_data:
Expand All @@ -353,6 +358,8 @@ def set_alarm_state(
state_reason_data,
)

self._evaluate_composite_alarms(context, alarm)

if not alarm.alarm["ActionsEnabled"]:
return
if state_value == "OK":
Expand Down Expand Up @@ -454,21 +461,18 @@ def put_metric_alarm(self, context: RequestContext, request: PutMetricAlarmInput

@handler("PutCompositeAlarm", expand=False)
def put_composite_alarm(self, context: RequestContext, request: PutCompositeAlarmInput) -> None:
composite_to_metric_alarm = {
"AlarmName": request.get("AlarmName"),
"AlarmDescription": request.get("AlarmDescription"),
"AlarmActions": request.get("AlarmActions", []),
"OKActions": request.get("OKActions", []),
"InsufficientDataActions": request.get("InsufficientDataActions", []),
"ActionsEnabled": request.get("ActionsEnabled", True),
"AlarmRule": request.get("AlarmRule"),
"Tags": request.get("Tags", []),
}
self.put_metric_alarm(context=context, request=composite_to_metric_alarm)
with _STORE_LOCK:
store = self.get_store(context.account_id, context.region)
composite_alarm = LocalStackCompositeAlarm(
context.account_id, context.region, {**request}
)

LOG.warning(
"Composite Alarms configuration is not yet supported, alarm state will not be evaluated"
)
alarm_rule = composite_alarm.alarm["AlarmRule"]
rule_expression_validation_result = self._validate_alarm_rule_expression(alarm_rule)
[LOG.warning(w) for w in rule_expression_validation_result]

alarm_arn = composite_alarm.alarm["AlarmArn"]
store.alarms[alarm_arn] = composite_alarm

def describe_alarms(
self,
Expand Down Expand Up @@ -766,7 +770,8 @@ def _update_state(
old_state_reason = alarm.alarm["StateReason"]
store = self.get_store(context.account_id, context.region)
current_time = datetime.datetime.now()
if state_reason_data:
# version is not present in state reason data for composite alarm, hence the check
if state_reason_data and isinstance(alarm, LocalStackMetricAlarm):
state_reason_data["version"] = HISTORY_VERSION
history_data = {
"version": HISTORY_VERSION,
Expand Down Expand Up @@ -844,6 +849,117 @@ def _get_timestamp(input: dict):
history = [h for h in history if (date := _get_timestamp(h)) and date <= end_date]
return DescribeAlarmHistoryOutput(AlarmHistoryItems=history)

def _evaluate_composite_alarms(self, context: RequestContext, triggering_alarm):
# TODO either pass store as a parameter or acquire RLock (with _STORE_LOCK:)
# everything works ok now but better ensure protection of critical section in front of future changes
store = self.get_store(context.account_id, context.region)
alarms = list(store.alarms.values())
composite_alarms = [a for a in alarms if isinstance(a, LocalStackCompositeAlarm)]
for composite_alarm in composite_alarms:
self._evaluate_composite_alarm(context, composite_alarm, triggering_alarm)

def _evaluate_composite_alarm(self, context, composite_alarm, triggering_alarm):
store = self.get_store(context.account_id, context.region)
alarm_rule = composite_alarm.alarm["AlarmRule"]
rule_expression_validation = self._validate_alarm_rule_expression(alarm_rule)
if rule_expression_validation:
LOG.warning(
"Alarm rule contains unsupported expressions and will not be evaluated: %s",
rule_expression_validation,
)
return
new_state_value = StateValue.OK
# assuming that a rule consists only of ALARM evaluations of metric alarms, with OR logic applied
for metric_alarm_arn in self._get_alarm_arns(alarm_rule):
metric_alarm = store.alarms.get(metric_alarm_arn)
if not metric_alarm:
LOG.warning(
"Alarm rule won't be evaluated as there is no alarm with ARN %s",
metric_alarm_arn,
)
return
if metric_alarm.alarm["StateValue"] == StateValue.ALARM:
triggering_alarm = metric_alarm
new_state_value = StateValue.ALARM
break
old_state_value = composite_alarm.alarm["StateValue"]
if old_state_value == new_state_value:
return
triggering_alarm_arn = triggering_alarm.alarm.get("AlarmArn")
triggering_alarm_state = triggering_alarm.alarm.get("StateValue")
triggering_alarm_state_change_timestamp = triggering_alarm.alarm.get(
"StateTransitionedTimestamp"
)
state_reason_formatted_timestamp = triggering_alarm_state_change_timestamp.strftime(
"%A %d %B, %Y %H:%M:%S %Z"
)
state_reason = (
f"{triggering_alarm_arn} "
f"transitioned to {triggering_alarm_state} "
f"at {state_reason_formatted_timestamp}"
)
state_reason_data = {
"triggeringAlarms": [
{
"arn": triggering_alarm_arn,
"state": {
"value": triggering_alarm_state,
"timestamp": timestamp_millis(triggering_alarm_state_change_timestamp),
},
}
]
}
self._update_state(
context, composite_alarm, new_state_value, state_reason, state_reason_data
)
if composite_alarm.alarm["ActionsEnabled"]:
self._run_composite_alarm_actions(
context, composite_alarm, old_state_value, triggering_alarm
)

def _validate_alarm_rule_expression(self, alarm_rule):
validation_result = []
alarms_conditions = [alarm.strip() for alarm in alarm_rule.split("OR")]
for alarm_condition in alarms_conditions:
if not alarm_condition.startswith("ALARM"):
validation_result.append(
f"Unsupported expression in alarm rule condition {alarm_condition}: Only ALARM expression is supported by Localstack as of now"
)
return validation_result

def _get_alarm_arns(self, composite_alarm_rule):
# regexp for everything within (" ")
return re.findall(r'\("([^"]*)"\)', composite_alarm_rule)

def _run_composite_alarm_actions(
self, context, composite_alarm, old_state_value, triggering_alarm
):
new_state_value = composite_alarm.alarm["StateValue"]
if new_state_value == StateValue.OK:
actions = composite_alarm.alarm["OKActions"]
elif new_state_value == StateValue.ALARM:
actions = composite_alarm.alarm["AlarmActions"]
else:
actions = composite_alarm.alarm["InsufficientDataActions"]
for action in actions:
data = arns.parse_arn(action)
if data["service"] == "sns":
service = connect_to(
region_name=data["region"], aws_access_key_id=data["account"]
).sns
subject = f"""{new_state_value}: "{composite_alarm.alarm["AlarmName"]}" in {context.region}"""
message = create_message_response_update_composite_alarm_state_sns(
composite_alarm, triggering_alarm, old_state_value
)
service.publish(TopicArn=action, Subject=subject, Message=message)
else:
# TODO: support other actions
LOG.warning(
"Action for service %s not implemented, action '%s' will not be triggered.",
data["service"],
action,
)


def create_metric_data_query_from_alarm(alarm: LocalStackMetricAlarm):
# TODO may need to be adapted for other use cases
Expand Down Expand Up @@ -898,7 +1014,7 @@ def create_message_response_update_state_lambda(
return json.dumps(response, cls=JSONEncoder)


def create_message_response_update_state_sns(alarm, old_state):
def create_message_response_update_state_sns(alarm: LocalStackMetricAlarm, old_state: StateValue):
_alarm = alarm.alarm
response = {
"AWSAccountId": alarm.account_id,
Expand Down Expand Up @@ -952,3 +1068,42 @@ def create_message_response_update_state_sns(alarm, old_state):
response["Trigger"] = details

return json.dumps(response, cls=JSONEncoder)


def create_message_response_update_composite_alarm_state_sns(
composite_alarm: LocalStackCompositeAlarm,
triggering_alarm: LocalStackMetricAlarm,
old_state: StateValue,
):
_alarm = composite_alarm.alarm
response = {
"AWSAccountId": composite_alarm.account_id,
"AlarmName": _alarm["AlarmName"],
"AlarmDescription": _alarm.get("AlarmDescription"),
"AlarmRule": _alarm.get("AlarmRule"),
"OldStateValue": old_state,
"NewStateValue": _alarm["StateValue"],
"NewStateReason": _alarm["StateReason"],
"StateChangeTime": _alarm["StateUpdatedTimestamp"],
# the long-name for 'region' should be used - as we don't have it, we use the short name
# which needs to be slightly changed to make snapshot tests work
"Region": composite_alarm.region.replace("-", " ").capitalize(),
"AlarmArn": _alarm["AlarmArn"],
"OKActions": _alarm.get("OKActions", []),
"AlarmActions": _alarm.get("AlarmActions", []),
"InsufficientDataActions": _alarm.get("InsufficientDataActions", []),
}

triggering_children = [
{
"Arn": triggering_alarm.alarm.get("AlarmArn"),
"State": {
"Value": triggering_alarm.alarm["StateValue"],
"Timestamp": triggering_alarm.alarm["StateUpdatedTimestamp"],
},
}
]

response["TriggeringChildren"] = triggering_children

return json.dumps(response, cls=JSONEncoder)
Loading
Loading