Skip to content

Commit

Permalink
fix(opsgenie): retry for delete and silent failure (#2891)
Browse files Browse the repository at this point in the history
  • Loading branch information
talboren authored Dec 24, 2024
1 parent 130023a commit de742bd
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
28 changes: 25 additions & 3 deletions keep/providers/opsgenie_provider/opsgenie_provider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import dataclasses
import time
import typing

import opsgenie_sdk
Expand Down Expand Up @@ -32,6 +33,9 @@ class OpsGenieRecipient(pydantic.BaseModel):
class OpsgenieProvider(BaseProvider):
"""Create incidents in OpsGenie."""

MAX_RETIRES = 3
RETRY_DELAY = 1 # seconds

PROVIDER_DISPLAY_NAME = "OpsGenie"
PROVIDER_CATEGORY = ["Incident Management"]

Expand Down Expand Up @@ -77,7 +81,11 @@ def validate_scopes(self):
note="Simple alert",
message="Simple alert showing context with name: John Doe",
)
self._delete_alert(alert["id"])
deleted = self._delete_alert(alert["id"])
if not deleted:
self.logger.warning(
"Failed to delete OpsGenie alert in scope validation"
)
scopes["opsgenie:create"] = True
except ApiException as e:
self.logger.exception("Failed to create OpsGenie alert")
Expand All @@ -92,9 +100,23 @@ def validate_config(self):
**self.config.authentication
)

def _delete_alert(self, alert_id: str):
def _delete_alert(self, alert_id: str) -> bool:
api_instance = opsgenie_sdk.AlertApi(opsgenie_sdk.ApiClient(self.configuration))
return api_instance.delete_alert(alert_id)
for attempt in range(OpsgenieProvider.MAX_RETIRES):
try:
api_instance.delete_alert(alert_id)
return True
except Exception as e:
if attempt < OpsgenieProvider.OpsgenieProvider - 1:
time.sleep(OpsgenieProvider.RETRY_DELAY)
continue
# Log the error but don't raise it
self.logger.warning(
f"Failed to delete alert {alert_id} after {attempt + 1} attempts: {str(e)}",
extra={"alert_id": alert_id, "error_message": str(e)},
)
# If we reach here, the alert was not deleted
return False

# https://github.com/opsgenie/opsgenie-python-sdk/blob/master/docs/CreateAlertPayload.md
def _create_alert(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "keep"
version = "0.32.7"
version = "0.32.8"
description = "Alerting. for developers, by developers."
authors = ["Keep Alerting LTD"]
packages = [{include = "keep"}]
Expand Down

0 comments on commit de742bd

Please sign in to comment.