Skip to content

Commit

Permalink
fix: yaml.safe_load is slow as hell (#3056)
Browse files Browse the repository at this point in the history
Co-authored-by: Tal <tal@keephq.dev>
  • Loading branch information
VladimirFilonov and talboren authored Jan 19, 2025
1 parent 0b36eef commit 2a453f0
Show file tree
Hide file tree
Showing 11 changed files with 65 additions and 50 deletions.
10 changes: 5 additions & 5 deletions keep/actions/actions_factory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import time
import logging
import yaml
from io import StringIO
from uuid import uuid4
from typing import List, Union
Expand All @@ -9,6 +8,7 @@
from keep.api.models.db.action import Action
from keep.api.core.db import get_all_actions, create_actions, delete_action, get_action, update_action
from keep.actions.actions_exception import ActionsCRUDException
from keep.functions import cyaml

logger = logging.getLogger(__name__)

Expand All @@ -25,7 +25,7 @@ def _convert_models_to_dtos(models: List[Action]) -> List[ActionDTO]:
results: List[ActionDTO] = []
for model in models:
try:
dto = ActionDTO(id=model.id, use=model.use, name=model.name, details=yaml.safe_load(StringIO(model.action_raw)))
dto = ActionDTO(id=model.id, use=model.use, name=model.name, details=cyaml.safe_load(StringIO(model.action_raw)))
results.append(dto)
except ValidationError:
logger.warning("Unmatched Action model and the coresponding DTO", exc_info=True, extra={
Expand All @@ -45,7 +45,7 @@ def add_actions(tenant_id: str, installed_by: str, action_dtos: List[dict]):
installation_time=time.time(),
name=action_dto.get("name"),
use=action_dto.get("use") or action_dto.get("name"), # if there is no `use` tag, use `name` instead
action_raw=yaml.dump(action_dto)
action_raw=cyaml.dump(action_dto)
)
actions.append(action)
create_actions(actions)
Expand Down Expand Up @@ -76,12 +76,12 @@ def update_action(tenant_id: str, action_id: str, payload: dict) -> Union[Action
action_payload = Action(
name=payload.get("name"),
use=payload.get("use") or payload.get("name"),
action_raw=yaml.dump(payload)
action_raw=cyaml.dump(payload)
)
updated_action = update_action(tenant_id, action_id, action_payload)
if updated_action:
return update_action
raise ActionsCRUDException(status_code=422, detail="No action matched to be updated")
except Exception:
logger.exception("Uknown exception when update an action on database")
raise ActionsCRUDException(status_code=400, detail="Unable to update an action")
raise ActionsCRUDException(status_code=400, detail="Unable to update an action")
11 changes: 6 additions & 5 deletions keep/api/models/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
from datetime import datetime
from typing import List, Literal, Optional

import yaml
from pydantic import BaseModel, validator

from keep.functions import cyaml


def represent_ordered_dict(dumper, data):
filtered_data = {k: v for k, v in data.items() if v is not None}
return dumper.represent_mapping("tag:yaml.org,2002:map", filtered_data.items())


yaml.add_representer(OrderedDict, represent_ordered_dict)
cyaml.add_representer(OrderedDict, represent_ordered_dict)


class ProviderDTO(BaseModel):
Expand Down Expand Up @@ -44,7 +45,7 @@ class WorkflowDTO(BaseModel):

@property
def workflow_raw_id(self):
id = yaml.safe_load(self.workflow_raw).get("id")
id = cyaml.safe_load(self.workflow_raw).get("id")
return id

@validator("workflow_raw", pre=False, always=True)
Expand All @@ -64,7 +65,7 @@ def manipulate_raw(cls, raw, values):
_type_: _description_
"""
ordered_raw = OrderedDict()
d = yaml.safe_load(raw)
d = cyaml.safe_load(raw)
# id desc and triggers
ordered_raw["id"] = d.get("id")
values["workflow_raw_id"] = d.get("id")
Expand All @@ -85,7 +86,7 @@ def manipulate_raw(cls, raw, values):
ordered_raw["steps"] = d.get("steps")
# last, actions
ordered_raw["actions"] = d.get("actions")
return yaml.dump(ordered_raw, width=99999)
return cyaml.dump(ordered_raw, width=99999)


class WorkflowExecutionLogsDTO(BaseModel):
Expand Down
6 changes: 3 additions & 3 deletions keep/api/routes/actions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging

import yaml
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, status
from fastapi.responses import JSONResponse

from keep.actions.actions_factory import ActionsCRUD
from keep.functions import cyaml
from keep.identitymanager.authenticatedentity import AuthenticatedEntity
from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory

Expand Down Expand Up @@ -39,8 +39,8 @@ async def _get_action_info(request: Request, file: UploadFile) -> dict:
action_inforaw = await file.read()
else:
action_inforaw = await request.body()
action_info = yaml.safe_load(action_inforaw)
except yaml.YAMLError:
action_info = cyaml.safe_load(action_inforaw)
except cyaml.YAMLError:
logger.exception("Invalid YAML format when parsing actions file")
raise HTTPException(status_code=400, detail="Invalid yaml format")
return action_info
Expand Down
18 changes: 9 additions & 9 deletions keep/api/routes/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Any, Dict, List, Optional

import validators
import yaml
from fastapi import (
APIRouter,
Body,
Expand Down Expand Up @@ -38,6 +37,7 @@
)
from keep.api.utils.enrichment_helpers import convert_db_alerts_to_dto_alerts
from keep.api.utils.pagination import WorkflowExecutionsPaginatedResultsDto
from keep.functions import cyaml
from keep.identitymanager.authenticatedentity import AuthenticatedEntity
from keep.identitymanager.identitymanagerfactory import IdentityManagerFactory
from keep.parser.parser import Parser
Expand Down Expand Up @@ -118,9 +118,9 @@ def get_workflows(

# create the workflow DTO
try:
workflow_raw = yaml.safe_load(workflow.workflow_raw)
workflow_raw = cyaml.safe_load(workflow.workflow_raw)
# very big width to avoid line breaks
workflow_raw = yaml.dump(workflow_raw, width=99999)
workflow_raw = cyaml.dump(workflow_raw, width=99999)
workflow_dto = WorkflowDTO(
id=workflow.id,
name=workflow.name,
Expand Down Expand Up @@ -313,15 +313,15 @@ async def __get_workflow_raw_data(request: Request, file: UploadFile | None) ->
workflow_raw_data = await file.read()
else:
workflow_raw_data = await request.body()
workflow_data = yaml.safe_load(workflow_raw_data)
workflow_data = cyaml.safe_load(workflow_raw_data)
# backward comptability
if "alert" in workflow_data:
workflow_data = workflow_data.pop("alert")
#
elif "workflow" in workflow_data:
workflow_data = workflow_data.pop("workflow")

except yaml.YAMLError:
except cyaml.YAMLError:
logger.exception("Invalid YAML format")
raise HTTPException(status_code=400, detail="Invalid YAML format")
return workflow_data
Expand Down Expand Up @@ -518,7 +518,7 @@ async def update_workflow_by_id(
workflow["name"] = workflow_from_db.name
workflow_from_db.description = workflow.get("description")
workflow_from_db.interval = workflow_interval
workflow_from_db.workflow_raw = yaml.dump(workflow, width=99999)
workflow_from_db.workflow_raw = cyaml.dump(workflow, width=99999)
workflow_from_db.last_updated = datetime.datetime.now()
session.add(workflow_from_db)
session.commit()
Expand Down Expand Up @@ -587,9 +587,9 @@ def get_workflow_by_id(
providers_dto, triggers = [], [] # Default in case of failure

try:
workflow_yaml = yaml.safe_load(workflow.workflow_raw)
workflow_yaml = cyaml.safe_load(workflow.workflow_raw)
valid_workflow_yaml = {"workflow": workflow_yaml}
final_workflow_raw = yaml.dump(valid_workflow_yaml, width=99999)
final_workflow_raw = cyaml.dump(valid_workflow_yaml, width=99999)
workflow_dto = WorkflowDTO(
id=workflow.id,
name=workflow.name,
Expand All @@ -604,7 +604,7 @@ def get_workflow_by_id(
disabled=workflow.is_disabled,
)
return workflow_dto
except yaml.YAMLError:
except cyaml.YAMLError:
logger.exception("Invalid YAML format")
raise HTTPException(status_code=500, detail="Error fetching workflow meta data")

Expand Down
6 changes: 3 additions & 3 deletions keep/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@

import click
import requests
import yaml
from dotenv import find_dotenv, load_dotenv
from prettytable import PrettyTable

from keep.api.core.posthog import posthog_client
from keep.functions import cyaml
from keep.providers.models.provider_config import ProviderScope
from keep.providers.providers_factory import ProvidersFactory

Expand Down Expand Up @@ -103,7 +103,7 @@ def set_config(self, keep_config: str):
try:
with open(file=keep_config, mode="r") as f:
self.logger.debug("Loading configuration file.")
self.config = yaml.safe_load(f) or {}
self.config = cyaml.safe_load(f) or {}
self.logger.debug("Configuration file loaded.")

except FileNotFoundError:
Expand All @@ -123,7 +123,7 @@ def set_config(self, keep_config: str):
self.random_user_id = str(uuid.uuid4())
self.config["random_user_id"] = self.random_user_id
with open(file=keep_config, mode="w") as f:
yaml.dump(self.config, f)
cyaml.dump(self.config, f)

arguments = sys.argv

Expand Down
14 changes: 14 additions & 0 deletions keep/functions/cyaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import yaml
from yaml import *


def safe_load(stream):
return yaml.load(stream, Loader=yaml.CSafeLoader)

def dump(data, stream=None, Dumper=None, **kwds):
Dumper = Dumper or yaml.CDumper
return yaml.dump(data, stream, Dumper=Dumper, **kwds)

def add_representer(data_type, representer, Dumper=None):
Dumper = Dumper or yaml.CDumper
Dumper.add_representer(data_type, representer)
15 changes: 7 additions & 8 deletions keep/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
import re
import typing

import yaml

from keep.actions.actions_factory import ActionsCRUD
from keep.api.core.db import get_workflow_id
from keep.contextmanager.contextmanager import ContextManager
from keep.functions import cyaml
from keep.providers.providers_factory import ProvidersFactory
from keep.step.step import Step, StepType
from keep.step.step_provider_parameter import StepProviderParameter
Expand Down Expand Up @@ -332,8 +331,8 @@ def _parse_providers_from_file(
):
with open(providers_file, "r") as file:
try:
providers = yaml.safe_load(file)
except yaml.YAMLError:
providers = cyaml.safe_load(file)
except cyaml.YAMLError:
self.logger.exception(f"Error parsing providers file {providers_file}")
raise
context_manager.providers_context.update(providers)
Expand Down Expand Up @@ -465,8 +464,8 @@ def _parse_actions_from_file(
if actions_file and os.path.isfile(actions_file):
with open(actions_file, "r") as file:
try:
actions_content = yaml.safe_load(file)
except yaml.YAMLError:
actions_content = cyaml.safe_load(file)
except cyaml.YAMLError:
self.logger.exception(f"Error parsing actions file {actions_file}")
raise
# create a hashmap -> action
Expand Down Expand Up @@ -553,8 +552,8 @@ def _load_actions_from_file(
actions = []
with open(actions_file, "r") as file:
try:
actions = yaml.safe_load(file)
except yaml.YAMLError:
actions = cyaml.safe_load(file)
except cyaml.YAMLError:
self.logger.exception(f"Error parsing actions file {actions_file}")
raise
# convert actions into dictionary of unique object by id
Expand Down
4 changes: 2 additions & 2 deletions keep/providers/aks_provider/aks_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
import logging

import pydantic
import yaml
from azure.identity import ClientSecretCredential
from azure.mgmt.containerservice import ContainerServiceClient
from kubernetes import client, config

from keep.contextmanager.contextmanager import ContextManager
from keep.exceptions.provider_exception import ProviderException
from keep.functions import cyaml
from keep.providers.base.base_provider import BaseProvider
from keep.providers.models.provider_config import ProviderConfig
from keep.providers.providers_factory import ProvidersFactory
Expand Down Expand Up @@ -114,7 +114,7 @@ def __generate_client(self):
)

# parse the kubeconfig (parsed as yml string)
kubeconfig = yaml.safe_load(
kubeconfig = cyaml.safe_load(
cluster_creds.kubeconfigs[0].value.decode("utf-8")
)

Expand Down
4 changes: 2 additions & 2 deletions keep/providers/sendgrid_provider/sendgrid_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sendgrid.helpers.mail import Mail

from keep.contextmanager.contextmanager import ContextManager
from keep.functions import cyaml
from keep.providers.base.base_provider import BaseProvider
from keep.providers.models.provider_config import ProviderConfig, ProviderScope
from keep.providers.providers_factory import ProvidersFactory
Expand Down Expand Up @@ -209,9 +210,8 @@ def dispose(self):
)
scopes = provider.validate_scopes()
print(scopes)
import yaml

mail = yaml.safe_load(
mail = cyaml.safe_load(
"""to:
- "youremail@gmail.com"
- "youranotheremail@gmail.com"
Expand Down
Loading

0 comments on commit 2a453f0

Please sign in to comment.