Skip to content

Commit

Permalink
cerbuerus chagnes
Browse files Browse the repository at this point in the history
  • Loading branch information
paigerube14 committed Oct 7, 2024
1 parent 0aac611 commit 61dce1f
Show file tree
Hide file tree
Showing 22 changed files with 65 additions and 512 deletions.
76 changes: 44 additions & 32 deletions krkn/cerberus/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,33 @@
import requests
import sys
import json
from krkn_lib.utils.functions import get_yaml_item_value

check_application_routes = ""
cerberus_url = None
exit_on_failure = False
cerberus_enabled = False

def get_status(config, start_time, end_time):
def set_url(config):
global exit_on_failure
exit_on_failure = get_yaml_item_value(config["kraken"], "exit_on_failure", False)
global cerberus_enabled
cerberus_enabled = get_yaml_item_value(config["cerberus"],"cerberus_enabled", False)
if cerberus_enabled:
global cerberus_url
cerberus_url = get_yaml_item_value(config["cerberus"],"cerberus_url", "")
global check_application_routes
check_application_routes = \
get_yaml_item_value(config["cerberus"],"check_applicaton_routes","")

def get_status(start_time, end_time):
"""
Get cerberus status
"""
cerberus_status = True
check_application_routes = False
application_routes_status = True
if config["cerberus"]["cerberus_enabled"]:
cerberus_url = config["cerberus"]["cerberus_url"]
check_application_routes = \
config["cerberus"]["check_applicaton_routes"]
if cerberus_enabled:
if not cerberus_url:
logging.error(
"url where Cerberus publishes True/False signal "
Expand Down Expand Up @@ -61,40 +75,38 @@ def get_status(config, start_time, end_time):
return cerberus_status


def publish_kraken_status(config, failed_post_scenarios, start_time, end_time):
def publish_kraken_status( start_time, end_time):
"""
Publish kraken status to cerberus
"""
cerberus_status = get_status(config, start_time, end_time)
cerberus_status = get_status(start_time, end_time)
if not cerberus_status:
if failed_post_scenarios:
if config["kraken"]["exit_on_failure"]:
logging.info(
"Cerberus status is not healthy and post action scenarios "
"are still failing, exiting kraken run"
)
sys.exit(1)
else:
logging.info(
"Cerberus status is not healthy and post action scenarios "
"are still failing"
)
if exit_on_failure:
logging.info(
"Cerberus status is not healthy and post action scenarios "
"are still failing, exiting kraken run"
)
sys.exit(1)
else:
logging.info(
"Cerberus status is not healthy and post action scenarios "
"are still failing"
)
else:
if failed_post_scenarios:
if config["kraken"]["exit_on_failure"]:
logging.info(
"Cerberus status is healthy but post action scenarios "
"are still failing, exiting kraken run"
)
sys.exit(1)
else:
logging.info(
"Cerberus status is healthy but post action scenarios "
"are still failing"
)
if exit_on_failure:
logging.info(
"Cerberus status is healthy but post action scenarios "
"are still failing, exiting kraken run"
)
sys.exit(1)
else:
logging.info(
"Cerberus status is healthy but post action scenarios "
"are still failing"
)


def application_status(cerberus_url, start_time, end_time):
def application_status( start_time, end_time):
"""
Check application availability
"""
Expand Down
8 changes: 5 additions & 3 deletions krkn/scenario_plugins/abstract_scenario_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from krkn_lib.models.telemetry import ScenarioTelemetry
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift

from krkn import utils
from krkn import utils, cerberus


class AbstractScenarioPlugin(ABC):
Expand All @@ -13,7 +13,6 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
Expand Down Expand Up @@ -76,10 +75,10 @@ def run_scenarios(
logging.info(
f"Running {self.__class__.__name__}: {self.get_scenario_types()} -> {scenario_config}"
)
start_time = int(time.time())
return_value = self.run(
run_uuid,
scenario_config,
krkn_config,
telemetry,
scenario_telemetry,
)
Expand Down Expand Up @@ -110,6 +109,9 @@ def run_scenarios(
if scenario_telemetry.exit_status != 0:
failed_scenarios.append(scenario_config)
scenario_telemetries.append(scenario_telemetry)
end_time = int(time.time())
cerberus.publish_kraken_status(start_time, end_time)
logging.info(f"wating {wait_duration} before running the next scenario")
time.sleep(wait_duration)

return failed_scenarios, scenario_telemetries
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
wait_duration = krkn_config["tunings"]["wait_duration"]
try:
with open(scenario, "r") as f:
app_outage_config_yaml = yaml.full_load(f)
Expand Down Expand Up @@ -68,14 +66,8 @@ def run(
"kraken-deny", namespace
)

logging.info(
"End of scenario. Waiting for the specified duration: %s"
% wait_duration
)
time.sleep(wait_duration)

end_time = int(time.time())
cerberus.publish_kraken_status(krkn_config, [], start_time, end_time)

except Exception as e:
logging.error(
"ApplicationOutageScenarioPlugin exiting due to Exception %s" % e
Expand Down
1 change: 0 additions & 1 deletion krkn/scenario_plugins/arcaflow/arcaflow_scenario_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
Expand Down
10 changes: 0 additions & 10 deletions krkn/scenario_plugins/container/container_scenario_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,10 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
start_time = int(time.time())
pool = PodsMonitorPool(lib_telemetry.get_lib_kubernetes())
wait_duration = krkn_config["tunings"]["wait_duration"]
try:
with open(scenario, "r") as f:
cont_scenario_config = yaml.full_load(f)
Expand All @@ -44,14 +41,7 @@ def run(
)
return 1
scenario_telemetry.affected_pods = result
logging.info("Waiting for the specified duration: %s" % (wait_duration))
time.sleep(wait_duration)

# capture end time
end_time = int(time.time())

# publish cerberus status
cerberus.publish_kraken_status(krkn_config, [], start_time, end_time)
except (RuntimeError, Exception):
logging.error("ContainerScenarioPlugin exiting due to Exception %s" % e)
return 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from krkn_lib.telemetry.ocp import KrknTelemetryOpenshift
from krkn_lib.utils import get_yaml_item_value

from krkn import cerberus, utils
from krkn.scenario_plugins.abstract_scenario_plugin import AbstractScenarioPlugin
from krkn.scenario_plugins.managed_cluster.common_functions import get_managedcluster
from krkn.scenario_plugins.managed_cluster.scenarios import Scenarios
Expand All @@ -18,7 +17,6 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
Expand All @@ -38,8 +36,6 @@ def run(
managedcluster_scenario_object,
lib_telemetry.get_lib_kubernetes(),
)
end_time = int(time.time())
cerberus.get_status(krkn_config, start_time, end_time)
except Exception as e:
logging.error(
"ManagedClusterScenarioPlugin exiting due to Exception %s"
Expand Down
4 changes: 1 addition & 3 deletions krkn/scenario_plugins/native/native_scenario_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def run(
self,
run_uuid: str,
scenario: str,
krkn_config: dict[str, any],
lib_telemetry: KrknTelemetryOpenshift,
scenario_telemetry: ScenarioTelemetry,
) -> int:
Expand All @@ -29,7 +28,6 @@ def run(
PLUGINS.run(
scenario,
lib_telemetry.get_lib_kubernetes().get_kubeconfig_path(),
krkn_config,
run_uuid,
)
result = pool.join()
Expand All @@ -48,7 +46,7 @@ def run(
def get_scenario_types(self) -> list[str]:
return [
"pod_disruption_scenarios",
"pod_network_scenario",
"pod_network_scenarios",
"vmware_node_scenarios",
"ibmcloud_node_scenarios",
]
Expand Down
Loading

0 comments on commit 61dce1f

Please sign in to comment.