diff --git a/Tests/kaas/k8s-node-distribution/check_nodes_test.py b/Tests/kaas/k8s-node-distribution/check_nodes_test.py new file mode 100644 index 000000000..d32edccfb --- /dev/null +++ b/Tests/kaas/k8s-node-distribution/check_nodes_test.py @@ -0,0 +1,63 @@ +""" +Unit tests for node distribution check functions. + +(c) Martin Morgenstern , 4/2024 +(c) Hannes Baum , 5/2024 +SPDX-License-Identifier: CC-BY-SA-4.0 +""" + +from pathlib import Path +import yaml + +import pytest + +from k8s_node_distribution_check import check_nodes + + +HERE = Path(__file__).parent + + +@pytest.fixture +def load_testdata(): + with open(Path(HERE, "testdata", "scenarios.yaml")) as stream: + return yaml.safe_load(stream) + + +@pytest.mark.parametrize("yaml_key", ["success-1", "success-2"]) +def test_success_single_region_warning(yaml_key, caplog, load_testdata): + data = load_testdata[yaml_key] + assert check_nodes(data.values()) == 0 + assert len(caplog.records) == 2 + for record in caplog.records: + assert "no distribution across multiple regions" in record.message + assert record.levelname == "WARNING" + + +def test_not_enough_nodes(caplog, load_testdata): + data = load_testdata["not-enough-nodes"] + assert check_nodes(data.values()) == 2 + assert len(caplog.records) == 1 + assert "cluster only contains a single node" in caplog.records[0].message + assert caplog.records[0].levelname == "ERROR" + + +@pytest.mark.parametrize("yaml_key", ["no-distribution-1", "no-distribution-2"]) +def test_no_distribution(yaml_key, caplog, load_testdata): + data = load_testdata[yaml_key] + with caplog.at_level("ERROR"): + assert check_nodes(data.values()) == 2 + assert len(caplog.records) == 1 + record = caplog.records[0] + assert "distribution of nodes described in the standard couldn't be detected" in record.message + assert record.levelname == "ERROR" + + +def test_missing_label(caplog, load_testdata): + data = load_testdata["missing-labels"] + assert check_nodes(data.values()) == 2 + hostid_missing_records = [ + record for record in caplog.records + if "label for host-ids" in record.message + ] + assert len(hostid_missing_records) == 1 + assert hostid_missing_records[0].levelname == "ERROR" diff --git a/Tests/kaas/k8s-node-distribution/config.yaml.template b/Tests/kaas/k8s-node-distribution/config.yaml.template deleted file mode 100644 index 0f96da24d..000000000 --- a/Tests/kaas/k8s-node-distribution/config.yaml.template +++ /dev/null @@ -1,24 +0,0 @@ -## Configuration file for the K8s Version Recency Test - -logging: - level: INFO - version: 1 - disable_existing_loggers: False - formatters: - k8s-node-distribution-check: - format: "%(levelname)s: %(message)s" - handlers: - console: - class: logging.StreamHandler - formatter: k8s-node-distribution-check - stream: ext://sys.stdout - file: - class: logging.handlers.WatchedFileHandler - formatter: k8s-node-distribution-check - filename: MY-LOG-FILE-NAME.log - root: # Configuring the default (root) logger is highly recommended - handlers: [console] - loggers: - k8s-node-distribution-check: - handlers: [console, file] - propagate: no \ No newline at end of file diff --git a/Tests/kaas/k8s-node-distribution/k8s-node-distribution-check.py b/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py similarity index 62% rename from Tests/kaas/k8s-node-distribution/k8s-node-distribution-check.py rename to Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py index a0c167ff9..8bc6fb7dd 100755 --- a/Tests/kaas/k8s-node-distribution/k8s-node-distribution-check.py +++ b/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py @@ -28,6 +28,7 @@ node-role.kubernetes.io/control-plane (c) Hannes Baum , 6/2023 +(c) Martin Morgenstern , 4/2024 License: CC-BY-SA 4.0 """ @@ -37,29 +38,17 @@ import logging import logging.config import sys -import yaml - - -logging_config = { - "level": "INFO", - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "k8s-node-distribution-check": { - "format": "%(levelname)s: %(message)s" - } - }, - "handlers": { - "console": { - "class": "logging.StreamHandler", - "formatter": "k8s-node-distribution-check", - "stream": "ext://sys.stdout" - } - }, - "root": { - "handlers": ["console"] - } -} + +# It is important to note, that the order of these labels matters for this test. +# Since we want to check if nodes are distributed, we want to do this from bigger +# infrastructure parts to smaller ones. So we first look if nodes are distributed +# across regions, then zones and then hosts. If one of these requirements is fulfilled, +# we don't need to check anymore, since a distribution was already detected. +LABELS = ( + "topology.kubernetes.io/region", + "topology.kubernetes.io/zone", + "topology.scs.community/host-id", +) logger = logging.getLogger(__name__) @@ -76,10 +65,12 @@ class DistributionException(BaseException): """Exception raised if the distribution seems to be not enough""" +class LabelException(BaseException): + """Exception raised if a label isn't set""" + + class Config: - config_path = "./config.yaml" kubeconfig = None - logging = None def print_usage(): @@ -97,7 +88,6 @@ def print_usage(): 2 - No distribution according to the standard could be detected for the nodes available. The following arguments can be set: - -c/--config PATH/TO/CONFIG - Path to the config file of the test script -k/--kubeconfig PATH/TO/KUBECONFIG - Path to the kubeconfig of the server we want to check -h - Output help """) @@ -108,51 +98,27 @@ def parse_arguments(argv): config = Config() try: - opts, args = getopt.gnu_getopt(argv, "c:k:h", ["config", "kubeconfig", "help"]) + opts, args = getopt.gnu_getopt(argv, "k:t:h", ["kubeconfig=", "test=", "help"]) except getopt.GetoptError: raise ConfigException for opt in opts: if opt[0] == "-h" or opt[0] == "--help": raise HelpException - if opt[0] == "-c" or opt[0] == "--config": - config.config_path = opt[1] if opt[0] == "-k" or opt[0] == "--kubeconfig": config.kubeconfig = opt[1] return config -def setup_logging(config_log): - - logging.config.dictConfig(config_log) - loggers = [ - logging.getLogger(name) - for name in logging.root.manager.loggerDict - if not logging.getLogger(name).level - ] - - for log in loggers: - log.setLevel(config_log['level']) - - def initialize_config(config): """Initialize the configuration for the test script""" - try: - with open(config.config_path, "r") as f: - config.logging = yaml.safe_load(f)['logging'] - except OSError: - logger.warning(f"The config file under {config.config_path} couldn't be found, " - f"falling back to the default config.") - finally: - # Setup logging if the config file with the relevant information could be loaded before - # Otherwise, we initialize logging with the included literal - setup_logging(config.logging or logging_config) - if config.kubeconfig is None: raise ConfigException("A kubeconfig needs to be set in order to test a k8s cluster version.") + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + return config @@ -176,28 +142,29 @@ async def get_k8s_cluster_labelled_nodes(kubeconfig, interesting_labels): return nodes -def compare_labels(node_list, labels, node_type="master"): +def compare_labels(node_list, node_type="control"): - label_data = {key: list() for key in labels} + label_data = {key: list() for key in LABELS} for node in node_list: - for key in labels: + for key in LABELS: try: label_data[key].append(node[key]) except KeyError: - logger.warning(f"The label for {key.split('/')[1]}s don't seem to be set for all nodes.") + raise LabelException(f"The label for {key.split('/')[1]}s doesn't seem to be set for all nodes.") - for label in labels: - if len(label_data[label]) < len(node_list): - logger.warning(f"The label for {label.split('/')[1]}s doesn't seem to be set for all nodes.") + for label in LABELS: if len(set(label_data[label])) <= 1: logger.warning(f"There seems to be no distribution across multiple {label.split('/')[1]}s " "or labels aren't set correctly across nodes.") else: - logger.info(f"The nodes are distributed across {str(len(set(label_data[label])))} {label.split('/')[1]}s.") + logger.info( + f"The {node_type} nodes are distributed across " + f"{str(len(set(label_data[label])))} {label.split('/')[1]}s." + ) return - if node_type == "master": + if node_type == "control": raise DistributionException("The distribution of nodes described in the standard couldn't be detected.") elif node_type == "worker": logger.warning("No node distribution could be detected for the worker nodes. " @@ -205,48 +172,44 @@ def compare_labels(node_list, labels, node_type="master"): return -async def main(argv): - try: - config = initialize_config(parse_arguments(argv)) - except (OSError, ConfigException, HelpException) as e: - if hasattr(e, 'message'): - logger.error(e.message) - print_usage() - return 1 - - # It is important to note, that the order of these labels matters for this test. - # Since we want to check if nodes are distributed, we want to do this from bigger - # infrastructure parts to smaller ones. So we first look if nodes are distributed - # across regions, then zones and then hosts. If one of these requirements is fulfilled, - # we don't need to check anymore, since a distribution was already detected. - labels = ( - "topology.kubernetes.io/region", - "topology.kubernetes.io/zone", - "topology.scs.community/host-id", - ) - - nodes = await get_k8s_cluster_labelled_nodes(config.kubeconfig, labels + ("node-role.kubernetes.io/control-plane", )) - +def check_nodes(nodes): if len(nodes) < 2: logger.error("The tested cluster only contains a single node, which can't comply with the standard.") return 2 - labelled_master_nodes = [node for node in nodes if "node-role.kubernetes.io/control-plane" in node] + labelled_control_nodes = [node for node in nodes if "node-role.kubernetes.io/control-plane" in node] try: - if len(labelled_master_nodes) >= 1: + if len(labelled_control_nodes) >= 1: worker_nodes = [node for node in nodes if "node-role.kubernetes.io/control-plane" not in node] # Compare the labels of both types, since we have enough of them with labels - compare_labels(labelled_master_nodes, labels, "master") - compare_labels(worker_nodes, labels, "worker") + compare_labels(labelled_control_nodes, "control") + compare_labels(worker_nodes, "worker") else: - compare_labels(nodes, labels) - except DistributionException as e: + compare_labels(nodes) + except (DistributionException, LabelException) as e: logger.error(str(e)) return 2 return 0 +async def main(argv): + try: + config = initialize_config(parse_arguments(argv)) + except (OSError, ConfigException, HelpException) as e: + if hasattr(e, 'message'): + logger.error(e.message) + print_usage() + return 1 + + nodes = await get_k8s_cluster_labelled_nodes( + config.kubeconfig, + LABELS + ("node-role.kubernetes.io/control-plane", ) + ) + + return check_nodes(nodes) + + if __name__ == "__main__": return_code = asyncio.run(main(sys.argv[1:])) sys.exit(return_code) diff --git a/Tests/kaas/k8s-node-distribution/testdata/scenarios.yaml b/Tests/kaas/k8s-node-distribution/testdata/scenarios.yaml new file mode 100644 index 000000000..5cec0118d --- /dev/null +++ b/Tests/kaas/k8s-node-distribution/testdata/scenarios.yaml @@ -0,0 +1,128 @@ +# Success Scenario 1: +# All nodes have distinct host-ids and zones, but share the region. +success-1: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-1: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone2" + topology.scs.community/host-id: "vm1" + control-2: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone3" + topology.scs.community/host-id: "vm2" + worker-0: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm3" + worker-1: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone2" + topology.scs.community/host-id: "vm4" + worker-2: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone3" + topology.scs.community/host-id: "vm5" + +# Success Scenario 2: +# Nodes share the host-id and region, but are in different zones. +success-2: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-1: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone2" + topology.scs.community/host-id: "vm0" + control-2: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone3" + topology.scs.community/host-id: "vm0" + worker-0: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm1" + worker-1: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone2" + topology.scs.community/host-id: "vm1" + worker-2: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone3" + topology.scs.community/host-id: "vm1" + +# Failure Scenario: +# No distribution detectable because of too few nodes +no-distribution-1: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + worker-0: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + +# Failure Scenario: +# No distribution detectable because all nodes are in the same zone +no-distribution-2: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-1: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-2: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + worker-0: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm1" + worker-1: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm1" + worker-2: + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm1" + +# Failure Scenario: +# A host-id label is missing on a control node +missing-labels: + control-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" + control-1: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + # host-id missing + +# Failure Scenario: +# Not enough nodes available, so no distribution is detectable +not-enough-nodes: + node-0: + node-role.kubernetes.io/control-plane: "" + topology.kubernetes.io/region: "region" + topology.kubernetes.io/zone: "zone1" + topology.scs.community/host-id: "vm0" diff --git a/Tests/scs-compatible-kaas.yaml b/Tests/scs-compatible-kaas.yaml index 81098b7c3..5ad86dcbf 100644 --- a/Tests/scs-compatible-kaas.yaml +++ b/Tests/scs-compatible-kaas.yaml @@ -15,7 +15,7 @@ versions: - name: Kubernetes node distribution and availability url: https://raw.githubusercontent.com/SovereignCloudStack/standards/main/Standards/scs-0214-v1-k8s-node-distribution.md checks: - - executable: ./kaas/k8s-node-distribution/k8s-node-distribution-check.py + - executable: ./kaas/k8s-node-distribution/k8s_node_distribution_check.py args: -k {kubeconfig} id: node-distribution-check - name: CNCF Kubernetes conformance