From 3b478a786a958ad5a24d23217503af351b7938c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20B=C3=BCchse?= Date: Mon, 25 Nov 2024 23:05:43 +0000 Subject: [PATCH] Stabilize scs-0214-v2 (#835) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Stabilize scs-0214-v1 * Relax wording to reflect weaknesses in the test for scs-0214-v2 * skip unit test because it no longer applies Signed-off-by: Matthias Büchse Co-authored-by: Kurt Garloff --- .../scs-0214-v2-k8s-node-distribution.md | 20 ++----------------- ...ode-distribution-implementation-testing.md | 20 +++++-------------- .../k8s-node-distribution/check_nodes_test.py | 6 +++--- .../k8s_node_distribution_check.py | 13 +++++------- 4 files changed, 15 insertions(+), 44 deletions(-) diff --git a/Standards/scs-0214-v2-k8s-node-distribution.md b/Standards/scs-0214-v2-k8s-node-distribution.md index 3b4915492..37af338ee 100644 --- a/Standards/scs-0214-v2-k8s-node-distribution.md +++ b/Standards/scs-0214-v2-k8s-node-distribution.md @@ -1,7 +1,8 @@ --- title: Kubernetes Node Distribution and Availability type: Standard -status: Draft +status: Stable +stabilized_at: 2024-11-21 replaces: scs-0214-v1-k8s-node-distribution.md track: KaaS --- @@ -100,23 +101,6 @@ These labels MUST be kept up to date with the current state of the deployment. The field gets autopopulated most of the time by either the kubelet or external mechanisms like the cloud controller. -- `topology.scs.community/host-id` - - This is an SCS-specific label; it MUST contain the hostID of the physical machine running - the hypervisor (NOT: the hostID of a virtual machine). Here, the hostID is an arbitrary identifier, - which need not contain the actual hostname, but it should nonetheless be unique to the host. - This helps identify the distribution over underlying physical machines, - which would be masked if VM hostIDs were used. - -## Conformance Tests - -The script `k8s-node-distribution-check.py` checks the nodes available with a user-provided -kubeconfig file. Based on the labels `topology.scs.community/host-id`, -`topology.kubernetes.io/zone`, `topology.kubernetes.io/region` and `node-role.kubernetes.io/control-plane`, -the script then determines whether the nodes are distributed according to this standard. -If this isn't the case, the script produces an error. -It also produces warnings and informational outputs, e.g., if labels don't seem to be set. - ## Previous standard versions This is version 2 of the standard; it extends [version 1](scs-0214-v1-k8s-node-distribution.md) with the diff --git a/Standards/scs-0214-w1-k8s-node-distribution-implementation-testing.md b/Standards/scs-0214-w1-k8s-node-distribution-implementation-testing.md index 4366365a0..6460cc195 100644 --- a/Standards/scs-0214-w1-k8s-node-distribution-implementation-testing.md +++ b/Standards/scs-0214-w1-k8s-node-distribution-implementation-testing.md @@ -16,25 +16,15 @@ Worker nodes can also be distributed over "failure zones", but this isn't a requ Distribution must be shown through labelling, so that users can access these information. Node distribution metadata is provided through the usage of the labels -`topology.kubernetes.io/region`, `topology.kubernetes.io/zone` and -`topology.scs.community/host-id` respectively. - -At the moment, not all labels are set automatically by most K8s cluster utilities, which incurs -additional setup and maintenance costs. +`topology.kubernetes.io/region` and `topology.kubernetes.io/zone`. ## Automated tests -### Notes - -The test for the [SCS K8s Node Distribution and Availability](https://github.com/SovereignCloudStack/standards/blob/main/Standards/scs-0214-v2-k8s-node-distribution.md) -checks if control-plane nodes are distributed over different failure zones (distributed into -physical machines, zones and regions) by observing their labels defined by the standard. - -### Implementation +Currently, automated testing is not readily possible because we cannot access information about +the underlying host of a node (as opposed to its region and zone). Therefore, the test will only output +a tentative result. -The script [`k8s_node_distribution_check.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py) -connects to an existing K8s cluster and checks if a distribution can be detected with the labels -set for the nodes of this cluster. +The current implementation can be found in the script [`k8s_node_distribution_check.py`](https://github.com/SovereignCloudStack/standards/blob/main/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py). ## Manual tests diff --git a/Tests/kaas/k8s-node-distribution/check_nodes_test.py b/Tests/kaas/k8s-node-distribution/check_nodes_test.py index d32edccfb..439d1b18e 100644 --- a/Tests/kaas/k8s-node-distribution/check_nodes_test.py +++ b/Tests/kaas/k8s-node-distribution/check_nodes_test.py @@ -42,9 +42,9 @@ def test_not_enough_nodes(caplog, load_testdata): @pytest.mark.parametrize("yaml_key", ["no-distribution-1", "no-distribution-2"]) -def test_no_distribution(yaml_key, caplog, load_testdata): +def notest_no_distribution(yaml_key, caplog, load_testdata): data = load_testdata[yaml_key] - with caplog.at_level("ERROR"): + with caplog.at_level("WARNING"): assert check_nodes(data.values()) == 2 assert len(caplog.records) == 1 record = caplog.records[0] @@ -52,7 +52,7 @@ def test_no_distribution(yaml_key, caplog, load_testdata): assert record.levelname == "ERROR" -def test_missing_label(caplog, load_testdata): +def notest_missing_label(caplog, load_testdata): data = load_testdata["missing-labels"] assert check_nodes(data.values()) == 2 hostid_missing_records = [ diff --git a/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py b/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py index efac000d4..038d8a67c 100755 --- a/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py +++ b/Tests/kaas/k8s-node-distribution/k8s_node_distribution_check.py @@ -22,7 +22,6 @@ and does require these labels to be set, but should yield overall pretty good initial results. - topology.scs.openstack.org/host-id # previously kubernetes.io/hostname topology.kubernetes.io/zone topology.kubernetes.io/region node-role.kubernetes.io/control-plane @@ -47,7 +46,6 @@ LABELS = ( "topology.kubernetes.io/region", "topology.kubernetes.io/zone", - "topology.scs.community/host-id", ) logger = logging.getLogger(__name__) @@ -164,12 +162,11 @@ def compare_labels(node_list, node_type="control"): ) return - if node_type == "control": - raise DistributionException("The distribution of nodes described in the standard couldn't be detected.") - elif node_type == "worker": - logger.warning("No node distribution could be detected for the worker nodes. " - "This produces only a warning, since it is just a recommendation.") - return + # + # if node_type == "control": + # raise DistributionException("The distribution of nodes described in the standard couldn't be detected.") + logger.warning("No node distribution could be detected for the worker nodes. " + "This produces only a warning, since it is just a recommendation.") def check_nodes(nodes):