From 65f297b4080a29e883d1d8d9866a585529b6a250 Mon Sep 17 00:00:00 2001 From: Josselin Date: Thu, 19 Dec 2024 16:49:33 -0800 Subject: [PATCH 1/5] CDP-8328: Emit normandie and knox statuses inside pings --- .gitignore | 6 +- deploy-agent/README.md | 2 + deploy-agent/deployd/client/client.py | 98 ++++++++++++++++++- deploy-agent/deployd/types/ping_request.py | 13 ++- .../tests/unit/deploy/client/test_client.py | 38 +++++++ 5 files changed, 146 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 43bf1857bf..283192a180 100644 --- a/.gitignore +++ b/.gitignore @@ -82,9 +82,5 @@ MANIFEST docs/docs_generator/bin/ # Bazel -bazel-bin -bazel-out -bazel-testlogs -bazel-weave -bazel-deploy-agent +bazel-* MODULE.bazel* diff --git a/deploy-agent/README.md b/deploy-agent/README.md index 75cf171f0a..d99705e957 100644 --- a/deploy-agent/README.md +++ b/deploy-agent/README.md @@ -5,6 +5,7 @@ See https://github.com/pinterest/teletraan/wiki for more details. 1. Install [pre-commit](https://pre-commit.com/#install) ```bash +cd teletraan pip install pre-commit pre-commit install ``` @@ -15,6 +16,7 @@ Ensure that your python version is at least python3.8. ## Building ```bash +cd teletraan/deploy-agent/ sudo bazel build //deployd:deploy-agent ``` diff --git a/deploy-agent/deployd/client/client.py b/deploy-agent/deployd/client/client.py index 549833608c..a26b0b86dc 100644 --- a/deploy-agent/deployd/client/client.py +++ b/deploy-agent/deployd/client/client.py @@ -19,6 +19,9 @@ import socket import traceback import json +from pathlib import Path +import re +import subprocess from deployd.client.base_client import BaseClient from deployd.client.restfulclient import RestfulClient @@ -32,6 +35,12 @@ log = logging.getLogger(__name__) +NORMANDIE_CERT_FILEPATH = "/var/lib/normandie/fuse/cert/generic" +SAN_URI_PATTERN = r"URI:(\S+),?" +STATUSERRNO_PATTERN = r"StatusErrno=(\d+)" +ACTIVESTATE_PATTERN = r"ActiveState=(\S+)" +SUBSTATE_PATTERN = r"SubState=(\S+)" + class Client(BaseClient): def __init__(self, config=None, hostname=None, ip=None, hostgroup=None, @@ -51,6 +60,8 @@ def __init__(self, config=None, hostname=None, ip=None, hostgroup=None, # keep trying to fetch it from facter every time self._stage_type_fetched = False self._account_id = None + self._normandie_status = None + self._knox_status = None def _read_host_info(self) -> bool: if self._use_facter: @@ -196,10 +207,24 @@ def _read_host_info(self) -> bool: info = json.loads(ec2_metadata) self._account_id = info.get('AccountId', None) + # Retrieve Normandie Status, swallowing exceptions if any: Ping should always be sent. + try: + self._normandie_status = self.get_normandie_status() + except Exception as e: + log.exception(f"Failed to get normandie status.: {e}") + self._normandie_status = 'ERROR' + + # Retrieve Knox Status, swallowing exceptions if any: Ping should always be sent. + try: + self._knox_status = self.get_knox_status() + except Exception as e: + log.exception(f"Failed to get knox status.: {e}") + self._knox_status = 'ERROR' + log.info("Host information is loaded. " "Host name: {}, IP: {}, host id: {}, agent_version={}, autoscaling_group: {}, " - "availability_zone: {}, ec2_tags: {}, stage_type: {}, group: {}, account id: {}".format(self._hostname, self._ip, self._id, - self._agent_version, self._autoscaling_group, self._availability_zone, self._ec2_tags, self._stage_type, self._hostgroup, self._account_id)) + "availability_zone: {}, ec2_tags: {}, stage_type: {}, group: {}, account id: {}, normandie_status: {}, knox_status: {}".format(self._hostname, self._ip, self._id, + self._agent_version, self._autoscaling_group, self._availability_zone, self._ec2_tags, self._stage_type, self._hostgroup, self._account_id, self._normandie_status, self._knox_status)) if not self._availability_zone: log.error("Fail to read host info: availablity zone") @@ -209,6 +234,71 @@ def _read_host_info(self) -> bool: return True + def get_normandie_status(self) -> Optional[str]: + path = Path(NORMANDIE_CERT_FILEPATH) + cmd = [ + "openssl", + "x509", + "-in", + path.as_posix(), + "-noout", + "-text", + "-certopt", + "no_subject,no_header,no_version,no_serial,no_signame,no_validity,no_issuer,no_pubkey,no_sigdump,no_aux", + ] + try: + cert = subprocess.check_output(cmd).decode("utf-8") + except subprocess.CalledProcessError as e: + log.exception(f"failed to get spiffe id from normandie: {e}") + return 'ERROR' + + matcher = re.search(SAN_URI_PATTERN, cert) + if matcher is None: + return 'ERROR' + spiff_id = matcher.group(1) + + if spiff_id: + return 'OK' + else: + return 'ERROR' + + def get_knox_status(self) -> Optional[str]: + cmd = [ + "systemctl", + "show", + "knox", + "--property=Result", + "--property=StatusErrno", + "--property=ActiveState", + "--property=SubState" + ] + try: + status = subprocess.check_output(cmd).decode("utf-8") + except subprocess.CalledProcessError as e: + log.exception(f"failed to get knox service status from systemctl: {e}") + return 'ERROR' + + # Use three different matchers and pattern to not make assumptions on the order of the properties + matcher = re.search(STATUSERRNO_PATTERN, status) + if matcher is None: + return 'ERROR' + statusErrNo = matcher.group(1) + + matcher = re.search(ACTIVESTATE_PATTERN, status) + if matcher is None: + return 'ERROR' + activeState = matcher.group(1) + + matcher = re.search(SUBSTATE_PATTERN, status) + if matcher is None: + return 'ERROR' + subState = matcher.group(1) + + if statusErrNo == "0" and activeState == "active" and subState == "running": + return "OK" + else: + return "ERROR" + def send_reports(self, env_reports=None) -> Optional[PingResponse]: try: if self._read_host_info(): @@ -229,7 +319,9 @@ def send_reports(self, env_reports=None) -> Optional[PingResponse]: availabilityZone=self._availability_zone, ec2Tags=self._ec2_tags, stageType=self._stage_type, - accountId=self._account_id) + accountId=self._account_id, + normandieStatus=self._normandie_status, + knoxStatus=self._knox_status) with create_stats_timer('deploy.agent.request.latency', tags={'host': self._hostname}): diff --git a/deploy-agent/deployd/types/ping_request.py b/deploy-agent/deployd/types/ping_request.py index 0e6affbf7a..76206cdaae 100644 --- a/deploy-agent/deployd/types/ping_request.py +++ b/deploy-agent/deployd/types/ping_request.py @@ -20,7 +20,8 @@ class PingRequest(object): def __init__(self, hostId=None, hostName=None, hostIp=None, groups=None, reports=None, - agentVersion=None, autoscalingGroup=None, availabilityZone=None, ec2Tags=None, stageType=None, accountId=None): + agentVersion=None, autoscalingGroup=None, availabilityZone=None, ec2Tags=None, stageType=None, + accountId=None, normandieStatus=None, knoxStatus=None): self.hostId = hostId self.hostName = hostName self.hostIp = hostIp @@ -32,6 +33,8 @@ def __init__(self, hostId=None, hostName=None, hostIp=None, groups=None, reports self.ec2Tags = ec2Tags self.stageType = stageType self.accountId = accountId + self.normandieStatus = normandieStatus + self.knoxStatus = knoxStatus def to_json(self): ping_requests = {} @@ -52,6 +55,10 @@ def to_json(self): ping_requests["accountId"] = self.accountId if self.ec2Tags: ping_requests["ec2Tags"] = self.ec2Tags + if self.normandieStatus: + ping_requests["normandieStatus"] = self.normandieStatus + if self.knoxStatus: + ping_requests["knoxStatus"] = self.knoxStatus ping_requests["reports"] = [] for report in self.reports: @@ -86,6 +93,6 @@ def to_json(self): def __str__(self): return "PingRequest(hostId={}, hostName={}, hostIp={}, agentVersion={}, autoscalingGroup={}, " \ - "availabilityZone={}, ec2Tags={}, stageType={}, groups={}, accountId={}, reports={})".format(self.hostId, self.hostName, + "availabilityZone={}, ec2Tags={}, stageType={}, groups={}, accountId={}, normandieStatus={}, knoxStatus={}, reports={})".format(self.hostId, self.hostName, self.hostIp, self.agentVersion, self.autoscalingGroup, self.availabilityZone, self.ec2Tags, self.stageType, - self.groups, self.accountId, ",".join(str(v) for v in self.reports)) + self.groups, self.accountId, self.normandieStatus, self.knoxStatus, ",".join(str(v) for v in self.reports)) diff --git a/deploy-agent/tests/unit/deploy/client/test_client.py b/deploy-agent/tests/unit/deploy/client/test_client.py index 7dd5052723..22e24775e0 100644 --- a/deploy-agent/tests/unit/deploy/client/test_client.py +++ b/deploy-agent/tests/unit/deploy/client/test_client.py @@ -1,4 +1,5 @@ import unittest +from unittest import mock from tests import TestCase from deployd.client.client import Client @@ -23,6 +24,43 @@ def test_read_host_info(self): self.assertIsNotNone(client._ip) self.assertTrue(return_value) + def test_read_host_info_normandie(self): + client = Client(config=Config()) + client._ec2_tags = {} + client._availability_zone = "us-east-1" + return_value: bool = client._read_host_info() + self.assertTrue(return_value) + + # On a host with normandie, the normandie status should be set to OK + # On a host without, such as build agents, the normandie status should be ERROR + self.assertIsNotNone(client._normandie_status) + self.assertTrue(client._normandie_status == "OK" or client._normandie_status == "ERROR") + + # Normandie status should be ERROR even when the subprocess call returns a non-parseable output + @mock.patch("subprocess.check_output") + def test_read_host_info_normandie_error(self, mock_check_output): + mock_check_output.return_value = b"not a parseable SAN URL" + client = Client(config=Config()) + client._ec2_tags = {} + client._availability_zone = "us-east-1" + return_value: bool = client._read_host_info() + self.assertTrue(return_value) + + self.assertIsNotNone(client._normandie_status) + self.assertEqual(client._normandie_status, "ERROR") + + def test_read_host_info_knox(self): + client = Client(config=Config()) + client._ec2_tags = {} + client._availability_zone = "us-east-1" + return_value: bool = client._read_host_info() + self.assertTrue(return_value) + + # On a host with knox, the knox status should be set to OK + # On a host without, such as build agents, the knox status should be ERROR + self.assertIsNotNone(client._knox_status) + self.assertTrue(client._knox_status == "OK" or client._knox_status == "ERROR") + def test_read_host_info_no_ec2_tags_provided(self): client = Client(config=Config()) with self.assertRaises(AttributeError): From 3462568654559f768ed37e48fe9c4b8e8d8878a8 Mon Sep 17 00:00:00 2001 From: Josselin PIERRE Date: Thu, 9 Jan 2025 14:15:21 -0800 Subject: [PATCH 2/5] Update deploy-agent/deployd/client/client.py Co-authored-by: Omar --- deploy-agent/deployd/client/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy-agent/deployd/client/client.py b/deploy-agent/deployd/client/client.py index a26b0b86dc..6880b35cb2 100644 --- a/deploy-agent/deployd/client/client.py +++ b/deploy-agent/deployd/client/client.py @@ -211,7 +211,7 @@ def _read_host_info(self) -> bool: try: self._normandie_status = self.get_normandie_status() except Exception as e: - log.exception(f"Failed to get normandie status.: {e}") + log.exception(f"Failed to get normandie status: {e}") self._normandie_status = 'ERROR' # Retrieve Knox Status, swallowing exceptions if any: Ping should always be sent. From f08a123ad992141c721d2d33f3fc30f9165f1d41 Mon Sep 17 00:00:00 2001 From: Josselin PIERRE Date: Thu, 9 Jan 2025 14:15:27 -0800 Subject: [PATCH 3/5] Update deploy-agent/deployd/client/client.py Co-authored-by: Omar --- deploy-agent/deployd/client/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy-agent/deployd/client/client.py b/deploy-agent/deployd/client/client.py index 6880b35cb2..f97c099f47 100644 --- a/deploy-agent/deployd/client/client.py +++ b/deploy-agent/deployd/client/client.py @@ -218,7 +218,7 @@ def _read_host_info(self) -> bool: try: self._knox_status = self.get_knox_status() except Exception as e: - log.exception(f"Failed to get knox status.: {e}") + log.exception(f"Failed to get knox status: {e}") self._knox_status = 'ERROR' log.info("Host information is loaded. " From d3590e87d706cf76b43a407b7f6d3412c651b78c Mon Sep 17 00:00:00 2001 From: Josselin Date: Thu, 9 Jan 2025 14:18:49 -0800 Subject: [PATCH 4/5] Fix indentation --- deploy-agent/deployd/types/ping_request.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy-agent/deployd/types/ping_request.py b/deploy-agent/deployd/types/ping_request.py index 76206cdaae..3e58710adc 100644 --- a/deploy-agent/deployd/types/ping_request.py +++ b/deploy-agent/deployd/types/ping_request.py @@ -20,7 +20,7 @@ class PingRequest(object): def __init__(self, hostId=None, hostName=None, hostIp=None, groups=None, reports=None, - agentVersion=None, autoscalingGroup=None, availabilityZone=None, ec2Tags=None, stageType=None, + agentVersion=None, autoscalingGroup=None, availabilityZone=None, ec2Tags=None, stageType=None, accountId=None, normandieStatus=None, knoxStatus=None): self.hostId = hostId self.hostName = hostName From 0a62f0698b9c51baa3d956e38738e0ab0f4f618f Mon Sep 17 00:00:00 2001 From: Josselin Date: Tue, 11 Feb 2025 14:58:14 -0800 Subject: [PATCH 5/5] remove extra whitespaces --- deploy-agent/deployd/client/client.py | 4 ++-- deploy-agent/deployd/types/ping_request.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy-agent/deployd/client/client.py b/deploy-agent/deployd/client/client.py index 0a1bc893ff..0413ecb6a5 100644 --- a/deploy-agent/deployd/client/client.py +++ b/deploy-agent/deployd/client/client.py @@ -240,7 +240,7 @@ def _read_host_info(self) -> bool: log.info( "Host information is loaded. " "Host name: {}, IP: {}, host id: {}, agent_version={}, autoscaling_group: {}, " - "availability_zone: {}, ec2_tags: {}, stage_type: {}, group: {}, account id: {}," + "availability_zone: {}, ec2_tags: {}, stage_type: {}, group: {}, account id: {}," "normandie_status: {}, knox_status: {}".format( self._hostname, self._ip, @@ -252,7 +252,7 @@ def _read_host_info(self) -> bool: self._stage_type, self._hostgroup, self._account_id, - self._normandie_status, + self._normandie_status, self._knox_status ) ) diff --git a/deploy-agent/deployd/types/ping_request.py b/deploy-agent/deployd/types/ping_request.py index ac3eac1363..b6fb9b27d7 100644 --- a/deploy-agent/deployd/types/ping_request.py +++ b/deploy-agent/deployd/types/ping_request.py @@ -31,7 +31,7 @@ def __init__( ec2Tags=None, stageType=None, accountId=None, - normandieStatus=None, + normandieStatus=None, knoxStatus=None ): self.hostId = hostId @@ -120,7 +120,7 @@ def __str__(self): self.stageType, self.groups, self.accountId, - self.normandieStatus, + self.normandieStatus, self.knoxStatus, ",".join(str(v) for v in self.reports), )