forked from bn222/cluster-deployment-automation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextraConfigDpuTenant.py
106 lines (90 loc) · 4.2 KB
/
extraConfigDpuTenant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from k8sClient import K8sClient
import os
import host
import time
from extraConfigSriov import ExtraConfigSriov
from extraConfigSriov import ExtraConfigSriovOvSHWOL
class ExtraConfigDpuTenant:
def __init__(self, cc):
self._cc = cc
def run(self, cfg):
print("Running post config step")
tclient = K8sClient("/root/kubeconfig.tenantcluster")
print("Apply DPU tenant mc")
tclient.oc("create -f manifests/tenant/dputenantmachineconfig.yaml")
time.sleep(60)
print("Waiting for mcp to be updated")
tclient.oc("wait mcp dpu-host --for condition=updated")
print("Labeling nodes")
for e in self._cc["workers"]:
cmd = f"label node {e['name']} node-role.kubernetes.io/dpu-host="
print(tclient.oc(cmd))
print("Deploying sriov network operator")
ec = ExtraConfigSriov(self._cc)
ec.run(cfg)
print("Waiting for mcp dpu-host to become ready")
tclient.oc("wait mcp dpu-host --for condition=updated --timeout=50m")
print("Creating sriov pool config")
tclient.oc("create -f manifests/tenant/sriov-pool-config.yaml")
tclient.oc("create -f manifests/tenant/SriovNetworkNodePolicy.yaml")
print("Waiting for mcp to be updated")
time.sleep(60)
tclient.oc("wait mcp dpu-host --for condition=updated --timeout=50m")
print("creating config map to put ovn-k into dpu host mode")
tclient.oc("create -f manifests/tenant/sriovdpuconfigmap.yaml")
print("creating mc to disable ovs")
tclient.oc("create -f manifests/tenant/disable-ovs.yaml")
print("Waiting for mcp")
time.sleep(60)
tclient.oc("wait mcp dpu-host --for condition=updated --timeout=50m")
print("setting ovn kube node env-override to set management port")
print(os.getcwd())
contents = open("manifests/tenant/setenvovnkube.yaml").read()
for e in cfg["mapping"]:
a = {}
a["OVNKUBE_NODE_MGMT_PORT_NETDEV"] = "ens1f0v0"
contents += f" {e['worker']}: |\n"
for (k, v) in a.items():
contents += f" {k}={v}\n"
open("/tmp/1.yaml", "w").write(contents)
print("Running create")
print(tclient.oc("create -f /tmp/1.yaml"))
for e in self._cc["workers"]:
cmd = f"label node {e['name']} network.operator.openshift.io/dpu-host="
print(tclient.oc(cmd))
rh = host.RemoteHost(tclient.get_ip(e['name']))
rh.ssh_connect("core")
# workaround for https://issues.redhat.com/browse/NHE-335
print(rh.run("sudo ovs-vsctl del-port br-int ovn-k8s-mp0"))
print("Final infrastructure cluster configuration")
iclient = K8sClient("/root/kubeconfig.infracluster")
# https://issues.redhat.com/browse/NHE-334
for e in iclient.get_nodes():
ip = iclient.get_ip(e)
rh = host.RemoteHost(ip)
rh.ssh_connect("core")
cmd = f"echo \'{self._cc['api_ip']} api.{self._cc['name']}.redhat.com\' | sudo tee -a /etc/hosts"
print(rh.run(cmd))
iclient.oc(f"project tenantcluster-dpu")
print(iclient.oc(f"create secret generic tenant-cluster-1-kubeconf --from-file=config={tclient._kc}"))
contents = open("manifests/tenant/envoverrides.yaml").read()
for e in cfg["mapping"]:
a = {}
a["TENANT_K8S_NODE"] = e['worker']
a["DPU_IP"] = iclient.get_ip(e['bf'])
a["MGMT_IFNAME"] = "eth1"
contents += f" {e['bf']}: |\n"
for (k, v) in a.items():
contents += f" {k}={v}\n"
open("/tmp/envoverrides.yaml", "w").write(contents)
iclient.oc("create -f /tmp/envoverrides.yaml")
r = iclient.oc("patch --type merge -p {\"spec\":{\"kubeConfigFile\":\"tenant-cluster-1-kubeconf\"}} OVNKubeConfig ovnkubeconfig-sample -n tenantcluster-dpu")
print(r)
print("Creating network attachement definition")
tclient.oc("create -f manifests/tenant/nad.yaml")
ec = ExtraConfigSriovOvSHWOL(self._cc)
ec.ensure_pci_realloc(tclient, "dpu-host")
def main():
pass
if __name__ == "__main__":
main()