forked from bn222/cluster-deployment-automation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextraConfigSriov.py
129 lines (107 loc) · 4.66 KB
/
extraConfigSriov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import host
from k8sClient import K8sClient
import os
from git import Repo
import time
from clustersConfig import ClustersConfig
from arguments import parse_args
import shutil
import jinja2
import sys
class ExtraConfigSriov:
def __init__(self, cc):
self._cc = cc
def run(self, _):
client = K8sClient(self._cc["kubeconfig"])
lh = host.LocalHost()
repo_dir = "/root/sriov-network-operator"
url = "https://github.com/bn222/sriov-network-operator"
if os.path.exists(repo_dir):
shutil.rmtree(repo_dir)
print(f"Cloning repo to {repo_dir}")
Repo.clone_from(url, repo_dir, branch='master')
cur_dir = os.getcwd()
os.chdir(repo_dir)
env = os.environ.copy()
env["KUBECONFIG"] = client._kc
# cleanup first, to make this script idempotent
print("running make undeploy")
print(lh.run("make undeploy", env))
# Workaround PSA issues. https://issues.redhat.com/browse/OCPBUGS-1005
client.oc("create namespace openshift-sriov-network-operator")
client.oc("label ns --overwrite openshift-sriov-network-operator "
"pod-security.kubernetes.io/enforce=privileged "
"pod-security.kubernetes.io/enforce-version=v1.24 "
"security.openshift.io/scc.podSecurityLabelSync=false")
print("running make deploy-setup")
print(lh.run("make deploy-setup", env))
time.sleep(60)
os.chdir(cur_dir)
class ExtraConfigSriovOvSHWOL:
def __init__(self, cc):
self._cc = cc
def need_pci_realloc(self, client: K8sClient) -> bool:
for e in self._cc["workers"]:
ip = client.get_ip(e['name'])
if ip is None:
sys.exit(-1)
rh = host.RemoteHost(ip)
rh.ssh_connect("core")
if "switchdev-configuration-before-nm.service" in rh.run("systemctl list-units --state=failed --plain --no-legend").out:
print(f"switchdev-configuration is failing in {e['name']}, additional machine configuration is required")
return True
return False
def enable_pci_realloc(self, client: K8sClient, mcp_name: str) -> None:
print("Applying pci-realloc machine config")
with open('./manifests/nicmode/pci-realloc.yaml.j2') as f:
j2_template = jinja2.Template(f.read())
rendered = j2_template.render(MCPName=mcp_name)
print(rendered)
with open("/tmp/pci-realloc.yaml", "w") as outFile:
outFile.write(rendered)
client.oc("create -f /tmp/pci-realloc.yaml")
print("Waiting for mcp")
time.sleep(60)
client.oc(f"wait mcp {mcp_name} --for condition=updated --timeout=50m")
def ensure_pci_realloc(self, client: K8sClient, mcp_name: str) -> None:
if self.need_pci_realloc(client):
self.enable_pci_realloc(client, mcp_name)
def run(self, _) -> None:
client = K8sClient(self._cc["kubeconfig"])
client.oc("create -f manifests/nicmode/pool.yaml")
pfNamesAll = []
for e in self._cc["workers"]:
name = e["name"]
print(client.oc(f'label node {name} --overwrite=true feature.node.kubernetes.io/network-sriov.capable=true'))
# Find out what the PF attached to br-ex is (uplink port). We only do HWOL on uplink ports.
ip = client.get_ip(name)
if ip is None:
sys.exit(-1)
rh = host.RemoteHost(ip)
rh.ssh_connect("core")
result = rh.run("cat /var/lib/ovnk/iface_default_hint").out.strip()
print(f"Found PF Name {result} on node {name}")
if result not in pfNamesAll:
pfNamesAll.append(result)
# Just in case we don't get any PFs
if not pfNamesAll:
pfNamesAll.append("ens1f0")
with open('./manifests/nicmode/sriov-node-policy.yaml.j2') as f:
j2_template = jinja2.Template(f.read())
rendered = j2_template.render(pfNamesAll=pfNamesAll)
print(rendered)
with open("/tmp/sriov-node-policy.yaml", "w") as outFile:
outFile.write(rendered)
print(client.oc("create -f manifests/nicmode/sriov-pool-config.yaml"))
print(client.oc("create -f /tmp/sriov-node-policy.yaml"))
print(client.oc("create -f manifests/nicmode/nad.yaml"))
time.sleep(60)
print(client.oc("wait mcp sriov --for condition=updated --timeout=50m"))
self.ensure_pci_realloc(client, "sriov")
def main():
args = parse_args()
cc = ClustersConfig(args.config)
ec = ExtraConfigSriov(cc)
ec.run(None)
if __name__ == "__main__":
main()