-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsetup_wizard.py
executable file
·198 lines (160 loc) · 6.3 KB
/
setup_wizard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python3
# This file should not depend on any repo python files outside of the top-level directory.
from pull_docker_image import docker_pull
from setup_common import get_env_json, update_env_json, LATEST_DOCKER_HUB_IMAGE
import os
import subprocess
def print_green(text):
green_start = "\033[32m"
reset = "\033[0m"
print(f"{green_start}{text}{reset}")
def print_red(text):
red_start = "\033[31m"
reset = "\033[0m"
print(f"{red_start}{text}{reset}")
def run(cmd: str, print_cmd=False, print_output=False):
"""
Runs the given cmd. If successful, returns None. Else, returns the proc object, from
which the stdout/stderr can be read.
"""
if print_cmd:
print(cmd)
if print_output:
stdout = None
stderr = None
else:
stdout = subprocess.PIPE
stderr = subprocess.PIPE
p = subprocess.Popen(cmd, shell=True, stdout=stdout, stderr=stderr)
p.wait()
if p.returncode:
return p
return None
class SetupException(Exception):
# When caught, will only print the first argument, not the full traceback
pass
class VerboseSetupException(Exception):
# When caught, will print the full traceback
pass
def validate_nvidia_driver():
"""Validate that the NVIDIA driver is installed and working."""
print('Validating NVIDIA driver installation...')
result = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode == 0:
print("✅ NVIDIA driver is installed and working.")
return
else:
print("❌ NVIDIA driver validation failed.")
print(result.stderr.decode())
print('')
print("Please check NVIDIA website for driver installation instructions.")
raise SetupException()
def validate_nvidia_installation(image):
"""Validate that NVIDIA components are installed and functional."""
print('Validating NVIDIA installation...')
test_cmd = ["docker", "run", "--rm", "--gpus", "all", image, "nvidia-smi"]
result = subprocess.run(test_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode == 0:
print("✅ NVIDIA Container Toolkit is installed and GPU is accessible in Docker.")
else:
# first check NVIDIA driver
validate_nvidia_driver()
# if we got here, the driver is installed but the container toolkit is not working
print("❌ NVIDIA Container Toolkit validation failed.")
print(result.stderr)
print("Please read here for Container Toolkit installation instructions:")
print('')
print('https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html')
print('')
print('Likely applicable sections:')
print(' - Installing with Apt')
print(' - Configuring Docker')
raise SetupException()
def setup_output_dir():
"""
Request user to set the output directory.
"""
print('AlphaZeroArcade runs write (a lot of) data to disk. Please specify a directory')
print('where that data will be written. This directory will be mounted into the docker')
print('container. If you have a fast SSD, it is recommended to use that for the data')
print('directory.')
print('')
env = get_env_json()
cwd = os.getcwd()
default_output_dir = env.get('OUTPUT_DIR', os.path.join(cwd, 'output'))
prompt = f'Please enter the location of your output directory [{default_output_dir}]: '
output_dir = input(prompt).strip()
if not output_dir:
output_dir = default_output_dir
expanded_output_dir = os.path.expanduser(output_dir)
try:
os.makedirs(expanded_output_dir, exist_ok=True)
except Exception as e:
print(f"❌ Failed to create output directory: {expanded_output_dir}")
print(f"Error: {e}")
raise SetupException()
update_env_json({'OUTPUT_DIR': expanded_output_dir})
print(f"✅ Successfully registered output directory: {output_dir}")
def check_docker_permissions():
"""Check if the user can run Docker commands without sudo."""
print('Checking if you have permission to run Docker commands without sudo...')
result = subprocess.run(['docker', 'ps'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode == 0:
print("✅ You have permission to run Docker commands without sudo.")
else:
# Check for permission-related errors
stderr = result.stderr.decode()
if "permission denied" in stderr.lower():
print("❌ You do not have permission to run Docker commands without sudo.")
print("To fix this, add your user to the Docker group by running:")
print(" sudo usermod -aG docker $USER")
print("Then, log out and log back in.")
else:
print("❌ Docker command failed for an unknown reason.")
print("Error details:")
print(stderr)
raise SetupException()
def verify_python_modules():
print('Checking that required python modules are installed...')
try:
import packaging
print("✅ packaging module is installed.")
except ImportError:
print("❌ packaging module is not installed.")
print("Please run `pip install packaging` and retry.")
raise SetupException()
def main():
print('*' * 80)
print('Running AlphaZeroArcade setup wizard...')
print('*' * 80)
os.chdir(os.path.dirname(__file__))
try:
setup_output_dir()
print('*' * 80)
check_docker_permissions()
print('*' * 80)
docker_pull(LATEST_DOCKER_HUB_IMAGE)
print('*' * 80)
validate_nvidia_installation(LATEST_DOCKER_HUB_IMAGE)
print('*' * 80)
verify_python_modules()
except KeyboardInterrupt:
print('')
print('Setup wizard was interrupted. Please try again.')
return
except SetupException as e:
for arg in e.args:
print('*' * 80)
print(arg)
# Call site should print further details
return
except VerboseSetupException as e:
print('*' * 80)
raise
except:
print('*' * 80)
print('Setup wizard failed unexpectedly! See below for details.')
print('*' * 80)
raise
if __name__ == '__main__':
main()