forked from iris-ac-uk/iris-euclid-vm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser_data_euclid
385 lines (320 loc) · 14.1 KB
/
user_data_euclid
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
From nobody Wed Nov 18 09:59:29 2015
Comments:
#
# Generic Cloud Init user_data template file to create CernVM4 (CentOS7) VMs
# which automatically join a cross-site VPN
#
# [email protected] July 2018
#
Content-Type: multipart/mixed; boundary="===============3141592653589793238=="
MIME-Version: 1.0
--===============3141592653589793238==
MIME-Version: 1.0
Content-Type: text/cloud-config; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="cloud-config-file"
# cloud-config
cvmfs:
local:
CVMFS_REPOSITORIES: grid
CVMFS_HTTP_PROXY: ##user_data_option_cvmfs_proxy##
--===============3141592653589793238==
MIME-Version: 1.0
Content-Type: text/ucernvm; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="ucernvm-file"
[ucernvm-begin]
resize_rootfs=off
cvmfs_http_proxy='##user_data_option_cvmfs_proxy##'
[ucernvm-end]
--===============3141592653589793238==
MIME-Version: 1.0
Content-Type: text/x-shellscript; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="user_data_script"
#!/bin/sh
mkdir -p /var/spool/joboutputs
(
# Set the hostname if available; display otherwise
hostname ##user_data_machine_hostname##
date --utc +"%Y-%m-%d %H:%M:%S %Z user_data_script Start user_data on `hostname`"
echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6
date --utc +"%Y-%m-%d %H:%M:%S %Z Disable IPv6"
# Cloud Init should do this automatically but something has changed since cernvm3 -> cernvm4
ls -l /root/.ssh/authorized_keys
curl http://169.254.169.254/2009-04-04/meta-data/public-keys/0/openssh-key > /root/.ssh/authorized_keys
echo >> /root/.ssh/authorized_keys
ls -l /root/.ssh/authorized_keys
# Record MJFJO if substituted here by VM lifecycle manager
export MACHINEFEATURES='##user_data_machinefeatures_url##'
export JOBFEATURES='##user_data_jobfeatures_url##'
export JOBOUTPUTS='##user_data_joboutputs_url##'
# Save whatever we use by other scripts
/bin/echo -e "export MACHINEFEATURES=$MACHINEFEATURES\nexport JOBFEATURES=$JOBFEATURES\nexport JOBOUTPUTS=$JOBOUTPUTS" > /etc/profile.d/mjf.sh
/bin/echo -e "setenv MACHINEFEATURES $MACHINEFEATURES\nsetenv JOBFEATURES $JOBFEATURES\nsetenv JOBOUTPUTS $JOBOUTPUTS" > /etc/profile.d/mjf.csh
# Create a shutdown_message if ACPI shutdown signal received
/bin/echo -e 'echo "100 VM received ACPI shutdown signal from hypervisor" > /var/spool/joboutputs/shutdown_message\n/sbin/shutdown -h now' >/etc/acpi/actions/power.sh
chmod +x /etc/acpi/actions/power.sh
# Disable TCP offloading etc - done by default
if [ "##user_data_option_network_offload##" != "on" ] ; then
ethtool -K eth0 tso off gso off gro off
fi
# We never let VMs send emails (likely to be annoying errors from root)
/sbin/iptables -A OUTPUT -p tcp --dport 25 -j DROP
# Once we have finished with the metadata, stop any user process reading it later
/sbin/iptables -A OUTPUT -d 169.254.169.254 -p tcp --dport 80 -j DROP
# Get the big 40GB+ logical partition as /scratch
mkdir -p /scratch
if [ -b /dev/vdb1 -a -b /dev/vdb2 ] ; then
# Openstack at CERN with cvm* flavor?
# vda1 is boot image, vdb1 is root partition, vdb2 is unformatted
mkfs -q -t ext4 /dev/vdb2
mount /dev/vdb2 /scratch
elif [ -b /dev/sda1 -a -b /dev/sda2 -a -b /dev/sda3 ] ; then
# GCE: sda1 is boot image, sda2 is root partition, sda3 is unformatted
mkfs -q -t ext4 /dev/sda3
mount /dev/sda3 /scratch
elif [ -b /dev/vdb1 ] ; then
# Openstack at CERN with hep* flavor?
# vda1 is boot image, vdb1 is root partition, and no vdb2
# Since boot image is small, can use rest of vda for /scratch
echo -e 'n\np\n2\n\n\nw\n'| fdisk /dev/vda
mkfs -q -t ext4 /dev/vda2
mount /dev/vda2 /scratch
elif [ -b /dev/vdb ] ; then
# Efficient virtio device
mkfs -q -t ext4 /dev/vdb
mount /dev/vdb /scratch
elif [ -b /dev/vda1 -a -b /dev/vda2 ] ; then
# We just have a big vda with unused space in vda2
mkfs -q -t ext4 /dev/vda2
mount /dev/vda2 /scratch
elif [ -b /dev/sdb ] ; then
# Virtual SCSI
mkfs -q -t ext4 /dev/sdb
mount /dev/sdb /scratch
elif [ -b /dev/hdb ] ; then
# Virtual IDE
mkfs -q -t ext4 /dev/hdb
mount /dev/hdb /scratch
elif [ -b /dev/xvdb ] ; then
# Xen virtual disk device
mkfs -q -t ext4 /dev/xvdb
mount /dev/xvdb /scratch
else
date --utc +'%Y-%m-%d %H:%M:%S %Z user_data_script Missing vdb/hdb/sdb/xvdb block device for /scratch'
echo "500 Missing vdb/hdb/sdb block device for /scratch" > /var/spool/joboutputs/shutdown_message
/sbin/shutdown -h now
sleep 1234567890
fi
if [ -b /dev/vda ] ; then
# We rely on the hypervisor's disk I/O scheduling
echo 'noop' > /sys/block/vda/queue/scheduler
echo 'noop' > /sys/block/vdb/queue/scheduler
fi
# anyone can create directories there
chmod ugo+rwxt /scratch
# Get CA certs from cvmfs
rm -Rf /etc/grid-security/*
ln -sf /cvmfs/grid.cern.ch/etc/grid-security/* /etc/grid-security
export X509_CERT_DIR=/etc/grid-security/certificates
# These will be picked up by login etc shells
cat <<EOF > /etc/profile.d/grid-paths.sh
export X509_CERT_DIR=/etc/grid-security/certificates
export X509_VOMS_DIR=/etc/grid-security/vomsdir
EOF
#
# Set up the EGI config repo to get /cvmfs/euclid.in2p3.fr
#
cat <<EOF >/etc/cvmfs/default.d/60-egi.conf
#CVMFS_SEND_INFO_HEADER=yes
CVMFS_CONFIG_REPOSITORY=config-egi.egi.eu
#CVMFS_CONFIG_REPO_REQUIRED=yes
EOF
#cat <<EOF >/etc/cvmfs/config.d/config-egi.egi.eu.conf
#CVMFS_SERVER_URL="http://cvmfs-egi.gridpp.rl.ac.uk:8000/cvmfs/@fqrn@;http://klei.nikhef.nl:8000/cvmfs/@fqrn@;http://cvmfsrepo.lcg.triumf.ca:8000/cvmfs/@fqrn@;http://cvmfsrep.grid.sinica.edu.tw:8000/cvmfs/@fqrn@;http://cvmfs-stratum-one.ihep.ac.cn:8000/cvmfs/@fqrn@"
#CVMFS_KEYS_DIR=/etc/cvmfs/keys/egi.eu
#CVMFS_FALLBACK_PROXY="http://cvmfsbproxy.cern.ch:3126;http://cvmfsbproxy.fnal.gov:3126"
#EOF
# Apply the CernVM-FS configuration changes
cvmfs_config reload
if [ ! -z "##user_data_option_hostkey##" -a ! -z "##user_data_option_hostcert##" ] ; then
# Given full host cert/key pair
cat <<X5_EOF > /etc/grid-security/hostkey.pem
##user_data_option_hostkey##
X5_EOF
cat <<X5_EOF > /etc/grid-security/hostcert.pem
##user_data_option_hostcert##
X5_EOF
cat /etc/grid-security/hostcert.pem /etc/grid-security/hostkey.pem > /etc/grid-security/hostcertkey.pem
else
date --utc +"%Y-%m-%d %H:%M:%S %Z user_data_option_hostkey/_hostcert defined!"
fi
chmod 0400 /etc/grid-security/*.pem
# make a first heartbeat
echo 0.0 0.0 0.0 0.0 0.0 > /var/spool/joboutputs/heartbeat
/usr/bin/curl --capath $X509_CERT_DIR --cert /etc/grid-security/hostcertkey.pem --cacert /etc/grid-security/hostcertkey.pem --location --upload-file /var/spool/joboutputs/heartbeat "$JOBOUTPUTS/heartbeat"
# put heartbeat on MJF server every 5 minutes with a random offset
echo -e "*/5 * * * * root sleep `expr $RANDOM / 110` ; echo \`cut -f1-3 -d' ' /proc/loadavg\` \`cat /proc/uptime\` >/var/spool/joboutputs/heartbeat ; /usr/bin/curl --capath $X509_CERT_DIR --cert /etc/grid-security/hostcertkey.pem --cacert /etc/grid-security/hostcertkey.pem --location --upload-file /var/spool/joboutputs/heartbeat $JOBOUTPUTS/vm-heartbeat ; /usr/bin/curl --capath $X509_CERT_DIR --cert /etc/grid-security/hostcertkey.pem --cacert /etc/grid-security/hostcertkey.pem --location --upload-file /var/spool/joboutputs/heartbeat $JOBOUTPUTS/heartbeat >/var/log/heartbeat.log 2>&1" >/etc/cron.d/heartbeat
# We swap on the logical partition (cannot on CernVM aufs filesystem)
# Since ext4 we can use fallocate:
fallocate -l 4g /scratch/swapfile
chmod 0600 /scratch/swapfile
mkswap /scratch/swapfile
swapon /scratch/swapfile
# Swap as little as possible
sysctl vm.swappiness=1
# Don't want to be doing this at 4 or 5am every day!
rm -f /etc/cron.daily/mlocate.cron
# Log proxies used for cvmfs
attr -g proxy /mnt/.ro
for i in /cvmfs/*
do
attr -g proxy $i
done
### Create the client side OpenVPN configuration and start OpenVPN
cat <<EOF >/etc/openvpn/client/slurm-client.conf
client
dev tun
proto tcp
remote-random
remote ##user_data_option_slurm_vpn_hostname## ##user_data_option_slurm_vpn_port##
verify-x509-name ##user_data_option_slurm_vpn_hostname## name
resolv-retry infinite
nobind
verb 3
capath /etc/grid-security/certificates
cert /etc/grid-security/hostcert.pem
key /etc/grid-security/hostkey.pem
EOF
systemctl start openvpn-client@slurm-client
### Optionally OpenVPN for the NFS VPN
if [ '##user_data_option_nfs_vpn_hostname##' != '' ] ; then
cat <<EOF >/etc/openvpn/client/nfs-client-tcp.conf
client
dev tun
proto tcp
remote-random
remote ##user_data_option_nfs_vpn_hostname## ##user_data_option_nfs_vpn_port##
verify-x509-name ##user_data_option_nfs_vpn_hostname## name
resolv-retry infinite
nobind
verb 3
capath /etc/grid-security/certificates
cert /etc/grid-security/hostcert.pem
key /etc/grid-security/hostkey.pem
EOF
systemctl start openvpn-client@nfs-client-tcp
sleep 5
fi
if [ '##user_data_option_nfs_addr##' != '' ] ; then
mkdir -p ##user_data_option_nfs_local_path##
echo '##user_data_option_nfs_addr##:##user_data_option_nfs_remote_path## ##user_data_option_nfs_local_path## nfs rw 0 0' >>/etc/fstab
mount ##user_data_option_nfs_local_path##
fi
#
# UDP vs TCP testing
#
cat <<EOF >/etc/openvpn/client/nfs-client-udp.conf
client
dev tun
proto udp
remote-random
remote ##user_data_option_nfs_vpn_hostname## ##user_data_option_nfs_vpn_port##
verify-x509-name ##user_data_option_nfs_vpn_hostname## name
resolv-retry infinite
nobind
verb 3
capath /etc/grid-security/certificates
cert /etc/grid-security/hostcert.pem
key /etc/grid-security/hostkey.pem
EOF
systemctl start openvpn-client@nfs-client-udp
#mkdir /data_udp
#mount ##user_data_option_nfs_addr##:##user_data_option_nfs_remote_path## /data_udp
#
# End of UDP vs TCP testing
#
#(
# Tell yum to use the current Squid proxy used by cvmfs
export http_proxy=`attr -qg proxy /mnt/.ro`
#
## Install nfs-client, networking utils, ntp
yum install -y nfs-utils net-tools bind-utils ntp
#
## Disable chronyd
systemctl disable chronyd
#
## Disable SELinux to allow for ceph mounted home dir
setenforce 0
#
cat <<EOF > /etc/yum.repos.d/puias-7.repo
[PUIAS_7_computational]
name=PUIAS computational Base $releasever - $basearch
mirrorlist=http://puias.math.ias.edu/data/puias/computational/$releasever/$basearch/mirrorlist
baseurl=http://puias.math.ias.edu/data/puias/computational/$releasever/$basearch
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-puias
EOF
#
## Mount ceph shared fs for user home dirs and a number of config files
mkdir /ceph
rpm -Uhv http://download.ceph.com/rpm-jewel/el7/noarch/ceph-release-1-1.el7.noarch.rpm
yum install ceph-common
# ??? Copy ceph.conf, ceph.client.euclid.secret
echo 'vm37.blackett.manchester.ac.uk,euclid-cam-proxy-0,euclid-ral-proxy-0,euclid-edi-ctrl-0:6789:/ /ceph ceph _netdev,noatime,name=euclid,secretfile=/etc/ceph/ceph.client.euclid.secret 0 0' >> /etc/fstab
mount -a
# Copy cluster /etc/hosts file
cp /ceph/home/manch-config/etc/hosts /etc/hosts
## Create users and groups
groupadd euclid -g 2000
groupadd iris -g 8069
useradd --no-create-home -u 16291 -g 2000 -g 8069 -d /ceph/home/hpcgill1 hpcgill1
useradd --no-create-home -u 16287 -g 2000 -g 8069 -d /ceph/home/hpcholl1 hpcholl1
useradd --no-create-home -u 15238 -g 2000 -g 8069 -d /ceph/home/sclt100 sclt100
useradd --no-create-home -u 15239 -g 2000 -g 8069 -d /ceph/home/mcnab mcnab
## Install NHC
yum install -y /ceph/home/manch-config/lbnl-nhc-1.4.2-1.el7.noarch.rpm
cp /ceph/home/manch-config/nhc.conf /etc/nhc/nhc.conf
## Install Slurm and Munge from OpenHPC
rpm -Uhv https://github.com/openhpc/ohpc/releases/download/v1.3.GA/ohpc-release-1.3-1.el7.x86_64.rpm
yum -y install slurm-ohpc slurm-slurmd-ohpc slurm-contribs-ohpc slurm-example-configs-ohpc slurm-pam_slurm-ohpc slurm-perlapi-ohpc munge-ohpc
cp /ceph/home/manch-config/slurm/slurm.conf /etc/slurm/slurm.conf
cp /ceph/home/manch-config/munge/munge.key /etc/munge/munge.key
systemctl enable munge
systemctl enable slurmd
systemctl start munge
systemctl start slurmd
# Example commands for the script to run
# Add a global ssh key to allow access from the headnode
echo 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCt+KRis+jk5YXHoEmUeZgfIT4GvJFrNHTWXgOa+vwG/qrM1V0NSLUfqDgqfrAkEVH+L7yV9Y3BB+e7VvTk80OSrWMUmMKQbD6iun0rbCsDKrSnoy37YGxiH6vroTuZylMDlHkKjYvf3R9q6RpZMGAKEIn8yqXmxURtJDpbVHhNWM2wWcxdvni1g3sXuuljT7C9WwcWhUC7NCze1wxzNknFNnwNmpuXqtWtII5CLXagDyzMVq8GpwGiPZwcnm8cGwlXGCGnJWz8bl1UMnJI7JPBYlsUa1AbyLJSoi3jFtwBqxP1YDxw0ZSp9FZzOsirSOLQSAHv0Yg+sI8H6KmD7ydn [email protected]' >>/root/.ssh/authorized_keys
echo 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDVVH/ie1PVwq4KIpkA2g/SSKLqL+K31SJaDq1muROm4cPBIe9/GMXTcDr0kiNPzRLHjHPEOYa9+6C/AqnHtnJP4mSHuFa1vRIwDE/abe7M+rE41TXINz98iyCyk63SblwBhkReFLGhMLrknq3w/1pX0N1VXJ1aZcJbi+Ur7EOabVjKGYYPr7oqfpec2afeOcujfxfN/HrVtG8YLcZcrv8nv6GfctZ39bUsPlxc/po3NqUL5681lan51oqsyl0xhihI8tgQjGrKfFC0ygwGE+/Phh52Q+5AXqSV7c43grfq/Qi35MEEKbohiRNppxu4Ef4CPF6vS9j8hEB3AmxoRsinGenerated-by-Nova' >>/root/.ssh/authorized_keys
sleep 1200
echo '200 Success' > /var/spool/joboutputs/shutdown_message
# Upload logs
cp -f /var/log/boot.log /var/log/dmesg /var/log/secure /var/log/messages* /var/log/cloud-init* /etc/cvmfs/default.* /root/.ssh/authorized_keys /etc/ssh/sshd_config \
/var/spool/joboutputs/
(
cd /var/spool/joboutputs
for i in *
do
if [ -f $i ] ; then
curl --capath $X509_CERT_DIR --cert /etc/grid-security/hostcertkey.pem --cacert /etc/grid-security/hostcertkey.pem --location --upload-file "$i" \
"$JOBOUTPUTS/"
# if [ "$DEPO_BASE_URL" != "" ] ; then
# # This will be replaced by extended pilot logging??
# curl --capath $X509_CERT_DIR --cert /etc/grid-security/hostcertkey.pem --cacert /etc/grid-security/hostcertkey.pem --location --upload-file "$i" \
# "$DEPO_BASE_URL/##user_data_space##/##user_data_machinetype##/##user_data_machine_hostname##/##user_data_uuid##/"
# fi
fi
done
)
# Try conventional shutdown
date --utc +'%Y-%m-%d %H:%M:%S %Z user_data_script Run /sbin/shutdown -h now'
/sbin/shutdown -h now
sleep 60
# If that fails, do an instant reboot
date --utc +'%Y-%m-%d %H:%M:%S %Z user_data_script Run echo o > /proc/sysrq-trigger'
echo o > /proc/sysrq-trigger
) >/var/spool/joboutputs/user_data_script.log 2>&1 &
--===============3141592653589793238==--