-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdelete_fsxn_filesystem
executable file
·360 lines (346 loc) · 13.9 KB
/
delete_fsxn_filesystem
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
#!/bin/bash
################################################################################
# THIS SOFTWARE IS PROVIDED BY NETAPP "AS IS" AND ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL NETAPP BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR'
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
################################################################################
#
# This script is used to delete a FSxN filesystem.
################################################################################
################################################################################
# This function just outputs the usage information and forces the script to
# exit.
################################################################################
usage () {
cat 1>&2 <<EOF
Usage: $(basename $0) -f fileSystemName [-r region] [-i fileSystemID] [-b]
Where:
fileSystemName: Is the name of the file system you want to delete. This option is mutually exclusive to the -i option.
fileSystemID: Is the file system id of the file system you want to delete. This option is mutually exclusive to the -f option.
region: Is the AWS region where the FSxN filesystem resides.
-b: enable final backup. Otherwise, it is skipped.
EOF
exit 1
}
################################################################################
# This function is used to display the contents of a FSxN file system based on
# the file system ID passed as the first and only argument. It assumes that
# the svmsFile and volumesFile fles have already been created.
################################################################################
displayFileSystemContents() {
fsId=$1
jq -r '.StorageVirtualMachines[] | if(.FileSystemId == "'$fsId'") then .StorageVirtualMachineId + " " + .Name else empty end' $svmsFile | while read svmId svmName; do
printf "Storage Virtual Machine: $svmId - '$svmName' with the following volumes:\n"
jq -r '.Volumes[] | if(.FileSystemId == "'$fsId'" and .OntapConfiguration.StorageVirtualMachineId == "'$svmId'") then .VolumeId + " " + .Name else empty end' $volumesFile | while read volumeId volumeName; do
printf "\t$volumeId - '$volumeName'\n"
done
done
}
################################################################################
# This function is used to delete an FSxN volume based on the volume ID
# passed as the first and only argument. It waits for the volume to be
# deleted. It assumes the volume has been deleted when the API call to display
# its status returns an error.
################################################################################
delete_volume () {
local tmpout=/tmp/delete_fsxn_delete_vol.$BASHPID
trap 'rm -f $tmpout' RETURN
local volumeId=$1
local skipBackup=$2
aws fsx delete-volume --volume-id $volumeId --region=$region --output=json --ontap-configuration '{"SkipFinalBackup": '$skipBackup'}' > $tmpout 2>&1
if [ $? != "0" ]; then
printf "\nError, failed to delete a volume with volumeId: '$volumeId'.\n" 1>&2
cat $tmpout 1>&2
return 1
fi
#
# Wait for the volume to be deleted.
i=0
while [ $i -lt $MaxIterations ]; do
aws fsx describe-volumes --volume-ids $volumeId --output=json --region=$region > $tmpout 2>&1
if [ $? -eq 0 ]; then
status=$(jq -r .Volumes[0].Lifecycle $tmpout 2> /dev/null)
if [ "$status" != "DELETING" -a "$status" != "PENDING" ]; then
printf "\nError, failed to delete volume with volume ID '$volumeId'. Status = ${status}.\n" 1>&2
cat $tmpout 1>&2
return 1
fi
else
# Assume if it failed, it is because the volume was deleted and doesn't exist anymore.
break
fi
sleep $SleepTime
let i+=1
done
if [ $i -ge $MaxIterations ]; then
echo "Failed to delete volume with volume ID of '$volumeId'. Taking too long." 1>&2
return 1
fi
return 0
}
################################################################################
# This function is used to delete an FSxN SVM. It waits for the SVM to be
# deleted. It assumes the SVM has been deleted when the API call to display
# its status returns an error.
################################################################################
delete_svm() {
local tmpout=/tmp/delete_fsxn_delete_svm.$BASHPID
trap 'rm -f $tmpout' RETURN
local svmId=$1
aws fsx delete-storage-virtual-machine --region=$region --output=json --storage-virtual-machine-id $svmId > $tmpout 2>&1
if [ $? != "0" ]; then
printf "\nError, failed to delete a SVM with svmID: '$svmId'.\n" 1>&2
cat $tmpout 1>&2
return 1
fi
#
# Wait for the svm to be deleted.
i=0
while [ $i -lt $MaxIterations ]; do
aws fsx describe-storage-virtual-machines --storage-virtual-machine-ids $svmId --output=json --region=$region > $tmpout 2>&1
if [ $? -eq 0 ]; then
status=$(jq -r '.StorageVirtualMachines[0].Lifecycle' $tmpout 2> /dev/null)
if [ "$status" != "DELETING" -a "$status" != "PENDING" ]; then
printf "\nError, failed to delete SVM with SVM ID '$svmId'. Status = $status\n" 1>&2
cat $tmpout 1>&2
return 1
fi
else
# Assume if it failed, it is because the SVM was delted and therefore doesn't exist anymore.
break
fi
sleep $SleepTime
let i+=1
done
if [ $i -ge $MaxIterations ]; then
printf "\nFailed to delete SVM with SVM ID of '$svmID'. Taking too long.\n" 1>&2
return 1
fi
return 0
}
################################################################################
# Main logic starts here.
################################################################################
tmpout=/tmp/fsx_fs_delete.$$
svmsFile=/tmp/fsx_fs_delete_svms.$$
volumesFile=/tmp/fsx_fs_delete_volumes.$$
trap 'rm -f $tmpout $svmsFile $volumesFile' exit
#
# Set the maximum number of times to check that a volume and/or SVM has been
# deleted. Multiple it by the SleepTime set below to the total amount of
# time allowed. Note, it takes at least 4 minutes to delete a volume.
MaxIterations=120
#
# Set the number of seconds to wait between checks that a volume and/or SVM has been deleted.
SleepTime=5
#
# Set the maximum number of "volume deletes" that can be running at the same time.
MaxDeletesRunning=20
#
# Check that the required commands are available.
if which jq aws > /dev/null 2>&1; then
:
else
echo "Error, both the 'aws' and 'jq' commands is required to run this script." 1>&2
exit 1
fi
#
# Get the default region.
region=$(aws configure list | egrep '^.*egion ' | awk '{print $2}')
skipBackup=true
#
# Process command line arguments.
while getopts "hbr:f:i:" option; do
case $option in
f) fileSystemName=$OPTARG
;;
r) region=$OPTARG
;;
i) fsid=$OPTARG
;;
b) skipBackup=false
;;
*) usage
;;
esac
done
if [ ! -z "$fsid" -a ! -z "$fileSystemName" ]; then
echo "Error, you can only specify the -i OR the -f option, not both." 1>&2
usage # implied exit
fi
#
# Ensure all the required parameters have been provided.
if [ -z "$fileSystemName" -a -z "$fsid" ]; then
echo "Error, missing required arguments." 1>&2
usage # implied exit
fi
#
# Get the file system id based on the name.
if [ -z "$fsid" ]; then
fsid=($(aws fsx describe-file-systems --region=$region --output=json 2> $tmpout | jq -r '.FileSystems[] | if((.Tags[] | select(.Key == "Name") .Value) == "'"${fileSystemName}"'") then .FileSystemId else empty end' 2> /dev/null))
if [ ${#fsid[*]} -gt 1 ]; then
echo "Error, more than one file system matched the file system name '$fileSystemName'." 1>&2
echo "Please use the -i option to specify the exact file system you want to delete." 1>&2
exit 1
fi
if [ -z "$fsid" ]; then
echo "Error, could not find the file system with name '$fileSystemName'." 1>&2
cat $tmpout 1>&2
exit 1
fi
else
#
# Get the file system name based on the fsid.
fileSystemName=$(aws fsx describe-file-systems --file-system-ids $fsid --region=$region --output=json 2> /dev/null | jq -r '.FileSystems[0].Tags[] | select(.Key == "Name") .Value' 2> /dev/null)
if [ -z "$fileSystemName" ]; then
echo "Error, failed to get the file system name based on the ID ($fsid)." 1>&2
exit 1
fi
fi
#
# Create a JSON file with all the FSxN SVMs in the region.
aws fsx describe-storage-virtual-machines --region=$region --output=json > $svmsFile 2>&1
if [ $? -ne 0 ]; then
echo "Error, failed to get the list of SVMs." 1>&2
cat $svmsFile 1>&2
exit 1
fi
#
# Create a JSON file with all the FSXN volumes in the region.
aws fsx describe-volumes --region=$region --output=json > $volumesFile 2>&1
if [ $? -ne 0 ]; then
echo "Error, failed to get the list of volumes." 1>&2
cat $volumesFile 1>&2
exit 1
fi
#
# Make sure the user really wants to delete the file system.
echo "Here are the current contents of the '$fileSystemName'($fsid) file system you have indicated you want to delete:"
displayFileSystemContents $fsid
read -p "Are you sure you want to delete this file system, with all the above volumes (yes/no)? " response
if [ "$response" != "yes" ]; then
echo "Aborted."
exit 1
fi
#
# Before you can delete a file system, you have to first delete all the volumes,
# and then all the SVMs. So, first get the list of SVMs:
declare -a svms
declare -a volumes
svms=($(jq -r '.StorageVirtualMachines[] | if(.FileSystemId == "'$fsid'") then .StorageVirtualMachineId else empty end' $svmsFile))
#
# Now delete all the volumes for each SVM. I could just deleted all the volumes
# associated with the fsid, but I wanted the extra check on the volumeId to be
# associated with one of the SVMs that is associated with the fsid.
for svmId in ${svms[*]}; do
#
# Create an array with all the non-root volume IDs for this SVM.
volumes=($(jq -r '.Volumes[] | if(.OntapConfiguration.StorageVirtualMachineId == "'$svmId'" and (.OntapConfiguration.StorageVirtualMachineRoot | not) and .FileSystemId == "'$fsid'") then .VolumeId else empty end' $volumesFile))
if [ ! -z "${volumes[*]}" ]; then
#
# Since it can take a while for a single volume to be deleted (e.g. 4 minutes
# for a small empty volume) and you can do multiple deletes in parallel,
# spawn them in the background and wait for them to finish. Although, since
# we don't want to overwhelm either AWS or ONTAP, only allow a certain
# number at a time.
i=0
numRunning=0
numVolumes=${#volumes[*]}
maxNumRunning=1 # Only do one initially, if it completes successfully, then do the rest concurrently.
printf "\nDeleting all the volumes associated with ${svmId}.\n"
while [ $i -lt $numVolumes ]; do
delete_volume ${volumes[$i]} $skipBackup &
let i+=1
let numRunning+=1
printf "\rTotal number of volumes to delete: ${numVolumes}. Number of deletes currently running: ${numRunning}. Number waiting to be started: $((numVolumes-i)). "
if [ $numRunning -ge $maxNumRunning ]; then
#
# Wait for a job to complete.
wait -n
rc=$?
if [ $rc -eq 127 ]; then
#
# 127 means there were no background jobs. Since we just deployed one, that shouldn't happen.
printf "\nError, got an expected response from 'wait'. Aborting.\n" 1>&2
exit 1
fi
if [ $rc -ne 0 ]; then
printf "\nError, one of the volume deletes failed. Aborting!\n" 1>&2
exit 1
fi
let numRunning-=1
if [ $i -eq 1 ]; then
# The first one succeeded, open up the flood gates.
maxNumRunning=$MaxDeletesRunning
fi
fi
done
#
# Now that we have queued them all up, wait for them to finish.
wait -n
rc=$?
let numRunning-=1
while [ "$rc" != 127 ]; do
printf "\rTotal number of volumes to delete: ${numVolumes}. Number of deletes currently running: ${numRunning}. Number waiting to be started: $((numVolumes-i)). "
if [ "$rc" != 0 ]; then
printf "\nError, one of the volume deletes failed. Aborting!\n" 1>&2
exit 1
fi
wait -n
rc=$?
let numRunning-=1
done
fi
done # for svmId in ${svms[*]}; do
#
# Now that all the volumes are deleted, delete the SVMs.
# Since there can only be 24 SVMs, don't really have to worry about spawning
# too many at a time.
[ ${#svms[*]} -gt 0 ] && printf "\nDeleting SVMs.\n"
for svmId in ${svms[*]}; do
delete_svm $svmId &
done
#
# Now wait for them to finish.
if [ ! -z "$svms" ]; then
numRunning=${#svms[*]}
printf "\rTotal number of SVMs to delete: ${#svms[*]}. Number of deletes currently running: ${numRunning}. Number waiting to be started: 0. "
wait -n
rs=$?
let numRunning-=1
while [ "$rs" != 127 ]; do
if [ "$rs" != 0 ]; then
printf "\nError, one of the SVM deletes failed. Aborting!\n" 1>&2
exit 1
fi
printf "\rTotal number of SVMs to delete: ${#svms[*]}. Number of deletes currently running: ${numRunning}. Number waiting to be started: 0. "
wait -n
rs=$?
let numRunning-=1
done
fi
#
# Now that all the volumes and all the SVMs have been deleted, we can delete the filesystem.
aws fsx delete-file-system --file-system-id $fsid --output=json --region=$region > $tmpout 2>&1
if [ $? != "0" ]; then
printf "\nError, failed to delete file system.\n" 1>&2
cat $tmpout 1>&2
exit 1
else
status=$(jq -r .Lifecycle $tmpout)
if [ "$status" == "DELETING" -o "$status" == "PENDING" ]; then
printf "\nFile system '$fileSystemName' is being deleted.\n"
exit 0
else
printf "\nUnknown status '$status'. Complete output returned from the AWS api:\n" 1>&2
cat $tmpout 1>&2
exit 1
fi
fi