-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpull_data
executable file
·291 lines (280 loc) · 9.41 KB
/
pull_data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#!/bin/bash
#
# Copyright (C) 2022 David Valin [email protected]
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
pbench_results_location=""
retrieve_csv_files()
{
if [[ $1 == "" ]]; then
echo Need to designate a results area.
exit
fi
#
# Grab the top directories
#
wget -r -np --spider -l1 -P . http://${pbench_results_location}/${1}/ 2> /dev/null
pushd ${pbench_results_location}/${1} > /dev/null > /dev/null
systems=`ls`
popd > /dev/null
top_dir=`pwd`
#
# Walk through the systems present
#
for sys in $systems; do
wget -r -np --spider -l1 -P . http://${pbench_results_location}/${1}/$sys/ 2> /dev/null
pushd ${pbench_results_location}/${1}/${sys} > /dev/null
#
# Process the directories with the test names.
#
for tests in `ls`; do
pushd $tests > /dev/null
#
# Determine the proper test and pull the results file. Any post processing of
# the data will be done here.
#
if [[ $tests == *"linpack_test"* ]]; then
wget http://${pbench_results_location}/${1}/$sys/$tests/results_linpack.csv 2> /dev/null
wget http://${pbench_results_location}/${1}/$sys/$tests/results.txt 2> /dev/null
fi
if [[ $tests == *"pig_test"* ]]; then
wget http://${pbench_results_location}/${1}/$sys/$tests/results_pig.csv 2> /dev/null
fi
if [[ $tests == *"streams_test"* ]]; then
pushd $top_dir > /dev/null
wget -r -np --spider -l1 -P . http://${pbench_results_location}/${1}/$sys/$tests 2> /dev/null
popd > /dev/null
for sdir in `ls -d results_streams_tuned_*`; do
wget http://${pbench_results_location}/${1}/$sys/$tests/$sdir/streams_results/results_streams.csv 2> /dev/null
done
fi
if [[ $tests == "uperf_tuned"* ]]; then
#
# Because uperf is a mess.
#
wget http://${pbench_results_location}/${1}/$sys/$tests/result.csv 2> /dev/null
wget http://${pbench_results_location}/$sys/$tests/result.txt 2> /dev/null
#
# We need to post process this file.
#
mv result.csv results_uperf.csv
fi
if [[ $tests == *"fio_run"* ]]; then
pushd $top_dir > /dev/null
wget -r -np --spider -l1 -P . http://${pbench_results_location}/${1}/$sys/$tests 2> /dev/null
popd > /dev/null
for sdir in `ls -d fio_bs*`; do
pushd ${sdir} > /dev/null
wget http://${pbench_results_location}/${1}/$sys/$tests/$sdir/result.txt 2> /dev/null
wget http://${pbench_results_location}/${1}/$sys/$tests/$sdir/result.csv 2> /dev/null
mv result.csv result_fio.csv
popd > /dev/null
done
fi
if [[ $tests == *"specjbb"* ]]; then
mkdir -p results/wrapper_results
cd results/wrapper_results
wget http://${pbench_results_location}//${1}/$sys/$tests/results/wrapper_results 2> /dev/null
next_dir=`grep folder.gif wrapper_results | cut -d'=' -f 5 | cut -d'"' -f 2 | cut -d'/' -f 1`
mkdir $next_dir
cd $next_dir
wget http://${pbench_results_location}/${1}/$sys/$tests/results/wrapper_results/${next_dir} 2> /dev/null
dir2=`grep folder.gif ${next_dir} | cut -d'=' -f 5 | cut -d'"' -f 2 | sed "s/\.\///g" | sed "s/\///g"`
for subdir in $dir2; do
mkdir $subdir
cd $subdir
wget http://${pbench_results_location}/${1}/$sys/$tests/results/wrapper_results/${next_dir}/$subdir/results_specjbb.csv 2> /dev/null
cd ..
done
fi
if [[ $tests == *"hammerdb"* ]]; then
wget http://${pbench_results_location}/${1}/$sys/$tests/mariadb 2> /dev/null
if [ $? -eq 0 ]; then
wget http://${pbench_results_location}/${1}/$sys/$tests/mariadb/results_hammerdb_maria.csv 2> /dev/null
fi
wget http://${pbench_results_location}/${1}/$sys/$tests/postgres 2> /dev/null
if [ $? -eq 0 ]; then
wget http://${pbench_results_location}/${1}/$sys/$tests/postgres/results_hammerdb_pg.csv 2> /dev/null
fi
wget http://${pbench_results_location}/${1}/$sys/$tests/mssql 2> /dev/null
if [ $? -eq 0 ]; then
wget http://${pbench_results_location}/${1}/$sys/$tests/mssql/results_hammerdb_mssql.csv 2> /dev/null
fi
fi
if [[ $tests == *"auto_hpl"* ]]; then
pushd $top_dir > /dev/null
wget -r -np --spider -l1 http://${pbench_results_location}/${1}/$sys/$tests/ 2> /dev/null
popd > /dev/null
for sdir in `ls -d results_auto_hpl_*`; do
pushd $sdir > /dev/null
#
# Pulls the index.html file.
#
wget http://${pbench_results_location}/${1}/${sys}/${tests}/${sdir} 2> /dev/null
file_check=""
if [ -f "index.html" ]; then
file_check="index.html"
fi
if [ -f "${sdir}" ]; then
file_check=$sdir
fi
if [[ $file_check != "" ]]; then
file_to_pull=`grep csv $file_check | cut -d'<' -f 7 | cut -d'"' -f 2`
if [[ $file_to_pull != "" ]]; then
wget http://${pbench_results_location}/${1}/${sys}/${tests}/$sdir/$file_to_pull 2> /dev/null
file_to=`echo $file_to_pull | cut -d'-' -f 1,2`
mv $file_to_pull results_${file_to}.csv
fi
rm $file_check
fi
popd > /dev/null
done
fi
if [[ $tests == *"etcd_run"* ]]; then
pushd $top_dir > /dev/null
wget -r -np --spider -l1 -P . http://${pbench_results_location}/${1}/$sys/$tests 2> /dev/null
popd > /dev/null
for sdir in `ls -d fio_bs*`; do
pushd ${sdir} > /dev/null
wget http://${pbench_results_location}/${1}/$sys/$tests/$sdir/result.txt 2> /dev/null
wget http://${pbench_results_location}/${1}/$sys/$tests/$sdir/result.csv 2> /dev/null
mv result.csv result_etcd.csv
popd > /dev/null
done
fi
if [[ $tests == *"speccpu2017"* ]]; then
pushd $top_dir > /dev/null
wget -r -np --spider -l1 -P . http://${pbench_results_location}/${1}/$sys/$tests 2> /dev/null
popd > /dev/null
for sdir in `ls -d results_speccpu*`; do
#
# Pulls the index.html file.
#
mkdir -p $sdir/run_results
pushd $sdir/run_results
wget http://${pbench_results_location}/${1}/${sys}/${tests}/${sdir}/run_results/ 2> /dev/null
file_check=""
if [ -f "index.html" ]; then
file_check="index.html"
fi
if [ -f "${sdir}" ]; then
file_check=$sdir
fi
if [[ $file_check != "" ]]; then
file_to_pull=`grep csv $file_check | cut -d'<' -f 7 | cut -d'"' -f 2`
echo file_to_pull $file_to_pull
if [[ $file_to_pull != "" ]]; then
for get_file in $file_to_pull; do
wget http://${pbench_results_location}/${1}/${sys}/${tests}/$sdir/run_results/$get_file 2> /dev/null
done
fi
rm $file_check
fi
popd
done
fi
popd > /dev/null
done
popd > /dev/null
done
}
#
# Now organize the data based on test then system class. Also sort them (at least for aws).
#
organize_results()
{
#
# List of tests we know about.
#
test_names="results_uperf.csv results_specjbb results_pig results_streams results_linpack results_hammerdb_maria results_hammerdb_pg results_hammerdb_mssql fio_run results_hpl etcd speccpu"
#
# Get all the csv files present
#
find ${pbench_results_location} -print | grep csv > results_files
#
# Get the system prefixes. This is generic.
#
systems=`cut -d'/' -f 4 results_files | sort -u`
series_list=""
separ=""
#
# If xlarge is in the system name, it is AWS. We may
# have to deal with other AWS names later, but worry about
# it then.
#
if [[ $systems == *"xlarge"* ]]; then
#
# AWS systems
#
sys_type="aws"
#
# Build the series list (m5, m5a, i3en...)
#
type=`cut -d'/' -f 4 results_files | cut -d'.' -f 1 | sort -u`
for i in $type; do
grep ${i}"\." results_files > ${i}_series
series_list=${series_list}${separ}${i}_series
separ=" "
done
else
echo $systems is unknown.
exit 1
fi
rm results_locations 2> /dev/null
rm results_missing_tests 2> /dev/null
rm results_found_tests 2> /dev/null
#
# Walk through the tests and systems and build the appropriate results file
#
series_ordered=""
for tn in $test_names; do
echo "test ${tn}" >> results_locations
echo "test ${tn}" >> found_tests
for series in $series_list; do
if [[ $sys_type == "aws" ]]; then
#
# Obtain the system names, and sort it based on the xlarge value.
# 2xlarge, 4xlarge ....
#
series_ordered=`cut -d/ -f 4 $series | sort -u | sed "s/\./ /g" | sort -n -k2 | sed "s/ /./g"`
fi
if [[ $sys_type == "azure" ]]; then
echo Need to handle azure
exit
fi
#
# Report the results
#
echo " new_series: $series" >> results_locations
for sys_checking in $series_ordered; do
res_file=`grep --no-filename $tn $series_list | grep $sys_checking`
if [[ $res_file != "" ]]; then
echo " ${res_file},${sys_checking}" >> results_locations
echo " ${sys_checking}" >> found_tests
else
#
# Test is missing, report it.
#
echo Missing $tn for $sys_checking
echo Missing $tn for $sys_checking >> missing_tests
fi
done
done
done
}
pbench_results_location=$2
retrieve_csv_files $1
organize_results