forked from CFSAN-Biostatistics/C-WAP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerateReport.sh
executable file
·484 lines (382 loc) · 18.9 KB
/
generateReport.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
#! /bin/bash -e
# Prepare an html report. The html file header and standard page banner are copied from a stored file for simplicity.
sampleName=$1
headerFile=$2
isPairedEnd=$3
primerBedFile=$4
projectDir=$5
reportFile=report.html
cp $headerFile $reportFile
IFS='.' read -ra username <<< "$USER"
fullName="${username[0]} ${username[1]}"
timestamp=$(date "+%Y-%m-%d, %T %Z")
echo "<table>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Sample name:</td><td>$sampleName</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Date generated:</td><td>$timestamp</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Timestamp of C-WAP version used:</td><td>$(git --git-dir $projectDir/.git log -n 1 | grep Date: | sed 's/Date: //g')</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Executed by:</td><td>$fullName (<a href=\\"mailto:[email protected]?subject=Wastewater report generated on $timestamp\\">[email protected]</a>)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Executed on:</td><td>$(hostname -I | awk '{print $1}') (aka $(hostname))</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "</table>" >> $reportFile
#######################################################
echo Compiling read statistics
numAligned=$(cat sorted.stats | grep "reads mapped:" | awk '{ print $NF }')
avgQuality=$(cat sorted.stats | grep "average quality" | awk '{ print $NF }')
avgLength=$(cat sorted.stats | grep "average length" | awk '{ print $NF }')
numPassedQuality=$(cat resorted.stats | grep "raw total sequences" | awk '{ print $4 }')
avgQualityPassed=$(cat resorted.stats | grep "average quality" | awk '{ print $NF }')
avgLengthPassed=$(cat resorted.stats | grep "average length" | awk '{ print $NF }')
let "avgCoveragePassed = $avgLengthPassed * $numPassedQuality / 29903" || true
# Deduce the total number of reads from the kraken2 output
# Use taxid column, i.e. column 5
numUnclassified=$(head k2-std.out | awk '$5==0' | awk '{ print $2 }')
if [[ -z $numUnclassified ]]; then
numUnclassified=0
fi
numClassified=$(head k2-std.out | awk '$5==1' | awk '{ print $2 }')
if [[ -z $numClassified ]]; then
numClassified=0
fi
# Calculation of total number of reads
let "numReads = $numUnclassified + $numClassified" || true
if $isPairedEnd; then
# Correction for undercounting by kraken2 in the paired mode
let "numReads = 2 * $numReads" || true
fi
####################################################################
echo >> $reportFile
echo "<br>" >> $reportFile
echo "<h2>Sequencing summary</h2>" >> $reportFile
echo '<table>' >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Sequencing chemistry:</td><td>$libraryProtocol with $seqInstrument</td>" >> $reportFile
echo "</tr>" >> $reportFile
# Catchment site the sample was collected from
echo "<tr>" >> $reportFile
echo " <td>Source site:</td><td><a href=\"https://www.openstreetmap.org/#map=3/$sampleLatitude/$sampleLongitude\">$sampleLocation\
($sampleLatitude,$sampleLongitude)</a></td>" >> $reportFile
echo "</tr>" >> $reportFile
# Date the WW sample was collected from the sanitary system
echo "<tr>" >> $reportFile
echo " <td>Sampling date:</td><td>$collectionDate</a></td>" >> $reportFile
echo "</tr>" >> $reportFile
# Agency collecting the WW sample
echo "<tr>" >> $reportFile
echo " <td>Collected by:</td><td>$collectedBy</td>" >> $reportFile
echo "</tr>" >> $reportFile
# Lab that sequenced the sample
echo "<tr>" >> $reportFile
echo " <td>Sequenced by:</td><td>$sequencedBy</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Total number of reads:</td><td>$numReads</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Reads aligned:</td><td>$numAligned ($(expr 100 \* $numAligned / $numReads)%)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Average read quality:</td><td>$avgQuality</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Average read length:</td><td>$avgLength</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Reads passing filter:</td><td>$numPassedQuality ($(expr 100 \* $numPassedQuality / $numReads)%)</td>" \
>> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Average read quality passing filter:</td><td>$avgQualityPassed</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Average read length passing filter:</td><td>$avgLengthPassed</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Average coverage passing filter:</td><td>${avgCoveragePassed}X</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "</table>" >> $reportFile
echo "<br>" >> $reportFile
echo "A read passes filter if the read length after adaptor trimming ≥ 30 and minimum \
read quality ≥ 20 within a sliding window of width 4." >> $reportFile
#######################################################
echo >> $reportFile
echo "<br><br>" >> $reportFile
echo "<h2>Overall sequence characteristics</h2>" >> $reportFile
echo "<img src=\"./coverage.png\" alt=\"Coverage vs. genome coordinate plot\" width=\"49%\" class=\"center\">" >> $reportFile
echo "<img src=\"./quality.png\" alt=\"Quality vs. genome coordinate plot\" width=\"49%\" class=\"center\">" >> $reportFile
echo "<p> NOTE: The red shaded areas marked with a (<font color="red">*</font>) are not covered by the design of the library preparation kit and hence excluded from analyses. <font color="magenta">Magenta</font> curves represent moving average with a window width of 1kb." >> $reportFile
echo "<br><br><br><br>" >> $reportFile
echo "<img src=\"./depthHistogram.png\" alt=\"Coverage histogram\" width=\"49%\" class=\"center\">" >> $reportFile
echo "<img src=\"./qualityHistogram.png\" alt=\"Quality histogram\" width=\"49%\" class=\"center\">" >> $reportFile
echo "<br><br><br><br>" >> $reportFile
echo "<img src=\"./readLengthHist.png\" alt=\"Read length histogram\" width=\"49%\" class=\"center\">" >> $reportFile
echo "<img src=\"./breadthVSdepth.png\" alt=\"Depth vs breadth plot\" width=\"49%\" class=\"center\">" >> $reportFile
echo "<img src=\"./discontinuitySignal.png\" alt=\"Coverage discontinuity signal\" width=\"49%\" class=\"center\">" >> $reportFile
if [[ -s timeVSreadcounts.png ]]; then
echo "<img src=\"./timeVSreadcounts.png\" alt=\"Passage time histogram of ONT reads\" width=\"49%\" class=\"center\">" >> $reportFile
else
rm timeVSreadcounts.png
fi
echo >> $reportFile
echo "<br>" >> $reportFile
# Check for coverage data. If the coverage is too low, any variant calling will be inaccurate
if [ $avgCoveragePassed -lt 10 ]; then
echo "<p> <font color=\"red\"> WARNING: The sequence coverage is very low (${avgCoveragePassed}X) </font>" >> $reportFile
fi
#######################################################
# Check the coverage depth across reference sequence
# Multiple metrics are reported
# Evaluation of number of genomic coordinates that could not be amplified due to being
# out of range of the amplicon kit.
echo Assessing completeness of coverage...
numUncoveredLociByDesign=0
while read line; do
gapBegin=$(echo $line | awk '{print $1}')
gapEnd=$(echo $line | awk '{print $2}')
let "numUncoveredLociByDesign += $gapEnd - $gapBegin + 1"
done <<< "$(python3 $projectDir/findUncoveredCoordinates.py $primerBedFile 1)"
poorlyCoveredLoci=($(cat pos-coverage-quality.tsv | awk '$2 < 10 { print $1 }'))
uncoveredLoci=($(cat pos-coverage-quality.tsv | awk '$2 == 0 { print $1 }'))
# Coverage analysis for SNP loci currently in circulation
# Substitutions present >=90% of GISAID submissions
commonSNPuncov=()
commonSNPpoorlycov=()
numCommonSNP=$(cat $projectDir/recentVariants/commonSNPpos | wc -l)
for locus in $(cat $projectDir/recentVariants/commonSNPpos); do
if echo ${poorlyCoveredLoci[@]} | grep -q -w $locus; then
commonSNPpoorlycov+=(locus)
fi
if echo ${uncoveredLoci[@]} | grep -q -w $locus; then
commonSNPuncov+=(locus)
fi
done
# Substitutions present (10%,90%) of GISAID submissions
rareSNPuncov=()
rareSNPpoorlycov=()
numDiverseSNP=$(cat $projectDir/recentVariants/diverseSNPpos | wc -l)
for locus in $(cat $projectDir/recentVariants/diverseSNPpos); do
if echo ${poorlyCoveredLoci[@]} | grep -q -w $locus; then
diverseSNPpoorlycov+=(locus)
fi
if echo ${uncoveredLoci[@]} | grep -q -w $locus; then
diverseSNPuncov+=(locus)
fi
done
# Substitutions present <=10% of GISAID submissions
rareSNPuncov=()
rareSNPpoorlycov=()
numRareSNP=$(cat $projectDir/recentVariants/rareSNPpos | wc -l)
for locus in $(cat $projectDir/recentVariants/rareSNPpos); do
if echo ${poorlyCoveredLoci[@]} | grep -q -w $locus; then
rareSNPpoorlycov+=(locus)
fi
if echo ${uncoveredLoci[@]} | grep -q -w $locus; then
rareSNPuncov+=(locus)
fi
done
echo '<table>' >> $reportFile
echo "<tr>" >> $reportFile
echo " <th></th><th>Uncovered coordinates (0X)</th><th>Poorly covered coordinates (<10X)</th>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td># Inaccessible genomic coordinates by kit design:</td><td>${numUncoveredLociByDesign}nt ($(expr 100 \* $numUncoveredLociByDesign / 29903)%)</td><td>${numUncoveredLociByDesign}nt ($(expr 100 \* $numUncoveredLociByDesign / 29903)%)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>All genomic coordinates:</td><td>${#uncoveredLoci[@]}nt ($(expr 100 \* ${#uncoveredLoci[@]} / 29903)%)</td><td>${#poorlyCoveredLoci[@]}nt ($(expr 100 \* ${#poorlyCoveredLoci[@]} / 29903)%)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Common SNPs:</td><td>${#commonSNPuncov[@]}nt ($(expr 100 \* ${#commonSNPuncov[@]} / $numCommonSNP)%)</td>" \
"<td>${#commonSNPpoorlycov[@]}nt ($(expr 100 \* ${#commonSNPpoorlycov[@]} / $numCommonSNP)%)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Diverse SNPs:</td><td>${#diverseSNPuncov[@]}nt ($(expr 100 \* ${#diverseSNPuncov[@]} / $numDiverseSNP)%)</td>" \
"<td>${#diverseSNPpoorlycov[@]}nt ($(expr 100 \* ${#diverseSNPpoorlycov[@]} / $numDiverseSNP)%)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Rare SNPs:</td><td>${#rareSNPuncov[@]}nt ($(expr 100 \* ${#rareSNPuncov[@]} / $numRareSNP)%)</td>" \
"<td>${#rareSNPpoorlycov[@]}nt ($(expr 100 \* ${#rareSNPpoorlycov[@]} / $numRareSNP)%)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo '</table>' >> $reportFile
echo "SNPs refer to the polymorphic sites currently in circulation that were detected out of recent GISAID entries. The sites that differ from the SC2 reference sequence are denoted as \"common\" if [90%, 100%] of the submissions carry this mutation, whereas those that are prevalent in [0%,10%] of the submissions are grouped under the \"rare\" category. The population is still diverse at the mutation sites that are observed in (10%,90%) of the entries and these coordinates are grouped under the \"diverse\" category." >> $reportFile
# Plots of missing spot counts, grouped w.r.t. SC2 genes.
echo "<br><br><br><br>" >> $reportFile
echo "<img src=\"./genesVSuncovered_abscounts.png\" alt=\"Uncovered genome coordinates per gene, absolute counts\" width=\"49%\" class=\"center\">" >> $reportFile
echo "<img src=\"./genesVSuncovered_scaled.png\" alt=\"Uncovered genome coordinates per gene, counts per kb\" width=\"49%\" class=\"center\">" >> $reportFile
echo "<br><br><br><br>" >> $reportFile
#######################################################
# Taxonomic classification results of the reads
numCovid=$(cat k2-std.out | grep Orthocoronavirinae | grep -v unclass | awk '{ print $2 }')
numHuman=$(cat k2-std.out | grep sapiens | awk '{ print $2 }')
numSynthetic=$(cat k2-std.out | grep other | grep 28384 | awk '{ print $2 }')
if [[ -z $numCovid ]]; then
pctCovid=0.00
numCovid=0
else
pctCovid=$(cat k2-std.out | grep Orthocoronavirinae | grep -v unclass | awk '{ print $1 }')
fi
if [[ -z $numHuman ]]; then
pctHuman=0.00
numHuman=0
else
pctHuman=$(cat k2-std.out | grep sapiens | awk '{ print $1 }')
fi
if [[ -z $numSynthetic ]]; then
pctSynthetic=0.00
numSynthetic=0
else
pctSynthetic=$(cat k2-std.out | grep other | grep 28384 | awk '{ print $1 }')
fi
k2hits=$(cat k2-std.out | sort -k 1 -rn | awk '$4=="F" {print $6 " (" $1 "%)<br>"}' | head -n 5)
echo '<table>' >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Hits to SARS-Cov2 genome (kraken2):</td><td>$numCovid reads (${pctCovid}%) </td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Hits to human genome (kraken2):</td><td>$numHuman reads (${pctHuman}%)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Hits to synthetic sequences (kraken2, taxid 28384):</td><td>$numSynthetic reads (${pctSynthetic}%)</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <td>Most abundant organisms (kraken2, family level):</td><td>$k2hits</td>" >> $reportFile
echo "</tr>" >> $reportFile
echo "</table>" >> $reportFile
#########################################################
# QC classification
# VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV
qc_flags=./qc-flags.txt
touch $qc_flags
dominantOrganism=$(echo $k2hits | head -n 1 | awk '{print $1}')
if [[ $dominantOrganism != "Coronaviridae" ]]; then
echo 'unspecific_amplification' >> $qc_flags
fi
if [[ $numReads -le 100 ]]; then
echo 'low_sequencing_depth' >> $qc_flags
fi
# Avg. coverage < 2X -> error, F
# Avg. coverage < 1000X -> warning, B
# Avg. coverage < 100X -> warning, C
if [[ $avgCoveragePassed -lt 2 ]]; then
echo 'insufficient_average_coverage' >> $qc_flags
else
if [[ $avgCoveragePassed -lt 100 ]]; then
echo 'low_average_coverage' >> $qc_flags
else
if [[ $avgCoveragePassed -lt 1000 ]]; then
echo 'suboptimal_average_coverage' >> $qc_flags
fi
fi
fi
if [[ $avgLengthPassed -le 70 ]]; then
echo 'truncated_reads' >> $qc_flags
fi
# 0x coordinates > 1%
if [[ ${#uncoveredLoci[@]} -gt 300 ]]; then
echo 'low_coverage_breadth' >> $qc_flags
fi
# 10X coordinates > 5%
if [[ ${#uncoveredLoci[@]} -gt 1500 ]]; then
echo 'uneven_sequence_coverage' >> $qc_flags
fi
# Adjust for PB/Nanopore
if [[ $avgQualityPassed -lt 30 ]]; then
echo 'sequence_quality_low' >> $qc_flags
fi
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#######################################################
echo "<br>" >> $reportFile
echo "<h2>Detected variants (Experimental)</h2>" >> $reportFile
echo "<div>" >> $reportFile
echo " <div id=\"figdiv\">" >> $reportFile
echo " <img src=\"./pieChart_deconvolution.png\" alt=\"Abundance of variants by deconvolution\" width=\"100%\">" >> $reportFile
echo " </div>" >> $reportFile
echo " <div id=\"figdiv\">" >> $reportFile
echo " <img src=\"./pieChart_kallisto.png\" alt=\"Abundance of variants by kallisto\" width=\"100%\">" >> $reportFile
echo " </div>" >> $reportFile
echo "</div>" >> $reportFile
echo "<div id=\"caption\">" >> $reportFile
echo " <p> Based on deconvolution, <a href=\"https://outbreak.info/situation-reports?pango=$mostAbundantVariantName\">$mostAbundantVariantName</a>
is estimated to constitute $mostAbundantVariantPct% of the viral\
particles and hence is the most abundant variant in the sample. The R<sup>2</sup> for the linear regression was $linRegressionR2.\
Variants that were detected less than 5% were grouped under \"Other\"" >> $reportFile
echo " <p> Based on the consensus sequence of the observed reads, the \"ensemble-averaged sequence\" most closely resembles \
the <a href=\"https://outbreak.info/situation-reports?pango=$consensusLineage\">$consensusLineage</a> lineage. \
If this is a sample consisting of a single source of pathogens or an overwhelming majority of the different sources \
are infected with the same variant, the sample is dominated by this variant." >> $reportFile
echo " <p> Based on mapping individual reads to the variant consensus sequences in the reference database, \
kallisto predicts that the sample is dominated by <a href=\"https://outbreak.info/situation-reports?pango=$kallistoTopName\"\
>$kallistoTopName</a> lineage. Accuracy of this measure is expected to improve if the \
input data consists of long reads as opposed to convolution." >> $reportFile
echo "</div>" >> $reportFile
echo "<br>" >> $reportFile
echo "<br>" >> $reportFile
echo "<div>" >> $reportFile
echo " <div id=\"figdiv\">" >> $reportFile
echo " <img src=\"./pieChart_k2_allCovid.png\" alt=\"Abundance of variants by kraken2+bracken\"\
width=\"100%\">" >> $reportFile
echo " </div>" >> $reportFile
echo " <div id=\"figdiv\">" >> $reportFile
echo " <img src=\"./pieChart_k2_majorCovid.png\" alt=\"Abundance of variants by kraken2+bracken\"\
width=\"100%\">" >> $reportFile
echo " </div>" >> $reportFile
echo "</div>" >> $reportFile
echo "<br>" >> $reportFile
echo "<br>" >> $reportFile
echo "<div>" >> $reportFile
echo " <div id=\"figdiv\">" >> $reportFile
echo " <img src=\"./pieChart_freyja.png\" alt=\"Abundance of variants by Freyja\"\
width=\"100%\">" >> $reportFile
echo " </div>" >> $reportFile
echo " <div id=\"figdiv\">" >> $reportFile
echo " <img src=\"./pieChart_lcs.png\" alt=\"Abundance of variants by LCS\"\
width=\"100%\">" >> $reportFile
echo " </div>" >> $reportFile
echo "</div>" >> $reportFile
echo "<br>" >> $reportFile
echo "<br>" >> $reportFile
echo "<div>" >> $reportFile
echo " <div id=\"figdiv\">" >> $reportFile
echo " <img src=\"./freyja_bootstrap.png\" alt=\"Freyja bootstrapping\"\
width=\"100%\">" >> $reportFile
echo " </div>" >> $reportFile
echo " <div id=\"figdiv\">" >> $reportFile
echo " " >> $reportFile
echo " </div>" >> $reportFile
echo "</div>" >> $reportFile
echo "<br>" >> $reportFile
# Append the VOC - VOI support table generated by the above Python script to the report
cat VOC-VOIsupportTable.html >> $reportFile
rm VOC-VOIsupportTable.html
#######################################################
echo Appending a detailed list of all detected mutations...
echo >> $reportFile
echo "<h2>Detected mutations</h2>" >> $reportFile
echo "Only genomic coordinates with at least 10X coverage were considered." >> $reportFile
echo "<br>" >> $reportFile
echo "<table>" >> $reportFile
echo "<tr>" >> $reportFile
echo " <th>Position</th> <th>Ref. base</th> <th>Alt. base</th> <th>Alt. freq</th> <th>p-value</th> <th>Mutation name</th> <th>Compatible lineages</th>" >> $reportFile
echo "</tr>" >> $reportFile
# Copy the list of mutations from the temporary file generated by python
cat mutationTable.html >> $reportFile
rm mutationTable.html
echo "</table>" >> $reportFile
echo "<br>" >> $reportFile
echo >> $reportFile
echo "</body>" >> $reportFile
echo "</html>" >> $reportFile
# Re-organise the folder so that it is ready to export.
mkdir report
mv report.* report/
mv *.png report/
mkdir outfolder
shopt -s extglob
mv ./!(outfolder) ./outfolder/