-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathq2pipe_tools_denoise_evaluation.sh
executable file
·156 lines (130 loc) · 5.54 KB
/
q2pipe_tools_denoise_evaluation.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/bin/bash
#################################
# #
# Qiime 2 Pipeline #
# By: Patrick Gagne (NRCan) #
# Step 1.5 - Denoise Evaluation #
# October 5, 2021 #
# #
#################################
exit_on_error(){
echo "Qiime2 command error detected"
echo "Exiting program"
exit 1
}
# This Qiime2 step will help to evaluate optimals parameters for the denoising step
# It will generate a serie of denoising on a random subsample (size defined by the user) extracted from the original manifest file
optionfile=$1
if [ ! $optionfile ] || [ ! -e $optionfile ] || [ ! -r $optionfile ]
then
echo "ERROR: you must specify a valid, accessible qiime2 optionfile"
exit 1
fi
source $optionfile
if [ $TEMPORARY_DIRECTORY ]
then
echo "Overriding default temporary directory to $TEMPORARY_DIRECTORY"
if [ ! -d $TEMPORARY_DIRECTORY ] || [ ! -w $TEMPORARY_DIRECTORY ]
then
echo "ERROR: $TEMPORARY_DIRECTORY does not exist or is read only"
exit 2
fi
export TMPDIR=$TEMPORARY_DIRECTORY
fi
if [ ! $TESTFILE_PATH ] || [ ! -e $TESTFILE_PATH ] || [ ! -r $TESTFILE_PATH ]
then
echo "ERROR: you must specify a valid, accessible testfile in the optionfile"
exit 1
fi
if [ $DENOISE_EVALUATION_SAMPLE_SIZE -eq 0 ]
then
echo "This step cannot be completed with a sample size of 0, you can skip it"
exit 1
fi
if [ "$FORCE_RESAMPLING" == "true" ] || [ ! -e $ANALYSIS_NAME.eval_manifest.temp ]
then
# Make temporary manifest file using defined user sample size
echo "Random Sampling of the manifest file"
head -n 1 $EVAL_MANIFEST_FILE_PATH > $ANALYSIS_NAME.eval_manifest.temp
if [ "$DATA_TYPE" == "paired" ]
then
sublist=$( awk 'NR>1' $EVAL_MANIFEST_FILE_PATH | grep ",forward" | shuf -n $DENOISE_EVALUATION_SAMPLE_SIZE )
for i in $sublist
do
sample_name=$( echo $i | awk -F ',' '{ print $1 }' )
echo $sample_name
grep $sample_name $EVAL_MANIFEST_FILE_PATH >> $ANALYSIS_NAME.eval_manifest.temp
done
else
awk 'NR>1' $EVAL_MANIFEST_FILE_PATH | shuf -n $DENOISE_EVALUATION_SAMPLE_SIZE >> $ANALYSIS_NAME.eval_manifest.temp
fi
else
echo "Random manifest already present, skipping sampling"
fi
if [ "$DRY_RUN" == "true" ]
then
echo "Dry run mode detected, ending program"
exit 0
fi
echo "Importing subsample into qiime2 artifact file"
$APPTAINER_COMMAND qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path $ANALYSIS_NAME.eval_manifest.temp \
--output-path $ANALYSIS_NAME.denoise_eval_import.qza \
--input-format PairedEndFastqManifestPhred33 || exit_on_error
echo "Summarizing data import into visualisation file"
$APPTAINER_COMMAND qiime demux summarize \
--i-data $ANALYSIS_NAME.denoise_eval_import.qza \
--o-visualization $ANALYSIS_NAME.denoise_eval_import.qzv --verbose
ca_flag=""
if [ "$RUN_CUTADAPT" == "true" ]
then
ca_flag="_CA"
untrimmed_flag=""
if [ "$p_discard_untrimmed" == "true" ]
then
untrimmed_flag="--p-discard-untrimmed"
fi
echo "Removing Adapters/Primers from reads with CutAdapt"
$APPTAINER_COMMAND qiime cutadapt trim-paired \
--i-demultiplexed-sequences $ANALYSIS_NAME.denoise_eval_import.qza \
--o-trimmed-sequences $ANALYSIS_NAME.denoise_eval_import$ca_flag.qza \
--p-match-adapter-wildcards \
$forward_trim_param $forward_primer \
$reverse_trim_param $reverse_primer \
$untrimmed_flag --p-cores $NB_THREADS --verbose || exit_on_error
echo "Summarizing Cutadapt trimming into visualisation file"
$APPTAINER_COMMAND qiime demux summarize \
--i-data $ANALYSIS_NAME.denoise_eval_import$ca_flag.qza \
--o-visualization $ANALYSIS_NAME.denoise_eval_import$ca_flag.qzv --verbose
fi
if [ ! -d "$ANALYSIS_NAME"_DenoiseTest_Results ]
then
mkdir "$ANALYSIS_NAME"_DenoiseTest_Results
fi
echo "Preparing to launch tests"
while read line
do
if [ "${line::1}" == "#" ] || [ -z "$line" ]
then
continue
fi
jobn=$( echo $line | awk -F ':' '{ print $1 }' )
params=$( echo $line | awk -F ':' '{ print $2 }' | sed 's/\r$//' )
echo "Launching $jobn parameters set"
$APPTAINER_COMMAND qiime dada2 denoise-paired --i-demultiplexed-seqs $ANALYSIS_NAME.denoise_eval_import$ca_flag.qza $params --p-n-threads $NB_THREADS --o-table $ANALYSIS_NAME.feature-table.$jobn.qza --o-representative-sequences $ANALYSIS_NAME.rep-seqs.$jobn.qza --o-denoising-stats $ANALYSIS_NAME.stats.$jobn.qza --verbose
if [ $? -ne 0 ]
then
echo "Command error detected during test denoising, $jobn will be skipped"
continue
fi
$APPTAINER_COMMAND qiime feature-table summarize --i-table $ANALYSIS_NAME.feature-table.$jobn.qza --o-visualization $ANALYSIS_NAME.feature-table.$jobn.qzv
$APPTAINER_COMMAND qiime metadata tabulate --m-input-file $ANALYSIS_NAME.stats.$jobn.qza --o-visualization $ANALYSIS_NAME.stats.$jobn.qzv --verbose
$APPTAINER_COMMAND qiime feature-table tabulate-seqs --i-data $ANALYSIS_NAME.rep-seqs.$jobn.qza --o-visualization $ANALYSIS_NAME.rep-seqs.$jobn.qzv
# Export results in TSV format (easier for downstream analysis)
$APPTAINER_COMMAND qiime tools export --input-path $ANALYSIS_NAME.stats.$jobn.qza --output-path $ANALYSIS_NAME.stats.$jobn
mv $ANALYSIS_NAME.stats.$jobn/stats.tsv ./$ANALYSIS_NAME.stats.$jobn.tsv
rm -rf $ANALYSIS_NAME.stats.$jobn
echo "Moving $jobn results to "$ANALYSIS_NAME"_DenoiseTest_Results"
mv $ANALYSIS_NAME.*.$jobn.* "$ANALYSIS_NAME"_DenoiseTest_Results/
done<$TESTFILE_PATH