-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptionfile_q2pipe_default.txt
executable file
·310 lines (236 loc) · 9.66 KB
/
optionfile_q2pipe_default.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#############################
# #
# Q2Pipe Option file #
# Designed for Q2Pipe V0.95 #
# #
#############################
################################
# Common Analysis Parameters #
################################
ANALYSIS_NAME=BASIC_TEST
NB_THREADS=8
# Threads fine-tuning
CLASSIFIER_NB_THREADS=2
# Leave empty if you don't use apptainer image to launch Qiime2
# To define a different temporary folder, add -B /path/to/your/folder:/tmp
# TMPDIR inside the container MUST point to /tmp at all times. DO NOT CHANGE THIS
# Also make sure that the TEMPORARY_DIRECTORY variable point to the same folder
# Q2Pipe can also use the TMPDIR environment variable if set
APPTAINER_COMMAND=
# Metadata must be in TSV format
# Manifest must be in CSV format
# For multiple run, seperate each run manifest by a comma (,)
METADATA_FILE_PATH=
MANIFEST_FILE_PATH=
# Leave empty to use system's TMPDIR variable (fallback on /tmp if not set)
# If you are using apptainer and want to define a different temp folder
# you must also add '-B /path/to/temp:/tmp' to the apptainer command definition
TEMPORARY_DIRECTORY=
##########################
# Step 1 : Data Import #
##########################
# PLACEHOLDER for single end support DO NOT CHANGE THIS, ONLY PAIRED IS SUPPORTED
DATA_TYPE=paired
# Number of sequences to use to generate quality plots (default: 10000)
p_n=10000
### FALCO ###
# Launch Falco analysis on samples (similar to fastqc but faster implementation)
# This program will NEVER affect your reads, it is using reads copies
RUN_FALCO=true
# Trim the end of the reads to reduce false positive on Falco FAIL detection
# Use 0 to analyse the complete sequence (not recommended as you many of your sample will fail
# screening because of the low-quality ends whit Illumina data
# Use 0 to disable the trimming
falco_right_trim=280
# Create a combined R1 file and a combined R2 file and run it through Falco
# Should give similar results to Qiime2 import, but could help flag entire run problems
# Will take more time to run and take more disk space
FALCO_COMBINED_RUN=true
# If true, will delete the exportation file to save disk space (results will be kept)
CLEAN_FALCO_OUTPUT=true
### FIGARO ###
# Launch Figaro program to predict correct trimming positions
# This program will NEVER affect your reads, it is using reads copies.
RUN_FIGARO=true
# Will substract the longest sequence length by this number
# This will make sure every reads are the same length
# NOTE : This will NOT affect your reads, only the cpies
figaro_trim_offset=2
# size must EXCLUDE the primers, ff amplicon length is variable, use the longest length expected
# If you want to test different amplicon size, you can input multiple values (separate them by ,)
# NOTE: each amplicon size takes approximately 40s to compute
f_amplicon_size=368,370,372,374,376,378,380
f_forward_primer_len=20
f_reverse_primer_len=18
# 12 is default value for overlap in Qiime2
f_min_overlap=12
# If true, will delete the exportation file to save disk space (results will be kept)
CLEAN_FIGARO_OUTPUT=true
#########################
# Cutadapt (Optional) #
#########################
# This step is done during step 1.5 and step 2
RUN_CUTADAPT=false
forward_trim_param=--p-front-f
reverse_trim_param=--p-front-r
forward_primer=GTGYCAGCMGCCGCGGTAA
reverse_primer=CCGYCAATTYMTTTRAGTTT
p_discard_untrimmed=true
# Ratio of error in the adapter recognition
# The actual error rate is computed as the number of errors in the match
# divided by the length of the matching part of the adapter
# Ex: an adapter match of length 8 containing 1 error has an error rate of 1/8=0.125
p_error_rate=0.1
# if true, will force the step to stop just after cutadapt
# Useful for debugging and accessing CutAdapt qzv files
CA_FORCE_INTERRUPTION=false
######################################################
# Step 1.5 : Dada2 Denoising Parameters Evaluation #
######################################################
# This step is design to work with a single manifest file
# Manifest must be in CSV format
EVAL_MANIFEST_FILE_PATH=
# if your data is single-end, only p_trim_left_f, p_trunc_len_f and p_max_ee_f will be used in trimming
# Use 0 to bypass evaluation (will just denoise all samples)
DENOISE_EVALUATION_SAMPLE_SIZE=5
TESTFILE_PATH=
# if true, will generate a new random manifest. even if one already exists
# if false, will use the $ANALYSIS_NAME.eval_manifest.temp as subset
FORCE_RESAMPLING=false
# If true, will stop the step after generating $ANALYSIS_NAME.eval_manifest.temp
# Nothing else will be generated until DRY_RUN is switched to false again
DRY_RUN=false
################################
# Step 2 : Dada2 Denoising #
################################
# NOT FOR STEP 1.5 (YOU MUST USE TESTFILE FOR STEP 1.5)
# Number of denoising job to launch simultaneously
# Each job will use NB_THREADS / job count
# For best result, your number of job should be a multiple of your NB_THREADS
# Ex: 3 jobs should have 9, 12, 15, etc. NB_THREADS so each job have the same number of threads
# If 1 is used, all job will run sequentially (which will be slower that having multiple jobs)
CONCURRENT_JOBS=1
# Use 0 to deactivate parameters (should be 0 if you used CutAdapt)
p_trim_left_f=19
p_trim_left_r=20
# Use 0 to deactivate parameters
# If you have multiple run, you must input a value for each of them
# seperate them by a comma (ex 234,235,223)
# They must be in the same order then your manifest list
p_trunc_len_f=237
p_trunc_len_r=209
p_max_ee_f=2
p_max_ee_r=2
p_trunc_q=2
p_n_reads_learn=1000000
p_chimera_method=consensus
p_min_fold_parent_over_abundance=1.0
###########################
# Step 2.5 : Run Merging #
###########################
# If true, will proceed with merging even if runs are incompatible (different denoising parameters)
# NOT RECOMMENDED
IGNORE_INCOMPATIBILITY=false
#####################################
# Step 3 : Feature Table filtering #
#####################################
# FUTURE DEV: REPLACE OR ADD AUTO CALCULATION ACCORDING TO VISUALIZATION DATA
# Use 0 to disable
#freq_threshold=0.0005
p_min_frequency=16
# IDEM ^^^^
# Use 0 to disable
#sample_threshold=0
p_min_samples=0
###############################
# Step 4 : Denovo Clustering #
###############################
# Must be between ]0 , 1.0]
# Use NA to skip denovo clustering (you must still launch step 4)
p_perc_identity=NA
##################################
# Step 5 : Classifier Training #
##################################
# Only Specify these if you want to train the classifier with your database
# It's recommended to use the same name (different extension) for the fasta and the tax file
# The classifier output name must have the .qza extension
#FASTA_DATABASE_PATH=
#TAXO_DATABASE_PATH=
SEQS_QZA_PATH=
TAXO_QZA_PATH=
CLASSIFIER_OUTPUT_NAME=
#################################
# Step 6 : Taxonomy Assignment #
#################################
# Only specify this if you skipped step 5
CLASSIFIER_DATABASE_PATH=
# Possible value [0, 1]
p_confidence=0.7
############################
# Step 7 : Taxa Filtering #
############################
# Filter samples according to specific metadata
# ex "[subject]='subject-1' AND NOT [body-site]='gut'"
# ex "[body-site]='gut' OR [reported-antibiotic-usage]='Yes'"
# ex "[body-site] IN ('left palm', 'right palm')"
# Leave "" to ignore
p_where=""
# Never use space in the list, only comma
# p_exclude remove unwanted taxe and p_include keep only specified taxa
# You can use both option to exclude parts of the inclusion
# If both are empty, taxa filtering will be skipped
p_include=bacteria,archaea
p_exclude=eukaryota,mitochondria,chloroplast
# Must be exact or contains (default)
p_mode=contains
# Will also be used in step 8 and 10
GENERATE_PHYLOGENY=false
# RAREFACTION OVERRIDE
# true, false or both
# if you activate this option, you can ignore the next sections, skip step 8 and 9
# Use "both" without the quotes to run both with and without rarefaction in a single run
SKIP_RAREFACTION=both
##########################################
# Step 8 : Alpha rarefaction plotting #
##########################################
p_max_depth=8000
p_steps=10
p_iterations=10
# Use this format: observed_features,shannon
# You can also leave this blank to use Qiime2 default metrics (observed_features,shannon)
# IMPORTANT: This is NOT the metrics for step 10, this is only for the rarefaction curve
p_metrics=observed_features,shannon
#############################
# Step 9 : Data Rarefaction #
#############################
p_sampling_depth=10000
########################
# Step 10 : Metrics #
########################
alpha_metrics=shannon,simpson,observed_features,chao1,pielou_e
beta_metrics=braycurtis,jaccard
# Relevent only if GENERATE_PHYLOGENY is activated
alpha_metrics_phylo=faith_pd
beta_metrics_phylo=weighted_unifrac,weighted_normalized_unifrac,generalized_unifrac,unweighted_unifrac
######################
# Step 11 : Export #
######################
## FUNGuild specific options (Fungal ITS Only)
# To use FUNGuild, Guilds_v1.1.py must be available in your path
# Recommended FUNGuild version : https://github.com/Patg13/FUNGuild
GENERATE_FUNGUILD=false
# Enter remote_fungi to use remote fungi database (http://stbates.org/funguild_db.php)
# You can also specify a local database path
# Database must be downloaded with the provided bash script in https://github.com/Patg13/FUNGuild
FUNGUILD_DATABASE_PATH=
# Path to the result extraction form (in tsv format)
EXTRACTION_FORM_PATH=
## ANCOM specific options
GENERATE_ANCOM=false
# Collapse table at genus level (6)
p_level=6
# Depend on your metadata
# Specify each target column seperated by a comma
# Ex: Col1,Col2,Col3,Col4
# There will be one ANCOM by column
m_metadata_column=