-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtemplate.yml
73 lines (73 loc) · 2.44 KB
/
template.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
gtf_path : path/to/gtf_file.gtf
fa_path : path/to/fa_file.fa
########################################################
## add entries when using ribosome profiling data.
## format: 'id : ribosome profiling paths'
## leave empty for sequence input models (TIS transformer)
## DO NOT change id after data is parsed to h5 file
########################################################
ribo_paths :
SRR000001 : path/to/mapped/riboseq.sam
SRR000002 : path/to/mapped/riboseq.sam
SRR000003 : path/to/mapped/riboseq.sam
########################################################
## Data is parsed and stored in a hdf5 format file.
########################################################
h5_path : path/to/hdf5_file.h5
########################################################
## path prefix used for output files predictions
## defaults to hdf5 path
########################################################
# out_prefix : riboformer/template_
########################################################
#
####################
## ADVANCED SETUP ##
####################
#
########################################################
## A custom set of riboseq data selected for training.
## Use ids applied in ribo_paths, leave commented if NA.
## Replicates can be merged where the number of mapped
## reads are summed for multiple experiments.
########################################################
## example: only use SRR000001 and SRR000003
#ribo:
# - SRR000001
# - SRR000003
#
## example: SRR000001 and SRR000002 are merged (replicates)
#ribo:
# - - SRR000001
# - SRR000002
# - - SRR000003
#
########################################################
## It is possible to set offsets per read length.
## NOT RECOMMENDED: loses read length information.
## Functionality exists merely for benchmarking
########################################################
#offsets:
# SRR000001:
# 28 : 7
# 29 : 10
# 30 : 11
#
########################################################
## Training times can be sped up by removing transcripts
## with few reads. This does not affect samples within
## the test set. Filtering is performed based
## on the number of reads on a transcript.
########################################################
## example: ommit readless transcripts during training/validation
#cond :
# ribo:
# num_reads : x > 0
#
## example: custom rules per data set
#cond :
# ribo:
# num_reads :
# SRR000001 : "x > 10"
# SRR000002 : "x > 0"
# SRR000003 : "x > 0"