forked from LangilleLab/microbiome_helper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_dada2_out.R
executable file
·147 lines (107 loc) · 4.45 KB
/
convert_dada2_out.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env Rscript
# Read in package to read in command-line options.
suppressMessages(library("optparse"))
version <- "1.0"
option_list <- list(
make_option(c("-i", "--input"), type="character", default=NULL,
help="Input RDS file of dada2 sequence table (required)." ,
metavar="path"),
make_option(c("-b", "--biom"), type="character", default="seqtab.biom.tsv",
help="Path to output BIOM file in TSV format (required).",
metavar="path"),
make_option(c("-f", "--fasta"), type="character", default="seqtab.fasta",
help="Path to output FASTA file (required).",
metavar="path"),
make_option(c("--taxa_in"), type="character", default=NULL,
help="Optional path to RDS containing table of taxa labels for dada2 variants",
metavar="path"),
make_option(c("--taxa_out"), type="character", default="taxa_metadata.txt",
help="Path for output of taxonomy metadata (default: taxa_metadata.txt).",
metavar="path"),
make_option(c("--verbose"), action = "store_true", type="logical", default=FALSE,
help="Print out more detailed information.", metavar = "boolean"),
make_option(c("--version"), action = "store_true", type="logical", default=FALSE,
help="Print out version number and exit.", metavar = "boolean")
)
opt_parser <- OptionParser(
option_list=option_list,
usage = "%prog [options] -i seqtab_final.rds -b seqtab.biom -f seqtab.fasta",
description = paste(
"Script to convert dada2 sequence table to a BIOM (in legacy TSV) and FASTA file.\n",
"You can also specify an RDS file containing the taxa labels to create",
"a BIOM observation metadata table for taxonomy", sep="")
)
opt <- parse_args(opt_parser)
# Print out version if --version flag set.
if (opt$version) {
cat("Wrapper version:", version, "\n")
options_tmp <- options(show.error.messages=FALSE)
on.exit(options(options_tmp))
stop()
}
if(is.null(opt$input)) {
stop("path to input sequence table RDS needs to be set.")
}
if(is.null(opt$biom)) {
stop("path to output biom table needs to be set.")
}
if(is.null(opt$fasta)) {
stop("path to output fasta file needs to be set.")
}
# Read in other required packages.
if(opt$verbose){
library("ShortRead")
} else {
suppressMessages(library("ShortRead"))
}
in_seqtab <- readRDS(opt$input)
seqs <- colnames(in_seqtab)
ids_study <- paste("seq", 1:ncol(in_seqtab), sep = "_")
colnames(in_seqtab) <- ids_study
if(opt$verbose){
cat("\nWriting sequences to", opt$fasta, "\n\n")
}
# Write out fasta file.
writeFasta(ShortRead(sread = DNAStringSet(seqs), id = BStringSet(ids_study)),
file = opt$fasta)
if(opt$verbose){
cat("Writing output biom table to", opt$biom, "\n\n")
}
# Transpose sequence table and add ids as new column.
in_seqtab_t <- as.data.frame(t(in_seqtab))
orig_col <- colnames(in_seqtab_t)
in_seqtab_t$ids <- rownames(in_seqtab_t)
in_seqtab_t <- in_seqtab_t[, c("ids", orig_col)]
col_df <- data.frame(matrix(c("#OTU ID", orig_col), nrow=1, ncol=ncol(in_seqtab_t)),
stringsAsFactors = FALSE)
colnames(col_df) <- colnames(in_seqtab_t)
# Write out BIOM file in legacy TSV format.
write.table(x = rbind(col_df, in_seqtab_t),
file = opt$biom,
quote = FALSE,
sep = "\t",
col.names = FALSE,
row.names = FALSE)
# If taxa file specified then also read that in.
if(!is.null(opt$taxa_in)){
in_taxa <- readRDS(opt$taxa_in)
# Check that dada2 variants are in same order as sequence table.
if(!identical(rownames(in_taxa), seqs)){
stop("sequences in taxa and sequence table are not ordered the same.")
}
rownames(in_taxa) <- ids_study
# Replace all NA values with "Unclassified".
in_taxa[is.na(in_taxa)] <- "Unclassified"
taxa_combined <- apply(in_taxa, 1, function(x) paste(x, collapse=";"))
taxa_out <- data.frame(names(taxa_combined), taxa_combined)
colnames(taxa_out) <- c("#OTU ID", "taxonomy")
if(opt$verbose){
cat("Writing taxonomy observation metadata to", opt$taxa_out, "\n\n")
}
write.table(x = taxa_out, file = opt$taxa_out, quote = FALSE, sep="\t",
col.names = TRUE, row.names = FALSE)
} else {
if(opt$verbose){
cat("No taxonomy file specified so not making taxonomy observation file.\n")
}
}