-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmutSigReport.Rmd
158 lines (139 loc) · 5.77 KB
/
mutSigReport.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
```{r setup, include=F}
library("optparse");
library("hwriter");
library("VariantAnnotation");
library("BSgenome.Hsapiens.UCSC.hg19");
library("reshape")
library("boot")
optList <- list(
make_option("--name", default = '', type = "character", action = "store", help = "report name"),
make_option("--alexandrovData", default = '~/share/reference/Alexandrov_NMF_signatures.txt', type = "character", action = "store", help = "alexandrov nmf signatures"),
make_option("--targetBed", default = NULL, type = "character", action = "store", help = "target intervals in bed format"))
parser <- OptionParser(usage = "%prog [options] [vcf file(s)]", option_list = optList);
arguments <- parse_args(parser, positional_arguments = T, args = args);
opt <- arguments$options;
if (length(arguments$args) < 1) {
cat("Need vcf file(s)\n");
print_help(parser);
stop();
}
vcfFiles <- arguments$args
outFile <- opt$outFile
genome <- BSgenome.Hsapiens.UCSC.hg19;
seqlevels(genome) <- sub('chr', '', seqlevels(genome))
bases <- c("A", "C", "G", "T")
```
# `r opt$name` Mutational Signature Report
---
### Raymond Lim
```{r loadAlexandrov, cache=T}
alexandrov <- read.table(opt$alexandrovData, sep = '\t', header = T)
sigs <- alexandrov[, grepl('Signature', colnames(alexandrov))]
```
```{r trinucleotideFreqs, cache=T}
if (!is.null(opt$targetBed)) {
bed <- import(opt$targetBed)
genSeq <- getSeq(genome, bed)
} else {
genSeq <- getSeq(genome)
}
trintFq <- trinucleotideFrequency(genSeq)
trintFq <- colSums(trintFq) / sum(as.numeric(trintFq))
```
```{r loadvcf, include=F, cache=T}
vcfs <- list()
for (vcfFile in vcfFiles) {
s <- sub('\\..*', '', vcfFile)
s <- sub('.*/', '', s)
vcfs[[s]] <- readVcf(vcfFile, 'hg19')
seqlevels(vcfs[[s]]) <- sub('chr', '', seqlevels(vcfs[[s]]))
vcfs[[s]] <- vcfs[[s]][sapply(rowData(vcfs[[s]])$ALT, length) == 1]
rowData(vcfs[[s]])$MUT <- paste(rowData(vcfs[[s]])$REF, unlist(rowData(vcfs[[s]])$ALT), sep = ">")
rowData(vcfs[[s]])$MUT[rowData(vcfs[[s]])$MUT == "G>T"] <- "C>A"
rowData(vcfs[[s]])$MUT[rowData(vcfs[[s]])$MUT == "G>C"] <- "C>G"
rowData(vcfs[[s]])$MUT[rowData(vcfs[[s]])$MUT == "G>A"] <- "C>T"
rowData(vcfs[[s]])$MUT[rowData(vcfs[[s]])$MUT == "A>T"] <- "T>A"
rowData(vcfs[[s]])$MUT[rowData(vcfs[[s]])$MUT == "A>G"] <- "T>C"
rowData(vcfs[[s]])$MUT[rowData(vcfs[[s]])$MUT == "A>C"] <- "T>G"
rowData(vcfs[[s]])$MUT <- factor(rowData(vcfs[[s]])$MUT, levels = c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G"))
}
```
```{r pie}
for (n in names(vcfs)) {
vcf <- vcfs[[n]]
cols <- c("C>A" = "lightblue", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "lightgreen", "T>G" = "pink")
main <- paste(n, " (n=", length(rowData(vcf)), ")", sep = '')
pie(table(rowData(vcf)$MUT), col = cols, main = main)
}
```
```{r mutCounts}
plotMutBarplot <- function(tabs, tit) {
cols <- c("C>A" = "lightblue", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "lightgreen", "T>G" = "pink")
names(cols) <- names(tabs)
yl <- c(0, max(unlist(tabs)))
par(mfrow = c(1, length(tabs)), mar = c(5,0,5,0), oma = c(2,2,2,2))
mut <- names(tabs)[1]
for (mut in names(tabs)) {
barplot(tabs[[mut]], ylim = yl, las = 2, yaxt = 'n', col = cols[[mut]])
}
axis(2, outer = T)
mtext(tit, outer = T, side = 3, line = -1)
}
mutTabs <- list()
for (s in names(vcfs)) {
svcf <- split(rowData(vcfs[[s]]), rowData(vcfs[[s]])$MUT)
tabs <- list()
for (mut in names(svcf)) {
if (length(svcf[[mut]]) > 0) {
seqs <- getSeq(genome, resize(flank(svcf[[mut]], width = 1, start = T, both = T), width = 3))
seqs[subseq(seqs,2,2) == "G"] <- reverseComplement(seqs[subseq(seqs,2,2) == "G"])
seqs[subseq(seqs, 2, 2) == "A"] <- reverseComplement(seqs[subseq(seqs,2,2) == "A"])
x <- as.character(subseq(seqs[1], 2, 2))
lvls <- paste(rep(bases, each = 4), x, bases, sep = '')
tabs[[mut]] <- table(factor(as.character(seqs), level = lvls))
}
}
mutTabs[[s]] <- tabs
}
```
```{r mutCountPlots, fig.width=12}
for (s in names(vcfs)) {
tabs <- mutTabs[[s]]
plotMutBarplot(tabs, s)
normTabs <- lapply(tabs, function(x) x * trintFq[names(x)])
normTabs <- lapply(normTabs, function(x) x / sum(unlist(normTabs)))
plotMutBarplot(normTabs, paste('normalized', s))
}
```
```{r bootPlot, fig.width=12}
bootFun <- function(x) {
nval <- x$value * trintFq[x$Trinucleotide]
nval <- nval / sum(nval)
sigs <- x[, grepl("Signature", colnames(x))]
apply(sigs, 2, function(x) cor(nval, x))
}
ranFun <- function(p, d) {
s <- sample.int(nrow(p), size = sum(p$value), replace = T, prob = p$value / sum(p$value))
y <- melt(table(p[s,c(1,2)]))
m <- match(paste(y$Substitution.Type, y$Trinucleotide), paste(p$Substitution.Type, p$Trinucleotide))
p[m[!is.na(m)], "value"] <- y$value[!is.na(m)]
p
}
for (s in names(vcfs)) {
tabs <- mutTabs[[s]]
tab <- melt(tabs)
X <- cbind(alexandrov, value = 0)
m <- match(paste(tab$L1, tab$Var.1), paste(X$Substitution.Type, X$Trinucleotide))
X$value[m] <- tab$value
boots <- boot(X, bootFun, R = 1000, ran.gen = ranFun, sim = 'parametric')
boots.sd <- apply(boots$t, 2, sd)
ci <- norm.ci(boots, index = 1:ncol(sigs))
cols <- ifelse(boots$t0 > ci[,2] & boots$t0 < ci[,3], 'grey', 'red')
n <- sub('Signature.', '', colnames(sigs))
par(mfrow = c(2,1), mar = c(3,5,3,3))
barCenters <- barplot(boots$t0, ylim = c(min(boots$t - boots.sd), max(boots$t + boots.sd)), names.arg = n, col = cols, main = s, ylab = 'Correlation')
segments(barCenters, boots$t0 - boots.sd, barCenters, boots$t0+boots.sd, lwd = 1)
# vote barplot
barplot(table(factor(n[apply(boots$t, 1, which.max)], levels = n)), ylab = '# Votes')
}
```