-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathintersectVcf.R
43 lines (32 loc) · 1.29 KB
/
intersectVcf.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env Rscript
# Read a variant table and extract uniprot accession ids
suppressPackageStartupMessages(library("optparse"));
suppressPackageStartupMessages(library("VariantAnnotation"));
#options(warn = -1, error = quote({ traceback(2); q('no', status = 1) }))
#options(error = recover)
options(error = quote(dump.frames("testdump", TRUE)))
optList <- list(
make_option("--genome", default = 'hg19', help = "genome build [default %default]"),
make_option("--outFile", default = NULL, help = "output file [default %default]"))
parser <- OptionParser(usage = "%prog vcf.files", option_list = optList);
arguments <- parse_args(parser, positional_arguments = T);
opt <- arguments$options;
if (is.null(opt$outFile)) {
cat("Need output file\n");
print_help(parser);
stop();
} else if (length(arguments$args) <= 1) {
cat("Need vcf files\n");
print_help(parser);
stop();
}
files <- arguments$args;
vcfs <- list()
for (f in files) {
vcfs <- append(vcfs, readVcf(f, genome = opt$genome))
}
all <- do.call('rbind', lapply(vcfs, function(x) as.data.frame(rowData(x))))
all <- all[, c("seqnames", "start", "end")]
cnt <- ddply(all, .(seqnames, start, end), nrow)
cnt <- subset(cnt, V1 > 1)
write.table(cnt, file = opt$outFile, sep = '\t', quote = F)