Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
shajoezhu committed Dec 8, 2024
1 parent 95d2803 commit 25adfdf
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 125 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,6 @@ rsconnect/
*o
*bak
.Rproj.user

*png
Rplots.pdf
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ export(computeObsWSAF)
export(extractCoverageFromTxt)
export(extractCoverageFromVcf)
export(extractPLAF)
export(extractVcf)
export(haplotypePainter)
export(histWSAF)
export(plotAltVsRef)
Expand Down
6 changes: 3 additions & 3 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
#'
#' @examples
#' vcfFile = system.file("extdata", "PG0390-C.test.vcf.gz", package = "DEploid.utils")
#' vcf = extractVcf(vcfFile, "PG0390-C")
#' vcf = extractCoverageFromVcf(vcfFile, "PG0390-C")
#'
extractVcf <- function(filename, samplename) {
.Call(`_DEploid_utils_extractVcf`, filename, samplename)
extractCoverageFromVcf <- function(filename, samplename) {
.Call(`_DEploid_utils_extractCoverageFromVcf`, filename, samplename)
}

65 changes: 0 additions & 65 deletions R/all_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,71 +29,6 @@ extractCoverageFromTxt <- function(refFileName, altFileName) {
}


#' @title Extract read counts from VCF
#'
#' @description Extract read counts from VCF file of a single sample.
#'
#' @note The VCF file should only contain one sample. If more samples present in the VCF, it only returns coverage for of the first sample.
#'
#' @param vcfFileName Path of the VCF file.
#'
#' @param ADFieldIndex Index of the AD field of the sample field. For example, if the format is "GT:AD:DP:GQ:PL", the AD index is 2 (by default).
#'
#' @return A data.frame contains four columns: chromosomes, positions, reference allele count, alternative allele count.
#'
#' @export
#'
#' @examples
#' vcfFile <- system.file("extdata", "PG0390-C.test.vcf.gz", package = "DEploid.utils")
#' PG0390 <- extractCoverageFromVcf(vcfFile)
#'
extractCoverageFromVcf <- function(vcfFileName, ADFieldIndex = 2) {
# Assume that AD is the second field
h <- function(w) {
if (any(grepl("gzfile connection", w))) {
invokeRestart("muffleWarning")
}
}

gzf <- gzfile(vcfFileName, open = "rb")
numberOfHeaderLines <- 0
line <- withCallingHandlers(readLines(gzf, n = 1), warning = h)
while (length(line) > 0) {
if (grepl("##", line)) {
numberOfHeaderLines <- numberOfHeaderLines + 1
} else {
break
}
line <- withCallingHandlers(readLines(gzf, n = 1), warning = h)
}
close(gzf)

vcf <- read.table(gzfile(vcfFileName),
skip = numberOfHeaderLines,
header = T, comment.char = "", stringsAsFactors = FALSE,
check.names = FALSE
)

sampleName <- names(vcf)[10]

tmp <- vcf[[sampleName]]
field <- strsplit(as.character(tmp), ":")

tmpCovStr <- unlist(lapply(field, `[[`, ADFieldIndex))
tmpCov <- strsplit(as.character(tmpCovStr), ",")

refCount <- as.numeric(unlist(lapply(tmpCov, `[[`, 1)))
altCount <- as.numeric(unlist(lapply(tmpCov, `[[`, 2)))

return(data.frame(
CHROM = vcf[, 1],
POS = vcf[, 2],
refCount = refCount,
altCount = altCount
))
}


#' @title Extract PLAF
#'
#' @description Extract population level allele frequency (PLAF) from text file.
Expand Down
33 changes: 21 additions & 12 deletions man/extractCoverageFromVcf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 0 additions & 36 deletions man/extractVcf.Rd

This file was deleted.

10 changes: 5 additions & 5 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ Rcpp::Rostream<true>& Rcpp::Rcout = Rcpp::Rcpp_cout_get();
Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif

// extractVcf
Rcpp::DataFrame extractVcf(std::string filename, std::string samplename);
RcppExport SEXP _DEploid_utils_extractVcf(SEXP filenameSEXP, SEXP samplenameSEXP) {
// extractCoverageFromVcf
Rcpp::DataFrame extractCoverageFromVcf(std::string filename, std::string samplename);
RcppExport SEXP _DEploid_utils_extractCoverageFromVcf(SEXP filenameSEXP, SEXP samplenameSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP);
Rcpp::traits::input_parameter< std::string >::type samplename(samplenameSEXP);
rcpp_result_gen = Rcpp::wrap(extractVcf(filename, samplename));
rcpp_result_gen = Rcpp::wrap(extractCoverageFromVcf(filename, samplename));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_DEploid_utils_extractVcf", (DL_FUNC) &_DEploid_utils_extractVcf, 2},
{"_DEploid_utils_extractCoverageFromVcf", (DL_FUNC) &_DEploid_utils_extractCoverageFromVcf, 2},
{NULL, NULL, 0}
};

Expand Down
4 changes: 2 additions & 2 deletions src/rvcf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ class Rvcf : public VcfReader{
//'
//' @examples
//' vcfFile = system.file("extdata", "PG0390-C.test.vcf.gz", package = "DEploid.utils")
//' vcf = extractVcf(vcfFile, "PG0390-C")
//' vcf = extractCoverageFromVcf(vcfFile, "PG0390-C")
//'
// [[Rcpp::export]]
Rcpp::DataFrame extractVcf(std::string filename, std::string samplename) {
Rcpp::DataFrame extractCoverageFromVcf(std::string filename, std::string samplename) {
Rvcf vcf(filename, samplename);
return vcf.info();
}
74 changes: 74 additions & 0 deletions tests/testthat/test-extractFromVCF.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@

#' @title Extract read counts from VCF
#'
#' @description Extract read counts from VCF file of a single sample.
#'
#' @note The VCF file should only contain one sample. If more samples present in
#' the VCF, it only returns coverage for of the first sample.
#'
#' @param vcfFileName Path of the VCF file.
#'
#' @param ADFieldIndex Index of the AD field of the sample field. For example,
#' if the format is "GT:AD:DP:GQ:PL", the AD index is 2 (by default).
#'
#' @return A data.frame contains four columns: chromosomes, positions, reference
#' allele count, alternative allele count.
#'
#' @examples
#' vcfFile = system.file("extdata", "PG0390-C.test.vcf.gz", package = "DEploid")
#' PG0390 = extractCoverageFromVcf(vcfFile)
#'
old_extractCoverageFromVcf <- function(vcfFileName, ADFieldIndex = 2) {
# Assume that AD is the second field
h <- function(w) {
if (any(grepl("gzfile connection", w)))
invokeRestart("muffleWarning")
}

gzf <- gzfile(vcfFileName, open = "rb")
numberOfHeaderLines <- 0
line <- withCallingHandlers(readLines(gzf, n = 1), warning = h)
while (length(line) > 0) {
if (grepl("##", line)) {
numberOfHeaderLines <- numberOfHeaderLines + 1
} else {
break
}
line <- withCallingHandlers(readLines(gzf, n = 1), warning = h)
}
close(gzf)

vcf <- read.table(gzfile(vcfFileName), skip = numberOfHeaderLines,
header = T, comment.char = "", stringsAsFactors = FALSE,
check.names = FALSE)

sampleName <- names(vcf)[10]

tmp <- vcf[[sampleName]]
field <- strsplit(as.character(tmp), ":")

tmpCovStr <- unlist(lapply(field, `[[`, ADFieldIndex))
tmpCov <- strsplit(as.character(tmpCovStr), ",")

refCount <- as.numeric(unlist(lapply(tmpCov, `[[`, 1)))
altCount <- as.numeric(unlist(lapply(tmpCov, `[[`, 2)))

return(data.frame(CHROM = vcf[, 1],
POS = vcf[, 2],
refCount = refCount,
altCount = altCount))
}


test_that("test R vs cpp vcf extract",
{
vcfFile = system.file("extdata", "PG0390-C.test.vcf.gz", package = "DEploid.utils")
vcf = extractCoverageFromVcf(vcfFile, "PG0390-C")
PG0390 = old_extractCoverageFromVcf(vcfFile)
testthat::expect_equal(names(vcf), names(PG0390))
testthat::expect_equal(vcf$CHROM, PG0390$CHROM)
testthat::expect_equal(vcf$POS, PG0390$POS)
testthat::expect_equal(vcf$refCount, PG0390$refCount)
testthat::expect_equal(vcf$altCount, PG0390$altCount)
}
)
2 changes: 1 addition & 1 deletion tests/testthat/test-vcfreader.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ panelFileName <- system.file("extdata", "labStrains.test.panel.txt",
refFileName <- system.file("extdata", "PG0390-C.test.ref", package = "DEploid.utils")
altFileName <- system.file("extdata", "PG0390-C.test.alt", package = "DEploid.utils")

PG0390CoverageVcf <- extractCoverageFromVcf(vcfFileName)
PG0390CoverageVcf <- extractCoverageFromVcf(vcfFileName, "PG0390-C")
plaf <- extractPLAF(plafFileName)


Expand Down

0 comments on commit 25adfdf

Please sign in to comment.