Skip to content

Commit

Permalink
add updateSeqlevelsStyle & tests
Browse files Browse the repository at this point in the history
  • Loading branch information
AtaJadidAhari committed Jan 13, 2025
1 parent ee6f279 commit 2c5cdf9
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 1 deletion.
10 changes: 9 additions & 1 deletion R/countRNAseqData.R
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,15 @@ countSplitReads <- function(sampleID, fds, NcpuPerSample=1, genome=NULL,
if(is.character(genome)){
genome <- getBSgenome(genome)
}
seqlevelsStyle(genome) <- seqlevelsStyle(chromosomes)[1]
tryCatch(
{
seqlevelsStyle(genome) <- seqlevelsStyle(chromosomes)[1]
}
error = function(e) {
warning("Could not update genome's seqlevelsStyle using GenomeInfoDb package. Updating manually now...")
genome <- updateSeqlevelsStyle(genome, metadata(genome)$genome, seqlevelsStyle(chromosomes)[1], metadata(genome)$provider, fds@workingDir)
}
)
chrLengths <- extractChromosomeLengths(bamfile)
mismatchChrs <- which(
seqlengths(genome)[chromosomes] != chrLengths[chromosomes])
Expand Down
62 changes: 62 additions & 0 deletions R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -700,3 +700,65 @@ checkForAndCreateDir <- function(object, dir){
}
return(TRUE)
}

updateSeqlevelsStyle <- function(bsgenome, genome_assembly, new_style, old_style, conversion_dict_path){
if (genome_assembly == "hg19" || genome_assembly == "hs37d5") {
conversion_dict_path <- file.path(conversion_dict_path, "hg19_NCBI2UCSC.txt")
if (!file.exists(conversion_dict_path)) {
file_url <- "https://www.cmm.in.tum.de/public/paper/FRASER/hg19_NCBI2UCSC.txt"
message(paste0("Downloading seqlevelStyle conversion dict: "), file_url)

response <- GET(file_url, write_disk(conversion_dict_path, overwrite = TRUE))

if (response$status_code == 200) {
assembly_report <- fread(conversion_dict_path)
}
else {
stop(paste0("Failed to download the file. Status code:", response$status_code,
".\nPlease download the file manually from ", file_url, " to the following directory: ", conversion_dict_path))
}
}
assembly_report <- fread(conversion_dict_path)
}
else if (genome_assembly == "hg38" || genome_assembly == "GRCh38") {
conversion_dict_path <- file.path(conversion_dict_path, "hg38_NCBI2UCSC.txt")
if (!file.exists(conversion_dict_path)) {
file_url <- "https://www.cmm.in.tum.de/public/paper/FRASER/hg38_NCBI2UCSC.txt"
message(paste0("Downloading seqlevelStyle conversion dict: "), file_url)

response <- GET(file_url, write_disk(conversion_dict_path, overwrite = TRUE))

if (response$status_code == 200) {
assembly_report <- fread(conversion_dict_path)
} else {
stop(paste0("Failed to download the file. Status code:", response$status_code,
".\nPlease download the file manually from ", file_url, " to the following directory: ", conversion_dict_path))
}
}
assembly_report <- fread(conversion_dict_path)
}

if (new_style == "UCSC") {
assembly_report <- assembly_report[NCBI_Genome_Name != ""]
mapping <- setNames(assembly_report$`UCSC.style.name`, assembly_report$`Sequence.Name`)
}
else { # new_style == "NCBI"
mapping <- setNames(assembly_report$`Sequence.Name`, assembly_report$`UCSC.style.name`)
}

mapping <- mapping[names(mapping) != "na" & mapping != "na"]
new_seqnames <- unname(mapping[seqnames(bsgenome)])

seqnames(bsgenome) <- new_seqnames

if (new_style == "UCSC" & old_style == "NCBI"){
seqinfo(bsgenome)@genome <- assembly_report[!(is.na(NCBI_Genome_Name) | NCBI_Genome_Name == ""), NCBI_Genome_Name]
}
else if (new_style == "NCBI" & old_style=="UCSC"){ # new_style == "NCBI"
seqinfo(bsgenome)@genome <- assembly_report[!(is.na(UCSC_Genome_Name) | UCSC_Genome_Name == ""), UCSC_Genome_Name]
}

seqlevelsStyle(bsgenome) <- new_style

return (bsgenome)
}
45 changes: 45 additions & 0 deletions tests/testthat/test_updateSeqlevelsStyle.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
context("Test updateSeqlevelsStyle function")

test_that("hg38, UCSC to NCBI", {
genome <- getBSgenome("hg38")
bsgenome <- genome

seqlevelsStyle(bsgenome) <- "NCBI"
genome <- updateSeqlevelsStyle(genome, metadata(genome)$genome, "NCBI", metadata(genome)$provider, "./")

expect_equal(seqnames(genome), seqnames(bsgenome))

})

test_that("hg38, NCBI to UCSC", {
genome <- getBSgenome("GRCh38")
bsgenome <- genome

seqlevelsStyle(bsgenome) <- "UCSC"
genome <- updateSeqlevelsStyle(genome, metadata(genome)$genome, "UCSC", metadata(genome)$provider, "./")

expect_equal(seqnames(genome), seqnames(bsgenome))

})

test_that("hg19, NCBI to UCSC", {
genome <- getBSgenome("hs37d5")
bsgenome <- genome

seqlevelsStyle(bsgenome) <- "UCSC"
genome <- updateSeqlevelsStyle(genome, metadata(genome)$genome, "UCSC", metadata(genome)$provider, "./")

expect_equal(seqnames(genome), seqnames(bsgenome))

})

test_that("hg19, UCSC to NCBI", {
genome <- getBSgenome("hg19")
bsgenome <- genome

seqlevelsStyle(bsgenome) <- "NCBI"
genome <- updateSeqlevelsStyle(genome, metadata(genome)$genome, "NCBI", metadata(genome)$provider, "./")

expect_equal(seqnames(genome), seqnames(bsgenome))

})

0 comments on commit 2c5cdf9

Please sign in to comment.