diff --git a/harpy/reports/_quarto.yml b/harpy/reports/_quarto.yml index a7714e59..e65086d7 100644 --- a/harpy/reports/_quarto.yml +++ b/harpy/reports/_quarto.yml @@ -1,20 +1,26 @@ brand: - logo: - medium: https://raw.githubusercontent.com/pdimens/harpy/docs/static/logo_report.png + logo: + medium: https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg execute: echo: false warning: false embed-resources: true -format: +format: dashboard: - theme: materia + theme: + - lumen + - harpy.scss scrolling: true expandable: false - logo: https://raw.githubusercontent.com/pdimens/harpy/docs/static/logo_report.png + logo: https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg nav-buttons: - icon: book-half href: https://pdimens.github.io/harpy - icon: bug href: https://github.com/pdimens/harpy/issues/new/choose - icon: code-slash - href: https://github.com/pdimens/harpy \ No newline at end of file + href: https://github.com/pdimens/harpy + include-in-header: + text: | + + \ No newline at end of file diff --git a/harpy/reports/align_bxstats.qmd b/harpy/reports/align_bxstats.qmd index d4151577..61655b6c 100644 --- a/harpy/reports/align_bxstats.qmd +++ b/harpy/reports/align_bxstats.qmd @@ -32,81 +32,58 @@ std_error <- function(x){ process_input <- function(infile){ samplename <- gsub(".bxstats.gz", "", basename(infile)) tb <- read.table(infile, header = T, sep = "\t") %>% select(-4, -5) - if(nrow(tb) == 0){ - return( - data.frame( - sample = samplename, - totalreads = 0, - totaluniquemol = 0, - singletons = 0, - nonsingletons = 0, - totaluniquebx = 0, - molecule2bxratio = 0, - totalvalidreads = 0, - totalinvalidreads = 0, - totalvalidpercent = 0, - averagereadspermol = 0, - sereadspermol = 0, - averagemolcov = 0, - semolcov = 0, - averagemolcovbp = 0, - semolcovbp = 0, - N50 = 0, - N75 = 0, - N90 = 0 - ) - ) - } else { - tb$valid <- tb$molecule - tb[tb$valid != "invalidBX", "valid"] <- "validBX" - tb$valid <- gsub("BX", " BX", tb$valid) - # isolate non-singletons b/c molecules with 1 read pair aren't linked reads - multiplex_df <- filter(tb, valid == "valid BX", reads >= 2) - singletons <- sum(tb$reads < 2 & tb$valid == "valid BX") - tot_uniq_bx <- read.table(infile, header = F, sep = "\n", as.is = T, skip = nrow(tb) + 1, comment.char = "+") - tot_uniq_bx <- gsub("#total unique barcodes: ", "", tot_uniq_bx$V1[1]) |> as.integer() - tot_mol <- sum(tb$valid == "valid BX") - tot_valid_reads <- sum(tb[tb$valid == "valid BX", "reads"]) - avg_reads_per_mol <- round(mean(multiplex_df$reads),1) - sem_reads_per_mol <- round(std_error(multiplex_df$reads), 2) - tot_invalid_reads <- sum(tb[tb$valid == "invalid BX", "reads"]) - avg_mol_cov <- round(mean(multiplex_df$coverage_inserts), 2) - sem_mol_cov <- round(std_error(multiplex_df$coverage_inserts), 4) - avg_mol_cov_bp <- round(mean(multiplex_df$coverage_bp), 2) - sem_mol_cov_bp<- round(std_error(multiplex_df$coverage_bp), 4) - n50 <- NX(multiplex_df$length_inferred, 50) - n75 <- NX(multiplex_df$length_inferred, 75) - n90 <- NX(multiplex_df$length_inferred, 90) - return( - data.frame( - sample = samplename, - totalreads = tot_valid_reads + tot_invalid_reads, - totaluniquemol = tot_mol, - singletons = singletons, - nonsingletons = nrow(multiplex_df), - totaluniquebx = tot_uniq_bx, - molecule2bxratio = round(tot_mol / tot_uniq_bx,2), - totalvalidreads = tot_valid_reads, - totalinvalidreads = tot_invalid_reads, - totalvalidpercent = round(tot_valid_reads/(tot_valid_reads + tot_invalid_reads) * 100,2), - averagereadspermol = avg_reads_per_mol, - sereadspermol = sem_reads_per_mol, - averagemolcov = avg_mol_cov, - semolcov = sem_mol_cov, - averagemolcovbp = avg_mol_cov_bp, - semolcovbp = sem_mol_cov_bp, - N50 = n50, - N75 = n75, - N90 = n90 - ) - ) - } + tb$valid <- tb$molecule + tb[tb$valid != "invalidBX", "valid"] <- "validBX" + tb$valid <- gsub("BX", " BX", tb$valid) + # isolate non-singletons b/c molecules with 1 read pair aren't linked reads + multiplex_df <- filter(tb, valid == "valid BX", reads >= 2) + singletons <- sum(tb$reads < 2 & tb$valid == "valid BX") + tot_uniq_bx <- read.table(infile, header = F, sep = "\n", as.is = T, skip = nrow(tb) + 1, comment.char = "+") + tot_uniq_bx <- gsub("#total unique barcodes: ", "", tot_uniq_bx$V1[1]) |> as.integer() + tot_mol <- sum(tb$valid == "valid BX") + tot_valid_reads <- sum(tb[tb$valid == "valid BX", "reads"]) + avg_reads_per_mol <- round(mean(multiplex_df$reads),1) + sem_reads_per_mol <- round(std_error(multiplex_df$reads), 2) + tot_invalid_reads <- sum(tb[tb$valid == "invalid BX", "reads"]) + avg_mol_cov <- round(mean(multiplex_df$coverage_inserts), 2) + sem_mol_cov <- round(std_error(multiplex_df$coverage_inserts), 4) + avg_mol_cov_bp <- round(mean(multiplex_df$coverage_bp), 2) + sem_mol_cov_bp<- round(std_error(multiplex_df$coverage_bp), 4) + n50 <- NX(multiplex_df$length_inferred, 50) + n75 <- NX(multiplex_df$length_inferred, 75) + n90 <- NX(multiplex_df$length_inferred, 90) + datarow <- data.frame( + sample = samplename, + totalreads = tot_valid_reads + tot_invalid_reads, + totaluniquemol = tot_mol, + singletons = singletons, + nonsingletons = nrow(multiplex_df), + totaluniquebx = tot_uniq_bx, + molecule2bxratio = round(tot_mol / tot_uniq_bx,2), + totalvalidreads = tot_valid_reads, + totalinvalidreads = tot_invalid_reads, + totalvalidpercent = round(tot_valid_reads/(tot_valid_reads + tot_invalid_reads) * 100,2), + averagereadspermol = avg_reads_per_mol, + sereadspermol = sem_reads_per_mol, + averagemolcov = avg_mol_cov, + semolcov = sem_mol_cov, + averagemolcovbp = avg_mol_cov_bp, + semolcovbp = sem_mol_cov_bp, + N50 = n50, + N75 = n75, + N90 = n90 + ) + #datarow[is.na(datarow)] <- 0 + #datarow[is.nan(datarow)] <- 0 + return(datarow) } + ``` ```{r setup_df} infiles <- list.files(params$indir, "bxstats.gz", full.names = TRUE) aggregate_df <- Reduce(rbind, Map(process_input, infiles)) +aggregate_df[is.na(aggregate_df)] <- 0 if(nrow(aggregate_df) == 0){ print("All input files were empty") diff --git a/harpy/reports/align_stats.qmd b/harpy/reports/align_stats.qmd index ef52c05a..137f4a10 100644 --- a/harpy/reports/align_stats.qmd +++ b/harpy/reports/align_stats.qmd @@ -26,9 +26,6 @@ using("dplyr","highcharter","DT","BioCircos") ``` ```{r import_file, results = F} -#infile <- "~/test.gz" -#samplename <- "test sample" -#sm_moldist <- 50000 infile <- params$bxstats samplename <- params$sample sm_moldist <- params$mol_dist diff --git a/harpy/reports/harpy.scss b/harpy/reports/harpy.scss new file mode 100644 index 00000000..4399fe28 --- /dev/null +++ b/harpy/reports/harpy.scss @@ -0,0 +1,17 @@ +/*-- scss:defaults --*/ +$navbar-bg: #f4f4f4; +$navbar-fg: #3d3d3d; + +// Fonts + +$font-family-sans-serif: Roboto, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, sans-serif !default; +$font-size-root: 1rem !default; +$font-weight-base: 400 !default; + +/*-- scss:rules --*/ +// Variables + +$web-font-path: "https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" !default; +@if $web-font-path { + @import url($web-font-path); +} \ No newline at end of file diff --git a/harpy/scripts/demultiplex_gen1.py b/harpy/scripts/demultiplex_gen1.py index a644041c..0d63d25d 100755 --- a/harpy/scripts/demultiplex_gen1.py +++ b/harpy/scripts/demultiplex_gen1.py @@ -109,7 +109,6 @@ def get_read_codes(index_read, left_segment, right_segment): R1_output = {sample: open(f"{outdir}/{sample}.{part}.R1.fq", 'w') for sample in samples} R2_output = {sample: open(f"{outdir}/{sample}.{part}.R2.fq", 'w') for sample in samples} segments = {'A':'', 'B':'', 'C':'', 'D':''} -unclear_read_map={} clear_read_map={} with ( pysam.FastxFile(r1) as R1, @@ -136,22 +135,18 @@ def get_read_codes(index_read, left_segment, right_segment): R2_output[sample_name].write(f"{r2_rec}\n") if "unclear" in statuses: - if BX_code in unclear_read_map: - unclear_read_map[BX_code] += 1 + continue + if "corrected" in statuses: + if BX_code in clear_read_map: + clear_read_map[BX_code][1] += 1 else: - unclear_read_map[BX_code] = 1 - else: - if "corrected" in statuses: + clear_read_map[BX_code] = [0,1] + else: + if all(status == "found" for status in statuses): if BX_code in clear_read_map: - clear_read_map[BX_code][1] += 1 + clear_read_map[BX_code][0] += 1 else: - clear_read_map[BX_code] = [0,1] - else: - if all(status == "found" for status in statuses): - if BX_code in clear_read_map: - clear_read_map[BX_code][0] += 1 - else: - clear_read_map[BX_code] = [1,0] + clear_read_map[BX_code] = [1,0] for sample_name in samples: R1_output[sample_name].close() @@ -160,6 +155,4 @@ def get_read_codes(index_read, left_segment, right_segment): BC_log.write("Barcode\tTotal_Reads\tCorrect_Reads\tCorrected_Reads\n") for code in clear_read_map: BC_log.write(f"{code}\t{sum(clear_read_map[code])}\t{clear_read_map[code][0]}\t{clear_read_map[code][1]}\n") - for code in unclear_read_map: - BC_log.write(f"{code}\t{unclear_read_map[code]}\t0\t0\n") f.close() \ No newline at end of file