diff --git a/harpy/reports/_quarto.yml b/harpy/reports/_quarto.yml
index a7714e59..e65086d7 100644
--- a/harpy/reports/_quarto.yml
+++ b/harpy/reports/_quarto.yml
@@ -1,20 +1,26 @@
brand:
- logo:
- medium: https://raw.githubusercontent.com/pdimens/harpy/docs/static/logo_report.png
+ logo:
+ medium: https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg
execute:
echo: false
warning: false
embed-resources: true
-format:
+format:
dashboard:
- theme: materia
+ theme:
+ - lumen
+ - harpy.scss
scrolling: true
expandable: false
- logo: https://raw.githubusercontent.com/pdimens/harpy/docs/static/logo_report.png
+ logo: https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg
nav-buttons:
- icon: book-half
href: https://pdimens.github.io/harpy
- icon: bug
href: https://github.com/pdimens/harpy/issues/new/choose
- icon: code-slash
- href: https://github.com/pdimens/harpy
\ No newline at end of file
+ href: https://github.com/pdimens/harpy
+ include-in-header:
+ text: |
+
+
\ No newline at end of file
diff --git a/harpy/reports/align_bxstats.qmd b/harpy/reports/align_bxstats.qmd
index d4151577..61655b6c 100644
--- a/harpy/reports/align_bxstats.qmd
+++ b/harpy/reports/align_bxstats.qmd
@@ -32,81 +32,58 @@ std_error <- function(x){
process_input <- function(infile){
samplename <- gsub(".bxstats.gz", "", basename(infile))
tb <- read.table(infile, header = T, sep = "\t") %>% select(-4, -5)
- if(nrow(tb) == 0){
- return(
- data.frame(
- sample = samplename,
- totalreads = 0,
- totaluniquemol = 0,
- singletons = 0,
- nonsingletons = 0,
- totaluniquebx = 0,
- molecule2bxratio = 0,
- totalvalidreads = 0,
- totalinvalidreads = 0,
- totalvalidpercent = 0,
- averagereadspermol = 0,
- sereadspermol = 0,
- averagemolcov = 0,
- semolcov = 0,
- averagemolcovbp = 0,
- semolcovbp = 0,
- N50 = 0,
- N75 = 0,
- N90 = 0
- )
- )
- } else {
- tb$valid <- tb$molecule
- tb[tb$valid != "invalidBX", "valid"] <- "validBX"
- tb$valid <- gsub("BX", " BX", tb$valid)
- # isolate non-singletons b/c molecules with 1 read pair aren't linked reads
- multiplex_df <- filter(tb, valid == "valid BX", reads >= 2)
- singletons <- sum(tb$reads < 2 & tb$valid == "valid BX")
- tot_uniq_bx <- read.table(infile, header = F, sep = "\n", as.is = T, skip = nrow(tb) + 1, comment.char = "+")
- tot_uniq_bx <- gsub("#total unique barcodes: ", "", tot_uniq_bx$V1[1]) |> as.integer()
- tot_mol <- sum(tb$valid == "valid BX")
- tot_valid_reads <- sum(tb[tb$valid == "valid BX", "reads"])
- avg_reads_per_mol <- round(mean(multiplex_df$reads),1)
- sem_reads_per_mol <- round(std_error(multiplex_df$reads), 2)
- tot_invalid_reads <- sum(tb[tb$valid == "invalid BX", "reads"])
- avg_mol_cov <- round(mean(multiplex_df$coverage_inserts), 2)
- sem_mol_cov <- round(std_error(multiplex_df$coverage_inserts), 4)
- avg_mol_cov_bp <- round(mean(multiplex_df$coverage_bp), 2)
- sem_mol_cov_bp<- round(std_error(multiplex_df$coverage_bp), 4)
- n50 <- NX(multiplex_df$length_inferred, 50)
- n75 <- NX(multiplex_df$length_inferred, 75)
- n90 <- NX(multiplex_df$length_inferred, 90)
- return(
- data.frame(
- sample = samplename,
- totalreads = tot_valid_reads + tot_invalid_reads,
- totaluniquemol = tot_mol,
- singletons = singletons,
- nonsingletons = nrow(multiplex_df),
- totaluniquebx = tot_uniq_bx,
- molecule2bxratio = round(tot_mol / tot_uniq_bx,2),
- totalvalidreads = tot_valid_reads,
- totalinvalidreads = tot_invalid_reads,
- totalvalidpercent = round(tot_valid_reads/(tot_valid_reads + tot_invalid_reads) * 100,2),
- averagereadspermol = avg_reads_per_mol,
- sereadspermol = sem_reads_per_mol,
- averagemolcov = avg_mol_cov,
- semolcov = sem_mol_cov,
- averagemolcovbp = avg_mol_cov_bp,
- semolcovbp = sem_mol_cov_bp,
- N50 = n50,
- N75 = n75,
- N90 = n90
- )
- )
- }
+ tb$valid <- tb$molecule
+ tb[tb$valid != "invalidBX", "valid"] <- "validBX"
+ tb$valid <- gsub("BX", " BX", tb$valid)
+ # isolate non-singletons b/c molecules with 1 read pair aren't linked reads
+ multiplex_df <- filter(tb, valid == "valid BX", reads >= 2)
+ singletons <- sum(tb$reads < 2 & tb$valid == "valid BX")
+ tot_uniq_bx <- read.table(infile, header = F, sep = "\n", as.is = T, skip = nrow(tb) + 1, comment.char = "+")
+ tot_uniq_bx <- gsub("#total unique barcodes: ", "", tot_uniq_bx$V1[1]) |> as.integer()
+ tot_mol <- sum(tb$valid == "valid BX")
+ tot_valid_reads <- sum(tb[tb$valid == "valid BX", "reads"])
+ avg_reads_per_mol <- round(mean(multiplex_df$reads),1)
+ sem_reads_per_mol <- round(std_error(multiplex_df$reads), 2)
+ tot_invalid_reads <- sum(tb[tb$valid == "invalid BX", "reads"])
+ avg_mol_cov <- round(mean(multiplex_df$coverage_inserts), 2)
+ sem_mol_cov <- round(std_error(multiplex_df$coverage_inserts), 4)
+ avg_mol_cov_bp <- round(mean(multiplex_df$coverage_bp), 2)
+ sem_mol_cov_bp<- round(std_error(multiplex_df$coverage_bp), 4)
+ n50 <- NX(multiplex_df$length_inferred, 50)
+ n75 <- NX(multiplex_df$length_inferred, 75)
+ n90 <- NX(multiplex_df$length_inferred, 90)
+ datarow <- data.frame(
+ sample = samplename,
+ totalreads = tot_valid_reads + tot_invalid_reads,
+ totaluniquemol = tot_mol,
+ singletons = singletons,
+ nonsingletons = nrow(multiplex_df),
+ totaluniquebx = tot_uniq_bx,
+ molecule2bxratio = round(tot_mol / tot_uniq_bx,2),
+ totalvalidreads = tot_valid_reads,
+ totalinvalidreads = tot_invalid_reads,
+ totalvalidpercent = round(tot_valid_reads/(tot_valid_reads + tot_invalid_reads) * 100,2),
+ averagereadspermol = avg_reads_per_mol,
+ sereadspermol = sem_reads_per_mol,
+ averagemolcov = avg_mol_cov,
+ semolcov = sem_mol_cov,
+ averagemolcovbp = avg_mol_cov_bp,
+ semolcovbp = sem_mol_cov_bp,
+ N50 = n50,
+ N75 = n75,
+ N90 = n90
+ )
+ #datarow[is.na(datarow)] <- 0
+ #datarow[is.nan(datarow)] <- 0
+ return(datarow)
}
+
```
```{r setup_df}
infiles <- list.files(params$indir, "bxstats.gz", full.names = TRUE)
aggregate_df <- Reduce(rbind, Map(process_input, infiles))
+aggregate_df[is.na(aggregate_df)] <- 0
if(nrow(aggregate_df) == 0){
print("All input files were empty")
diff --git a/harpy/reports/align_stats.qmd b/harpy/reports/align_stats.qmd
index ef52c05a..137f4a10 100644
--- a/harpy/reports/align_stats.qmd
+++ b/harpy/reports/align_stats.qmd
@@ -26,9 +26,6 @@ using("dplyr","highcharter","DT","BioCircos")
```
```{r import_file, results = F}
-#infile <- "~/test.gz"
-#samplename <- "test sample"
-#sm_moldist <- 50000
infile <- params$bxstats
samplename <- params$sample
sm_moldist <- params$mol_dist
diff --git a/harpy/reports/harpy.scss b/harpy/reports/harpy.scss
new file mode 100644
index 00000000..4399fe28
--- /dev/null
+++ b/harpy/reports/harpy.scss
@@ -0,0 +1,17 @@
+/*-- scss:defaults --*/
+$navbar-bg: #f4f4f4;
+$navbar-fg: #3d3d3d;
+
+// Fonts
+
+$font-family-sans-serif: Roboto, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, sans-serif !default;
+$font-size-root: 1rem !default;
+$font-weight-base: 400 !default;
+
+/*-- scss:rules --*/
+// Variables
+
+$web-font-path: "https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" !default;
+@if $web-font-path {
+ @import url($web-font-path);
+}
\ No newline at end of file
diff --git a/harpy/scripts/demultiplex_gen1.py b/harpy/scripts/demultiplex_gen1.py
index a644041c..0d63d25d 100755
--- a/harpy/scripts/demultiplex_gen1.py
+++ b/harpy/scripts/demultiplex_gen1.py
@@ -109,7 +109,6 @@ def get_read_codes(index_read, left_segment, right_segment):
R1_output = {sample: open(f"{outdir}/{sample}.{part}.R1.fq", 'w') for sample in samples}
R2_output = {sample: open(f"{outdir}/{sample}.{part}.R2.fq", 'w') for sample in samples}
segments = {'A':'', 'B':'', 'C':'', 'D':''}
-unclear_read_map={}
clear_read_map={}
with (
pysam.FastxFile(r1) as R1,
@@ -136,22 +135,18 @@ def get_read_codes(index_read, left_segment, right_segment):
R2_output[sample_name].write(f"{r2_rec}\n")
if "unclear" in statuses:
- if BX_code in unclear_read_map:
- unclear_read_map[BX_code] += 1
+ continue
+ if "corrected" in statuses:
+ if BX_code in clear_read_map:
+ clear_read_map[BX_code][1] += 1
else:
- unclear_read_map[BX_code] = 1
- else:
- if "corrected" in statuses:
+ clear_read_map[BX_code] = [0,1]
+ else:
+ if all(status == "found" for status in statuses):
if BX_code in clear_read_map:
- clear_read_map[BX_code][1] += 1
+ clear_read_map[BX_code][0] += 1
else:
- clear_read_map[BX_code] = [0,1]
- else:
- if all(status == "found" for status in statuses):
- if BX_code in clear_read_map:
- clear_read_map[BX_code][0] += 1
- else:
- clear_read_map[BX_code] = [1,0]
+ clear_read_map[BX_code] = [1,0]
for sample_name in samples:
R1_output[sample_name].close()
@@ -160,6 +155,4 @@ def get_read_codes(index_read, left_segment, right_segment):
BC_log.write("Barcode\tTotal_Reads\tCorrect_Reads\tCorrected_Reads\n")
for code in clear_read_map:
BC_log.write(f"{code}\t{sum(clear_read_map[code])}\t{clear_read_map[code][0]}\t{clear_read_map[code][1]}\n")
- for code in unclear_read_map:
- BC_log.write(f"{code}\t{unclear_read_map[code]}\t0\t0\n")
f.close()
\ No newline at end of file