Skip to content

Commit

Permalink
tweaks to the alignstats reports and lumen theme (#203)
Browse files Browse the repository at this point in the history
* rm unclear

* deal with na and nan

* rm comments

* add lumen style back, with flair

* rm the is.nan() call b/c is.na() apparently treats NaN as NA. shrug
  • Loading branch information
pdimens authored Feb 13, 2025
1 parent 94b395e commit ae8b913
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 94 deletions.
18 changes: 12 additions & 6 deletions harpy/reports/_quarto.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
brand:
logo:
medium: https://raw.githubusercontent.com/pdimens/harpy/docs/static/logo_report.png
logo:
medium: https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg
execute:
echo: false
warning: false
embed-resources: true
format:
format:
dashboard:
theme: materia
theme:
- lumen
- harpy.scss
scrolling: true
expandable: false
logo: https://raw.githubusercontent.com/pdimens/harpy/docs/static/logo_report.png
logo: https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg
nav-buttons:
- icon: book-half
href: https://pdimens.github.io/harpy
- icon: bug
href: https://github.com/pdimens/harpy/issues/new/choose
- icon: code-slash
href: https://github.com/pdimens/harpy
href: https://github.com/pdimens/harpy
include-in-header:
text: |
<link rel="shortcut icon" href="https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg" />
<link rel="icon" type="image/x-icon" href="https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg">
115 changes: 46 additions & 69 deletions harpy/reports/align_bxstats.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -32,81 +32,58 @@ std_error <- function(x){
process_input <- function(infile){
samplename <- gsub(".bxstats.gz", "", basename(infile))
tb <- read.table(infile, header = T, sep = "\t") %>% select(-4, -5)
if(nrow(tb) == 0){
return(
data.frame(
sample = samplename,
totalreads = 0,
totaluniquemol = 0,
singletons = 0,
nonsingletons = 0,
totaluniquebx = 0,
molecule2bxratio = 0,
totalvalidreads = 0,
totalinvalidreads = 0,
totalvalidpercent = 0,
averagereadspermol = 0,
sereadspermol = 0,
averagemolcov = 0,
semolcov = 0,
averagemolcovbp = 0,
semolcovbp = 0,
N50 = 0,
N75 = 0,
N90 = 0
)
)
} else {
tb$valid <- tb$molecule
tb[tb$valid != "invalidBX", "valid"] <- "validBX"
tb$valid <- gsub("BX", " BX", tb$valid)
# isolate non-singletons b/c molecules with 1 read pair aren't linked reads
multiplex_df <- filter(tb, valid == "valid BX", reads >= 2)
singletons <- sum(tb$reads < 2 & tb$valid == "valid BX")
tot_uniq_bx <- read.table(infile, header = F, sep = "\n", as.is = T, skip = nrow(tb) + 1, comment.char = "+")
tot_uniq_bx <- gsub("#total unique barcodes: ", "", tot_uniq_bx$V1[1]) |> as.integer()
tot_mol <- sum(tb$valid == "valid BX")
tot_valid_reads <- sum(tb[tb$valid == "valid BX", "reads"])
avg_reads_per_mol <- round(mean(multiplex_df$reads),1)
sem_reads_per_mol <- round(std_error(multiplex_df$reads), 2)
tot_invalid_reads <- sum(tb[tb$valid == "invalid BX", "reads"])
avg_mol_cov <- round(mean(multiplex_df$coverage_inserts), 2)
sem_mol_cov <- round(std_error(multiplex_df$coverage_inserts), 4)
avg_mol_cov_bp <- round(mean(multiplex_df$coverage_bp), 2)
sem_mol_cov_bp<- round(std_error(multiplex_df$coverage_bp), 4)
n50 <- NX(multiplex_df$length_inferred, 50)
n75 <- NX(multiplex_df$length_inferred, 75)
n90 <- NX(multiplex_df$length_inferred, 90)
return(
data.frame(
sample = samplename,
totalreads = tot_valid_reads + tot_invalid_reads,
totaluniquemol = tot_mol,
singletons = singletons,
nonsingletons = nrow(multiplex_df),
totaluniquebx = tot_uniq_bx,
molecule2bxratio = round(tot_mol / tot_uniq_bx,2),
totalvalidreads = tot_valid_reads,
totalinvalidreads = tot_invalid_reads,
totalvalidpercent = round(tot_valid_reads/(tot_valid_reads + tot_invalid_reads) * 100,2),
averagereadspermol = avg_reads_per_mol,
sereadspermol = sem_reads_per_mol,
averagemolcov = avg_mol_cov,
semolcov = sem_mol_cov,
averagemolcovbp = avg_mol_cov_bp,
semolcovbp = sem_mol_cov_bp,
N50 = n50,
N75 = n75,
N90 = n90
)
)
}
tb$valid <- tb$molecule
tb[tb$valid != "invalidBX", "valid"] <- "validBX"
tb$valid <- gsub("BX", " BX", tb$valid)
# isolate non-singletons b/c molecules with 1 read pair aren't linked reads
multiplex_df <- filter(tb, valid == "valid BX", reads >= 2)
singletons <- sum(tb$reads < 2 & tb$valid == "valid BX")
tot_uniq_bx <- read.table(infile, header = F, sep = "\n", as.is = T, skip = nrow(tb) + 1, comment.char = "+")
tot_uniq_bx <- gsub("#total unique barcodes: ", "", tot_uniq_bx$V1[1]) |> as.integer()
tot_mol <- sum(tb$valid == "valid BX")
tot_valid_reads <- sum(tb[tb$valid == "valid BX", "reads"])
avg_reads_per_mol <- round(mean(multiplex_df$reads),1)
sem_reads_per_mol <- round(std_error(multiplex_df$reads), 2)
tot_invalid_reads <- sum(tb[tb$valid == "invalid BX", "reads"])
avg_mol_cov <- round(mean(multiplex_df$coverage_inserts), 2)
sem_mol_cov <- round(std_error(multiplex_df$coverage_inserts), 4)
avg_mol_cov_bp <- round(mean(multiplex_df$coverage_bp), 2)
sem_mol_cov_bp<- round(std_error(multiplex_df$coverage_bp), 4)
n50 <- NX(multiplex_df$length_inferred, 50)
n75 <- NX(multiplex_df$length_inferred, 75)
n90 <- NX(multiplex_df$length_inferred, 90)
datarow <- data.frame(
sample = samplename,
totalreads = tot_valid_reads + tot_invalid_reads,
totaluniquemol = tot_mol,
singletons = singletons,
nonsingletons = nrow(multiplex_df),
totaluniquebx = tot_uniq_bx,
molecule2bxratio = round(tot_mol / tot_uniq_bx,2),
totalvalidreads = tot_valid_reads,
totalinvalidreads = tot_invalid_reads,
totalvalidpercent = round(tot_valid_reads/(tot_valid_reads + tot_invalid_reads) * 100,2),
averagereadspermol = avg_reads_per_mol,
sereadspermol = sem_reads_per_mol,
averagemolcov = avg_mol_cov,
semolcov = sem_mol_cov,
averagemolcovbp = avg_mol_cov_bp,
semolcovbp = sem_mol_cov_bp,
N50 = n50,
N75 = n75,
N90 = n90
)
#datarow[is.na(datarow)] <- 0
#datarow[is.nan(datarow)] <- 0
return(datarow)
}
```

```{r setup_df}
infiles <- list.files(params$indir, "bxstats.gz", full.names = TRUE)
aggregate_df <- Reduce(rbind, Map(process_input, infiles))
aggregate_df[is.na(aggregate_df)] <- 0
if(nrow(aggregate_df) == 0){
print("All input files were empty")
Expand Down
3 changes: 0 additions & 3 deletions harpy/reports/align_stats.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ using("dplyr","highcharter","DT","BioCircos")
```

```{r import_file, results = F}
#infile <- "~/test.gz"
#samplename <- "test sample"
#sm_moldist <- 50000
infile <- params$bxstats
samplename <- params$sample
sm_moldist <- params$mol_dist
Expand Down
17 changes: 17 additions & 0 deletions harpy/reports/harpy.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*-- scss:defaults --*/
$navbar-bg: #f4f4f4;
$navbar-fg: #3d3d3d;

// Fonts

$font-family-sans-serif: Roboto, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, sans-serif !default;
$font-size-root: 1rem !default;
$font-weight-base: 400 !default;

/*-- scss:rules --*/
// Variables

$web-font-path: "https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" !default;
@if $web-font-path {
@import url($web-font-path);
}
25 changes: 9 additions & 16 deletions harpy/scripts/demultiplex_gen1.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def get_read_codes(index_read, left_segment, right_segment):
R1_output = {sample: open(f"{outdir}/{sample}.{part}.R1.fq", 'w') for sample in samples}
R2_output = {sample: open(f"{outdir}/{sample}.{part}.R2.fq", 'w') for sample in samples}
segments = {'A':'', 'B':'', 'C':'', 'D':''}
unclear_read_map={}
clear_read_map={}
with (
pysam.FastxFile(r1) as R1,
Expand All @@ -136,22 +135,18 @@ def get_read_codes(index_read, left_segment, right_segment):
R2_output[sample_name].write(f"{r2_rec}\n")

if "unclear" in statuses:
if BX_code in unclear_read_map:
unclear_read_map[BX_code] += 1
continue
if "corrected" in statuses:
if BX_code in clear_read_map:
clear_read_map[BX_code][1] += 1
else:
unclear_read_map[BX_code] = 1
else:
if "corrected" in statuses:
clear_read_map[BX_code] = [0,1]
else:
if all(status == "found" for status in statuses):
if BX_code in clear_read_map:
clear_read_map[BX_code][1] += 1
clear_read_map[BX_code][0] += 1
else:
clear_read_map[BX_code] = [0,1]
else:
if all(status == "found" for status in statuses):
if BX_code in clear_read_map:
clear_read_map[BX_code][0] += 1
else:
clear_read_map[BX_code] = [1,0]
clear_read_map[BX_code] = [1,0]

for sample_name in samples:
R1_output[sample_name].close()
Expand All @@ -160,6 +155,4 @@ def get_read_codes(index_read, left_segment, right_segment):
BC_log.write("Barcode\tTotal_Reads\tCorrect_Reads\tCorrected_Reads\n")
for code in clear_read_map:
BC_log.write(f"{code}\t{sum(clear_read_map[code])}\t{clear_read_map[code][0]}\t{clear_read_map[code][1]}\n")
for code in unclear_read_map:
BC_log.write(f"{code}\t{unclear_read_map[code]}\t0\t0\n")
f.close()

0 comments on commit ae8b913

Please sign in to comment.