Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tweaks to the alignstats reports and lumen theme #203

Merged
merged 5 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions harpy/reports/_quarto.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
brand:
logo:
medium: https://raw.githubusercontent.com/pdimens/harpy/docs/static/logo_report.png
logo:
medium: https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg
execute:
echo: false
warning: false
embed-resources: true
format:
format:
dashboard:
theme: materia
theme:
- lumen
- harpy.scss
scrolling: true
expandable: false
logo: https://raw.githubusercontent.com/pdimens/harpy/docs/static/logo_report.png
logo: https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg
nav-buttons:
- icon: book-half
href: https://pdimens.github.io/harpy
- icon: bug
href: https://github.com/pdimens/harpy/issues/new/choose
- icon: code-slash
href: https://github.com/pdimens/harpy
href: https://github.com/pdimens/harpy
include-in-header:
text: |
<link rel="shortcut icon" href="https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg" />
<link rel="icon" type="image/x-icon" href="https://raw.githubusercontent.com/pdimens/harpy/docs/static/favicon_report.svg">
115 changes: 46 additions & 69 deletions harpy/reports/align_bxstats.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -32,81 +32,58 @@ std_error <- function(x){
process_input <- function(infile){
samplename <- gsub(".bxstats.gz", "", basename(infile))
tb <- read.table(infile, header = T, sep = "\t") %>% select(-4, -5)
if(nrow(tb) == 0){
return(
data.frame(
sample = samplename,
totalreads = 0,
totaluniquemol = 0,
singletons = 0,
nonsingletons = 0,
totaluniquebx = 0,
molecule2bxratio = 0,
totalvalidreads = 0,
totalinvalidreads = 0,
totalvalidpercent = 0,
averagereadspermol = 0,
sereadspermol = 0,
averagemolcov = 0,
semolcov = 0,
averagemolcovbp = 0,
semolcovbp = 0,
N50 = 0,
N75 = 0,
N90 = 0
)
)
} else {
tb$valid <- tb$molecule
tb[tb$valid != "invalidBX", "valid"] <- "validBX"
tb$valid <- gsub("BX", " BX", tb$valid)
# isolate non-singletons b/c molecules with 1 read pair aren't linked reads
multiplex_df <- filter(tb, valid == "valid BX", reads >= 2)
singletons <- sum(tb$reads < 2 & tb$valid == "valid BX")
tot_uniq_bx <- read.table(infile, header = F, sep = "\n", as.is = T, skip = nrow(tb) + 1, comment.char = "+")
tot_uniq_bx <- gsub("#total unique barcodes: ", "", tot_uniq_bx$V1[1]) |> as.integer()
tot_mol <- sum(tb$valid == "valid BX")
tot_valid_reads <- sum(tb[tb$valid == "valid BX", "reads"])
avg_reads_per_mol <- round(mean(multiplex_df$reads),1)
sem_reads_per_mol <- round(std_error(multiplex_df$reads), 2)
tot_invalid_reads <- sum(tb[tb$valid == "invalid BX", "reads"])
avg_mol_cov <- round(mean(multiplex_df$coverage_inserts), 2)
sem_mol_cov <- round(std_error(multiplex_df$coverage_inserts), 4)
avg_mol_cov_bp <- round(mean(multiplex_df$coverage_bp), 2)
sem_mol_cov_bp<- round(std_error(multiplex_df$coverage_bp), 4)
n50 <- NX(multiplex_df$length_inferred, 50)
n75 <- NX(multiplex_df$length_inferred, 75)
n90 <- NX(multiplex_df$length_inferred, 90)
return(
data.frame(
sample = samplename,
totalreads = tot_valid_reads + tot_invalid_reads,
totaluniquemol = tot_mol,
singletons = singletons,
nonsingletons = nrow(multiplex_df),
totaluniquebx = tot_uniq_bx,
molecule2bxratio = round(tot_mol / tot_uniq_bx,2),
totalvalidreads = tot_valid_reads,
totalinvalidreads = tot_invalid_reads,
totalvalidpercent = round(tot_valid_reads/(tot_valid_reads + tot_invalid_reads) * 100,2),
averagereadspermol = avg_reads_per_mol,
sereadspermol = sem_reads_per_mol,
averagemolcov = avg_mol_cov,
semolcov = sem_mol_cov,
averagemolcovbp = avg_mol_cov_bp,
semolcovbp = sem_mol_cov_bp,
N50 = n50,
N75 = n75,
N90 = n90
)
)
}
tb$valid <- tb$molecule
tb[tb$valid != "invalidBX", "valid"] <- "validBX"
tb$valid <- gsub("BX", " BX", tb$valid)
# isolate non-singletons b/c molecules with 1 read pair aren't linked reads
multiplex_df <- filter(tb, valid == "valid BX", reads >= 2)
singletons <- sum(tb$reads < 2 & tb$valid == "valid BX")
tot_uniq_bx <- read.table(infile, header = F, sep = "\n", as.is = T, skip = nrow(tb) + 1, comment.char = "+")
tot_uniq_bx <- gsub("#total unique barcodes: ", "", tot_uniq_bx$V1[1]) |> as.integer()
tot_mol <- sum(tb$valid == "valid BX")
tot_valid_reads <- sum(tb[tb$valid == "valid BX", "reads"])
avg_reads_per_mol <- round(mean(multiplex_df$reads),1)
sem_reads_per_mol <- round(std_error(multiplex_df$reads), 2)
tot_invalid_reads <- sum(tb[tb$valid == "invalid BX", "reads"])
avg_mol_cov <- round(mean(multiplex_df$coverage_inserts), 2)
sem_mol_cov <- round(std_error(multiplex_df$coverage_inserts), 4)
avg_mol_cov_bp <- round(mean(multiplex_df$coverage_bp), 2)
sem_mol_cov_bp<- round(std_error(multiplex_df$coverage_bp), 4)
n50 <- NX(multiplex_df$length_inferred, 50)
n75 <- NX(multiplex_df$length_inferred, 75)
n90 <- NX(multiplex_df$length_inferred, 90)
datarow <- data.frame(
sample = samplename,
totalreads = tot_valid_reads + tot_invalid_reads,
totaluniquemol = tot_mol,
singletons = singletons,
nonsingletons = nrow(multiplex_df),
totaluniquebx = tot_uniq_bx,
molecule2bxratio = round(tot_mol / tot_uniq_bx,2),
totalvalidreads = tot_valid_reads,
totalinvalidreads = tot_invalid_reads,
totalvalidpercent = round(tot_valid_reads/(tot_valid_reads + tot_invalid_reads) * 100,2),
averagereadspermol = avg_reads_per_mol,
sereadspermol = sem_reads_per_mol,
averagemolcov = avg_mol_cov,
semolcov = sem_mol_cov,
averagemolcovbp = avg_mol_cov_bp,
semolcovbp = sem_mol_cov_bp,
N50 = n50,
N75 = n75,
N90 = n90
)
#datarow[is.na(datarow)] <- 0
#datarow[is.nan(datarow)] <- 0
return(datarow)
}

```

```{r setup_df}
infiles <- list.files(params$indir, "bxstats.gz", full.names = TRUE)
aggregate_df <- Reduce(rbind, Map(process_input, infiles))
aggregate_df[is.na(aggregate_df)] <- 0

if(nrow(aggregate_df) == 0){
print("All input files were empty")
Expand Down
3 changes: 0 additions & 3 deletions harpy/reports/align_stats.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ using("dplyr","highcharter","DT","BioCircos")
```

```{r import_file, results = F}
#infile <- "~/test.gz"
#samplename <- "test sample"
#sm_moldist <- 50000
infile <- params$bxstats
samplename <- params$sample
sm_moldist <- params$mol_dist
Expand Down
17 changes: 17 additions & 0 deletions harpy/reports/harpy.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*-- scss:defaults --*/
$navbar-bg: #f4f4f4;
$navbar-fg: #3d3d3d;

// Fonts

$font-family-sans-serif: Roboto, -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Arial, sans-serif !default;
$font-size-root: 1rem !default;
$font-weight-base: 400 !default;

/*-- scss:rules --*/
// Variables

$web-font-path: "https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" !default;
@if $web-font-path {
@import url($web-font-path);
}
25 changes: 9 additions & 16 deletions harpy/scripts/demultiplex_gen1.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def get_read_codes(index_read, left_segment, right_segment):
R1_output = {sample: open(f"{outdir}/{sample}.{part}.R1.fq", 'w') for sample in samples}
R2_output = {sample: open(f"{outdir}/{sample}.{part}.R2.fq", 'w') for sample in samples}
segments = {'A':'', 'B':'', 'C':'', 'D':''}
unclear_read_map={}
clear_read_map={}
with (
pysam.FastxFile(r1) as R1,
Expand All @@ -136,22 +135,18 @@ def get_read_codes(index_read, left_segment, right_segment):
R2_output[sample_name].write(f"{r2_rec}\n")

if "unclear" in statuses:
if BX_code in unclear_read_map:
unclear_read_map[BX_code] += 1
continue
if "corrected" in statuses:
if BX_code in clear_read_map:
clear_read_map[BX_code][1] += 1
else:
unclear_read_map[BX_code] = 1
else:
if "corrected" in statuses:
clear_read_map[BX_code] = [0,1]
else:
if all(status == "found" for status in statuses):
if BX_code in clear_read_map:
clear_read_map[BX_code][1] += 1
clear_read_map[BX_code][0] += 1
else:
clear_read_map[BX_code] = [0,1]
else:
if all(status == "found" for status in statuses):
if BX_code in clear_read_map:
clear_read_map[BX_code][0] += 1
else:
clear_read_map[BX_code] = [1,0]
clear_read_map[BX_code] = [1,0]

for sample_name in samples:
R1_output[sample_name].close()
Expand All @@ -160,6 +155,4 @@ def get_read_codes(index_read, left_segment, right_segment):
BC_log.write("Barcode\tTotal_Reads\tCorrect_Reads\tCorrected_Reads\n")
for code in clear_read_map:
BC_log.write(f"{code}\t{sum(clear_read_map[code])}\t{clear_read_map[code][0]}\t{clear_read_map[code][1]}\n")
for code in unclear_read_map:
BC_log.write(f"{code}\t{unclear_read_map[code]}\t0\t0\n")
f.close()
Loading