Skip to content

Commit

Permalink
update atac enrich
Browse files Browse the repository at this point in the history
  • Loading branch information
lindenb committed Feb 1, 2024
1 parent f23a309 commit c70f651
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 17 deletions.
7 changes: 5 additions & 2 deletions docs/AtacSeqEnrichment.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,13 @@ java -jar dist/jvarkit.jar atacseqenrich -R ref.fa --gtf jeter.gtf bams.list > o

## Output

output is a multipart text file. Each part can be isolated using, for example
output is a multipart text file. Each part can be isolated using awk. For example the following
commands plot the normalized peaks using R

```
awk '($1=="RAW")' output.txt | cut -f 2-
$ awk '$1=="NORMALIZED"' output.txt | cut -f 2- > jeter.txt
$ awk '$1=="R_PLOT"' output.txt | cut -f 2- | sed 's/__INPUT__/jeter.txt/;s/__OUTPUT__/jeter.svg/' > jeter.R
$ R --vanilla < jeter.R
```


4 changes: 2 additions & 2 deletions docs/JvarkitCentral.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ JVARKIT
=======

Author : Pierre Lindenbaum Phd. Institut du Thorax. Nantes. France.
Version : 5f58f036
Compilation : 20240131195636
Version : f23a30912
Compilation : 20240201171004
Github : https://github.com/lindenb/jvarkit
Issues : https://github.com/lindenb/jvarkit/issues

Expand Down
4 changes: 2 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ JVARKIT
=======

Author : Pierre Lindenbaum Phd. Institut du Thorax. Nantes. France.
Version : 5f58f036
Compilation : 20240131195636
Version : f23a30912
Compilation : 20240201171004
Github : https://github.com/lindenb/jvarkit
Issues : https://github.com/lindenb/jvarkit/issues

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ of this software and associated documentation files (the "Software"), to deal
import java.util.Objects;
import java.util.function.Supplier;

import htsjdk.samtools.util.RuntimeIOException;

/**
* A writing adding suffix of prefix after/before each carriage return.
*
*/
public class PrefixSuffixWriter extends Writer {
private Writer delegate;
private boolean at_start = true;
Expand Down Expand Up @@ -69,12 +74,29 @@ public PrefixSuffixWriter setSuffix(final String prefix) {
return setSuffix(()->prefix);
}

public final void println(final String s) throws IOException {
public PrefixSuffixWriter setNoPrefix() {
this.prefixSupplier = ()->null;
return this;
}
public PrefixSuffixWriter setNoSuffix() {
this.suffixSupplier = ()->null;
return this;
}

public final void println(final String s) {
print(s);
print("\n");
println();
}
public final void print(final String s) throws IOException {
write(s);
public final void print(final String s) {
try {
write(s);
}
catch(final IOException err) {
throw new RuntimeIOException(err);
}
}
public final void println() {
print("\n");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,13 @@ of this software and associated documentation files (the "Software"), to deal
## Output
output is a multipart text file. Each part can be isolated using, for example
output is a multipart text file. Each part can be isolated using awk. For example the following
commands plot the normalized peaks using R
```
awk '($1=="RAW")' output.txt | cut -f 2-
$ awk '$1=="NORMALIZED"' output.txt | cut -f 2- > jeter.txt
$ awk '$1=="R_PLOT"' output.txt | cut -f 2- | sed 's/__INPUT__/jeter.txt/;s/__OUTPUT__/jeter.svg/' > jeter.R
$ R --vanilla < jeter.R
```
END_DOC
Expand Down Expand Up @@ -428,6 +431,13 @@ else if(V < treshold_low) {
}

final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.referenceFile);
if(SequenceDictionaryUtils.isGRCh38(dict) && (treshold_low!=5 || treshold_high!=7)) {
LOG.warning("for hg38, recommandation for treshold are 5,7 (you: "+treshold_low+","+treshold_high+")");
}
else if(SequenceDictionaryUtils.isGRCh37(dict) && (treshold_low!=6 || treshold_high!=10)) {
LOG.warning("for hg19, recommandation for treshold are 6,10 (you: "+treshold_low+","+treshold_high+")");
}

final long raw_autosome_length = dict.getSequences().stream().
filter(SR->contigRegex.matcher(SR.getSequenceName()).matches()).
mapToLong(SR->SR.getSequenceLength()).
Expand Down Expand Up @@ -497,6 +507,8 @@ else if(V < treshold_low) {


try(PrefixSuffixWriter pw = new PrefixSuffixWriter(super.openPathOrStdoutAsPrintWriter(output))) {
pw.setNoPrefix();
pw.println("# This is the output of "+getProgramName());
pw.setPrefix("GENERAL\t");
pw.println("date\t"+StringUtils.now());
pw.println("version\t"+getVersion());
Expand All @@ -513,13 +525,17 @@ else if(V < treshold_low) {
pw.println("use_strand\t"+this.use_strand_info);
pw.println("use_transcript_type\t"+this.use_transcript_type);
pw.println("command\t"+this.getProgramCommandLine());


pw.setNoPrefix();
pw.println("# List of bams");
pw.setPrefix("FILE\t");
for(Path bamPath: bamPaths) {
pw.println(bamPath.toString());
}

//
pw.setNoPrefix();
pw.println("# Raw outputs");
pw.setPrefix("RAW\t");
pw.write("sample\ttotal_reads\ttotal_reads_in_tss\tmean_depth\tcount_tss_with_reads");
for(int i=0;i< this.extend_tss*2;i++) {
Expand Down Expand Up @@ -547,7 +563,8 @@ else if(V < treshold_low) {
}

// normalisation

pw.setNoPrefix();
pw.println("# normalized outputs");
pw.setPrefix("NORMALIZED\t");
final int winsize = Math.max(1, (this.extend_tss*2)/this.num_bins);
pw.write("sample");
Expand Down Expand Up @@ -577,8 +594,12 @@ else if(V < treshold_low) {
}

// R PLOT
pw.setNoPrefix();
pw.println("#\tR script used to plot the normalized results");
pw.println("#\t$ awk '$1==\"NORMALIZED\"' output.txt | cut -f 2- > jeter.txt");
pw.println("#\t$ awk '$1==\"R_PLOT\"' output.txt | cut -f 2- | sed 's/__INPUT__/jeter.txt/;s/__OUTPUT__/jeter.svg/' > jeter.R");
pw.println("#\t$ R --vanilla < jeter.R");
pw.setPrefix("R_PLOT\t");

pw.write("data <- read.table(\"__INPUT__\", header = TRUE, sep = \"\\t\")\n");
pw.write("data_values <- data[, -1]\n");
pw.write("sample_name <- data[, 1]\n");
Expand Down Expand Up @@ -622,7 +643,8 @@ else if(V < treshold_low) {
pw.write(score2status.apply(summary.max_ratio));
pw.print("\n");
}

pw.setNoPrefix();
pw.println("# work in progress do not use.");
pw.setPrefix("MULTIQC\t");
pw.println("plot_type: \"linegraph\"");
pw.println("description: \"multiqc output is not ready\"");
Expand All @@ -640,7 +662,10 @@ else if(V < treshold_low) {
pw.println(" \""+ (i-this.extend_tss)+"\": "+array2[i/winsize]/genome_mean_cov);
}
}



pw.setNoPrefix();
pw.println("# EOF");
pw.flush();
}

Expand Down

0 comments on commit c70f651

Please sign in to comment.