Skip to content

Commit

Permalink
BRBseqTools v1.1
Browse files Browse the repository at this point in the history
Novel "AnnotateBAM" tool + "-s" option
  • Loading branch information
Gardeux Vincent Roland Julien committed Feb 22, 2018
1 parent 91585eb commit 1977e80
Show file tree
Hide file tree
Showing 13 changed files with 826 additions and 162 deletions.
Binary file added releases/BRBseqTools.1.1.jar
Binary file not shown.
170 changes: 170 additions & 0 deletions src/AnnotateBAMManager.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import java.io.File;
import java.util.HashSet;
import java.util.TreeSet;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import model.ComparableBAMRecord;
import model.Parameters;
import model.Read;
import model.TmpFileManager;
import tools.Utils;

public class AnnotateBAMManager
{
public static SAMFileHeader header = null;

/**
* Using Picard to read the reads from the BAM file created by the alignment tool
* @throws Exception Yes I know...
*/
public static void readR2BAM() throws Exception
{
Long start = System.currentTimeMillis();
System.out.println("\nReading the reads from the BAM file...");
SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
SamReader samReader = samReaderFactory.open(Parameters.inputBAMFileR2);
header = samReader.getFileHeader();
SAMRecordIterator it = samReader.iterator();
HashSet<String> uniqueGenes = new HashSet<String>();
// Start reading the BAM file
TreeSet<ComparableBAMRecord> lines = new TreeSet<ComparableBAMRecord>();
Parameters.nbReads = 0;
Parameters.unmapped = 0;
Parameters.notUnique = 0;
Parameters.ambiguous = 0;
Parameters.mapped = 0;
Parameters.noFeature = 0;
Parameters.toolowAqual = 0;
Parameters.nbTmpBAM = 1;
while(it.hasNext())
{
SAMRecord samRecord = it.next();
ComparableBAMRecord bamRecord = new ComparableBAMRecord(samRecord);
Parameters.nbReads++;

if(Parameters.inputGTFFile != null)
{
if(samRecord.getSupplementaryAlignmentFlag())
{
Parameters.notUnique++;
bamRecord.record.setAttribute("CO", "__alignment_not_unique");
lines.add(bamRecord);
}
else if(samRecord.getReadUnmappedFlag())
{
Parameters.unmapped++;
bamRecord.record.setAttribute("CO", "__not_aligned");
}
else if(samRecord.getMappingQuality() < 10)
{
Parameters.toolowAqual++;
bamRecord.record.setAttribute("CO", "__too_low_aQual");
}
else
{
HashSet<String> overlappingGenes = Utils.getOverlappingGenes(samRecord.getReferenceName(), samRecord.getAlignmentStart(), samRecord.getAlignmentEnd(), samRecord.getCigar(), samRecord.getReadNegativeStrandFlag());
if(overlappingGenes.size() == 0)
{
Parameters.noFeature++;
bamRecord.record.setAttribute("CO", "__no_feature");
}
else if(overlappingGenes.size() == 1)
{
Parameters.mapped++;
String gene = overlappingGenes.iterator().next();
uniqueGenes.add(gene);
bamRecord.record.setAttribute("CO", gene);
}
else
{
Parameters.ambiguous++;
bamRecord.record.setAttribute("CO", "__ambiguous");
}
}
}
lines.add(bamRecord);

if(Parameters.nbReads %Parameters.chunkSize == 0)
{
TmpFileManager.createTmpFileB(header, Parameters.tmpFolder + "sam."+Parameters.nbTmpBAM+".tmp", lines);
Parameters.nbTmpBAM++;
lines = new TreeSet<ComparableBAMRecord>();
System.out.println(Parameters.nbReads + " reads were processed from BAM file [" + Utils.toReadableTime(System.currentTimeMillis() - start) + "]");
}
}
samReader.close();

TmpFileManager.createTmpFileB(header, Parameters.tmpFolder + "sam."+Parameters.nbTmpBAM+".tmp", lines);
System.out.println(Parameters.nbReads + " reads were processed from BAM file [" + Utils.toReadableTime(System.currentTimeMillis() - start) + "]");
System.out.println("Created " + Parameters.nbTmpBAM + " temporary BAM files");

if(Parameters.inputGTFFile != null)
{
System.out.println(uniqueGenes.size() + " Unique genes were detected (at least one read).");
System.out.println(Parameters.mapped + " 'Mapped' reads (" + Parameters.pcFormatter.format(((float)Parameters.mapped / Parameters.nbReads) * 100) + "%)");
System.out.println(Parameters.ambiguous + " 'Ambiguous' reads (" + Parameters.pcFormatter.format(((float)Parameters.ambiguous / Parameters.nbReads) * 100) + "%)");
System.out.println(Parameters.noFeature + " 'No Features' reads (" + Parameters.pcFormatter.format(((float)Parameters.noFeature / Parameters.nbReads) * 100) + "%)");
System.out.println(Parameters.notUnique + " 'Not Unique' reads (" + Parameters.pcFormatter.format(((float)Parameters.notUnique / Parameters.nbReads) * 100) + "%)");
System.out.println(Parameters.unmapped + " 'Not Aligned' reads (" + Parameters.pcFormatter.format(((float)Parameters.unmapped / Parameters.nbReads) * 100) + "%)");
System.out.println(Parameters.toolowAqual + " 'Too Low aQual' reads (" + Parameters.pcFormatter.format(((float)Parameters.toolowAqual / Parameters.nbReads) * 100) + "%)");
}
}

/**
* Read temporary sorted BAM/fastq files and mark/remove duplicates
* @throws Exception Yes I know...
*/
public static void annotate() throws Exception
{
Long start = System.currentTimeMillis();
System.out.println("\nStarting annotation of BAM...");
TmpFileManager fm_fastq = new TmpFileManager("fastq", Parameters.nbTmpFastq);
TmpFileManager fm_bam = new TmpFileManager("sam", Parameters.nbTmpBAM);

// Read Tmp files and generate the new BAM on the go
int nbReadsWritten = 0;
int lastWritten = nbReadsWritten; // For not writing twice the same line [Mehhhh]
SAMFileWriterFactory samWriterFactory = new SAMFileWriterFactory();
SAMFileWriter samWriter = samWriterFactory.makeBAMWriter(header, false, new File(Parameters.outputFolder + Parameters.inputBAMFileR2.getName() + ".annotated.bam"));
Read r_fq = fm_fastq.getFirstRead();
while(r_fq != null)
{
Read r_bam = fm_bam.getReadS(r_fq.name);
if(r_bam != null)
{
nbReadsWritten++;
if(Parameters.l1 != -1)
{
r_bam.samRecord.setAttribute("BC", r_fq.barcode);
r_bam.samRecord.setAttribute("QT", r_fq.qualityBC);
}
if(Parameters.UMILength != -1)
{
r_bam.samRecord.setAttribute("BX", r_fq.UMI);
r_bam.samRecord.setAttribute("QX", r_fq.qualityUMI);
}
samWriter.addAlignment(r_bam.samRecord);
}
r_fq = fm_fastq.getFirstRead();
if(nbReadsWritten %Parameters.chunkSize == 0 && lastWritten != nbReadsWritten)
{
lastWritten = nbReadsWritten;
System.out.println(nbReadsWritten + " reads were written in annotated BAM file [" + Utils.toReadableTime(System.currentTimeMillis() - start) + "]");
}
}
samWriter.close();

// Clean tmp files
fm_bam.closeSAMReaders();
TmpFileManager.delete("fastq", Parameters.nbTmpFastq);
TmpFileManager.delete("sam", Parameters.nbTmpBAM);
System.out.println(nbReadsWritten + " reads were written in annotated BAM file [" + Utils.toReadableTime(System.currentTimeMillis() - start) + "]");
}
}
28 changes: 23 additions & 5 deletions src/BRBseqTools.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public static void main(String[] args) throws Exception
switch(args[0])
{
case "CreateDGEMatrix":
System.out.println("BRBSeqTools [CreateDGEMatrix]\n");
System.out.println("BRBSeqTools 1.1 [CreateDGEMatrix]\n");
Parameters.loadDGE(argsParsed);
Parameters.barcodes = Utils.readConfig();
Parameters.BC1 = new ArrayList<String>();
Expand All @@ -37,12 +37,30 @@ public static void main(String[] args) throws Exception
Parameters.barcodeIndex.put("Unknown", Parameters.barcodeIndex.size());
Utils.patterning();
GTF.readGTF();
DGEMatrixManager.readR1Fastq();
Utils.readR1Fastq(false);
DGEMatrixManager.readR2BAM();
DGEMatrixManager.createOutputDGE();
break;
case "AnnotateBAM":
System.out.println("BRBSeqTools 1.1 [AnnotateBAM]\n");
Parameters.loadAnnoBAM(argsParsed);
if(Parameters.inputConfigFile != null)
{
Parameters.barcodes = Utils.readConfig();
Parameters.BC1 = new ArrayList<String>();
Parameters.barcodeIndex = new HashMap<String, Integer>();
for(String B1:Parameters.barcodes) Parameters.BC1.add(B1);
for(int i = 0; i < Parameters.BC1.size(); i++) Parameters.barcodeIndex.put(Parameters.BC1.get(i), i);
Parameters.barcodeIndex.put("Unknown", Parameters.barcodeIndex.size());
}
Utils.patterning();
if(Parameters.inputGTFFile != null) GTF.readGTF();
Utils.readR1Fastq(true);
AnnotateBAMManager.readR2BAM();
AnnotateBAMManager.annotate();
break;
case "Demultiplex":
System.out.println("BRBSeqTools [Demultiplex]\n");
System.out.println("BRBSeqTools 1.1 [Demultiplex]\n");
Parameters.loadDemultiplex(argsParsed);
Parameters.barcodes = Utils.readConfig();
Parameters.BC1 = new ArrayList<String>();
Expand All @@ -54,7 +72,7 @@ public static void main(String[] args) throws Exception
DemultiplexingManager.demultiplex();
break;
case "Trim":
System.out.println("BRBSeqTools [Trim]\n");
System.out.println("BRBSeqTools 1.1 [Trim]\n");
Parameters.loadTrim(argsParsed);
BufferedWriter bw_excel = new BufferedWriter(new FileWriter(Parameters.outputFolder + "brbseq.trimming.output.txt"));
bw_excel.write("sample\tnbReads\tnbRemaininingReads\tnbContaminated\tnbPolyATrimmed\tnbRemoved\tnbPolyABefore\n");
Expand All @@ -70,7 +88,7 @@ public static void main(String[] args) throws Exception
bw_excel.close();
break;
default:
System.err.println("This method is not implemented. Please use one of the following: [CreateDGEMatrix, Demultiplex, Trim].");
System.err.println("This method is not implemented. Please use one of the following: [CreateDGEMatrix, AnnotateBAM, Demultiplex, Trim].");
Parameters.printHelp();
}
}
Expand Down
Loading

0 comments on commit 1977e80

Please sign in to comment.