Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/origin/master' into NGSDAnnotat…
Browse files Browse the repository at this point in the history
…eSV_pathogenic
  • Loading branch information
Kilian Ilius committed Dec 17, 2024
2 parents f20b7e1 + bba8d44 commit b34a458
Show file tree
Hide file tree
Showing 25 changed files with 558 additions and 100 deletions.
76 changes: 47 additions & 29 deletions src/BamToFastq/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ class ConcreteTool

//optional
addOutfile("out2", "Read 2 output FASTQ.GZ file (required for pair-end samples).", true);
addEnum("mode", "Determine if BAM/CRAM contains paired-end or single-end reads (default: paired-end)", true, QStringList() << "paired-end" << "single-end", "paired-end");
addString("reg", "Export only reads in the given region. Format: chr:start-end.", true);
addFlag("remove_duplicates", "Does not export duplicate reads into the FASTQ file.");
addFlag("remove_duplicates", "Does not export reads marked as duplicates in SAM flags into the FASTQ file.");
addInt("compression_level", "Output FASTQ compression level from 1 (fastest) to 9 (best compression).", true, 1);
addInt("write_buffer_size", "Output write buffer size (number of FASTQ entry pairs).", true, 100);
addInfile("ref", "Reference genome for CRAM support (mandatory if CRAM is used).", true);
addInt("extend", "Extend all reads to the given length. Base 'N' and base qualiy '2' are used for extension.", true, 0);
addFlag("fix", "Keep only one read pair if several have the same name (note: needs much memory as read names are kept in memory).");

changeLog(2024, 12, 13, "Added 'fix' parameter.");
changeLog(2024, 12, 9, "Added 'extend' parameter.");
changeLog(2020, 11, 27, "Added CRAM support.");
changeLog(2020, 5, 29, "Massive speed-up by writing in background. Added 'compression_level' parameter.");
Expand Down Expand Up @@ -62,21 +63,32 @@ class ConcreteTool
}
}

struct ReadWritten
{
bool read1 = false;
bool read2 = false;

bool done(bool first_read) const
{
return first_read ? read1 : read2;
}
void setDone(bool first_read)
{
if(first_read) read1 = true;
else read2 = true;
}
};

virtual void main()
{
//init
QTime timer;
timer.start();
QTextStream out(stdout);
BamReader reader(getInfile("in"), getInfile("ref"));

QString out1 = getOutfile("out1");
QString out2 = getOutfile("out2");
QString mode = getEnum("mode");

if (mode == "paired-end" && out2.trimmed().isEmpty()) THROW(ArgumentException, "'out2' has to be provided for paired-end reads!");
if (mode == "single-end" && out2.trimmed() != "") THROW(ArgumentException, "'out2' cannot be set for single-end reads!");

bool fix = getFlag("fix");
QString reg = getString("reg");
if (reg!="")
{
Expand All @@ -98,33 +110,29 @@ class ConcreteTool
QThreadPool analysis_pool;
analysis_pool.setMaxThreadCount(1);
OutputWorker* output_worker;
if (mode == "paired-end")
const bool is_pe = !out2.trimmed().isEmpty();
if (is_pe)
{
output_worker = new OutputWorker(pair_pool, out1, out2, compression_level);
}
else if (mode == "single-end")
else //single-end
{
output_worker = new OutputWorker(pair_pool, out1, compression_level);
}
else
{
THROW(ArgumentException, "Invalid mode '" + mode + "' provided!")
}
analysis_pool.start(output_worker);

long long c_unpaired = 0;
long long c_paired = 0;
long long c_duplicates = 0;
long long c_single_end = 0;
long long c_fixed = 0;
int max_cached = 0;

//iterate through reads
QHash<QByteArray, QSharedPointer<BamAlignment>> al_cache;
QHash<QByteArray, ReadWritten> reads_processed;
QSharedPointer<BamAlignment> al = QSharedPointer<BamAlignment>(new BamAlignment());
while (true)
while (reader.getNextAlignment(*al))
{
bool ok = reader.getNextAlignment(*al);
if (!ok) break;
//out << al.name() << " PAIRED=" << al.isPaired() << " SEC=" << al.isSecondaryAlignment() << " PROP=" << al.isProperPair() << endl;

//skip secondary alinments
Expand All @@ -137,7 +145,19 @@ class ConcreteTool
continue;
}

if(mode == "paired-end")
//remove reads with same name (but handle read pairing properly)
if (fix)
{
ReadWritten& written = reads_processed[al->name()];
if (written.done(al->isRead1()))
{
++c_fixed;
continue;
}
written.setDone(al->isRead1());
}

if(is_pe)
{
//skip unpaired
if(!al->isPaired())
Expand Down Expand Up @@ -167,30 +187,25 @@ class ConcreteTool
pair.status = ReadPair::TO_BE_WRITTEN;
++c_paired;
}
//cache read for later retrieval
else
else //cache read for later retrieval
{
al_cache.insert(name, al);
al = QSharedPointer<BamAlignment>(new BamAlignment());
}

max_cached = std::max(max_cached, al_cache.size());
}
else if (mode == "single-end")
else //single-end
{
ReadPair& pair = pair_pool.nextFreePair();
alignmentToFastq(al, pair.e1, extend);
pair.status = ReadPair::TO_BE_WRITTEN;
++c_single_end;
}
else
{
THROW(ArgumentException, "Invalid mode '" + mode + "' provided!")
}
}

//write debug output
if(mode == "paired-end")
if(is_pe)
{
out << "Pair reads (written) : " << c_paired << endl;
out << "Unpaired reads (skipped) : " << c_unpaired << endl;
Expand All @@ -200,10 +215,13 @@ class ConcreteTool
{
out << "Reads (written) : " << c_single_end << endl;
}

if (remove_duplicates)
{
out << "Duplicate reads (skipped) : " << c_duplicates << endl;
out << "Duplicate tagged reads (skipped): " << c_duplicates << endl;
}
if (fix)
{
out << "Duplicate name reads (skipped) : " << c_fixed << endl;
}
out << endl;
out << "Maximum cached reads : " << max_cached << endl;
Expand Down
8 changes: 4 additions & 4 deletions src/ExportcBioportal/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ class ConcreteTool
int idx_icd10_catalog = samples.columnIndex("icd10_catalog");
int idx_oncotree_code = samples.columnIndex("oncotree_code");

for (int i=0; i< samples.rowCount(); i++)
for (int i=0; i< samples.count(); i++)
{
QStringList row = samples.row(i);
const QStringList& row = samples[i];
QString sample_id = db.sampleId(row[idx_tumor_name], true);

SampleMTBmetadata mtb_data;
Expand Down Expand Up @@ -177,9 +177,9 @@ class ConcreteTool
int idx_datatype = getIndex(headers, "datatype");
int idx_prio = getIndex(headers, "priority");

for (int i=0; i<attr_data.rowCount(); i++)
for (int i=0; i<attr_data.count(); i++)
{
QStringList row = attr_data.row(i);
const QStringList& row = attr_data[i];

SampleAttribute attr;
attr.name = row[idx_attr_name];
Expand Down
6 changes: 3 additions & 3 deletions src/GSvar/CohortExpressionDataWidget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void CohortExpressionDataWidget::loadExpressionData()
}

//set dimensions
ui_->tw_cohort_data->setRowCount(cohort_expression_data.rowCount());
ui_->tw_cohort_data->setRowCount(cohort_expression_data.count());
ui_->tw_cohort_data->setColumnCount(cohort_expression_data.headers().size());

// create header
Expand All @@ -69,9 +69,9 @@ void CohortExpressionDataWidget::loadExpressionData()
}

//fill table
for(int row_idx=0; row_idx<cohort_expression_data.rowCount(); ++row_idx)
for(int row_idx=0; row_idx<cohort_expression_data.count(); ++row_idx)
{
QStringList row = cohort_expression_data.row(row_idx);
const QStringList& row = cohort_expression_data[row_idx];
for (int col_idx = 0; col_idx < tsv_header.size(); ++col_idx)
{
if(col_idx > 0)
Expand Down
4 changes: 2 additions & 2 deletions src/GSvar/DiseaseCourseWidget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,9 @@ void DiseaseCourseWidget::createTableView()
{
for (int col_idx = 0; col_idx < table_data_.cfdna_samples.length(); ++col_idx)
{
if(table_data_.mrd_tables.at(col_idx).rowCount() > 0)
if(table_data_.mrd_tables.at(col_idx).count() > 0)
{
ui_->mrd->setItem(row_idx, col_idx, GUIHelper::createTableItem(table_data_.mrd_tables.at(col_idx).row(0).at(row_idx), Qt::AlignRight));
ui_->mrd->setItem(row_idx, col_idx, GUIHelper::createTableItem(table_data_.mrd_tables.at(col_idx)[0].at(row_idx), Qt::AlignRight));
continue;
}
ui_->mrd->setItem(row_idx, col_idx, GUIHelper::createTableItem(""));
Expand Down
Loading

0 comments on commit b34a458

Please sign in to comment.