Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing some issues with option parsing on macos #247

Merged
merged 2 commits into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions src/analysis/cpgbins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
#include "MSite.hpp"
#include "OptionParser.hpp"
#include "bsutils.hpp"
#include "xcounts_utils.hpp"
#include "smithlab_utils.hpp"
#include "xcounts_utils.hpp"

#include <bamxx.hpp>

#include <algorithm>
#include <charconv>
#include <cstdint>
#include <filesystem>
#include <fstream>
#include <iostream>
Expand All @@ -45,6 +46,8 @@ using std::ostream;
using std::runtime_error;
using std::size;
using std::string;
using std::uint32_t;
using std::uint64_t;
using std::unordered_map;
using std::vector;

Expand Down Expand Up @@ -75,13 +78,13 @@ format_levels_counter(const LevelsCounter &lc) {
return oss.str();
}


static unordered_map<string, uint64_t>
get_chrom_sizes(const string &chrom_sizes_file) {
unordered_map<string, uint64_t> chrom_sizes;

ifstream in(chrom_sizes_file);
if (!in) throw runtime_error("failed to open file: " + chrom_sizes_file);
if (!in)
throw runtime_error("failed to open file: " + chrom_sizes_file);

string line;
while (getline(in, line)) {
Expand All @@ -91,7 +94,8 @@ get_chrom_sizes(const string &chrom_sizes_file) {
if (!(iss >> chrom_name >> chrom_size))
throw runtime_error("bad line in " + chrom_sizes_file + ":\n" + line);
string dummy;
if (iss >> dummy) throw runtime_error("too many columns: " + line);
if (iss >> dummy)
throw runtime_error("too many columns: " + line);

if (chrom_sizes.find(chrom_name) != cend(chrom_sizes))
throw runtime_error("repeated entry " + chrom_name + " in " +
Expand All @@ -105,7 +109,8 @@ get_chrom_sizes(const string &chrom_sizes_file) {
static vector<string>
get_chrom_names(const string &chrom_sizes_file) {
ifstream in(chrom_sizes_file);
if (!in) throw runtime_error("failed to open file: " + chrom_sizes_file);
if (!in)
throw runtime_error("failed to open file: " + chrom_sizes_file);

vector<string> chrom_names;

Expand All @@ -121,7 +126,6 @@ get_chrom_names(const string &chrom_sizes_file) {
return chrom_names;
}


static void
update(LevelsCounter &lc, const xcounts_entry &xce) {
const uint64_t n_reads = xce.n_meth + xce.n_unmeth;
Expand Down Expand Up @@ -149,7 +153,8 @@ process_chrom(const bool report_more_info, const char level_code,

uint64_t j = 0;
for (auto i = 0ul; i < chrom_size; i += bin_size) {
while (j < size(sites) && sites[j].pos < i) ++j;
while (j < size(sites) && sites[j].pos < i)
++j;

LevelsCounter lc;
while (j < size(sites) && sites[j].pos < i + bin_size)
Expand All @@ -166,7 +171,8 @@ process_chrom(const bool report_more_info, const char level_code,
: (level_code == 'u' ? lc.sites_covered
: lc.total_called()))));
out << r;
if (report_more_info) out << '\t' << format_levels_counter(lc);
if (report_more_info)
out << '\t' << format_levels_counter(lc);
out << '\n';
}
}
Expand All @@ -182,7 +188,8 @@ process_chrom(const bool report_more_info, const string &chrom_name,
r.set_start(i);
r.set_end(std::min(i + bin_size, chrom_size));
out << r;
if (report_more_info) out << '\t' << lc_formatted;
if (report_more_info)
out << '\t' << lc_formatted;
out << '\n';
}
}
Expand Down Expand Up @@ -210,7 +217,8 @@ Columns (beyond the first 6) in the BED format output:
bool verbose = false;
bool report_more_info = false;
uint32_t n_threads = 1;
uint64_t bin_size = 1000;
// uint64_t bin_size = 1000;
size_t bin_size = 1000; // ADS: for macOS gcc-14.2.0
string level_code = "w";
string outfile;

Expand Down Expand Up @@ -266,7 +274,8 @@ Columns (beyond the first 6) in the BED format output:
std::ofstream of;
if (!outfile.empty()) {
of.open(outfile);
if (!of) throw runtime_error("failed to open outfile: " + outfile);
if (!of)
throw runtime_error("failed to open outfile: " + outfile);
}
std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());

Expand Down
66 changes: 42 additions & 24 deletions src/utils/clean-hairpins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#include <algorithm>
#include <array>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint32_t and std::uint64_t
#include <fstream>
#include <iomanip>
#include <iostream>
Expand All @@ -45,7 +47,10 @@ using std::ofstream;
using std::ostream;
using std::ostringstream;
using std::runtime_error;
using std::size_t;
using std::string;
using std::uint32_t;
using std::uint64_t;
using std::vector;

using bamxx::bgzf_file;
Expand Down Expand Up @@ -93,22 +98,28 @@ operator>>(bgzf_file &s, FASTQRecord &r) {

err_code ec = err_code::none;

if (!getline(s, r.name)) return s;
if (!getline(s, r.name))
return s;

if (r.name.empty() || r.name[0] != '@') ec = err_code::bad_name;
if (r.name.empty() || r.name[0] != '@')
ec = err_code::bad_name;

const auto nm_end = r.name.find_first_of(" \t");
const auto nm_sz = (nm_end == string::npos ? r.name.size() : nm_end) - 1;
r.name.erase(copy_n(cbegin(r.name) + 1, nm_sz, begin(r.name)), cend(r.name));

if (!getline(s, r.seq)) ec = err_code::bad_seq;
if (!getline(s, r.seq))
ec = err_code::bad_seq;

string tmp;
if (!getline(s, tmp)) ec = err_code::bad_plus;
if (!getline(s, tmp))
ec = err_code::bad_plus;

if (!getline(s, r.qual)) ec = err_code::bad_qual;
if (!getline(s, r.qual))
ec = err_code::bad_qual;

if (ec != err_code::none) throw error_msg[ec];
if (ec != err_code::none)
throw error_msg[ec];

return s;
}
Expand Down Expand Up @@ -171,15 +182,13 @@ struct hp_summary {
// sum_percent_match_bad over the total hairpin reads.
double mean_percent_match_hairpin{};

auto
assign_values() -> void {
auto assign_values() -> void {
mean_percent_match_non_hairpin =
sum_percent_match_good / (n_reads - n_hairpin_reads);
mean_percent_match_hairpin = sum_percent_match_bad / n_hairpin_reads;
}

auto
tostring() const -> string {
auto tostring() const -> string {
ostringstream oss;
oss << "total_reads_pairs: " << n_reads << '\n'
<< "good_reads_pairs: " << n_good_reads << '\n'
Expand All @@ -197,7 +206,8 @@ struct hp_summary {
static void
write_histogram(const string &hist_outfile, vector<double> hist) {
ofstream hist_out(hist_outfile);
if (!hist_out) throw runtime_error("failed to open file: " + hist_outfile);
if (!hist_out)
throw runtime_error("failed to open file: " + hist_outfile);
const auto total = accumulate(cbegin(hist), cend(hist), 0.0);
transform(cbegin(hist), cend(hist), begin(hist),
[&total](const double t) { return t / total; });
Expand All @@ -213,7 +223,8 @@ static void
write_statistics(const string &filename, hp_summary hps) {
hps.assign_values();
ofstream out(filename);
if (!out) throw runtime_error("failed to open file: " + filename);
if (!out)
throw runtime_error("failed to open file: " + filename);
out << hps.tostring();
}

Expand All @@ -226,17 +237,18 @@ fraction_good_bases(const FASTQRecord &a, const FASTQRecord &b) {

struct clean_hairpin {
double cutoff{0.95};
uint64_t n_reads_to_check{std::numeric_limits<size_t>::max()};
// ADS: this was uint64_t but g++-14.2.0 on macOS had a problem
size_t n_reads_to_check{std::numeric_limits<size_t>::max()};
double min_good_base_percent{0.5};
uint32_t min_read_length{32};
uint32_t n_hist_bins{20};
bool invert_output{false};

hp_summary
analyze_reads(const string &outfile1, const string &outfile2, bgzf_file &in1,
bgzf_file &in2, vector<double> &hist) const;
hp_summary
analyze_reads(bgzf_file &in1, bgzf_file &in2, vector<double> &hist) const;
hp_summary analyze_reads(const string &outfile1, const string &outfile2,
bgzf_file &in1, bgzf_file &in2,
vector<double> &hist) const;
hp_summary analyze_reads(bgzf_file &in1, bgzf_file &in2,
vector<double> &hist) const;
};

hp_summary
Expand All @@ -246,9 +258,11 @@ clean_hairpin::analyze_reads(const string &outfile1, const string &outfile2,

// output files for read1 and read2 with hairpins removed
bgzf_file out1(outfile1, "w");
if (!out1) throw runtime_error("cannot open output file: " + outfile1);
if (!out1)
throw runtime_error("cannot open output file: " + outfile1);
bgzf_file out2(outfile2, "w");
if (!out2) throw runtime_error("cannot open output file: " + outfile2);
if (!out2)
throw runtime_error("cannot open output file: " + outfile2);

hp_summary hps{cutoff};
FASTQRecord r1, r2;
Expand Down Expand Up @@ -384,17 +398,21 @@ main_clean_hairpins(int argc, const char **argv) {

// Input: paired-end reads with end1 and end2
bgzf_file in1(reads_file1, "r");
if (!in1) throw runtime_error("cannot open input file: " + reads_file1);
if (!in1)
throw runtime_error("cannot open input file: " + reads_file1);
bgzf_file in2(reads_file2, "r");
if (!in2) throw runtime_error("cannot open input file: " + reads_file2);
if (!in2)
throw runtime_error("cannot open input file: " + reads_file2);

const hp_summary hps =
write_reads ? ch.analyze_reads(outfile1, outfile2, in1, in2, hist)
: ch.analyze_reads(in1, in2, hist);

if (!stat_outfile.empty()) write_statistics(stat_outfile, hps);
if (!stat_outfile.empty())
write_statistics(stat_outfile, hps);

if (!hist_outfile.empty()) write_histogram(hist_outfile, hist);
if (!hist_outfile.empty())
write_histogram(hist_outfile, hist);
}
catch (const std::exception &e) {
cerr << e.what() << endl;
Expand Down
Loading