Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove or add back non-covered sites from compressed counts files #190

Merged
merged 8 commits into from
Nov 29, 2023
2 changes: 2 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ dnmtools_SOURCES += src/utils/symmetric-cpgs.cpp
dnmtools_SOURCES += src/utils/merge-methcounts.cpp
dnmtools_SOURCES += src/utils/lift-filter.cpp
dnmtools_SOURCES += src/utils/fast-liftover.cpp
dnmtools_SOURCES += src/utils/covered.cpp
dnmtools_SOURCES += src/utils/recovered.cpp
dnmtools_SOURCES += src/utils/kmersites.cpp

dnmtools_SOURCES += src/amrfinder/allelicmeth.cpp
Expand Down
6 changes: 6 additions & 0 deletions src/dnmtools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ main_symmetric_cpgs(int argc, const char **argv);
int
metagene(int argc, const char **argv);
int
main_covered(int argc, const char **argv);
int
main_recovered(int argc, const char **argv);
int
kmersites(int argc, const char **argv);

void
Expand Down Expand Up @@ -194,6 +198,8 @@ main(int argc, const char **argv) {
{"lc", "approximate line counts in a file", main_lc_approx},
{"merge-bsrate", "merge bisulfite conversion rates files from bsrate", main_merge_bsrate},
{"merge", "merge multiple counts files into a counts file or a table", main_merge_methcounts},
{"covered", "filter a counts file for only covered sites", main_covered},
{"recovered", "replace missing sites in a counts file", main_recovered},
{"selectsites", "sites inside a set of genomic intervals", main_selectsites},
{"kmersites", "make track file for sites matching kmer", kmersites}}}}}};
// clang-format on
Expand Down
100 changes: 100 additions & 0 deletions src/utils/covered.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/* covered: filter counts files so they only have covered sites.
*
* Copyright (C) 2023 Andrew D. Smith
*
* Authors: Andrew D. Smith
*
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/

#include <string>
#include <vector>
#include <iostream>
#include <stdexcept>
#include <bamxx.hpp>

// from smithlab_cpp
#include "OptionParser.hpp"
#include "smithlab_utils.hpp"
#include "smithlab_os.hpp"

#include "MSite.hpp"

using std::string;
using std::cout;
using std::cerr;
using std::endl;

using bamxx::bgzf_file;

int
main_covered(int argc, const char **argv) {
try {

size_t n_threads = 1;

string outfile;
const string description =
"filter counts files so they only have covered sites";

/****************** COMMAND LINE OPTIONS ********************/
OptionParser opt_parse(strip_path(argv[0]), description,
"<methcounts-file>");
opt_parse.add_opt("output", 'o', "output file (required)", true, outfile);
opt_parse.add_opt("threads", 't', "number of threads", false, n_threads);
std::vector<string> leftover_args;
opt_parse.parse(argc, argv, leftover_args);
if (argc == 1 || opt_parse.help_requested()) {
cerr << opt_parse.help_message() << endl
<< opt_parse.about_message() << endl;
return EXIT_SUCCESS;
}
if (opt_parse.about_requested()) {
cerr << opt_parse.about_message() << endl;
return EXIT_SUCCESS;
}
if (opt_parse.option_missing()) {
cerr << opt_parse.option_missing_message() << endl;
return EXIT_SUCCESS;
}
if (leftover_args.size() != 1) {
cerr << opt_parse.help_message() << endl;
return EXIT_SUCCESS;
}
const string filename(leftover_args.front());
/****************** END COMMAND LINE OPTIONS *****************/

bamxx::bam_tpool tpool(n_threads);

// const bool show_progress = VERBOSE && isatty(fileno(stderr));
bgzf_file in(filename, "r");
if (!in) throw std::runtime_error("could not open file: " + filename);

// open the output file
bamxx::bgzf_file out(outfile, "w");
if (!out) throw std::runtime_error("error opening output file: " + outfile);

if (n_threads > 1) {
tpool.set_io(in);
tpool.set_io(out);
}

MSite site;
while (read_site(in, site))
if (site.n_reads > 0 || site.is_mutated())
write_site(out, site);
}
catch (const std::runtime_error &e) {
cerr << e.what() << endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
Loading