Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Cooke committed Mar 31, 2021
2 parents 53bc73a + 43066d5 commit fe7acfd
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 38 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ Daniel Cooke and Gerton Lunter

## Citing

Please cite the preprint: [A unified haplotype-based method for accurate and comprehensive variant calling](https://www.biorxiv.org/content/early/2018/10/29/456103).
Please cite the paper: [A unified haplotype-based method for accurate and comprehensive variant calling](https://www.nature.com/articles/s41587-021-00861-3).

## License

Expand Down
8 changes: 3 additions & 5 deletions scripts/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
forests = ['germline', 'somatic']

latest_llvm = 'llvm'
latest_gcc = 'gcc@10'
latest_gcc = 'gcc'

required_curl_version = 7, 41, 0
required_git_version = 2, 7, 0
Expand Down Expand Up @@ -262,10 +262,8 @@ def get_brewed_compiler_binaries(homebrew_dir):
else:
gcc_dir = cellar_dir / latest_gcc
gcc_version = os.listdir(str(gcc_dir))[0]
gcc_bin_dir = gcc_dir /gcc_version / 'bin'
gcc_bin_name = latest_gcc.replace('@', '-')
gxx_bin_name = gcc_bin_name.replace('cc', '++')
return gcc_bin_dir / gcc_bin_name, gcc_bin_dir / gxx_bin_name
gcc_bin_dir = gcc_dir / gcc_version / 'bin'
return gcc_bin_dir / 'gcc', gcc_bin_dir / 'g++'

def patch_homebrew_centos_gcc9(homebrew_dir):
homebrew_bin_dir = homebrew_dir / 'bin'
Expand Down
10 changes: 9 additions & 1 deletion src/config/option_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include "exceptions/user_error.hpp"
#include "config.hpp"

#include "core/csr/measures/measure_factory.hpp"

namespace po = boost::program_options;
namespace fs = boost::filesystem;

Expand Down Expand Up @@ -776,8 +778,10 @@ OptionMap parse_options(const int argc, const char** argv)
.add(variant_discovery).add(haplotype_generation).add(general_variant_calling)
.add(cancer).add(trio).add(polyclone).add(cell).add(call_filtering);

po::options_description conditional_help_options("Octopus help command line options");
conditional_help_options.add(general).add(call_filtering);
OptionMap vm_init;
po::store(run(po::command_line_parser(argc, argv).options(general).allow_unregistered()), vm_init);
po::store(run(po::command_line_parser(argc, argv).options(conditional_help_options).allow_unregistered()), vm_init);

if (vm_init.count("help") == 1) {
po::store(run(po::command_line_parser(argc, argv).options(general_variant_calling).allow_unregistered()), vm_init);
Expand Down Expand Up @@ -821,6 +825,10 @@ OptionMap parse_options(const int argc, const char** argv)
} else {
std::cout << all << std::endl;
}
if (vm_init.count("annotations") == 1) {
std::cout << "Available annotations:\n" << std::endl;
csr::print_all_measures_help(std::cout);
}
return vm_init;
}

Expand Down
23 changes: 13 additions & 10 deletions src/core/callers/cancer_caller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1492,9 +1492,9 @@ void CancerCaller::log(const ModelPosteriors& model_posteriors) const

namespace debug {

template <typename S>
template <typename S, typename GenotypeReference>
void print_genotype_posteriors(S&& stream,
const std::unordered_map<Genotype<IndexedHaplotype<>>, double>& genotype_posteriors,
std::vector<std::pair<GenotypeReference, double>> genotype_posteriors,
const std::size_t n = std::numeric_limits<std::size_t>::max())
{
const auto m = std::min(n, genotype_posteriors.size());
Expand All @@ -1503,14 +1503,10 @@ void print_genotype_posteriors(S&& stream,
} else {
stream << "Printing top " << m << " genotype posteriors " << '\n';
}
using GenotypeReference = std::reference_wrapper<const Genotype<IndexedHaplotype<>>>;
std::vector<std::pair<GenotypeReference, double>> v {};
v.reserve(genotype_posteriors.size());
std::copy(std::cbegin(genotype_posteriors), std::cend(genotype_posteriors), std::back_inserter(v));
const auto mth = std::next(std::begin(v), m);
std::partial_sort(std::begin(v), mth, std::end(v),
const auto mth = std::next(std::begin(genotype_posteriors), m);
std::partial_sort(std::begin(genotype_posteriors), mth, std::end(genotype_posteriors),
[] (const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; });
std::for_each(std::begin(v), mth,
std::for_each(std::begin(genotype_posteriors), mth,
[&] (const auto& p) {
print_variant_alleles(stream, p.first.get());
stream << " " << p.second << '\n';
Expand Down Expand Up @@ -1544,6 +1540,10 @@ void CancerCaller::log(const GenotypeVector& germline_genotypes,
auto map_somatic = find_map_genotype(cancer_posteriors);
auto map_cancer_genotype = map_somatic->first.get();
somatic_log << "MAP cancer genotype: ";
auto weighted_cancer_posteriors = zip_cref(cancer_genotypes, somatic_inferences.weighted_genotype_posteriors);
auto weighted_somatic_log = stream(*debug_log_);
weighted_somatic_log << "Weighted cancer genotypes... ";
debug::print_genotype_posteriors(weighted_somatic_log, weighted_cancer_posteriors, 10);
debug::print_variant_alleles(somatic_log, map_cancer_genotype);
somatic_log << ' ' << map_somatic->second;
auto map_marginal_germline = find_map_genotype(germline_genotype_posteriors);
Expand All @@ -1553,7 +1553,10 @@ void CancerCaller::log(const GenotypeVector& germline_genotypes,
marginal_germline_log << ' ' << map_marginal_germline->second;
}
if (trace_log_) {
debug::print_genotype_posteriors(stream(*trace_log_), germline_genotype_posteriors);
auto weighted_cancer_posteriors = zip_cref(cancer_genotypes, somatic_inferences.weighted_genotype_posteriors);
auto weighted_somatic_log = stream(*trace_log_);
weighted_somatic_log << "Weighted cancer genotypes... ";
debug::print_genotype_posteriors(weighted_somatic_log, weighted_cancer_posteriors);
}
}

Expand Down
33 changes: 33 additions & 0 deletions src/core/csr/measures/measure_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "exceptions/user_error.hpp"
#include "utils/map_utils.hpp"
#include "utils/string_utils.hpp"
#include "io/variant/vcf_header.hpp"
#include "io/variant/vcf_spec.hpp"

namespace octopus { namespace csr {

Expand Down Expand Up @@ -163,5 +165,36 @@ std::vector<std::string> get_all_measure_names()
return extract_sorted_keys(measure_makers);
}

VcfHeader make_header(const std::vector<MeasureWrapper>& measures)
{
VcfHeader::Builder builder {};
for (const auto& measure : measures) {
measure.annotate(builder);
}
return builder.build_once();
}

void print_help(const std::vector<MeasureWrapper>& measures, std::ostream& os)
{
const auto header = make_header(measures);
os << "Name\tKind\tNumber\tType\tDescription" << std::endl;
for (const auto& measure : measures) {
os << measure.name() << '\t';
using namespace vcfspec::header;
const auto kind = header.has(format, measure.name()) ? format : info;
os << kind << '\t';
os << get_id_field_value(header, kind, measure.name(), meta::struc::number) << '\t';
os << get_id_field_value(header, kind, measure.name(), meta::struc::type) << '\t';
os << get_id_field_value(header, kind, measure.name(), meta::struc::description) << '\t';
os << std::endl;
}
}

void print_all_measures_help(std::ostream& os)
{
const auto measures = make_measures(get_all_measure_names());
print_help(measures, os);
}

} // namespace csr
} // namespace octopus
4 changes: 4 additions & 0 deletions src/core/csr/measures/measure_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <string>
#include <vector>
#include <ostream>

#include "utils/string_utils.hpp"
#include "measure.hpp"
Expand All @@ -18,6 +19,9 @@ std::vector<MeasureWrapper> make_measures(std::vector<std::string> names);

std::vector<std::string> get_all_measure_names();

void print_help(const std::vector<MeasureWrapper>& measures, std::ostream& os);
void print_all_measures_help(std::ostream& os);

} // namespace csr
} // namespace octopus

Expand Down
43 changes: 37 additions & 6 deletions src/core/models/genotype/subclone_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ compute_genotype_likelihoods_with_fixed_mixture_model(const std::vector<SampleNa
return result;
}

auto evaluate(const Genotype<IndexedHaplotype<>>& genotype, const ConstantMixtureGenotypeLikelihoodModel& model)
{
return model.evaluate(genotype);
}
auto evaluate(const CancerGenotype<IndexedHaplotype<>>& genotype, const ConstantMixtureGenotypeLikelihoodModel& model)
{
return model.evaluate(demote(genotype));
Expand Down Expand Up @@ -243,14 +247,41 @@ generate_seeds(const std::vector<SampleName>& samples,
if (result.size() > max_seeds) return result;
max_seeds -= result.size();
result.reserve(max_seeds);
result.push_back(genotype_log_priors);
IndividualModel germline_model {priors.genotype_prior_model};
if (haplotypes) germline_model.prime(*haplotypes);
ConstantMixtureGenotypeLikelihoodModel basic_likelihood_model {haplotype_log_likelihoods};
std::vector<LogProbabilityVector> basic_sample_likelihoods {};
basic_sample_likelihoods.reserve(samples.size());
for (const auto& sample : samples) {
haplotype_log_likelihoods.prime(sample);
auto latents = germline_model.evaluate(genotypes, haplotype_log_likelihoods);
result.push_back(std::move(latents.posteriors.genotype_log_probabilities));
basic_likelihood_model.cache().prime(sample);
basic_sample_likelihoods.push_back(evaluate(genotypes, basic_likelihood_model));
}
auto basic_likelihoods = add_all_and_normalise(basic_sample_likelihoods);
auto basic_posteriors = add_and_normalise(genotype_log_priors, basic_likelihoods);
result.push_back(basic_posteriors);
--max_seeds;
if (max_seeds == 0) return result;
result.push_back(basic_likelihoods);
--max_seeds;
if (max_seeds == 0) return result;
result.push_back(genotype_log_priors);
maths::normalise_logs(result.back());
--max_seeds;
if (max_seeds == 0) return result;
for (auto& likelihoods : basic_sample_likelihoods) {
result.push_back(std::move(likelihoods));
--max_seeds;
if (max_seeds == 0) return result;
}
std::vector<std::pair<double, std::size_t>> ranked_basic_posteriors {};
ranked_basic_posteriors.reserve(genotypes.size());
for (std::size_t i {0}; i < genotypes.size(); ++i) {
ranked_basic_posteriors.push_back({basic_posteriors[i], i});
}
const auto nth_ranked_basic_posteriors_itr = std::next(std::begin(ranked_basic_posteriors), max_seeds);
std::partial_sort(std::begin(ranked_basic_posteriors),
nth_ranked_basic_posteriors_itr,
std::end(ranked_basic_posteriors), std::greater<> {});
std::transform(std::begin(ranked_basic_posteriors), nth_ranked_basic_posteriors_itr,
std::back_inserter(result), [&] (const auto& p) { return make_point_seed(genotypes.size(), p.second); });
return result;
}

Expand Down
24 changes: 16 additions & 8 deletions src/io/variant/vcf_header.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,20 @@ bool VcfHeader::has(const Tag& t) const noexcept

bool VcfHeader::has(const Tag& t, const StructuredKey& k) const noexcept
{
return structured_fields_.count(t) > 0; // TODO: complete this
const auto er = structured_fields_.equal_range(t);
return std::find_if(er.first, er.second,
[&] (const auto& p) {
return p.second.at(vcfspec::header::meta::struc::id) == k;
}) != er.second;
}

const VcfHeader::Value& VcfHeader::at(const BasicKey& k) const
{
return basic_fields_.at(k);
}

const VcfHeader::Value& VcfHeader::find(const Tag& search_tag, const StructuredKey& search_key,
const StructuredKey& id_key, const Value& id_value) const
const VcfHeader::Value& VcfHeader::find(const Tag& search_tag, const StructuredKey& id_key,
const Value& id_value, const StructuredKey& search_key) const
{
const auto er = structured_fields_.equal_range(search_tag);
return std::find_if(er.first, er.second,
Expand Down Expand Up @@ -118,15 +122,19 @@ const VcfHeader::StructuredFieldMap& VcfHeader::structured_fields() const noexce

// non-member methods

const VcfHeader::Value& get_id_field_value(const VcfHeader& header, const VcfHeader::Tag& tag,
const VcfHeader::Value& id_value,
const VcfHeader::StructuredKey& lookup_key)
const VcfHeader::Value&
get_id_field_value(const VcfHeader& header,
const VcfHeader::Tag& tag,
const VcfHeader::StructuredKey& id_value,
const VcfHeader::StructuredKey& lookup_key)
{
return header.find(tag, vcfspec::header::meta::struc::id, id_value, lookup_key);
}

const VcfHeader::Value& get_id_field_type(const VcfHeader& header, const VcfHeader::Tag& tag,
const VcfHeader::Value& id_value)
const VcfHeader::Value&
get_id_field_type(const VcfHeader& header,
const VcfHeader::Tag& tag,
const VcfHeader::Value& id_value)
{
using namespace vcfspec::header::meta;
return header.find(tag, struc::id, id_value, struc::type);
Expand Down
18 changes: 11 additions & 7 deletions src/io/variant/vcf_header.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ class VcfHeader : public Equitable<VcfHeader>
bool has(const Tag& tag, const StructuredKey& k) const noexcept;

const Value& at(const BasicKey& k) const;
const Value& find(const Tag& search_tag, const StructuredKey& search_key,
const StructuredKey& id_key, const Value& id_value) const;
const Value& find(const Tag& search_tag, const StructuredKey& id_key,
const Value& id_value, const StructuredKey& search_key) const;

std::vector<BasicKey> basic_keys() const;
std::vector<Tag> tags() const;
Expand Down Expand Up @@ -162,12 +162,16 @@ VcfHeader::VcfHeader(T&& file_format, U&& samples, B&& basic_fields, S&& structu
, structured_fields_ {std::forward<S>(structured_fields)}
{}

const std::string& get_id_field_value(const VcfHeader& header, const VcfHeader::Tag& t,
const VcfHeader::StructuredKey& id_value,
const VcfHeader::StructuredKey& lookup_key);
const VcfHeader::Value&
get_id_field_value(const VcfHeader& header,
const VcfHeader::Tag& t,
const VcfHeader::StructuredKey& id_value,
const VcfHeader::StructuredKey& lookup_key);

const std::string& get_id_field_type(const VcfHeader& header, const VcfHeader::Tag& t,
const VcfHeader::Value& id_value);
const VcfHeader::Value&
get_id_field_type(const VcfHeader& header,
const VcfHeader::Tag& t,
const VcfHeader::Value& id_value);

VcfType get_typed_value(const VcfHeader& header, const VcfHeader::Tag& t,
const VcfHeader::StructuredKey& key, const VcfHeader::Value& value);
Expand Down

0 comments on commit fe7acfd

Please sign in to comment.