Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Cooke committed May 4, 2017
2 parents 9fa3deb + b03c1d0 commit d6b85ba
Show file tree
Hide file tree
Showing 19 changed files with 584 additions and 302 deletions.
38 changes: 19 additions & 19 deletions src/config/option_collation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,13 +267,14 @@ ReferenceGenome make_reference(const OptionMap& options)

InputRegionMap make_search_regions(const std::vector<GenomicRegion>& regions)
{
InputRegionMap contig_mapped_regions {};
std::map<ContigName, std::deque<GenomicRegion>> contig_mapped_regions {};
for (const auto& region : regions) {
contig_mapped_regions[region.contig_name()].insert(region);
contig_mapped_regions[region.contig_name()].push_back(region);
}
InputRegionMap result {};
result.reserve(contig_mapped_regions.size());
for (const auto& p : contig_mapped_regions) {
for (auto& p : contig_mapped_regions) {
std::sort(std::begin(p.second), std::end(p.second));
auto covered_contig_regions = extract_covered_regions(p.second);
result.emplace(std::piecewise_construct,
std::forward_as_tuple(p.first),
Expand All @@ -288,26 +289,24 @@ InputRegionMap extract_search_regions(const ReferenceGenome& reference)
return make_search_regions(get_all_contig_regions(reference));
}

auto cut(const MappableFlatSet<GenomicRegion>& mappables, const MappableFlatSet<GenomicRegion>& regions)
auto get_unskipped(const MappableFlatSet<GenomicRegion>& regions, const MappableFlatSet<GenomicRegion>& skips)
{
if (mappables.empty()) return regions;
if (skips.empty()) return regions;
MappableFlatSet<GenomicRegion> result {};
for (const auto& region : regions) {
auto overlapped = mappables.overlap_range(region);
const auto overlapped = skips.overlap_range(region);
if (empty(overlapped)) {
result.emplace(region);
} else if (!is_same_region(region, overlapped.front())) {
auto chunk = region;
if (begins_before(overlapped.front(), chunk)) {
chunk = right_overhang_region(chunk, overlapped.front());
overlapped.advance_begin(1);
} else if (!contains(overlapped.front(), region)) {
if (begins_before(region, overlapped.front())) {
result.emplace(left_overhang_region(region, overlapped.front()));
}
std::for_each(std::cbegin(overlapped), std::cend(overlapped), [&] (const auto& region) {
result.emplace(left_overhang_region(chunk, region));
chunk = expand_lhs(chunk, -begin_distance(chunk, region));
});
if (ends_before(overlapped.back(), chunk)) {
result.emplace(right_overhang_region(chunk, overlapped.back()));
auto intervening_chunks = extract_intervening_regions(overlapped);
using std::make_move_iterator;
result.insert(make_move_iterator(std::begin(intervening_chunks)),
make_move_iterator(std::end(intervening_chunks)));
if (ends_before(overlapped.back(), region)) {
result.emplace(right_overhang_region(region, overlapped.back()));
}
}
}
Expand All @@ -323,7 +322,7 @@ InputRegionMap extract_search_regions(const std::vector<GenomicRegion>& regions,
InputRegionMap result {input_regions.size()};
for (auto& p : input_regions) {
if (skipped.count(p.first) == 1) {
result.emplace(p.first, cut(skipped.at(p.first), std::move(p.second)));
result.emplace(p.first, get_unskipped(std::move(p.second), skipped.at(p.first)));
} else {
result.emplace(p.first, std::move(p.second));
}
Expand Down Expand Up @@ -1352,7 +1351,8 @@ CallerFactory make_caller_factory(const ReferenceGenome& reference, ReadPipe& re
vc_builder.set_min_somatic_posterior(min_somatic_posterior);
} else if (caller == "trio") {
vc_builder.set_trio(make_trio(read_pipe.samples(), options, pedigree));
vc_builder.set_denovo_mutation_rate(options.at("denovo-mutation-rate").as<float>());
vc_builder.set_snv_denovo_mutation_rate(options.at("snv-denovo-mutation-rate").as<float>());
vc_builder.set_indel_denovo_mutation_rate(options.at("indel-denovo-mutation-rate").as<float>());
vc_builder.set_min_denovo_posterior(options.at("min-denovo-posterior").as<Phred<double>>());
}

Expand Down
30 changes: 26 additions & 4 deletions src/config/option_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,9 +473,13 @@ OptionMap parse_options(const int argc, const char** argv)
po::value<std::string>(),
"Paternal sample")

("denovo-mutation-rate",
po::value<float>()->default_value(1e-8, "1e-8"),
"Expected de novo mutation rate, per megabase pair, for this sample")
("snv-denovo-mutation-rate",
po::value<float>()->default_value(1e-9, "1e-9"),
"SNV de novo mutation rate, per base per generation")

("indel-denovo-mutation-rate",
po::value<float>()->default_value(1e-10, "1e-10"),
"INDEL de novo mutation rate, per base per generation")

("min-denovo-posterior",
po::value<Phred<double>>()->default_value(Phred<double> {0.5}),
Expand Down Expand Up @@ -518,7 +522,7 @@ OptionMap parse_options(const int argc, const char** argv)
"Include the read mapping quality in the haplotype likelihood calculation")

("max-joint-genotypes",
po::value<int>()->default_value(200000),
po::value<int>()->default_value(1000000),
"The maximum number of joint genotype vectors to consider when computing joint"
" genotype posterior probabilities")

Expand Down Expand Up @@ -794,6 +798,16 @@ void check_strictly_positive(const std::string& option, const OptionMap& vm)
}
}

void check_probability(const std::string& option, const OptionMap& vm)
{
if (vm.count(option) == 1) {
const auto value = vm.at(option).as<float>();
if (value < 0 || value > 1) {
throw InvalidCommandLineOptionValue {option, value, "must be between zero and one" };
}
}
}

void conflicting_options(const OptionMap& vm, const std::string& opt1, const std::string& opt2)
{
if (vm.count(opt1) == 1 && !vm[opt1].defaulted() && vm.count(opt2) == 1 && !vm[opt2].defaulted()) {
Expand Down Expand Up @@ -888,6 +902,11 @@ void validate(const OptionMap& vm)
"max-haplotypes", "haplotype-holdout-threshold", "haplotype-overflow",
"max-joint-genotypes"
};
const std::vector<std::string> probability_options {
"snp-heterozygosity", "snp-heterozygosity-stdev", "indel-heterozygosity",
"somatic-mutation-rate", "min-expected-somatic-frequency", "min-credible-somatic-frequency", "credible-mass",
"snv-denovo-mutation-rate", "indel-denovo-mutation-rate"
};
conflicting_options(vm, "maternal-sample", "normal-sample");
conflicting_options(vm, "paternal-sample", "normal-sample");
for (const auto& option : positive_int_options) {
Expand All @@ -896,6 +915,9 @@ void validate(const OptionMap& vm)
for (const auto& option : strictly_positive_int_options) {
check_strictly_positive(option, vm);
}
for (const auto& option : probability_options) {
check_probability(option, vm);
}
check_reads_present(vm);
check_region_files_consistent(vm);
check_trio_consistent(vm);
Expand Down
12 changes: 9 additions & 3 deletions src/core/callers/caller_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,15 @@ CallerBuilder& CallerBuilder::set_min_denovo_posterior(Phred<double> posterior)
return *this;
}

CallerBuilder& CallerBuilder::set_denovo_mutation_rate(double rate) noexcept
CallerBuilder& CallerBuilder::set_snv_denovo_mutation_rate(double rate) noexcept
{
params_.denovo_mutation_rate = rate;
params_.snv_denovo_mutation_rate = rate;
return *this;
}

CallerBuilder& CallerBuilder::set_indel_denovo_mutation_rate(double rate) noexcept
{
params_.indel_denovo_mutation_rate = rate;
return *this;
}

Expand Down Expand Up @@ -348,7 +354,7 @@ CallerBuilder::CallerFactoryMap CallerBuilder::generate_factory() const
params_.ploidies.of(params_.trio->father(), *requested_contig_),
params_.ploidies.of(params_.trio->child(), *requested_contig_),
make_trio_prior_model(params_.snp_heterozygosity, params_.indel_heterozygosity),
{*params_.denovo_mutation_rate},
{*params_.snv_denovo_mutation_rate, *params_.indel_denovo_mutation_rate},
params_.min_variant_posterior,
params_.min_denovo_posterior,
params_.min_refcall_posterior,
Expand Down
5 changes: 3 additions & 2 deletions src/core/callers/caller_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ class CallerBuilder
// trio
CallerBuilder& set_trio(Trio trio);
CallerBuilder& set_min_denovo_posterior(Phred<double> posterior) noexcept;
CallerBuilder& set_denovo_mutation_rate(double rate) noexcept;
CallerBuilder& set_snv_denovo_mutation_rate(double rate) noexcept;
CallerBuilder& set_indel_denovo_mutation_rate(double rate) noexcept;

// pedigree
CallerBuilder& set_pedigree(Pedigree pedigree);
Expand Down Expand Up @@ -117,7 +118,7 @@ class CallerBuilder
// trio
boost::optional<Trio> trio;
Phred<double> min_denovo_posterior;
boost::optional<double> denovo_mutation_rate;
boost::optional<double> snv_denovo_mutation_rate, indel_denovo_mutation_rate;

// pedigree
boost::optional<Pedigree> pedigree;
Expand Down
9 changes: 6 additions & 3 deletions src/core/callers/trio_caller.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,15 @@ TrioCaller::calculate_model_posterior(const std::vector<Haplotype>& haplotypes,
const Latents& latents) const
{
const auto max_ploidy = std::max({parameters_.maternal_ploidy, parameters_.paternal_ploidy, parameters_.child_ploidy});
const auto genotypes = generate_all_genotypes(haplotypes, max_ploidy + 1);
std::vector<std::vector<unsigned>> genotype_indices {};
const auto genotypes = generate_all_genotypes(haplotypes, max_ploidy + 1, genotype_indices);
const auto germline_prior_model = make_prior_model(haplotypes);
DeNovoModel denovo_model {parameters_.denovo_model_params, haplotypes.size(), DeNovoModel::CachingStrategy::address};
DeNovoModel denovo_model {parameters_.denovo_model_params};
germline_prior_model->prime(haplotypes);
denovo_model.prime(haplotypes);
const model::TrioModel model {parameters_.trio, *germline_prior_model, denovo_model,
TrioModel::Options {parameters_.max_joint_genotypes}};
const auto inferences = model.evaluate(genotypes, haplotype_likelihoods);
const auto inferences = model.evaluate(genotypes, genotype_indices, haplotype_likelihoods);
return octopus::calculate_model_posterior(latents.model_latents.log_evidence, inferences.log_evidence);
}

Expand Down
72 changes: 65 additions & 7 deletions src/core/models/genotype/trio_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ struct GenotypeRefProbabilityPairGenotypeLess
struct ParentsProbabilityPair
{
GenotypeReference maternal, paternal;
double probability;
double probability, maternal_likelihood, paternal_likelihood;
const GenotypeIndiceVector* maternal_indices = nullptr, *paternal_indices = nullptr;
};

Expand Down Expand Up @@ -249,10 +249,65 @@ auto reduce(std::vector<T>& zipped, const TrioModel::Options& options)
return make_reduction_map(zipped, last_to_join, options);
}

struct UniformPriorJointProbabilityHelper
{
std::size_t index;
double probability;
};

bool operator==(const UniformPriorJointProbabilityHelper& lhs, const UniformPriorJointProbabilityHelper& rhs) noexcept
{
return lhs.probability == rhs.probability;
}
bool operator!=(const UniformPriorJointProbabilityHelper& lhs, const UniformPriorJointProbabilityHelper& rhs) noexcept
{
return lhs.probability != rhs.probability;
}
bool operator<(const UniformPriorJointProbabilityHelper& lhs, const UniformPriorJointProbabilityHelper& rhs) noexcept
{
return lhs.probability < rhs.probability;
}
bool operator>(const UniformPriorJointProbabilityHelper& lhs, const UniformPriorJointProbabilityHelper& rhs) noexcept
{
return lhs.probability > rhs.probability;
}

auto reduce(std::vector<ParentsProbabilityPair>& zipped, const TrioModel::Options& options)
{
auto last_to_join = reduce(zipped, get_sample_reduction_count(options.max_joint_genotypes),
options.max_joint_mass_loss);
const auto reduction_count = get_sample_reduction_count(options.max_joint_genotypes);
auto last_to_join = reduce(zipped, reduction_count, options.max_joint_mass_loss);
if (last_to_join != std::cend(zipped)) {
std::vector<UniformPriorJointProbabilityHelper> likelihood_zipped(zipped.size());
for (std::size_t i {0}; i < zipped.size(); ++i) {
likelihood_zipped[i].index = i;
likelihood_zipped[i].probability = zipped[i].maternal_likelihood + zipped[i].paternal_likelihood;
}
likelihood_zipped.erase(reduce(likelihood_zipped, reduction_count, options.max_joint_mass_loss),
std::cend(likelihood_zipped));
std::deque<std::size_t> new_indices {};
const auto num_posterior_joined = static_cast<std::size_t>(std::distance(std::begin(zipped), last_to_join));
for (const auto& p : likelihood_zipped) {
if (p.index >= num_posterior_joined) {
new_indices.push_back(p.index);
}
}
likelihood_zipped.clear();
if (!new_indices.empty()) {
std::sort(std::begin(new_indices), std::end(new_indices));
auto curr_index = num_posterior_joined;
// Can use std::partition as it must do a single left-to-right pass due to ForwardIterator
// and complexity requirements
last_to_join = std::partition(last_to_join, std::end(zipped), [&] (const auto& p) {
if (!new_indices.empty() && curr_index++ == new_indices.front()) {
new_indices.pop_front();
return true;
} else {
return false;
}
});
assert(new_indices.empty());
}
}
return make_reduction_map(zipped, last_to_join, options);
}

Expand Down Expand Up @@ -343,17 +398,20 @@ auto join(const ReducedVectorMap<GenotypeRefProbabilityPair>& maternal,
result.reserve(join_size(maternal, paternal));
std::for_each(maternal.first, maternal.last_to_join, [&] (const auto& m) {
std::for_each(paternal.first, paternal.last_to_join, [&] (const auto& p) {
result.push_back({m.genotype, p.genotype, joint_probability(m, p, model), m.indices, p.indices});
result.push_back({m.genotype, p.genotype, joint_probability(m, p, model),
m.probability, m.probability, m.indices, p.indices});
});
});
std::for_each(maternal.last_to_join, maternal.last, [&] (const auto& m) {
std::for_each(paternal.first, paternal.last_to_partially_join, [&] (const auto& p) {
result.push_back({m.genotype, p.genotype, joint_probability(m, p, model), m.indices, p.indices});
result.push_back({m.genotype, p.genotype, joint_probability(m, p, model),
m.probability, m.probability, m.indices, p.indices});
});
});
std::for_each(paternal.last_to_join, paternal.last, [&] (const auto& p) {
std::for_each(maternal.first, maternal.last_to_partially_join, [&] (const auto& m) {
result.push_back({m.genotype, p.genotype, joint_probability(m, p, model), m.indices, p.indices});
result.push_back({m.genotype, p.genotype, joint_probability(m, p, model),
m.probability, m.probability, m.indices, p.indices});
});
});
return result;
Expand Down Expand Up @@ -586,7 +644,7 @@ auto join(const ReducedVectorMap<ParentsProbabilityPair>& parents,

}
} else if (child_ploidy == 3 && maternal_ploidy == 3 && paternal_ploidy == 3) {
return join(parents, child, ProbabilityOfChildGivenParents<2, 2, 2> {mutation_model});
return join(parents, child, ProbabilityOfChildGivenParents<3, 3, 3> {mutation_model});
}
throw std::runtime_error {"TrioModel: unimplemented joint probability function"};
}
Expand Down
Loading

0 comments on commit d6b85ba

Please sign in to comment.