Skip to content

Commit

Permalink
Move utilities for working with subpaths/base paths somewhere more av…
Browse files Browse the repository at this point in the history
…ailable
  • Loading branch information
adamnovak committed Jan 17, 2025
1 parent b52da6d commit d2baa7e
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 71 deletions.
70 changes: 0 additions & 70 deletions src/hts_alignment_emitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,76 +94,6 @@ unique_ptr<AlignmentEmitter> get_alignment_emitter(const string& filename, const
return emitter;
}

/// Run the given iteratee for each path that is either the path with the given
/// name (if present), or a subrange of a path with the given name as the base
/// name (otherwise).
///
/// If a path and subpaths both exist, only look at the full path.
///
/// If the name describes a subpath, look only at that subpath.
///
/// Iteratee returns false to stop.
///
/// Returns true if we reached the end, and false if asked to stop.
static bool for_each_subpath_of(const PathPositionHandleGraph& graph, const string& path_name, const std::function<bool(const path_handle_t& path)>& iteratee) {
if (graph.has_path(path_name)) {
// Just look at the full path.
return iteratee(graph.get_path_handle(path_name));
}

// Parse out the metadata of the thing we want subpaths of
PathSense sense;
string sample;
string locus;
size_t haplotype;
size_t phase_block;
subrange_t subrange;
PathMetadata::parse_path_name(path_name,
sense,
sample,
locus,
haplotype,
phase_block,
subrange);

if (subrange != PathMetadata::NO_SUBRANGE) {
// The path name described a subpath, and we didn't find it.
// Don't call the iteratee.
return true;
}

// Look at every subpath on it
return graph.for_each_path_matching({sense}, {sample}, {locus}, [&](const path_handle_t& match) {
// TODO: There's no way to search by haplotype and phase block, we have to scan
if (graph.get_haplotype(match) != haplotype) {
// Skip this haplotype
return true;
}
if (graph.get_phase_block(match) != phase_block) {
// Skip this phase block
return true;
}
// Don't need to check subrange, we know we don't have one and this candidate does.
return iteratee(match);
});
}

/// Returns the base path name for this path (i.e. the path's name without any subrange).
static string get_path_base_name(const PathPositionHandleGraph& graph, const path_handle_t& path) {
if (graph.get_subrange(path) == PathMetadata::NO_SUBRANGE) {
// This is a full path
return graph.get_path_name(path);
} else {
// This is a subpath, so remember what it's a subpath of, and use that.
return PathMetadata::create_path_name(graph.get_sense(path),
graph.get_sample_name(path),
graph.get_locus_name(path),
graph.get_haplotype(path),
graph.get_phase_block(path),
PathMetadata::NO_SUBRANGE);
}
}

pair<vector<pair<string, int64_t>>, unordered_map<string, int64_t>> extract_path_metadata(
const vector<tuple<path_handle_t, size_t, size_t>>& paths, const PathPositionHandleGraph& graph,
bool subpath_support) {
Expand Down
60 changes: 59 additions & 1 deletion src/path.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ const std::function<bool(const string&)> Paths::is_alt = [](const string& path_n
// But std::regex was taking loads and loads of time (probably matching .+) so we're replacing it with special-purpose code.

string prefix("_alt_");

if (path_name.length() < prefix.length() || !std::equal(prefix.begin(), prefix.end(), path_name.begin())) {
// We lack the prefix
return false;
Expand Down Expand Up @@ -2531,6 +2530,65 @@ Alignment alignment_from_path(const HandleGraph& graph, const Path& path) {
return aln;
}

bool for_each_subpath_of(const PathPositionHandleGraph& graph, const string& path_name, const std::function<bool(const path_handle_t& path)>& iteratee) {
if (graph.has_path(path_name)) {
// Just look at the full path.
return iteratee(graph.get_path_handle(path_name));
}

// Parse out the metadata of the thing we want subpaths of
PathSense sense;
string sample;
string locus;
size_t haplotype;
size_t phase_block;
subrange_t subrange;
PathMetadata::parse_path_name(path_name,
sense,
sample,
locus,
haplotype,
phase_block,
subrange);

if (subrange != PathMetadata::NO_SUBRANGE) {
// The path name described a subpath, and we didn't find it.
// Don't call the iteratee.
return true;
}

// Look at every subpath on it
return graph.for_each_path_matching({sense}, {sample}, {locus}, [&](const path_handle_t& match) {
// TODO: There's no way to search by haplotype and phase block, we have to scan
if (graph.get_haplotype(match) != haplotype) {
// Skip this haplotype
return true;
}
if (graph.get_phase_block(match) != phase_block) {
// Skip this phase block
return true;
}
// Don't need to check subrange, we know we don't have one and this candidate does.
return iteratee(match);
});
}

std::string get_path_base_name(const PathPositionHandleGraph& graph, const path_handle_t& path) {
if (graph.get_subrange(path) == PathMetadata::NO_SUBRANGE) {
// This is a full path
return graph.get_path_name(path);
} else {
// This is a subpath, so remember what it's a subpath of, and use that.
return PathMetadata::create_path_name(graph.get_sense(path),
graph.get_sample_name(path),
graph.get_locus_name(path),
graph.get_haplotype(path),
graph.get_phase_block(path),
PathMetadata::NO_SUBRANGE);
}
}


void from_proto_edit(const Edit& proto_edit, edit_t& edit) {
edit.set_from_length(proto_edit.from_length());
edit.set_to_length(proto_edit.to_length());
Expand Down
21 changes: 21 additions & 0 deletions src/path.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,27 @@ Path path_from_path_handle(const PathHandleGraph& graph, path_handle_t path_hand
// Wrap a Path in an Alignment
Alignment alignment_from_path(const HandleGraph& graph, const Path& path);

////
// Functiuons for working with path subranges.
// TODO: Move to libhandlegraph
////

/// Run the given iteratee for each path that is either the path with the given
/// name (if present), or a subrange of a path with the given name as the base
/// name (otherwise).
///
/// If a path and subpaths both exist, only look at the full path.
///
/// If the name describes a subpath, look only at that subpath.
///
/// Iteratee returns false to stop.
///
/// Returns true if we reached the end, and false if asked to stop.
bool for_each_subpath_of(const PathPositionHandleGraph& graph, const string& path_name, const std::function<bool(const path_handle_t& path)>& iteratee);

/// Returns the base path name for this path (i.e. the path's name without any subrange).
std::string get_path_base_name(const PathPositionHandleGraph& graph, const path_handle_t& path);


/*
* STL implementations of the protobuf object for use in in-memory operations
Expand Down

0 comments on commit d2baa7e

Please sign in to comment.