Skip to content

Commit

Permalink
Merge pull request #70 from hammerlab/variant-sequence-combine-return…
Browse files Browse the repository at this point in the history
…-none

Changed VariantSequence.combine to return None instead of raising exception
  • Loading branch information
iskandr authored Nov 28, 2016
2 parents 89f95b5 + 17ace0e commit f30bf34
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 31 deletions.
2 changes: 1 addition & 1 deletion isovar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@

from __future__ import print_function, division, absolute_import

__version__ = "0.5.0"
__version__ = "0.5.1"
41 changes: 22 additions & 19 deletions isovar/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from collections import defaultdict
import logging

from six.moves import range

from .default_parameters import MIN_VARIANT_SEQUENCE_ASSEMBLY_OVERLAP_SIZE

logger = logging.getLogger(__name__)
Expand All @@ -27,7 +29,7 @@ def greedy_merge_helper(
"""
Returns a list of merged VariantSequence objects, and True if any
were successfully merged.
"""
"""
merged_variant_sequences = {}
merged_any = False

Expand All @@ -37,22 +39,20 @@ def greedy_merge_helper(
sequence1 = variant_sequences[i]
# it works to loop over the triangle (i+1 onwards) because combine() tries flipping the
# arguments if sequence1 is on the right of sequence2
for j in range(i+1, len(variant_sequences)):
for j in range(i + 1, len(variant_sequences)):
sequence2 = variant_sequences[j]
try:
s = sequence1.combine(sequence2)
if s.sequence in merged_variant_sequences:
existing_s = merged_variant_sequences[s.sequence]
# we may have already created the same sequence from another set of reads, in
# which case we need to merge the reads
if s.reads != existing_s.reads:
s = s.add_reads(existing_s.reads)
merged_variant_sequences[s.sequence] = s
unmerged_variant_sequences.discard(sequence1)
unmerged_variant_sequences.discard(sequence2)
merged_any = True
except ValueError:
combined = sequence1.combine(sequence2)
if combined is None:
continue
if combined.sequence in merged_variant_sequences:
existing = merged_variant_sequences[combined.sequence]
# the existing VariantSequence and the newly merged
# VariantSequence should differ only in which reads support them
combined = combined.add_reads(existing.reads)
merged_variant_sequences[combined.sequence] = combined
unmerged_variant_sequences.discard(sequence1)
unmerged_variant_sequences.discard(sequence2)
merged_any = True
result = list(merged_variant_sequences.values()) + list(unmerged_variant_sequences)
return result, merged_any

Expand All @@ -71,7 +71,7 @@ def greedy_merge(
while merged_any:
variant_sequences, merged_any = greedy_merge_helper(
variant_sequences,
MIN_VARIANT_SEQUENCE_ASSEMBLY_OVERLAP_SIZE)
min_overlap_size=min_overlap_size)
return variant_sequences

def collapse_substrings(variant_sequences):
Expand All @@ -85,12 +85,13 @@ def collapse_substrings(variant_sequences):
Returns a (potentially shorter) list without any contained subsequences.
"""
# dictionary mapping VariantSequence objects to lists of reads
# they absorb from substring VariantSequences
if len(variant_sequences) <= 1:
# if we don't have at least two VariantSequences then just
# return your input
return variant_sequences

# dictionary mapping VariantSequence objects to lists of reads
# they absorb from substring VariantSequences
extra_reads_from_substrings = defaultdict(set)
result_list = []
# sort by longest to shortest total length
Expand Down Expand Up @@ -139,4 +140,6 @@ def iterative_overlap_assembly(
n_after_collapse)

merged_variant_sequences = greedy_merge(variant_sequences, min_overlap_size)
return list(sorted(merged_variant_sequences, key=lambda seq: -len(seq.reads)))
return list(sorted(
merged_variant_sequences,
key=lambda seq: -len(seq.reads)))
29 changes: 18 additions & 11 deletions isovar/variant_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,19 @@ def combine(self, other_sequence):
them into a single VariantSequence object. If the other sequence
is contained in this one, then add its reads to this VariantSequence.
Also tries to flip the order (e.g. this sequence is a suffix or
this sequence is a subsequence).
this sequence is a subsequence). If sequences can't be combined
then returns None.
"""
if other_sequence.alt != self.alt:
raise ValueError(
"Cannot combine %s and %s with mismatching alt sequences" % (
self,
other_sequence))
logger.warn(
"Cannot combine %s and %s with mismatching alt sequences",
self,
other_sequence)
return None
elif self.contains(other_sequence):
return self.add_reads(other_sequence.reads)
elif other_sequence.contains(self):
return other_sequence.add_reads(self.reads)
elif self.left_overlaps(other_sequence):
# If sequences are like AABC and ABCC
return VariantSequence(
Expand All @@ -153,13 +159,14 @@ def combine(self, other_sequence):
suffix=other_sequence.suffix,
reads=self.reads.union(other_sequence.reads))
elif other_sequence.left_overlaps(self):
return other_sequence.combine(self)
elif self.contains(other_sequence):
return self.add_reads(other_sequence.reads)
elif other_sequence.contains(self):
return other_sequence.add_reads(self.reads)
return VariantSequence(
prefix=other_sequence.prefix,
alt=self.alt,
suffix=self.suffix,
reads=self.reads.union(other_sequence.reads))
else:
raise ValueError("%s does not overlap with %s" % (self, other_sequence))
# sequences don't overlap
return None

def variant_indices(self):
"""
Expand Down

0 comments on commit f30bf34

Please sign in to comment.