Skip to content

Commit

Permalink
[estimate] gracefully handle cases where time point heuristic fails. …
Browse files Browse the repository at this point in the history
…fixes #79
  • Loading branch information
terhorst committed Mar 31, 2018
1 parent 39f8164 commit e7b26f9
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
6 changes: 6 additions & 0 deletions smcpp/analysis/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self, files, args):
pipe.add_filter(data_filter.RecodeMonomorphic())
pipe.add_filter(data_filter.Compress())
pipe.add_filter(data_filter.Validate())
pipe.add_filter(data_filter.DropUninformativeContigs())
pipe.add_filter(data_filter.Summarize())

if self.npop != 1:
Expand All @@ -45,6 +46,11 @@ def __init__(self, files, args):
if args.timepoints == "h":
mc = self._pipeline['mutation_counts'].counts
w = self._pipeline['mutation_counts'].w
if np.all(mc == 0):
logger.error("Heuristic used to calculate time points has failed, "
"possibly due to having a lot of missing data. Please "
"set the --timepoints option manually.")
raise RuntimeError()
q = smcpp.beta_de.quantile(mc / w, 1. / w, np.geomspace(1e-2, .99, args.knots))
tau = q / (2. * self._theta)
self._knots = tau
Expand Down
22 changes: 21 additions & 1 deletion smcpp/data_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,14 +205,34 @@ def run(self, contigs):
for cc in estimation_tools.break_long_spans(c, self.cutoff)]


@attr.s
class DropUninformativeContigs(Filter):
def _n_variable_sites(self, c):
d = c.data
return ((d[:, 1::3].sum(axis=1) > 0) | (d[:, 2::3].sum(axis=1) > 0)).sum()

def run(self, contigs):
ret = []
for c in contigs:
if self._n_variable_sites(c):
ret.append(c)
else:
logger.debug("Dropping a contig derived from %s which has no mutations.", c.fn)
if len(ret) == 0:
logger.error("No contigs have mutation data. Inference is impossible.")
raise RuntimeError()
return ret


@attr.s
class DropSmallContigs(Filter):
cutoff = attr.ib()

def run(self, contigs):
ret = [c for c in contigs if len(c) > self.cutoff]
if len(ret) == 0:
logger.error("All contigs are <.01cM (estimated). Please double check your data")
logger.error("All contigs are <.01cM (estimated). "
"Please double check your data.")
raise RuntimeError()
return ret

Expand Down

0 comments on commit e7b26f9

Please sign in to comment.