diff --git a/setup.py b/setup.py
index 9b0046f..dae67b6 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,7 @@
     setup(
         name='varcode',
         packages=find_packages(),
-        version="0.3.19",
+        version="0.4.0",
         description="Variant annotation in Python",
         long_description=readme,
         url="https://github.com/hammerlab/varcode",
@@ -62,6 +62,5 @@
             'biopython>=1.64',
             'pyvcf>=0.6.7',
             'memoized_property>=1.0.2',
-            'pysam>=0.8.3',
         ],
     )
diff --git a/test/data/reads/gatk_mini_bundle_extract.bam b/test/data/reads/gatk_mini_bundle_extract.bam
deleted file mode 100644
index 0385e19..0000000
Binary files a/test/data/reads/gatk_mini_bundle_extract.bam and /dev/null differ
diff --git a/test/data/reads/gatk_mini_bundle_extract.bam.bai b/test/data/reads/gatk_mini_bundle_extract.bam.bai
deleted file mode 100644
index c408fe9..0000000
Binary files a/test/data/reads/gatk_mini_bundle_extract.bam.bai and /dev/null differ
diff --git a/test/data/reads/rna_chr17_41244936.bam b/test/data/reads/rna_chr17_41244936.bam
deleted file mode 100644
index 0b970b8..0000000
Binary files a/test/data/reads/rna_chr17_41244936.bam and /dev/null differ
diff --git a/test/data/reads/rna_chr17_41244936.bam.bai b/test/data/reads/rna_chr17_41244936.bam.bai
deleted file mode 100644
index 7afd1fb..0000000
Binary files a/test/data/reads/rna_chr17_41244936.bam.bai and /dev/null differ
diff --git a/test/read_evidence_performance.py b/test/read_evidence_performance.py
deleted file mode 100644
index 16e96ad..0000000
--- a/test/read_evidence_performance.py
+++ /dev/null
@@ -1,54 +0,0 @@
-"""
-Simple script to measure performance of the read_evidence module.
-
-Usage:
-    %(prog)s /path/to/reads.bam <region1> <region2> ... --num-loci-per-region X
-
-Example:
-    %(prog)s my.bam 8:101723999-101725999 --num-loci-per-region 2000
-
-"""
-import argparse
-import sys
-import random
-import time
-
-import varcode
-import varcode.read_evidence
-from varcode.locus import Locus
-    
-PARSER = argparse.ArgumentParser(usage=__doc__)
-PARSER.add_argument("bam_path")
-PARSER.add_argument("regions", nargs="+")
-PARSER.add_argument("--num-loci-per-region", type=int, default=100)
-
-def parse_region(s):
-    (contig, rest) = s.split(":")
-    (start, end) = rest.split("-")
-    return varcode.Locus.from_inclusive_coordinates(
-        contig, int(start), int(end))
-
-def go(argv):
-    args = PARSER.parse_args(argv)
-
-    loci_regions = [parse_region(s) for s in args.regions]
-
-    loci = []
-    for region in loci_regions:
-        new_loci = random.sample(
-            range(region.start, region.end), args.num_loci_per_region)
-        loci.extend(
-            Locus.from_inclusive_coordinates(region.contig, locus)
-            for locus in new_loci)
-
-    print("Loading pileups for %d loci." % len(loci))
-
-    start = time.time()
-    varcode.read_evidence.PileupCollection.from_bam(args.bam_path, loci)
-    elapsed = time.time() - start
-
-    print("Read pileups for %d loci in %f.2 seconds = %f locus / sec" % (
-        len(loci), elapsed, len(loci) / elapsed))
-
-if __name__ == '__main__':
-    go(sys.argv[1:])
diff --git a/test/test_read_evidence.py b/test/test_read_evidence.py
deleted file mode 100644
index 4121e46..0000000
--- a/test/test_read_evidence.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-
-import collections
-from nose.tools import eq_, assert_raises
-import pysam
-from varcode import Variant as VarcodeVariant
-from varcode.read_evidence import PileupCollection
-from varcode import Locus
-from pysam.csamfile import Samfile
-from .data import data_path
-
-# This will be moved into mainline varcode soon. For now, however,
-# these tests use their own simple Variant class.
-Variant = collections.namedtuple("Variant", "locus ref alt")
-
-def filtered_read_names(filtered_evidence):
-    assert filtered_evidence.parent is not None
-    full = set(filtered_evidence.parent.read_attribute('query_name'))
-    filtered = set(filtered_evidence.read_attribute('query_name'))
-    assert filtered - full == set()
-    return full - filtered
-
-def test_read_evidence_rna1_single_base_loci():
-    loci = [
-        Locus.from_inclusive_coordinates("17", 41244936, 41244936),  # 0
-        Locus.from_inclusive_coordinates("17", 41244937, 41244937),  # 1
-        Locus.from_inclusive_coordinates("17", 41244935, 41244935),  # 2
-        Locus.from_inclusive_coordinates("17", 41244933, 41244933),  # 3
-        Locus.from_inclusive_coordinates("17", 41244853, 41244853),  # 4
-        Locus.from_inclusive_coordinates("17", 41244857, 41244857),  # 5
-        Locus.from_inclusive_coordinates("17", 41244864, 41244864),  # 6
-        Locus.from_inclusive_coordinates("17", 41244879, 41244879),  # 7
-        Locus.from_inclusive_coordinates("17", 41244901, 41244901),  # 8
-        Locus.from_inclusive_coordinates("17", 41244910, 41244910),  # 9
-        Locus.from_inclusive_coordinates("17", 41244917, 41244917),  # 10
-        Locus.from_inclusive_coordinates("17", 41244972, 41244972),  # 11
-        Locus.from_inclusive_coordinates("17", 41244973, 41244973),  # 12
-        Locus.from_inclusive_coordinates("17", 41245026, 41245026),  # 13
-        Locus.from_inclusive_coordinates("17", 41245027, 41245027),  # 14
-        Locus.from_inclusive_coordinates("17", 41245019, 41245019),  # 15
-        Locus.from_inclusive_coordinates("17", 41245018, 41245018),  # 16
-    ]
-    evidence = PileupCollection.from_bam(
-        data_path("reads/rna_chr17_41244936.bam"), loci)
-
-    eq_(evidence.allele_summary(loci[0]), [("A", 11), ("G", 7)])
-    eq_(evidence.allele_summary(loci[1]), [("G", 17), ("A", 1)])
-    eq_(evidence.allele_summary(loci[2]), [("C", 18)])
-    eq_(evidence.allele_summary(loci[3]), [("A", 17), ("G", 1)])
-    eq_(evidence.allele_summary(loci[4]), [("C", 1)])
-    eq_(evidence.allele_summary(loci[5]), [("T", 2)])
-    eq_(evidence.allele_summary(loci[6]), [("T", 4)])
-    eq_(evidence.allele_summary(loci[7]), [("C", 8)])
-    eq_(evidence.allele_summary(loci[8]), [("C", 8)])
-    eq_(evidence.allele_summary(loci[9]), [("C", 9)])
-    eq_(evidence.allele_summary(loci[10]), [("A", 10)])
-    eq_(evidence.allele_summary(loci[11]), [("T", 11)])
-    eq_(evidence.allele_summary(loci[12]), [("T", 11)])
-    eq_(evidence.allele_summary(loci[13]), [("C", 1)])
-    eq_(evidence.allele_summary(loci[14]), [("G", 1)])
-    eq_(evidence.allele_summary(loci[15]), [("T", 8)])
-    eq_(evidence.allele_summary(loci[16]), [("T", 8)])
-
-def test_read_evidence_rna1_multi_base_loci():
-    loci = [
-        Locus.from_inclusive_coordinates("17", 41244853, 41244854),  # 0
-        Locus.from_inclusive_coordinates("17", 41244853, 41244857),  # 1
-        Locus.from_inclusive_coordinates("17", 41244854, 41244857),  # 2
-        Locus.from_inclusive_coordinates("17", 41244852, 41244857),  # 3
-        Locus.from_inclusive_coordinates("17", 41244933, 41244936),  # 4
-        Locus.from_inclusive_coordinates("17", 41244933, 41244937),  # 5
-        Locus.from_inclusive_coordinates("17", 41244971, 41244973),  # 6
-        Locus.from_inclusive_coordinates("17", 41265063, 41265067),  # 7
-    ]
-    evidence = PileupCollection.from_bam(
-        data_path("reads/rna_chr17_41244936.bam"), loci)
-    eq_(evidence.allele_summary(loci[0]), [("CT", 1)])
-    eq_(evidence.allele_summary(loci[1]), [("CTTTT", 1)])
-    eq_(evidence.allele_summary(loci[2]), [("TTTT", 1)])
-    eq_(evidence.allele_summary(loci[3]), [])
-    eq_(evidence.allele_summary(loci[4]),
-        [("AACA", 11), ("AACG", 6), ("GACG", 1)])
-    eq_(evidence.allele_summary(loci[5]),
-        [("AACAG", 10), ("AACGG", 6), ("AACAA", 1), ("GACGG", 1)])
-    eq_(evidence.allele_summary(loci[6]), [("ATT", 11)])
-    eq_(evidence.allele_summary(loci[7]), [("ACCCG", 1)])
-
-def test_read_evidence_gatk_mini_bundle_extract():
-    loci = [
-        Locus.from_inclusive_coordinates("20", 9999996, 9999996),    # 0
-        Locus.from_inclusive_coordinates("20", 10260442),            # 1
-        Locus.from_inclusive_coordinates("20", 10006823),            # 2
-        Locus.from_inclusive_coordinates("20", 10006819, 10006823),  # 3
-        Locus.from_inclusive_coordinates("20", 10006819, 10006825),  # 4
-        Locus.from_inclusive_coordinates("20", 10006822, 10006827),  # 5
-        Locus.from_inclusive_coordinates("20", 10007175),            # 6
-        Locus.from_inclusive_coordinates("20", 10007174, 10007176),  # 7
-        Locus.from_inclusive_coordinates("20", 1, 3),                # 8
-        Locus.from_inclusive_coordinates("20", 10008796),            # 9
-        Locus.from_inclusive_coordinates("20", 10008921),            # 10
-    ]
-    handle = Samfile(data_path("reads/gatk_mini_bundle_extract.bam"))
-    evidence = PileupCollection.from_bam(handle, loci)
-
-    eq_(evidence.allele_summary(loci[0]), [("ACT", 9)])
-    eq_(evidence.filter(drop_duplicates=True).allele_summary(loci[0]),
-        [("ACT", 8)])
-    eq_(evidence.allele_summary(loci[1]), [("T", 7)])
-    eq_(evidence.filter().allele_summary(loci[2]), [("", 6), ("C", 2)])
-    eq_(evidence.filter(
-        drop_duplicates=True, min_base_quality=50).allele_summary(loci[2]),
-        [])
-    eq_(evidence.filter(drop_duplicates=True).allele_summary(loci[2]),
-        [("", 5), ("C", 1)])
-    eq_(evidence.filter(
-        drop_duplicates=True, min_mapping_quality=60).allele_summary(
-            loci[2]),
-        [("", 5), ("C", 1)])
-    eq_(evidence.filter(drop_duplicates=True,
-        min_mapping_quality=61).allele_summary(loci[2]), [("", 2)])
-    eq_(evidence.filter(drop_duplicates=True,
-        min_mapping_quality=61).allele_summary(loci[3]), [("A", 2)])
-    eq_(evidence.filter(drop_duplicates=True,
-        min_mapping_quality=61).allele_summary(loci[4]), [("AAA", 2)])
-    eq_(evidence.filter(drop_duplicates=True,
-        min_mapping_quality=61).allele_summary(loci[5]), [("AAAC", 2)])
-    eq_(evidence.filter().allele_summary(loci[6]), [("T", 5), ("C", 3)])
-    eq_(evidence.filter(min_base_quality=30).allele_summary(loci[6]),
-        [("T", 4), ("C", 3)])
-    eq_(evidence.filter().allele_summary(loci[7]),
-        [("CTT", 5), ("CCT", 3)])
-    eq_(evidence.filter(min_base_quality=30).allele_summary(loci[7]),
-        [("CTT", 3), ("CCT", 2)])
-    eq_(evidence.filter(min_base_quality=32).allele_summary(loci[2]),
-        [("", 6), ("C", 1)])
-    eq_(filtered_read_names(evidence.at(loci[2]).filter(min_base_quality=32)),
-        {'20GAVAAXX100126:4:3:18352:43857'})
-    eq_(evidence.allele_summary(loci[8]), [])
-    eq_(evidence.filter(drop_duplicates=True).allele_summary(loci[8]), [])
-    assert_raises(KeyError,
-        evidence.allele_summary,
-        Locus.from_inclusive_coordinates("20", 10009174, 10009176))
-    eq_(filtered_read_names(
-            evidence.at(loci[9]).filter(drop_improper_mate_pairs=True)),
-        {'20FUKAAXX100202:8:68:1530:49310'})
-    eq_(len(evidence.at(loci[8]).read_attribute('mapping_quality')), 0)
-    eq_(list(evidence.at(loci[9]).read_attribute('mapping_quality')),
-        list(evidence.at(loci[9]).read_attributes().mapping_quality))
-    eq_(evidence.filter(drop_duplicates=True).allele_summary(loci[10]),
-        [('C', 2), ('CA', 1), ('CAA', 1)])
-    eq_(evidence.filter(drop_duplicates=True).allele_summary(
-            Locus.from_interbase_coordinates(
-                loci[10].contig, loci[10].start, loci[10].start)),
-        [('', 2), ('A', 1), ('AA', 1)])
-
-
-def test_read_evidence_variant_matching_gatk_mini_bundle_extract():
-    handle = Samfile(data_path("reads/gatk_mini_bundle_extract.bam"))
-
-    loci = [
-        Locus.from_inclusive_coordinates("20", 10008951),  # 0
-        Locus.from_inclusive_coordinates("20", 10009053),  # 1
-        Locus.from_inclusive_coordinates("20", 10009053, 10009054),  # 2
-        Locus.from_inclusive_coordinates("20", 10006822),  # 3
-        Locus.from_inclusive_coordinates("20", 10006822, 10006823),  # 4
-
-    ]
-    evidence = PileupCollection.from_bam(handle, loci)
-
-    eq_(evidence.match_summary(Variant(loci[0], "A", "C")),
-        [('A', 1), ('C', 4)])
-    eq_(evidence.filter(drop_duplicates=True).match_summary(
-            Variant(loci[0], "A", "C")),
-        [('A', 0), ('C', 3)])
-    eq_(evidence.match_summary(Variant(loci[1], "A", "C")),
-        [('A', 3), ('C', 0)])
-    eq_(evidence.match_summary(Variant(loci[1], "A", "CC")),
-        [('A', 3), ('CC', 0)])
-    eq_(evidence.match_summary(Variant(loci[1], "A", "")),
-        [('A', 3), ('', 0)])
-    eq_(evidence.match_summary(Variant(loci[1], "A", "")),
-        [('A', 3), ('', 0)])
-    eq_(evidence.match_summary(Variant(loci[2], "AT", "")),
-        [('AT', 3), ('', 0)])
-    eq_(evidence.match_summary(Variant(loci[3], "A", "")),
-        [('A', 2), ('', 6)])
-    eq_(evidence.match_summary(Variant(loci[4], "AC", "")),
-        [('AC', 2), ('', 6)])
-    eq_(evidence.match_summary(
-            Variant(loci[4], "AC", ""),
-            lambda e: e.read_attributes().mapping_quality.mean()),
-        [('AC', 60.0), ('', 65.0)])
-
-def test_read_evidence_variant_matching_gatk_bundle_native_varcode_variant():
-    # Try native varcode Variant.
-    handle = Samfile(data_path("reads/gatk_mini_bundle_extract.bam"))
-    locus = Locus.from_inclusive_coordinates("20", 10008951)
-    variant = VarcodeVariant(
-        locus.contig,
-        locus.position + 1,  # inclusive not interbase
-        "A",
-        "C")
-    evidence = PileupCollection.from_bam(handle, [variant])
-    eq_(evidence.match_summary(variant),
-        [('A', 1), ('C', 4)])
-
-
-def test_read_evidence_variant_matching_gatk_mini_bundle_extract_warning():
-    filename = data_path("reads/gatk_mini_bundle_extract.bam")
-
-    # Should log a warning but pass.
-    loci = [
-        Locus.from_inclusive_coordinates("20", 10009053, 10009054),  # 0
-    ]
-    evidence = PileupCollection.from_bam(filename, loci)
-    eq_(evidence.match_summary(Variant(loci[0], "A", "")),
-        [('A', 0), ('', 0), ('AT', 3)])
-
-
diff --git a/varcode/__init__.py b/varcode/__init__.py
index 582b64a..915b3d4 100644
--- a/varcode/__init__.py
+++ b/varcode/__init__.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .locus import Locus
 from .variant import Variant
 from .collection import Collection
 from .effect_collection import EffectCollection
@@ -52,7 +51,6 @@
 
 __all__ = [
     # basic classes
-    "Locus",
     "Variant",
     "Collection",
     "EffectCollection",
diff --git a/varcode/locus.py b/varcode/locus.py
deleted file mode 100644
index d758f23..0000000
--- a/varcode/locus.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import typechecks
-from collections import namedtuple
-
-class Locus(namedtuple("Locus", "contig start end")):
-    '''
-    A genomic interval in 0-indexed interbase coordinates.
-
-    See this blog post for a discussion on coordinate systems:
-        http://alternateallele.blogspot.com/2012/03/genome-coordinate-conventions.html
-    '''
-
-    @property
-    def positions(self):
-        '''
-        A Python range object giving the bases included in this locus.
-        '''
-        return range(self.start, self.end)
-
-    @property
-    def position(self):
-        '''
-        If this locus spans a single base, this property gives that position.
-        Otherwise, raises a ValueError.
-        '''
-        if self.end != self.start + 1:
-            raise ValueError("Not a single base: %s" % str(self))
-        return self.start
-
-    # Factory functions.
-    @staticmethod
-    def from_inclusive_coordinates(contig, start, end=None):
-        '''
-        Given coordinates in 1-based coordinates that are inclusive on start
-        and end, return a Locus instance. Locus instances are always 0-based
-        "interbase" coordinates.
-        '''
-        typechecks.require_string(contig)
-        typechecks.require_integer(start)
-        if end is None:
-            end = start
-        typechecks.require_integer(end)
-        return Locus(contig, start - 1, end)
-
-    @staticmethod
-    def from_interbase_coordinates(contig, start, end=None):
-        '''
-        Given coordinates in 0-based interbase coordinates, return a Locus
-        instance.
-        '''
-        typechecks.require_string(contig)
-        typechecks.require_integer(start)
-        if end is None:
-            end = start + 1
-        typechecks.require_integer(end)
-        return Locus(contig, start, end)
diff --git a/varcode/read_evidence/__init__.py b/varcode/read_evidence/__init__.py
deleted file mode 100644
index c89c94c..0000000
--- a/varcode/read_evidence/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-'''
-This subpackage provides functionality for collecting and filtering aligned
-sequencing reads from a BAM file, determining the alleles they suggest at
-a locus, and assesing the evidence for particular variants.
-
-In this subpackage, the records stored in the BAM file are referred to as
-"alignments," whereas the term "read" may be more familiar. We use the term
-"alignment" for consistency with the SAM specification, and since an
-individual read from the sequencer may generate any number of alignments in
-the case of chimeric alignments and secondary alignments.
-'''
-
-from .util import alignment_key, read_key
-from .pileup import Pileup
-from .pileup_element import PileupElement
-from .pileup_collection import PileupCollection
-
-__all__ = [
-    "PileupCollection",
-    "Pileup",
-    "PileupElement",
-    "alignment_key",
-    "read_key",
-]
diff --git a/varcode/read_evidence/pileup.py b/varcode/read_evidence/pileup.py
deleted file mode 100644
index 6966e9e..0000000
--- a/varcode/read_evidence/pileup.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-
-from collections import OrderedDict
-
-
-class Pileup(object):
-    '''
-    A Pileup is a collection of PileupElement instances at a particular locus.
-
-    Attributes
-    ----------
-    locus : Varcode.Locus
-        The reference locus. Must be length 1, i.e. a single base.
-
-    elements : OrderedDict of PileupElement instances
-        This is logically and ordered set, which we implement as an OrderedDict
-        with all values mapping to None.
-    '''
-    def __init__(self, locus, elements):
-        '''
-        Construct a new Pileup.
-
-        Parameters
-        ----------
-        locus : Varcode.Locus
-            The reference locus. Must be length 1, i.e. a single base.
-
-        elements : iterable of PileupElement
-            The pileup elements. The locus field of these instances must 
-            match the locus parameter.
-        '''
-        self.locus = locus
-        self.elements = OrderedDict((e, None) for e in elements)
-        assert all(e.locus == self.locus for e in self.elements)
-
-    def __iter__(self):
-        return iter(self.elements)
-
-    def __len__(self):
-        return len(self.elements)
-
-    def append(self, element):
-        '''
-        Append a PileupElement to this Pileup. If an identical PileupElement is
-        already part of this Pileup, do nothing.
-        '''
-        assert element.locus == self.locus, (
-            "Element locus (%s) != Pileup locus (%s)"
-            % (element.locus, self.locus))
-        self.elements[element] = None
-
-    def update(self, other):
-        '''
-        Add all pileup elements from other into self.
-        '''
-        assert self.locus == other.locus
-        self.elements.update(other.elements)
-
-    def filter(self, filters):
-        '''
-        Apply filters to the pileup elements, and return a new Pileup with the
-        filtered elements removed.
-
-        Parameters
-        ----------
-        filters : list of PileupElement -> bool callables
-            A PileupUp element is retained if all filters return True when
-            called on it.
-        '''
-        new_elements = [
-            e for e in self.elements
-            if all(function(e) for function in filters)]
-        return Pileup(self.locus, new_elements)
diff --git a/varcode/read_evidence/pileup_collection.py b/varcode/read_evidence/pileup_collection.py
deleted file mode 100644
index f301faa..0000000
--- a/varcode/read_evidence/pileup_collection.py
+++ /dev/null
@@ -1,624 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-
-from collections import namedtuple, defaultdict, OrderedDict
-import logging
-import itertools
-import re
-
-import pandas
-import pysam
-from pysam.csamfile import Samfile
-import typechecks
-import pyensembl
-
-from .. import Locus
-from . import Pileup, PileupElement, alignment_key, read_key
-
-MatchingEvidence = namedtuple("MatchingEvidence", "ref alt other")
-
-class PileupCollection(object):
-    '''
-    A collection of Pileup instances for some loci.
-
-    Attributes
-    ----------
-    pileups (optional) : dict of Locus -> Pileup
-        A map from 1-base Locus instances to the Pileup at that locus.
-
-    parent (optional) : PileupCollection
-        For PileupCollection instances that are derived (e.g. by filtering)
-        from another instance, this attribute tracks the parent instance.
-    '''
-    def __init__(self, pileups=None, parent=None):
-        '''
-        Construct a new PileupCollection.
-        '''
-        if pileups is None:
-            pileups = {}
-        self.pileups = pileups
-        self.parent = parent
-
-    def pileup(self, locus):
-        '''
-        Given a 1-base locus, return the Pileup at that locus.
-
-        Raises a KeyError if this PileupCollection does not have a Pileup at
-        the specified locus.
-        '''
-        locus = to_locus(locus)
-        if len(locus.positions) != 1:
-            raise ValueError("Not a single-base locus: %s" % locus)
-        return self.pileups[locus]
-
-    def at(self, *loci):
-        '''
-        Return a new PileupCollection instance including only pileups for 
-        the specified loci.
-        '''
-        loci = [to_locus(obj) for obj in loci]
-        single_position_loci = []
-        for locus in loci:
-            for position in locus.positions:
-                single_position_loci.append(
-                    Locus.from_interbase_coordinates(locus.contig, position))
-        pileups = dict(
-            (locus, self.pileups[locus]) for locus in single_position_loci)
-        return PileupCollection(pileups, self)
-
-    def loci(self):
-        '''
-        Returns the loci included in this instance.
-        '''
-        return list(self.pileups)
-
-    def reads(self):
-        '''
-        The reads in this PileupCollection. All reads will have an alignment
-        that overlaps at least one of the included loci.
-
-        Since SAM (and pysam) have no real notion of a "read", the returned
-        instances are actually pysam.AlignedSegment instances, (i.e. 
-        alignments). However, only one alignment will be returned by this
-        method per read.
-        
-        Returns
-        ----------
-        List of pysam.AlignedSegment instances. If a particular read has more
-        than one alignment in this PileupCollection (e.g. one primary and one
-        secondary), then the alignment returned is the one with the highest
-        mapping quality.
-        '''
-        # TODO: Optimize this.
-
-        def alignment_precedence(pysam_alignment_record):
-            return pysam_alignment_record.mapping_quality
-
-        result = {}
-        for pileup in self.pileups.values():
-            for e in pileup.elements:
-                key = read_key(e.alignment)
-                if key not in result or (
-                        alignment_precedence(e.alignment) >
-                        alignment_precedence(result[key])):
-                    result[key] = e.alignment
-        return list(result.values())
-
-    def num_reads(self):
-        '''
-        Returns the number of reads in this PileupCollection.
-        '''
-        return len(self.reads())
-
-    def read_attribute(self, attribute):
-        '''
-        Query a read attribute across all reads in this PileupCollection.
-
-        Parameters
-        ----------
-        attribute : string
-            Attribute to query. See `PileupCollection.read_attributes` for
-            possible attributes.
-
-        Returns
-        ----------
-        pandas.Series instance giving values for the attribute in each read.
-        The order of reads is fixed, so multiple calls to this function will
-        return corresponding values. 
-        '''
-        return self.read_attributes([attribute])[attribute]
-
-    def read_attributes(self, attributes=None):
-        '''
-        Collect read attributes across reads in this PileupCollection into a
-        pandas.DataFrame.
-
-        Valid attributes are the following properties of a pysam.AlignedSegment
-        instance. See:
-            http://pysam.readthedocs.org/en/latest/api.html#pysam.AlignedSegment
-        for the meaning of these attributes.
-
-             * cigarstring
-             * flag
-             * inferred_length
-             * is_duplicate
-             * is_paired
-             * is_proper_pair
-             * is_qcfail
-             * is_read1
-             * is_read2
-             * is_reverse
-             * is_secondary
-             * is_unmapped
-             * mapping_quality
-             * mate_is_reverse
-             * mate_is_unmapped
-             * next_reference_id
-             * next_reference_start
-             * query_alignment_end
-             * query_alignment_length
-             * query_alignment_qualities
-             * query_alignment_sequence
-             * query_alignment_start
-             * query_length
-             * query_name
-             * reference_end
-             * reference_id
-             * reference_length
-             * reference_start
-             * template_length
-
-        (Note: the above list is parsed into the _READ_ATTRIBUTE_NAMES class
-        variable, so be careful when modifying it.)
-
-        Additionally, for alignment "tags" (arbitrary key values associated
-        with an alignment), a column of the form "TAG_{tag name}" is
-        included.
-
-        Finally, the column "pysam_alignment_record" gives the underlying
-        `pysam.AlignedSegment` instances.
-
-        Parameters
-        ----------
-        attributes (optional): list of strings
-            List of columns to include. If unspecified, all columns are
-            included in the result.
-
-        Returns
-        ----------
-        pandas.DataFrame of read attributes.
-
-        '''
-        def include(attribute):
-            return attributes is None or attribute in attributes
-
-        reads = self.reads()
-
-        possible_column_names = list(PileupCollection._READ_ATTRIBUTE_NAMES)
-        result = OrderedDict(
-            (name, [getattr(read, name) for read in reads])
-            for name in PileupCollection._READ_ATTRIBUTE_NAMES
-            if include(name))
-
-        # Add tag columns.
-        if reads:
-            tag_dicts = [dict(x.get_tags()) for x in reads]
-            tag_keys = set.union(
-                *[set(item.keys()) for item in tag_dicts])
-            for tag_key in sorted(tag_keys):
-                column_name = "TAG_%s" % tag_key
-                possible_column_names.append(column_name)
-                if include(column_name):
-                    result[column_name] = [d.get(tag_key) for d in tag_dicts]
-
-        # Lastly, we include the underlying pysam alignment record.
-        possible_column_names.append("pysam_alignment_record")
-        if include("pysam_alignment_record"):
-            result["pysam_alignment_record"] = reads
-
-        # If particular attributes were requested, check that they're here.
-        if attributes is not None:
-            for attribute in attributes:
-                if attribute not in result:
-                    raise ValueError(
-                        "No such attribute: %s. Valid attributes are: %s"
-                        % (attribute, " ".join(possible_column_names)))
-            assert set(attributes) == set(result)
-
-        return pandas.DataFrame(result)
-
-    # Parse the bulleted list of attributes in the previous method's docstring.
-    _READ_ATTRIBUTE_NAMES = []
-    for line in read_attributes.__doc__.split("\n"):
-        match = re.match("\s+\*\s*(\w+)\s*", line)
-        if match:
-            _READ_ATTRIBUTE_NAMES.append(match.groups()[0])
-
-    def group_by_allele(self, locus):
-        '''
-        Split the PileupCollection by the alleles suggested by the reads at the
-        specified locus.
-
-        If a read has an insertion immediately following the locus, then the
-        insertion is included in the allele. For example, if locus is the
-        1-base range [5,6), one allele might be "AGA", indicating that at
-        locus 5 some read has an "A" followed by a 2-base insertion ("GA"). If
-        a read has a deletion at the specified locus, the allele is the empty
-        string.
-
-        The given locus may include any number of bases. If the locus includes
-        multiple bases, then the alleles consist of all bases aligning to that
-        range in any read. Note that only sequences actually sequenced in a
-        particular read are included. For example, if one read has "ATT" at a
-        locus and another read has "GCC", then the alleles are "ATT" and
-        "GCC", but not "GTT". That is, the bases in each allele are phased. For
-        this reason, only reads that overlap the entire locus are included.
-
-        If the locus is an empty interval (e.g. [5,5) ), then the alleles
-        consist only of inserted bases. In this example, only bases inserted
-        immediately after locus 5 would be included (but *not* the base
-        actually at locus 5). In the previous insertion example, the allele
-        would be "GA", indicating a 2-base insertion. Reads that have no
-        insertion at that position (matches or deletions) would have the empty
-        string as their allele.
-
-        Parameters
-        ----------
-        locus : Locus
-            The reference locus, encompassing 0 or more bases.
-
-        Returns
-        ----------
-        A dict of string -> PileupCollection. The keys are nucleotide strings
-        giving the bases sequenced at the locus, and the values are
-        PileupCollection instances of the alignments that support that allele.
-
-        '''
-        locus = to_locus(locus)
-        read_to_allele = None
-        loci = []
-        if locus.positions:
-            # Our locus includes at least one reference base.
-            for position in locus.positions:
-                base_position = Locus.from_interbase_coordinates(
-                    locus.contig, position)
-                loci.append(base_position)
-                new_read_to_allele = {}
-                for element in self.pileups[base_position]:
-                    allele_prefix = ""
-                    key = alignment_key(element.alignment)
-                    if read_to_allele is not None:
-                        try:
-                            allele_prefix = read_to_allele[key]
-                        except KeyError:
-                            continue
-                    allele = allele_prefix + element.bases
-                    new_read_to_allele[key] = allele
-                read_to_allele = new_read_to_allele
-        else:
-            # Our locus is between reference bases.
-            position_before = Locus.from_interbase_coordinates(
-                locus.contig, locus.start)
-            loci.append(position_before)
-            read_to_allele = {}
-            for element in self.pileups[position_before]:
-                allele = element.bases[1:]
-                read_to_allele[alignment_key(element.alignment)] = allele
-
-        split = defaultdict(lambda: PileupCollection(pileups={}, parent=self))
-        for locus in loci:
-            pileup = self.pileups[locus]
-            for e in pileup.elements:
-                key = read_to_allele.get(alignment_key(e.alignment))
-                if key is not None:
-                    if locus in split[key].pileups:
-                        split[key].pileups[locus].append(e)
-                    else:
-                        split[key].pileups[locus] = Pileup(locus, [e])
-
-        # Sort by number of reads (descending). Break ties with the
-        # lexicographic ordering of the allele string.
-        def sorter(pair):
-            (allele, pileup_collection) = pair
-            return (-1 * pileup_collection.num_reads(), allele)
-        return OrderedDict(sorted(split.items(), key=sorter))
-
-    def allele_summary(self, locus, score=lambda x: x.num_reads()):
-        '''
-        Convenience method to summarize the evidence for each of the alleles
-        present at a locus. Applies a score function to the PileupCollection
-        associated with each allele.
-
-        See also `PileupCollection.group_by_allele`.
-
-        Parameters
-        ----------
-        locus : Locus
-            The reference locus, encompassing 0 or more bases.
-
-        score (optional) : PileupCollection -> object
-            Function to apply to summarize the evidence for each allele.
-            Default: count number of reads.
-
-        Returns
-        ----------
-        List of (allele, score) pairs.
-        '''
-        locus = to_locus(locus)
-        return [
-            (allele, score(x))
-            for (allele, x) in self.group_by_allele(locus).items()
-        ]
-
-    def group_by_match(self, variant):
-        '''
-        Given a variant, split the PileupCollection based on whether it the
-        data supports the reference allele, the alternate allele, or neither.
-
-        Parameters
-        ----------
-        variant : Variant
-            The variant. Must have fields 'locus', 'ref', and 'alt'.
-
-        Returns
-        ----------
-        A MatchingEvidence named tuple with fields (ref, alt, other),
-        each of which is a string -> PileupCollection dict mapping alleles
-        to the PileupCollection of evidence supporting them.
-        '''
-        locus = to_locus(variant)
-        if len(variant.ref) != len(locus.positions):
-            logging.warning(
-                "Ref is length %d but locus has %d bases in variant: %s" %
-                (len(variant.ref), len(locus.positions), str(variant)))
-
-        alleles_dict = self.group_by_allele(locus)
-        single_base_loci = [
-            Locus.from_interbase_coordinates(locus.contig, position)
-            for position in locus.positions
-        ]
-        empty_pileups = dict(
-            (locus, Pileup(locus=locus, elements=[]))
-            for locus in single_base_loci)
-        empty_collection = PileupCollection(pileups=empty_pileups, parent=self)
-
-        ref = {variant.ref: alleles_dict.pop(variant.ref, empty_collection)}
-        alt = {variant.alt: alleles_dict.pop(variant.alt, empty_collection)}
-        other = alleles_dict
-
-        # TODO: consider end of read issues for insertions
-        return MatchingEvidence(ref, alt, other)
-
-    def match_summary(self, variant, score=lambda x: x.num_reads()):
-        '''
-        Convenience method to summarize the evidence for and against a variant
-        using a user-specified score function.
-
-        See also `PileupCollection.group_by_match`.
-
-        Parameters
-        ----------
-        variant : Variant
-            The variant. Must have fields 'locus', 'ref', and 'alt'.
-
-        score (optional) : PileupCollection -> object
-            Function to apply to summarize the evidence for each allele.
-            Default: count number of reads.
-
-        Returns
-        ----------
-        List of (allele, score) pairs. This list will always have at least two
-        elements. The first pair in the list is the reference allele. The
-        second pair is the alternate. The subsequent items give the "third"
-        alleles (neither ref nor alt), if any.
-        '''
-        split = self.group_by_match(variant)
-
-        def name(allele_to_pileup_collection):
-            return ",".join(allele_to_pileup_collection)
-
-        def aggregate_and_score(pileup_collections):
-            merged = PileupCollection.merge(*pileup_collections)
-            return score(merged)
-
-        result = [
-            (name(split.ref), aggregate_and_score(split.ref.values())),
-            (name(split.alt), aggregate_and_score(split.alt.values())),
-        ]
-        result.extend(
-            (allele, score(collection))
-            for (allele, collection) in split.other.items())
-        return result
-
-    def filter(self,
-            drop_duplicates=False,
-            drop_improper_mate_pairs=False,
-            min_mapping_quality=None,
-            min_base_quality=None,
-            filters=None):
-        '''
-        Return a new PileupCollection that includes only pileup elements
-        satisfying the specified criteria.
-
-        Parameters
-        ----------
-        drop_duplicates (optional, default False) : boolean
-            Remove alignments with the is_duplicate flag.
-
-        drop_improper_mate_pairs (optional, default False) : boolean
-            Retain only alignments that have mapped mate pairs, where one
-            alignment in the pair is on the forward strand and the other
-            is on the reverse.
-
-        min_mapping_quality (optional) : int
-            If specified, retain only alignments with mapping quality >= the
-            specified threshold.
-
-        min_base_quality (optional) : int
-            If specified, retain only pileup elements where the base quality
-            for the bases aligning to the pileup's locus are >= the specified
-            threshold.
-
-        filters (optional) : list of PileupElement -> bool functions
-            User-specified filter functions to apply. This will be called on 
-            each PileupElement, and should return True if the element should
-            be retained.
-
-        Returns
-        ----------
-        A new PileupCollection that includes the subset of the pileup elements
-        matching all the specified filters.
-        '''
-
-        if filters is None:
-            filters = []
-        if drop_duplicates:
-            filters.append(lambda e: not e.alignment.is_duplicate)
-        if drop_improper_mate_pairs:
-            filters.append(lambda e: e.alignment.is_proper_pair)
-        if min_mapping_quality is not None:
-            filters.append(
-                lambda e: e.alignment.mapping_quality >= min_mapping_quality)
-        if min_base_quality is not None:
-            filters.append(
-                lambda e: e.min_base_quality >= min_base_quality)
-        pileups = OrderedDict(
-            (locus, pileup.filter(filters))
-            for (locus, pileup)
-            in self.pileups.items())
-        return PileupCollection(pileups=pileups, parent=self)
-
-    def merge(self, *others):
-        '''
-        Return a new PileupCollection that is the union of self and the other
-        specified collections.
-        '''
-        new_pileups = {}
-        for collection in (self,) + others:
-            for (locus, pileup) in collection.pileups.items():
-                if locus in new_pileups:
-                    new_pileups[locus].update(pileup)
-                else:
-                    new_pileups[locus] = Pileup(locus, pileup.elements)
-        return PileupCollection(new_pileups, parent=self)
-
-    @staticmethod
-    def from_bam(pysam_samfile, loci):
-        '''
-        Create a PileupCollection for a set of loci from a BAM file.
-
-        Parameters
-        ----------
-        pysam_samfile : `pysam.csamfile.Samfile` instance, or filename string
-            to a BAM file. The BAM file must be indexed.
-
-        loci : list of Locus instances
-            Loci to collect pileups for.
-
-        Returns
-        ----------
-        PileupCollection instance containing pileups for the specified loci.
-        All alignments in the BAM file are included (e.g. duplicate reads,
-        secondary alignments, etc.). See `PileupCollection.filter` if these
-        need to be removed. 
-        '''
-
-        loci = [to_locus(obj) for obj in loci]
-
-        close_on_completion = False
-        if typechecks.is_string(pysam_samfile):
-            pysam_samfile = Samfile(pysam_samfile)
-            close_on_completion = True        
-
-        try:
-            # Map from pyensembl normalized chromosome names used in Variant to
-            # the names used in the BAM file.
-            chromosome_name_map = {}
-            for name in pysam_samfile.references:
-                normalized = pyensembl.locus.normalize_chromosome(name)
-                chromosome_name_map[normalized] = name
-
-            result = PileupCollection({})
-
-            # Optimization: we sort variants so our BAM reads are localized.
-            locus_iterator = itertools.chain.from_iterable(
-                (Locus.from_interbase_coordinates(locus_interval.contig, pos)
-                    for pos
-                    in locus_interval.positions)
-                for locus_interval in sorted(loci))
-            for locus in locus_iterator:
-                result.pileups[locus] = Pileup(locus, [])
-                try:
-                    chromosome = chromosome_name_map[locus.contig]
-                except KeyError:
-                    logging.warn("No such contig in bam: %s" % locus.contig)
-                    continue
-                columns = pysam_samfile.pileup(
-                    chromosome,
-                    locus.position,
-                    locus.position + 1,  # exclusive, 0-indexed
-                    truncate=True,
-                    stepper="nofilter")
-                try:
-                    column = next(columns)
-                except StopIteration:
-                    # No reads align to this locus.
-                    continue
-
-                # Note that storing the pileups here is necessary, since the
-                # subsequent assertion will invalidate our column.
-                pileups = column.pileups
-                assert list(columns) == []  # column is invalid after this.
-                for pileup_read in pileups:
-                    if not pileup_read.is_refskip:
-                        element = PileupElement.from_pysam_alignment(
-                            locus, pileup_read)
-                        result.pileups[locus].append(element)
-            return result
-        finally:
-            if close_on_completion:
-                pysam_samfile.close()
-
-def to_locus(variant_or_locus):
-    """
-    Return a Locus object for a Variant instance.
-
-    This is necessary since the read evidence module expects Variant instances
-    to have a locus attribute st to a varcode.Locus instance of interbase
-    genomic coordinates.  The rest of varcode uses a different Variant class,
-    but will eventually be transitioned to interbase coordinates.
-
-    See test/test_read_evidence.py for a definition of the Variant class that
-    the read_evidence module is meant to work with.
-
-    This function can be passed a regular varcode.Variant instance (with fields
-    start, end, contig, etc.), a different kind of variant object that has a
-    'locus' field, or a varcode.Locus. It will return a varcode.Locus instance.
-
-    This should all get cleaned up once varcode switches to interbase
-    coordinates and we standardize on a Variant class.
-    """
-    if isinstance(variant_or_locus, Locus):
-        return variant_or_locus
-    try:
-        return variant_or_locus.locus
-    except AttributeError:
-        # IMPORTANT: if varcode someday changes from inclusive to interbase
-        # coordinates, this will need to be updated.
-        return Locus.from_inclusive_coordinates(
-            variant_or_locus.contig,
-            variant_or_locus.start,
-            variant_or_locus.end)
diff --git a/varcode/read_evidence/pileup_element.py b/varcode/read_evidence/pileup_element.py
deleted file mode 100644
index 42cc365..0000000
--- a/varcode/read_evidence/pileup_element.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-
-from . import alignment_key
-
-class PileupElement(object):
-    '''
-    A PileupElement represents the segment of an alignment that aligns to a
-    particular base in the reference.
-
-    Attributes
-    ----------
-    locus : Varcode.Locus
-        The reference locus. Must be length 1, i.e. a single base.
-
-    offset_start : int
-        0-based start offset into the alignment sequence, inclusive
-
-    offset_end : int
-        0-based end offset into the alignment sequence, exclusive
-
-    alignment : pysam.AlignedSegment
-        pysam alignment instance
-
-    alignment_key : tuple
-        value computed from the alignment instance that uniquely specifies its
-        properties. Used for comparisons since pysam.AlignedSegment instances
-        do not support a useful notion of equality (they compare using object
-        identity). See `read_evidence.alignment_key` for the implementation of
-        this key.
-    '''
-    def __init__(self, locus, offset_start, offset_end, alignment):
-        '''
-        Construct a PileupElement object.
-        '''
-        assert offset_end >= offset_start, \
-            "offset_start=%d > offset_end=%d" % (offset_start, offset_end)
-        self.locus = locus
-        self.offset_start = offset_start
-        self.offset_end = offset_end
-        self.alignment = alignment
-        self.alignment_key = alignment_key(self.alignment)
-
-    def fields(self):
-        '''
-        Fields that should be considered for our notion of object equality.
-        '''
-        return (
-            self.locus, self.offset_start, self.offset_end, self.alignment_key)
-
-    def __eq__(self, other):
-        return hasattr(other, "fields") and self.fields() == other.fields()
-
-    def __hash__(self):
-        return hash(self.fields())
-
-    @property
-    def bases(self):
-        '''
-        The sequenced bases in the alignment that align to this locus in the
-        genome, as a string.
-
-        Empty string in the case of a deletion. String of length > 1 if there
-        is an insertion here.
-        '''
-        sequence = self.alignment.query_sequence
-        assert self.offset_end <= len(sequence), \
-            "End offset=%d > sequence length=%d. CIGAR=%s. SEQUENCE=%s" % (
-                self.offset_end,
-                len(sequence),
-                self.alignment.cigarstring,
-                sequence)
-        return sequence[self.offset_start:self.offset_end]
-
-    @property
-    def base_qualities(self):
-        '''
-        The phred-scaled base quality scores corresponding to `self.bases`, as
-        a list.
-        '''
-        return self.alignment.query_qualities[
-            self.offset_start:self.offset_end]
-
-    @property
-    def min_base_quality(self):
-        '''
-        The minimum of the base qualities. In the case of a deletion, in which
-        case there are no bases in this PileupElement, the minimum is taken
-        over the sequenced bases immediately before and after the deletion.
-        '''
-        try:
-            return min(self.base_qualities)
-        except ValueError:
-            # We are mid-deletion. We return the minimum of the adjacent bases.
-            assert self.offset_start == self.offset_end
-            adjacent_qualities = [
-                self.alignment.query_qualities[offset]
-                for offset in [self.offset_start - 1, self.offset_start]
-                if 0 <= offset < len(self.alignment.query_qualities)
-            ]
-            return min(adjacent_qualities)
-
-    @staticmethod
-    def from_pysam_alignment(locus, pileup_read):
-        '''
-        Factory function to create a new PileupElement from a pysam
-        `PileupRead`.
-
-        Parameters
-        ----------
-        locus : varcode.Locus
-            Reference locus for which to construct a PileupElement. Must
-            include exactly one base.
-
-        pileup_read : pysam.calignmentfile.PileupRead
-            pysam PileupRead instance. Its alignment must overlap the locus.
-
-        Returns
-        ----------
-        PileupElement
-
-        '''
-        assert not pileup_read.is_refskip, (
-            "Can't create a PileupElement in a refskip (typically an intronic "
-            "gap in an RNA alignment)")
-
-        # Pysam has an `aligned_pairs` method that gives a list of
-        # (offset, locus) pairs indicating the correspondence between bases in
-        # the alignment and reference loci. Here we use that to compute
-        # offset_start and offset_end.
-        #
-        # This is slightly tricky in the case of insertions and deletions.
-        # Here are examples of the desired logic.
-        #
-        # Target locus = 1000
-        #
-        # (1) Simple case: matching bases.
-        #
-        # OFFSET           LOCUS
-        # 0                999
-        # 1                1000
-        # 2                1001
-        #
-        # DESIRED RESULT: offset_start=1, offset_end=2.
-        #
-        #
-        # (2) A 1 base insertion at offset 2.
-        #
-        # OFFSET           LOCUS
-        # 0                999
-        # 1                1000
-        # 2                None
-        # 3                1001
-        #
-        # DESIRED RESULT: offset_start = 1, offset_end=3.
-        #
-        #
-        # (3) A 2 base deletion at loci 1000 and 1001.
-        #
-        # OFFSET           LOCUS
-        # 0                999
-        # None             1000
-        # None             1001
-        # 1                1002
-        #
-        # DESIRED RESULT: offset_start = 1, offset_end=1.
-        #
-        offset_start = None
-        offset_end = len(pileup_read.alignment.query_sequence)
-        # TODO: doing this with get_blocks() may be faster.
-        for (offset, position) in pileup_read.alignment.aligned_pairs:
-            if offset is not None and position is not None:
-                if position == locus.position:
-                    offset_start = offset
-                elif position > locus.position:
-                    offset_end = offset
-                    break
-        if offset_start is None:
-            offset_start = offset_end
-        
-        assert pileup_read.is_del == (offset_end - offset_start == 0), \
-            "Deletion=%s but | [%d,%d) |=%d for locus %d in: \n%s" % (
-                pileup_read.is_del,
-                offset_start,
-                offset_end,
-                offset_end - offset_start,
-                locus.position,
-                pileup_read.alignment.aligned_pairs)
-
-        assert offset_end >= offset_start
-        result = PileupElement(
-            locus, offset_start, offset_end, pileup_read.alignment)
-        return result   
-
diff --git a/varcode/read_evidence/util.py b/varcode/read_evidence/util.py
deleted file mode 100644
index 252d4a8..0000000
--- a/varcode/read_evidence/util.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) 2015. Mount Sinai School of Medicine
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-
-def alignment_key(pysam_alignment_record):
-    '''
-    Return the identifying attributes of a `pysam.AlignedSegment` instance.
-    This is necessary since these objects do not support a useful notion of
-    equality (they compare on identify by default).
-    '''
-    return (
-        read_key(pysam_alignment_record),
-        pysam_alignment_record.query_alignment_start,
-        pysam_alignment_record.query_alignment_end,
-    )
-
-def read_key(pysam_alignment_record):
-    '''
-    Given a `pysam.AlignedSegment` instance, return the attributes identifying
-    the *read* it comes from (not the alignment). There may be more than one
-    alignment for a read, e.g. chimeric and secondary alignments.
-    '''
-    return (
-        pysam_alignment_record.query_name,
-        pysam_alignment_record.is_duplicate,
-        pysam_alignment_record.is_read1,
-        pysam_alignment_record.is_read2,
-    )