Skip to content

Commit

Permalink
added some filter settings to CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
iskandr committed Feb 20, 2016
1 parent a090ee9 commit 0294814
Showing 1 changed file with 29 additions and 11 deletions.
40 changes: 29 additions & 11 deletions script/isovar-translate-variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import varcode
import skbio
import numpy as np
from pysam import AlignmentFile

from isovar import gather_variant_reads, sequence_counts
Expand All @@ -39,13 +40,19 @@
default=None)

parser.add_argument(
"--min-count", type=int, default=3)
"--min-read-count",
type=int,
default=3)

parser.add_argument(
"--context-size",
default=45,
"--sequence-length",
default=105,
type=int)

parser.add_argument(
"--max-sequences-per-variant",
type=int,
default=5)

if __name__ == "__main__":
args = parser.parse_args()
Expand All @@ -54,31 +61,42 @@
samfile = AlignmentFile(args.bam)

for variant in variants:
print(variant)
variant_reads = gather_variant_reads(
samfile=samfile,
chromosome="chr" + variant.contig,
base1_location=variant.start,
ref=variant.ref,
alt=variant.alt)
if len(variant_reads) < args.min_count:
if len(variant_reads) < args.min_read_count:
continue

# the number of context nucleotides on either side of the variant
# is half the desired length (minus the number of variant nucleotides)
context_size = int(
np.ceil((args.sequence_length - len(variant.alt)) / 2.0))
sequence_count_info = sequence_counts(
variant_reads, context_size=args.context_size)
for ((prefix, suffix), count) in sorted(
variant_reads,
context_size=context_size)
for i, ((prefix, suffix), count) in enumerate(sorted(
sequence_count_info.full_read_counts.items(),
key=lambda x: -x[1]):
if count < args.min_count:
key=lambda x: -x[1])):
if i >= args.max_sequences_per_variant:
break

variant = sequence_count_info.variant_nucleotides
if count < args.min_read_count:
break

variant_seq = sequence_count_info.variant_nucleotides

print("\t%s_%s_%s: %d" % (
prefix,
variant,
variant_seq,
suffix,
count))

# translate in three reading frames:
seq = "%s%s%s" % (prefix, variant, suffix)
seq = "%s%s%s" % (prefix, variant_seq, suffix)
for offset in range(3):
dna = skbio.DNA(seq[offset:])
print("\t\tframe=%d: %s" % (offset, dna.translate()))

0 comments on commit 0294814

Please sign in to comment.