Skip to content

Commit

Permalink
By default, filter out CDR3s shorter than 6 amino acids since they ca…
Browse files Browse the repository at this point in the history
…use problems with the distance calculation. Todo- find better solution
  • Loading branch information
phbradley committed Sep 10, 2018
1 parent f9304fa commit 5ea156b
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions compute_probs.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
p.flag('add_masked_seqs') # --flag_arg (no argument passed)
p.flag('filter') # --flag_arg (no argument passed)
p.int('max_cdr3_length_for_filtering').default(30) # --flag_arg (no argument passed)
p.int('min_cdr3_length_for_filtering').default(6) # otherwise compute_distances.py will fail...
p.flag('no_probabilities').described_as('Assign a probability of 1 to all TCRs.')
#p.flag('find_exact_matches') # --flag_arg (no argument passed)
#p.range('range_arg') # --range_arg 1:2
Expand Down Expand Up @@ -92,8 +93,10 @@
if filter:
if 'UNK' in va_gene+ja_gene or 'TRa' in va_gene+ja_gene: continue
if 'UNK' in va_gene+ja_gene or 'TRa' in va_gene+ja_gene: continue
if len(cdr3a_protseq)>max_cdr3_length_for_filtering:continue
if len(cdr3b_protseq)>max_cdr3_length_for_filtering:continue
if ( len(cdr3a_protseq) > max_cdr3_length_for_filtering or
len(cdr3b_protseq) > max_cdr3_length_for_filtering or
len(cdr3a_protseq) < min_cdr3_length_for_filtering or
len(cdr3b_protseq) < min_cdr3_length_for_filtering ): continue

## check for stop codons
skip_me = False
Expand Down

0 comments on commit 5ea156b

Please sign in to comment.