Skip to content

Commit

Permalink
minor i/o tweaks on branch
Browse files Browse the repository at this point in the history
  • Loading branch information
phbradley committed Oct 6, 2019
1 parent 4f83df3 commit 47391cb
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 6 deletions.
4 changes: 3 additions & 1 deletion compute_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@
p.str('organism')
p.flag('intrasubject_nbrdists')
p.multiword('clones_files').cast(lambda x:x.split())
p.multiword('dist_chains').cast(lambda x:x.split()).default('A B AB').described_as('The chains over which the distance calcn will be performed; default is all three ("A B AB")')
p.multiword('epitope_prefixes').cast(lambda x:x.split())
p.multiword('nbrdist_percentiles').cast(lambda x: [int(val) for val in x.split()]).default("5 10 25")#"5 10 25 -1 -5 -10")
p.str('distance_params')


#internal legacy hack
new_nbrdists = not intrasubject_nbrdists

Expand Down Expand Up @@ -104,7 +106,7 @@



for chains in ['A','B','AB']:
for chains in dist_chains:
epitopes = list( set( [x['epitope'] for x in all_info ] ) )
epitopes.sort()

Expand Down
4 changes: 4 additions & 0 deletions file_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,10 @@ def map_field_names( l, input_format, output_format ):
continue
outl[ field ] = l[ field ]

if field == 'cdr3a_nucseq' or field == 'cdr3b_nucseq':
outl[ field ] = outl[ field ].lower()


# check the genes
if check_genes:
bad_genes = False
Expand Down
5 changes: 5 additions & 0 deletions make_10x_clones_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ def make_clones_file( organism, outfile, clonotype2tcrs, clonotype2barcodes ):
'''

tmpfile = outfile+'.tmp' # a temporary intermediate file
bc_mapfile = outfile+'.barcode_mapping.tsv'
outmap = open(bc_mapfile,'w')
outmap.write('clone_id\tbarcodes\n')

out = open(tmpfile,'w')
outfields = 'clone_id subject clone_size va_gene ja_gene vb_gene jb_gene cdr3a cdr3a_nucseq cdr3b cdr3b_nucseq'\
Expand Down Expand Up @@ -169,7 +172,9 @@ def make_clones_file( organism, outfile, clonotype2tcrs, clonotype2barcodes ):
outl['beta_umi'] = str(btcr_umi)
outl['num_betas'] = str(len(btcrs))
out.write( make_tsv_line(outl,outfields)+'\n' )
outmap.write('{}\t{}\n'.format(clonotype,','.join(clonotype2barcodes[clonotype])))
out.close()
outmap.close()


cmd = 'python {}/file_converter.py --input_format clones --output_format clones --input_file {} --output_file {} --organism {} --clobber --epitope UNK_E --check_genes --extra_fields {} '\
Expand Down
12 changes: 9 additions & 3 deletions make_kpca_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
p.flag('paper_figs')
p.flag('vertical') ## each epitope is a vertical stack of scatter plots
p.flag('showmotifs')
p.flag('use_tsne')
p.multiword('epitopes').cast(lambda x: x.split())

if pngfile_prefix is None:
Expand Down Expand Up @@ -92,9 +93,14 @@
D = np.minimum( D, np.full( D.shape, Dmax ) )
print 'true_Dmax:',old_Dmax,'using_Dmax:',Dmax,epitope

pca = KernelPCA(kernel='precomputed')
gram = 1 - ( D / Dmax )
xy = pca.fit_transform(gram)
if use_tsne:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, metric='precomputed')
xy = tsne.fit_transform(D)
else:
pca = KernelPCA(kernel='precomputed')
gram = 1 - ( D / Dmax )
xy = pca.fit_transform(gram)

xs = xy[:,0]
ys = xy[:,1]
Expand Down
12 changes: 10 additions & 2 deletions read_motifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ def get_amino_acid_consensus( seqs ):
## new approach
all_tcr_infos = parse_tsv.parse_tsv_file( clones_file, ['epitope'], [], True )

fake_chains = util.detect_fake_chains( clones_file )

if epitopes is None:
epitopes = all_tcr_infos.keys()[:]
epitopes.sort()
Expand Down Expand Up @@ -192,8 +194,8 @@ def get_amino_acid_consensus( seqs ):
jb = l['jb_rep']
cdr3a = l['cdr3a']
cdr3b = l['cdr3b']
cdr3a_nucseq = l['cdr3a_nucseq']
cdr3b_nucseq = l['cdr3b_nucseq']
cdr3a_nucseq = l['cdr3a_nucseq'].lower()
cdr3b_nucseq = l['cdr3b_nucseq'].lower()

## note-- we are using mm1 reps here, same as in find_cdr3_motifs.py
##
Expand Down Expand Up @@ -244,6 +246,8 @@ def get_amino_acid_consensus( seqs ):
## index these by the v_rep and the j_rep

for ab in 'AB':
if ab in fake_chains:
continue
ng_logfile = '{}/new_nextgen_chains_{}_{}.tsv'.format( path_to_current_db_files(), organism, ab )
if not exists(ng_logfile):
Log('WARNING:: read_motifs.py: missing nextgen TCR chains file: {}'.format(ng_logfile))
Expand Down Expand Up @@ -313,7 +317,10 @@ def get_amino_acid_consensus( seqs ):
all_distances = {}
all_neighbors = {}
for ab in 'AB':
if ab in fake_chains:
continue
distfile = '{}_{}_{}.dist'.format(clones_file[:-4],ab,epitope)
print 'reading:',distfile
assert exists(distfile)
N=0
all_nbrs = []
Expand Down Expand Up @@ -557,6 +564,7 @@ def analyze_matches_using_ngseqs( matches, matched_tcrs, ab ):
if target_num != None and target_num != num: continue

if ABs != None and ab not in ABs: continue
if ab in fake_chains: continue

expected_fraction = max(expect_random,expect_nextgen)/num_tcrs

Expand Down

0 comments on commit 47391cb

Please sign in to comment.