Skip to content

Commit

Permalink
Merge pull request #88 from IBM/rfd-dev
Browse files Browse the repository at this point in the history
🪲 Bug-fix: Include the foldseek tmp in hestia_tmp
  • Loading branch information
RaulFD-creator authored Jan 20, 2025
2 parents fc35089 + d6d085b commit 68f07dd
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions hestia/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,9 +535,10 @@ def protein_structure_similarity(
if df_target is None:
df_target = df_query
tmp_dir = f'hestia_tmp_{time.time()}'

if os.path.isdir(tmp_dir):
shutil.rmtree(tmp_dir)
os.mkdir(tmp_dir)
os.makedirs(tmp_dir)
if verbose > 2:
mmseqs_v = 3
else:
Expand Down Expand Up @@ -575,7 +576,7 @@ def protein_structure_similarity(
prefilter = '0' if prefilter else '2'

subprocess.run([foldseek, 'search', db_query, db_target, alignment_db,
'tmp', '-s', '9.5', '-a', '-e', 'inf',
f'{tmp_dir}/tmp', '-s', '9.5', '-a', '-e', 'inf',
'--seq-id-mode', denominator, '--threads',
str(threads), '--alignment-type', representation,
'--prefilter-mode', prefilter, '-v', str(mmseqs_v)
Expand All @@ -602,7 +603,8 @@ def protein_structure_similarity(
else:
df = df.rename({'prob': 'metric'})
df = df.with_columns(pl.col("metric").map_elements(
lambda x: qry2idx[x.split('.pdb')[0].split('_')[0]]
lambda x: qry2idx[x.split('.pdb')[0].split('_')[0]],
return_dtype=pl.Int64
))
df = df.with_columns(pl.col('query').map_elements(
lambda x: qry2idx[x.split('.pdb')[0].split('_')[0]],
Expand All @@ -618,7 +620,7 @@ def protein_structure_similarity(
if filename is None:
filename = time.time()
df.write_csv(f'{filename}.csv.gz', compression='gzip')

shutil.rmtree(tmp_dir)
return df


Expand Down

0 comments on commit 68f07dd

Please sign in to comment.