Skip to content

Commit

Permalink
ENH SemiBin1 cannot use --abundances
Browse files Browse the repository at this point in the history
  • Loading branch information
psj1997 committed Mar 4, 2024
1 parent 3c4f79a commit b3c7d5b
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 53 deletions.
4 changes: 4 additions & 0 deletions SemiBin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,10 @@ def main2(args=None, is_semibin2=True):
if is_semibin2 and getattr(args, 'training_type', None) == 'semi':
logger.info('Currently using semi-supervised mode. This is generally only useful for backwards compability.')

if not is_semibin2 and getattr(args, 'abundances', None) is not None:
logger.error(f'--abundances cannot be used in SemiBin1.')
sys.exit(1)

if args.cmd == 'citation':
from . import citation
if args.cite_format == 'bibtex':
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/generate_data_multi_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
assert data_split.shape == (40, 146)

# running with abundance file from strobealign-aemb
subprocess.check_call('SemiBin1 generate_sequence_features_multi '
subprocess.check_call('SemiBin2 generate_sequence_features_multi '
'-i test/multi_samples_data/input_multi.fasta '
'-o test-outputs/output_multi_fa -m 2500 '
'--ratio 0.05 --ml-threshold 4000 -p 1 '
Expand Down
53 changes: 1 addition & 52 deletions script/generate_split.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,8 @@
import argparse
from atomicwrites import atomic_write
from SemiBin.fasta import fasta_iter
import os

def fasta_iter(fname, full_header=False):
'''Iterate over a (possibly gzipped) FASTA file
Parameters
----------
fname : str
Filename.
If it ends with .gz, gzip format is assumed
If .bz2 then bzip2 format is assumed
if .xz, then lzma format is assumerd
full_header : boolean (optional)
If True, yields the full header. Otherwise (the default), only the
first word
Yields
------
(h,seq): tuple of (str, str)
'''
header = None
chunks = []
if hasattr(fname, 'readline'):
op = lambda f,_ : f
elif fname.endswith('.gz'):
import gzip
op = gzip.open
elif fname.endswith('.bz2'):
import bz2
op = bz2.open
elif fname.endswith('.xz'):
import lzma
op = lzma.open
else:
op = open
with op(fname, 'rt') as f:
for line in f:
if line[0] == '>':
if header is not None:
yield header,''.join(chunks)
line = line[1:].strip()
if not line:
header = ''
elif full_header:
header = line.strip()
else:
header = line.split()[0]
chunks = []
else:
chunks.append(line.strip())
if header is not None:
yield header, ''.join(chunks)


def generate_file(contig_file, output, min_length, name):
os.makedirs(output, exist_ok=True)

Expand Down

0 comments on commit b3c7d5b

Please sign in to comment.