ENH SemiBin1 cannot use --abundances

BigDataBiology · Mar 4, 2024 · b3c7d5b · b3c7d5b
1 parent 3c4f79a
commit b3c7d5b
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 53 deletions.
diff --git a/SemiBin/main.py b/SemiBin/main.py
@@ -1402,6 +1402,10 @@ def main2(args=None, is_semibin2=True):
     if is_semibin2 and getattr(args, 'training_type', None) == 'semi':
         logger.info('Currently using semi-supervised mode. This is generally only useful for backwards compability.')
 
+    if not is_semibin2 and getattr(args, 'abundances', None) is not None:
+        logger.error(f'--abundances cannot be used in SemiBin1.')
+        sys.exit(1)
+
     if args.cmd == 'citation':
         from . import citation
         if args.cite_format == 'bibtex':

diff --git a/integration-tests/generate_data_multi_command.py b/integration-tests/generate_data_multi_command.py
@@ -13,7 +13,7 @@
     assert data_split.shape == (40, 146)
 
 # running with abundance file from strobealign-aemb
-subprocess.check_call('SemiBin1 generate_sequence_features_multi '
+subprocess.check_call('SemiBin2 generate_sequence_features_multi '
                       '-i test/multi_samples_data/input_multi.fasta '
                       '-o test-outputs/output_multi_fa -m 2500 '
                       '--ratio 0.05 --ml-threshold 4000 -p 1 '

diff --git a/script/generate_split.py b/script/generate_split.py
@@ -1,59 +1,8 @@
 import argparse
 from atomicwrites import atomic_write
+from SemiBin.fasta import fasta_iter
 import os
 
-def fasta_iter(fname, full_header=False):
-    '''Iterate over a (possibly gzipped) FASTA file
-
-    Parameters
-    ----------
-    fname : str
-        Filename.
-            If it ends with .gz, gzip format is assumed
-            If .bz2 then bzip2 format is assumed
-            if .xz, then lzma format is assumerd
-    full_header : boolean (optional)
-        If True, yields the full header. Otherwise (the default), only the
-        first word
-
-    Yields
-    ------
-    (h,seq): tuple of (str, str)
-    '''
-    header = None
-    chunks = []
-    if hasattr(fname, 'readline'):
-        op = lambda f,_ : f
-    elif fname.endswith('.gz'):
-        import gzip
-        op = gzip.open
-    elif fname.endswith('.bz2'):
-        import bz2
-        op = bz2.open
-    elif fname.endswith('.xz'):
-        import lzma
-        op = lzma.open
-    else:
-        op = open
-    with op(fname, 'rt') as f:
-        for line in f:
-            if line[0] == '>':
-                if header is not None:
-                    yield header,''.join(chunks)
-                line = line[1:].strip()
-                if not line:
-                    header = ''
-                elif full_header:
-                    header = line.strip()
-                else:
-                    header = line.split()[0]
-                chunks = []
-            else:
-                chunks.append(line.strip())
-        if header is not None:
-            yield header, ''.join(chunks)
-
-
 def generate_file(contig_file, output, min_length, name):
     os.makedirs(output, exist_ok=True)