SpikeInterface · samuelgarcia · May 21, 2024 · May 15, 2024 · alejoe91 · May 20, 2024
diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py
@@ -10,16 +10,6 @@
 from math import prod
 
 import numpy as np
-from tqdm import tqdm
-
-from .job_tools import (
-    ensure_chunk_size,
-    ensure_n_jobs,
-    divide_segment_into_chunks,
-    fix_job_kwargs,
-    ChunkRecordingExecutor,
-    _shared_job_kwargs_doc,
-)
 
 
 def define_function_from_class(source_class, name):
@@ -447,6 +437,42 @@ def convert_bytes_to_str(byte_value: int) -> str:
     return f"{byte_value:.2f} {suffixes[i]}"
 
 
+_exponents = {
+    "k": 1e3,
+    "M": 1e6,
+    "G": 1e9,
+    "T": 1e12,
+    "P": 1e15,  # Decimal (SI) prefixes
+    "Ki": 1024**1,
+    "Mi": 1024**2,
+    "Gi": 1024**3,
+    "Ti": 1024**4,
+    "Pi": 1024**5,  # Binary (IEC) prefixes
+}
+
+
+def convert_string_to_bytes(memory_string: str) -> int:
+    """
+    Convert a memory size string to the corresponding number of bytes.
+
+    Parameters:
+    mem (str): Memory size string (e.g., "1G", "512Mi", "2T").
+
+    Returns:
+    int: Number of bytes.
+    """
+    if memory_string[-2:] in _exponents:
+        suffix = memory_string[-2:]
+        mem_value = memory_string[:-2]
+    else:
+        suffix = memory_string[-1]
+        mem_value = memory_string[:-1]
+
+    assert suffix in _exponents, f"Unknown suffix: {suffix}"
+    bytes_value = int(float(mem_value) * _exponents[suffix])
+    return bytes_value
+
+
 def is_editable_mode() -> bool:
     """
     Check if spikeinterface is installed in editable mode

diff --git a/src/spikeinterface/core/job_tools.py b/src/spikeinterface/core/job_tools.py
@@ -8,9 +8,9 @@
 import platform
 import os
 import warnings
+from spikeinterface.core.core_tools import convert_string_to_bytes
 
 import sys
-import contextlib
 from tqdm.auto import tqdm
 
 from concurrent.futures import ProcessPoolExecutor
@@ -23,7 +23,7 @@
                 - chunk_size: int
                     Number of samples per chunk
                 - chunk_memory: str
-                    Memory usage for each job (e.g. "100M", "1G")
+                    Memory usage for each job (e.g. "100M", "1G", "500MiB", "2GiB")
                 - total_memory: str
                     Total memory usage (e.g. "500M", "2G")
                 - chunk_duration : str or float or None
@@ -149,16 +149,6 @@ def divide_recording_into_chunks(recording, chunk_size):
     return all_chunks
 
 
-_exponents = {"k": 1e3, "M": 1e6, "G": 1e9}
-
-
-def _mem_to_int(mem):
-    suffix = mem[-1]
-    assert suffix in _exponents
-    mem = int(float(mem[:-1]) * _exponents[suffix])
-    return mem
-
-
 def ensure_n_jobs(recording, n_jobs=1):
     if n_jobs == -1:
         n_jobs = os.cpu_count()
@@ -206,9 +196,11 @@ def ensure_chunk_size(
     chunk_size: int or None
         size for one chunk per job
     chunk_memory: str or None
-        must end with "k", "M" or "G"
+        must end with "k", "M", "G", etc for decimal units and "ki", "Mi", "Gi", etc for
+        binary units. (e.g. "1k", "500M", "2G", "1ki", "500Mi", "2Gi")
     total_memory: str or None
-        must end with "k", "M" or "G"
+        must end with "k", "M", "G", etc for decimal units and "ki", "Mi", "Gi", etc for
+        binary units. (e.g. "1k", "500M", "2G", "1ki", "500Mi", "2Gi")
     chunk_duration: None or float or str
         Units are second if float.
         If str then the str must contain units(e.g. "1s", "500ms")
@@ -219,14 +211,14 @@ def ensure_chunk_size(
     elif chunk_memory is not None:
         assert total_memory is None
         # set by memory per worker size
-        chunk_memory = _mem_to_int(chunk_memory)
+        chunk_memory = convert_string_to_bytes(chunk_memory)
         n_bytes = np.dtype(recording.get_dtype()).itemsize
         num_channels = recording.get_num_channels()
         chunk_size = int(chunk_memory / (num_channels * n_bytes))
     elif total_memory is not None:
         # clip by total memory size
         n_jobs = ensure_n_jobs(recording, n_jobs=n_jobs)
-        total_memory = _mem_to_int(total_memory)
+        total_memory = convert_string_to_bytes(total_memory)
         n_bytes = np.dtype(recording.get_dtype()).itemsize
         num_channels = recording.get_num_channels()
         chunk_size = int(total_memory / (num_channels * n_bytes * n_jobs))

diff --git a/src/spikeinterface/core/tests/test_core_tools.py b/src/spikeinterface/core/tests/test_core_tools.py
@@ -1,8 +1,6 @@
 import platform
 import math
-from multiprocessing.shared_memory import SharedMemory
 from pathlib import Path
-import importlib
 import pytest
 import numpy as np
 
@@ -12,10 +10,8 @@
     make_paths_absolute,
     check_paths_relative,
     normal_pdf,
+    convert_string_to_bytes,
 )
-from spikeinterface.core.binaryrecordingextractor import BinaryRecordingExtractor
-from spikeinterface.core.generate import NoiseGeneratorRecording
-from spikeinterface.core.numpyextractors import NumpySorting
 
 
 if hasattr(pytest, "global_test_folder"):
@@ -88,6 +84,37 @@ def test_path_utils_functions():
         # UNC can be relative to the same UNC
         assert check_paths_relative(d, r"\\host\share")
 
+    def test_convert_string_to_bytes():
+        # Test SI prefixes
+        assert convert_string_to_bytes("1k") == 1000
+        assert convert_string_to_bytes("1M") == 1000000
+        assert convert_string_to_bytes("1G") == 1000000000
+        assert convert_string_to_bytes("1T") == 1000000000000
+        assert convert_string_to_bytes("1P") == 1000000000000000
+        # Test IEC prefixes
+        assert convert_string_to_bytes("1Ki") == 1024
+        assert convert_string_to_bytes("1Mi") == 1048576
+        assert convert_string_to_bytes("1Gi") == 1073741824
+        assert convert_string_to_bytes("1Ti") == 1099511627776
+        assert convert_string_to_bytes("1Pi") == 1125899906842624
+        # Test mixed values
+        assert convert_string_to_bytes("1.5k") == 1500
+        assert convert_string_to_bytes("2.5M") == 2500000
+        assert convert_string_to_bytes("0.5G") == 500000000
+        assert convert_string_to_bytes("1.2T") == 1200000000000
+        assert convert_string_to_bytes("1.5Pi") == 1688849860263936
+        # Test zero values
+        assert convert_string_to_bytes("0k") == 0
+        assert convert_string_to_bytes("0Ki") == 0
+        # Test invalid inputs (should raise assertion error)
+        with pytest.raises(AssertionError) as e:
+            convert_string_to_bytes("1Z")
+            assert str(e.value) == "Unknown suffix: Z"
+
+        with pytest.raises(AssertionError) as e:
+            convert_string_to_bytes("1Xi")
+            assert str(e.value) == "Unknown suffix: Xi"
+
 
 def test_normal_pdf() -> None:
     mu = 4.160771