From 4ff55ec73500c6f3916a7c737af33216d09f2724 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Wed, 15 May 2024 17:27:07 -0600 Subject: [PATCH] add binary sufixes to job tools --- src/spikeinterface/core/core_tools.py | 46 +++++++++++++++---- src/spikeinterface/core/job_tools.py | 24 ++++------ .../core/tests/test_core_tools.py | 37 +++++++++++++-- 3 files changed, 76 insertions(+), 31 deletions(-) diff --git a/src/spikeinterface/core/core_tools.py b/src/spikeinterface/core/core_tools.py index ad16e00f98..74c281683f 100644 --- a/src/spikeinterface/core/core_tools.py +++ b/src/spikeinterface/core/core_tools.py @@ -10,16 +10,6 @@ from math import prod import numpy as np -from tqdm import tqdm - -from .job_tools import ( - ensure_chunk_size, - ensure_n_jobs, - divide_segment_into_chunks, - fix_job_kwargs, - ChunkRecordingExecutor, - _shared_job_kwargs_doc, -) def define_function_from_class(source_class, name): @@ -447,6 +437,42 @@ def convert_bytes_to_str(byte_value: int) -> str: return f"{byte_value:.2f} {suffixes[i]}" +_exponents = { + "k": 1e3, + "M": 1e6, + "G": 1e9, + "T": 1e12, + "P": 1e15, # Decimal (SI) prefixes + "Ki": 1024**1, + "Mi": 1024**2, + "Gi": 1024**3, + "Ti": 1024**4, + "Pi": 1024**5, # Binary (IEC) prefixes +} + + +def convert_string_to_bytes(memory_string: str) -> int: + """ + Convert a memory size string to the corresponding number of bytes. + + Parameters: + mem (str): Memory size string (e.g., "1G", "512Mi", "2T"). + + Returns: + int: Number of bytes. + """ + if memory_string[-2:] in _exponents: + suffix = memory_string[-2:] + mem_value = memory_string[:-2] + else: + suffix = memory_string[-1] + mem_value = memory_string[:-1] + + assert suffix in _exponents, f"Unknown suffix: {suffix}" + bytes_value = int(float(mem_value) * _exponents[suffix]) + return bytes_value + + def is_editable_mode() -> bool: """ Check if spikeinterface is installed in editable mode diff --git a/src/spikeinterface/core/job_tools.py b/src/spikeinterface/core/job_tools.py index 779414b337..6161d5f064 100644 --- a/src/spikeinterface/core/job_tools.py +++ b/src/spikeinterface/core/job_tools.py @@ -8,9 +8,9 @@ import platform import os import warnings +from spikeinterface.core.core_tools import convert_string_to_bytes import sys -import contextlib from tqdm.auto import tqdm from concurrent.futures import ProcessPoolExecutor @@ -23,7 +23,7 @@ - chunk_size: int Number of samples per chunk - chunk_memory: str - Memory usage for each job (e.g. "100M", "1G") + Memory usage for each job (e.g. "100M", "1G", "500MiB", "2GiB") - total_memory: str Total memory usage (e.g. "500M", "2G") - chunk_duration : str or float or None @@ -149,16 +149,6 @@ def divide_recording_into_chunks(recording, chunk_size): return all_chunks -_exponents = {"k": 1e3, "M": 1e6, "G": 1e9} - - -def _mem_to_int(mem): - suffix = mem[-1] - assert suffix in _exponents - mem = int(float(mem[:-1]) * _exponents[suffix]) - return mem - - def ensure_n_jobs(recording, n_jobs=1): if n_jobs == -1: n_jobs = os.cpu_count() @@ -206,9 +196,11 @@ def ensure_chunk_size( chunk_size: int or None size for one chunk per job chunk_memory: str or None - must end with "k", "M" or "G" + must end with "k", "M", "G", etc for decimal units and "ki", "Mi", "Gi", etc for + binary units. (e.g. "1k", "500M", "2G", "1ki", "500Mi", "2Gi") total_memory: str or None - must end with "k", "M" or "G" + must end with "k", "M", "G", etc for decimal units and "ki", "Mi", "Gi", etc for + binary units. (e.g. "1k", "500M", "2G", "1ki", "500Mi", "2Gi") chunk_duration: None or float or str Units are second if float. If str then the str must contain units(e.g. "1s", "500ms") @@ -219,14 +211,14 @@ def ensure_chunk_size( elif chunk_memory is not None: assert total_memory is None # set by memory per worker size - chunk_memory = _mem_to_int(chunk_memory) + chunk_memory = convert_string_to_bytes(chunk_memory) n_bytes = np.dtype(recording.get_dtype()).itemsize num_channels = recording.get_num_channels() chunk_size = int(chunk_memory / (num_channels * n_bytes)) elif total_memory is not None: # clip by total memory size n_jobs = ensure_n_jobs(recording, n_jobs=n_jobs) - total_memory = _mem_to_int(total_memory) + total_memory = convert_string_to_bytes(total_memory) n_bytes = np.dtype(recording.get_dtype()).itemsize num_channels = recording.get_num_channels() chunk_size = int(total_memory / (num_channels * n_bytes * n_jobs)) diff --git a/src/spikeinterface/core/tests/test_core_tools.py b/src/spikeinterface/core/tests/test_core_tools.py index 5214ed3242..c37f356900 100644 --- a/src/spikeinterface/core/tests/test_core_tools.py +++ b/src/spikeinterface/core/tests/test_core_tools.py @@ -1,8 +1,6 @@ import platform import math -from multiprocessing.shared_memory import SharedMemory from pathlib import Path -import importlib import pytest import numpy as np @@ -12,10 +10,8 @@ make_paths_absolute, check_paths_relative, normal_pdf, + convert_string_to_bytes, ) -from spikeinterface.core.binaryrecordingextractor import BinaryRecordingExtractor -from spikeinterface.core.generate import NoiseGeneratorRecording -from spikeinterface.core.numpyextractors import NumpySorting if hasattr(pytest, "global_test_folder"): @@ -88,6 +84,37 @@ def test_path_utils_functions(): # UNC can be relative to the same UNC assert check_paths_relative(d, r"\\host\share") + def test_convert_string_to_bytes(): + # Test SI prefixes + assert convert_string_to_bytes("1k") == 1000 + assert convert_string_to_bytes("1M") == 1000000 + assert convert_string_to_bytes("1G") == 1000000000 + assert convert_string_to_bytes("1T") == 1000000000000 + assert convert_string_to_bytes("1P") == 1000000000000000 + # Test IEC prefixes + assert convert_string_to_bytes("1Ki") == 1024 + assert convert_string_to_bytes("1Mi") == 1048576 + assert convert_string_to_bytes("1Gi") == 1073741824 + assert convert_string_to_bytes("1Ti") == 1099511627776 + assert convert_string_to_bytes("1Pi") == 1125899906842624 + # Test mixed values + assert convert_string_to_bytes("1.5k") == 1500 + assert convert_string_to_bytes("2.5M") == 2500000 + assert convert_string_to_bytes("0.5G") == 500000000 + assert convert_string_to_bytes("1.2T") == 1200000000000 + assert convert_string_to_bytes("1.5Pi") == 1688849860263936 + # Test zero values + assert convert_string_to_bytes("0k") == 0 + assert convert_string_to_bytes("0Ki") == 0 + # Test invalid inputs (should raise assertion error) + with pytest.raises(AssertionError) as e: + convert_string_to_bytes("1Z") + assert str(e.value) == "Unknown suffix: Z" + + with pytest.raises(AssertionError) as e: + convert_string_to_bytes("1Xi") + assert str(e.value) == "Unknown suffix: Xi" + def test_normal_pdf() -> None: mu = 4.160771