diff --git a/.codecov.yml b/.codecov.yml index 70f8cec9..0e323f2f 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -10,6 +10,8 @@ flags: numba: paths: - cellfinder/core/detect/filters/plane/tile_walker.py + - cellfinder/core/detect/filters/plane/classical_filter.py + - cellfinder/core/detect/filters/plane/plane_filter.py - cellfinder/core/detect/filters/volume/ball_filter.py - cellfinder/core/detect/filters/volume/structure_detection.py carryforward: true diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index 4bb002de..1cdc1ad8 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -38,11 +38,14 @@ jobs: test: needs: [linting, manifest] name: Run package tests - timeout-minutes: 60 + timeout-minutes: 120 runs-on: ${{ matrix.os }} env: KERAS_BACKEND: torch CELLFINDER_TEST_DEVICE: cpu + # pooch cache dir + BRAINGLOBE_TEST_DATA_DIR: "~/.pooch_cache" + strategy: matrix: # Run all supported Python versions on linux @@ -56,6 +59,14 @@ jobs: python-version: "3.12" steps: + - uses: actions/checkout@v4 + - name: Cache pooch data + uses: actions/cache@v4 + with: + path: "~/.pooch_cache" + # hash on conftest in case url changes + key: ${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/pooch_registry.txt') }} + # Cache the tensorflow model so we don't have to remake it every time - name: Cache brainglobe directory uses: actions/cache@v3 with: @@ -78,12 +89,16 @@ jobs: test_numba_disabled: needs: [linting, manifest] name: Run tests with numba disabled - timeout-minutes: 60 + timeout-minutes: 120 runs-on: ubuntu-latest env: - NUMBA_DISABLE_JIT: "1" + NUMBA_DISABLE_JIT: "1" + PYTORCH_JIT: "0" + # pooch cache dir + BRAINGLOBE_TEST_DATA_DIR: "~/.pooch_cache" steps: + - uses: actions/checkout@v4 - name: Cache brainglobe directory uses: actions/cache@v3 with: @@ -91,6 +106,13 @@ jobs: ~/.brainglobe !~/.brainglobe/atlas.tar.gz key: brainglobe + + - name: Cache pooch data + uses: actions/cache@v4 + with: + path: "~/.pooch_cache" + key: ${{ runner.os }}-3.10-${{ hashFiles('**/pooch_registry.txt') }} + # Setup pyqt libraries - name: Setup qtpy libraries uses: tlambert03/setup-qt-libs@v1 @@ -108,7 +130,7 @@ jobs: test_brainmapper_cli: needs: [linting, manifest] name: Run brainmapper tests to check for breakages - timeout-minutes: 60 + timeout-minutes: 120 runs-on: ubuntu-latest env: KERAS_BACKEND: torch diff --git a/benchmarks/benchmark_tools.py b/benchmarks/benchmark_tools.py new file mode 100644 index 00000000..16c5d160 --- /dev/null +++ b/benchmarks/benchmark_tools.py @@ -0,0 +1,86 @@ +from pathlib import Path + +import pooch +import torch +from torch.profiler import ProfilerActivity, profile +from torch.utils.benchmark import Compare, Timer + +from cellfinder.core.tools.IO import fetch_pooch_directory + + +def get_test_data_path(path): + """ + Create a test data registry for BrainGlobe. + + Returns: + pooch.Pooch: The test data registry object. + + """ + registry = pooch.create( + path=pooch.os_cache("brainglobe_test_data"), + base_url="https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/", + env="BRAINGLOBE_TEST_DATA_DIR", + ) + + registry.load_registry( + Path(__file__).parent.parent / "tests" / "data" / "pooch_registry.txt" + ) + + return fetch_pooch_directory(registry, path) + + +def time_filters(repeat, run, run_args, label): + timer = Timer( + stmt="run(*args)", + globals={"run": run, "args": run_args}, + label=label, + num_threads=4, + description="", # must be not None due to pytorch bug + ) + return timer.timeit(number=repeat) + + +def compare_results(*results): + # prints the results of all the timed tests + compare = Compare(results) + compare.trim_significant_figures() + compare.colorize() + compare.print() + + +def profile_cpu(repeat, run, run_args): + with profile( + activities=[ProfilerActivity.CPU], + record_shapes=True, + profile_memory=True, + with_stack=True, + with_modules=True, + ) as prof: + for _ in range(repeat): + run(*run_args) + + print( + prof.key_averages(group_by_stack_n=1).table( + sort_by="self_cpu_time_total", row_limit=20 + ) + ) + + +def profile_cuda(repeat, run, run_args): + with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + record_shapes=True, + profile_memory=True, + with_stack=True, + with_modules=True, + ) as prof: + for _ in range(repeat): + run(*run_args) + # make sure it's fully done filtering + torch.cuda.synchronize("cuda") + + print( + prof.key_averages(group_by_stack_n=1).table( + sort_by="self_cuda_time_total", row_limit=20 + ) + ) diff --git a/benchmarks/filter_2d.py b/benchmarks/filter_2d.py index 1ae24d94..067fc5d8 100644 --- a/benchmarks/filter_2d.py +++ b/benchmarks/filter_2d.py @@ -1,27 +1,131 @@ +import os +import sys + +sys.path.append(os.path.dirname(__file__)) + import numpy as np -from pyinstrument import Profiler +import torch +from benchmark_tools import ( + compare_results, + get_test_data_path, + profile_cpu, + profile_cuda, + time_filters, +) +from brainglobe_utils.IO.image.load import read_with_dask from cellfinder.core.detect.filters.plane import TileProcessor -from cellfinder.core.detect.filters.setup_filters import setup_tile_filtering +from cellfinder.core.detect.filters.setup_filters import DetectionSettings -# Use random 16-bit integer data for signal plane -shape = (10000, 10000) -signal_array_plane = np.random.randint( - low=0, high=65536, size=shape, dtype=np.uint16 -) +def setup_filter( + signal_path, + batch_size=1, + num_z=None, + torch_device="cpu", + dtype=np.uint16, + use_scipy=False, +): + signal_array = read_with_dask(signal_path) + num_z = num_z or len(signal_array) + signal_array = np.asarray(signal_array[:num_z]).astype(dtype) + shape = signal_array.shape + + settings = DetectionSettings( + plane_original_np_dtype=dtype, + plane_shape=shape[1:], + torch_device=torch_device, + voxel_sizes=(5.06, 4.5, 4.5), + soma_diameter_um=30, + ball_xy_size_um=6, + ball_z_size_um=15, + ) + signal_array = settings.filter_data_converter_func(signal_array) + signal_array = torch.from_numpy(signal_array).to(torch_device) + + tile_processor = TileProcessor( + plane_shape=shape[1:], + clipping_value=settings.clipping_value, + threshold_value=settings.threshold_value, + soma_diameter=settings.soma_diameter, + log_sigma_size=settings.log_sigma_size, + n_sds_above_mean_thresh=settings.n_sds_above_mean_thresh, + torch_device=torch_device, + dtype=settings.filtering_dtype.__name__, + use_scipy=use_scipy, + ) + + return tile_processor, signal_array, batch_size + + +def run_filter(tile_processor, signal_array, batch_size): + for i in range(0, len(signal_array), batch_size): + tile_processor.get_tile_mask(signal_array[i : i + batch_size]) + -clipping_value, threshold_value = setup_tile_filtering(signal_array_plane) -tile_processor = TileProcessor( - clipping_value=clipping_value, - threshold_value=threshold_value, - soma_diameter=16, - log_sigma_size=0.2, - n_sds_above_mean_thresh=10, -) if __name__ == "__main__": - profiler = Profiler() - profiler.start() - plane, tiles = tile_processor.get_tile_mask(signal_array_plane) - profiler.stop() - profiler.print(show_all=True) + with torch.inference_mode(True): + n = 5 + batch_size = 2 + signal_path = get_test_data_path("bright_brain/signal") + + compare_results( + time_filters( + n, + run_filter, + setup_filter( + signal_path, + batch_size=batch_size, + torch_device="cpu", + use_scipy=False, + ), + "cpu-no_scipy", + ), + time_filters( + n, + run_filter, + setup_filter( + signal_path, + batch_size=batch_size, + torch_device="cpu", + use_scipy=True, + ), + "cpu-scipy", + ), + time_filters( + n, + run_filter, + setup_filter( + signal_path, batch_size=batch_size, torch_device="cuda" + ), + "cuda", + ), + ) + + profile_cpu( + n, + run_filter, + setup_filter( + signal_path, + batch_size=batch_size, + torch_device="cpu", + use_scipy=False, + ), + ) + profile_cpu( + n, + run_filter, + setup_filter( + signal_path, + batch_size=batch_size, + torch_device="cpu", + use_scipy=True, + ), + ) + profile_cuda( + n, + run_filter, + setup_filter( + signal_path, batch_size=batch_size, torch_device="cuda" + ), + ) diff --git a/benchmarks/filter_3d.py b/benchmarks/filter_3d.py index 5509dd52..0a8dd0af 100644 --- a/benchmarks/filter_3d.py +++ b/benchmarks/filter_3d.py @@ -1,51 +1,129 @@ +import os +import sys + +sys.path.append(os.path.dirname(__file__)) + import numpy as np -from pyinstrument import Profiler +import torch +from benchmark_tools import ( + compare_results, + get_test_data_path, + profile_cpu, + profile_cuda, + time_filters, +) +from brainglobe_utils.IO.image.load import read_with_dask -from cellfinder.core.detect.filters.volume.volume_filter import VolumeFilter +from cellfinder.core.detect.filters.setup_filters import DetectionSettings +from cellfinder.core.detect.filters.volume.ball_filter import BallFilter -# Use random data for signal data -ball_z_size = 3 +def setup_filter( + plane_path, + tiles_path, + batch_size=1, + num_z=None, + torch_device="cpu", + dtype=np.uint16, +): + filtered = read_with_dask(plane_path) + tiles = read_with_dask(tiles_path) + num_z = num_z or len(filtered) + filtered = np.asarray(filtered[:num_z]) + tiles = np.asarray(tiles[:num_z]) + shape = filtered.shape -def gen_signal_array(ny, nx): - shape = (ball_z_size, ny, nx) - return np.random.randint(low=0, high=65536, size=shape, dtype=np.uint16) + settings = DetectionSettings( + plane_original_np_dtype=dtype, + plane_shape=shape[1:], + torch_device=torch_device, + soma_diameter_um=30, + ball_xy_size_um=6, + ball_z_size_um=15, + ) + filtered = filtered.astype(settings.filtering_dtype) + filtered = torch.from_numpy(filtered).to(torch_device) + tiles = tiles.astype(np.bool_) + tiles = torch.from_numpy(tiles).to(torch_device) + ball_filter = BallFilter( + plane_height=settings.plane_height, + plane_width=settings.plane_width, + ball_xy_size=settings.ball_xy_size, + ball_z_size=settings.ball_z_size, + overlap_fraction=settings.ball_overlap_fraction, + threshold_value=settings.threshold_value, + soma_centre_value=settings.soma_centre_value, + tile_height=settings.tile_height, + tile_width=settings.tile_width, + dtype=settings.filtering_dtype.__name__, + batch_size=batch_size, + torch_device=torch_device, + use_mask=True, + ) -signal_array = gen_signal_array(667, 510) + return ball_filter, filtered, tiles, batch_size -soma_diameter = 8 -setup_params = ( - signal_array[0, :, :].T, - soma_diameter, - 3, # ball_xy_size, - ball_z_size, - 0.6, # ball_overlap_fraction, - 0, # start_plane, -) -mp_3d_filter = VolumeFilter( - soma_diameter=soma_diameter, - setup_params=setup_params, - n_planes=len(signal_array), - n_locks_release=1, -) +def run_filter(ball_filter, filtered, tiles, batch_size): + for i in range(0, len(filtered), batch_size): + ball_filter.append( + filtered[i : i + batch_size], tiles[i : i + batch_size] + ) + if ball_filter.ready: + ball_filter.walk() + ball_filter.get_processed_planes() -# Use random data for mask data -mask = np.random.randint(low=0, high=2, size=(42, 32), dtype=bool) -# Fill up the 3D filter with planes -for plane in signal_array: - mp_3d_filter.ball_filter.append(plane, mask) - if mp_3d_filter.ball_filter.ready: - break +if __name__ == "__main__": + with torch.inference_mode(True): + n = 5 + batch_size = 4 + plane_path = get_test_data_path("bright_brain/2d_filter") + tiles_path = get_test_data_path("bright_brain/tiles") -# Run the 3D filter -profiler = Profiler() -profiler.start() -for i in range(10): - # Repeat same filter 10 times to increase runtime - mp_3d_filter._run_filter() + compare_results( + time_filters( + n, + run_filter, + setup_filter( + plane_path, + tiles_path, + batch_size=batch_size, + torch_device="cpu", + ), + "cpu", + ), + time_filters( + n, + run_filter, + setup_filter( + plane_path, + tiles_path, + batch_size=batch_size, + torch_device="cuda", + ), + "cuda", + ), + ) -profiler.stop() -profiler.print(show_all=True) + profile_cpu( + n, + run_filter, + setup_filter( + plane_path, + tiles_path, + batch_size=batch_size, + torch_device="cpu", + ), + ) + profile_cuda( + n, + run_filter, + setup_filter( + plane_path, + tiles_path, + batch_size=batch_size, + torch_device="cuda", + ), + ) diff --git a/cellfinder/core/classify/classify.py b/cellfinder/core/classify/classify.py index 3488c666..3908b385 100644 --- a/cellfinder/core/classify/classify.py +++ b/cellfinder/core/classify/classify.py @@ -30,7 +30,7 @@ def main( max_workers: int = 3, *, callback: Optional[Callable[[int], None]] = None, -) -> List: +) -> List[Cell]: """ Parameters ---------- diff --git a/cellfinder/core/detect/detect.py b/cellfinder/core/detect/detect.py index 9d70e541..7562bbcb 100644 --- a/cellfinder/core/detect/detect.py +++ b/cellfinder/core/detect/detect.py @@ -13,76 +13,47 @@ - (max_val) is used to mark bright points during 3D filtering """ -import multiprocessing +import dataclasses from datetime import datetime -from queue import Queue -from threading import Lock -from typing import Callable, List, Optional, Sequence, Tuple, TypeVar +from typing import Callable, List, Optional, Tuple import numpy as np +import torch from brainglobe_utils.cells.cells import Cell -from brainglobe_utils.general.system import get_num_processes -from numba import set_num_threads from cellfinder.core import logger, types from cellfinder.core.detect.filters.plane import TileProcessor -from cellfinder.core.detect.filters.setup_filters import setup_tile_filtering +from cellfinder.core.detect.filters.setup_filters import DetectionSettings from cellfinder.core.detect.filters.volume.volume_filter import VolumeFilter +from cellfinder.core.tools.tools import inference_wrapper -def calculate_parameters_in_pixels( - voxel_sizes: Tuple[float, float, float], - soma_diameter_um: float, - max_cluster_size_um3: float, - ball_xy_size_um: float, - ball_z_size_um: float, -) -> Tuple[int, int, int, int]: - """ - Convert the command-line arguments from real (um) units to pixels - """ - - mean_in_plane_pixel_size = 0.5 * ( - float(voxel_sizes[2]) + float(voxel_sizes[1]) - ) - voxel_volume = ( - float(voxel_sizes[2]) * float(voxel_sizes[1]) * float(voxel_sizes[0]) - ) - soma_diameter = int(round(soma_diameter_um / mean_in_plane_pixel_size)) - max_cluster_size = int(round(max_cluster_size_um3 / voxel_volume)) - ball_xy_size = int(round(ball_xy_size_um / mean_in_plane_pixel_size)) - ball_z_size = int(round(ball_z_size_um / float(voxel_sizes[0]))) - - if ball_z_size == 0: - raise ValueError( - "Ball z size has been calculated to be 0 voxels." - " This may be due to large axial spacing of your data or the " - "ball_z_size_um parameter being too small. " - "Please check input parameters are correct. " - "Note that cellfinder requires high resolution data in all " - "dimensions, so that cells can be detected in multiple " - "image planes." - ) - return soma_diameter, max_cluster_size, ball_xy_size, ball_z_size - - +@inference_wrapper def main( signal_array: types.array, - start_plane: int, - end_plane: int, - voxel_sizes: Tuple[float, float, float], - soma_diameter: float, - max_cluster_size: float, - ball_xy_size: float, - ball_z_size: float, - ball_overlap_fraction: float, - soma_spread_factor: float, - n_free_cpus: int, - log_sigma_size: float, - n_sds_above_mean_thresh: float, + start_plane: int = 0, + end_plane: int = -1, + voxel_sizes: Tuple[float, float, float] = (5, 2, 2), + soma_diameter: float = 16, + max_cluster_size: float = 100_000, + ball_xy_size: float = 6, + ball_z_size: float = 15, + ball_overlap_fraction: float = 0.6, + soma_spread_factor: float = 1.4, + n_free_cpus: int = 2, + log_sigma_size: float = 0.2, + n_sds_above_mean_thresh: float = 10, outlier_keep: bool = False, artifact_keep: bool = False, save_planes: bool = False, plane_directory: Optional[str] = None, + batch_size: Optional[int] = None, + torch_device: str = "cpu", + use_scipy: bool = True, + split_ball_xy_size: int = 3, + split_ball_z_size: int = 3, + split_ball_overlap_fraction: float = 0.8, + split_soma_diameter: int = 7, *, callback: Optional[Callable[[int], None]] = None, ) -> List[Cell]: @@ -101,7 +72,7 @@ def main( Index of the ending plane for detection. voxel_sizes : Tuple[float, float, float] - Tuple of voxel sizes in each dimension (x, y, z). + Tuple of voxel sizes in each dimension (z, y, x). soma_diameter : float Diameter of the soma in physical units. @@ -142,6 +113,18 @@ def main( plane_directory : str, optional Directory path to save the planes. Defaults to None. + batch_size : int, optional + The number of planes to process in each batch. Defaults to 1. + For CPU, there's no benefit for a larger batch size. Only a memory + usage increase. For CUDA, the larger the batch size the better the + performance. Until it fills up the GPU memory - after which it + becomes slower. + + torch_device : str, optional + The device on which to run the computation. By default, it's "cpu". + To run on a gpu, specify the PyTorch device name, such as "cuda" to + run on the first GPU. + callback : Callable[int], optional A callback function that is called every time a plane has finished being processed. Called with the plane number that has finished. @@ -151,151 +134,103 @@ def main( List[Cell] List of detected cells. """ - if not np.issubdtype(signal_array.dtype, np.integer): - raise ValueError( - "signal_array must be integer datatype, but has datatype " + start_time = datetime.now() + if batch_size is None: + if torch_device == "cpu": + batch_size = 4 + else: + batch_size = 1 + + if not np.issubdtype(signal_array.dtype, np.number): + raise TypeError( + "signal_array must be a numpy datatype, but has datatype " f"{signal_array.dtype}" ) - n_processes = get_num_processes(min_free_cpu_cores=n_free_cpus) - n_ball_procs = max(n_processes - 1, 1) - - # we parallelize 2d filtering, which typically lags behind the 3d - # processing so for n_ball_procs 2d filtering threads, ball_z_size will - # typically be in use while the others stall waiting for 3d processing - # so we can use those for other things, such as numba threading - set_num_threads(max(n_ball_procs - int(ball_z_size), 1)) - - start_time = datetime.now() - - ( - soma_diameter, - max_cluster_size, - ball_xy_size, - ball_z_size, - ) = calculate_parameters_in_pixels( - voxel_sizes, - soma_diameter, - max_cluster_size, - ball_xy_size, - ball_z_size, - ) - - if end_plane == -1: - end_plane = len(signal_array) - signal_array = signal_array[start_plane:end_plane] - signal_array = signal_array.astype(np.uint32) - - callback = callback or (lambda *args, **kwargs: None) if signal_array.ndim != 3: raise ValueError("Input data must be 3D") - setup_params = ( - signal_array[0, :, :], - soma_diameter, - ball_xy_size, - ball_z_size, - ball_overlap_fraction, - start_plane, - ) - - # Create 3D analysis filter - mp_3d_filter = VolumeFilter( - soma_diameter=soma_diameter, - setup_params=setup_params, - soma_size_spread_factor=soma_spread_factor, - n_planes=len(signal_array), - n_locks_release=n_ball_procs, - save_planes=save_planes, - plane_directory=plane_directory, + if end_plane < 0: + end_plane = len(signal_array) + end_plane = min(len(signal_array), end_plane) + + torch_device = torch_device.lower() + batch_size = max(batch_size, 1) + # brainmapper can pass them in as str + voxel_sizes = list(map(float, voxel_sizes)) + + settings = DetectionSettings( + plane_shape=signal_array.shape[1:], + plane_original_np_dtype=signal_array.dtype, + voxel_sizes=voxel_sizes, + soma_spread_factor=soma_spread_factor, + soma_diameter_um=soma_diameter, + max_cluster_size_um3=max_cluster_size, + ball_xy_size_um=ball_xy_size, + ball_z_size_um=ball_z_size, start_plane=start_plane, - max_cluster_size=max_cluster_size, + end_plane=end_plane, + n_free_cpus=n_free_cpus, + ball_overlap_fraction=ball_overlap_fraction, + log_sigma_size=log_sigma_size, + n_sds_above_mean_thresh=n_sds_above_mean_thresh, outlier_keep=outlier_keep, artifact_keep=artifact_keep, + save_planes=save_planes, + plane_directory=plane_directory, + batch_size=batch_size, + torch_device=torch_device, + ) + + # replicate the settings specific to splitting, before we access anything + # of the original settings, causing cached properties + kwargs = dataclasses.asdict(settings) + kwargs["ball_z_size_um"] = split_ball_z_size * settings.z_pixel_size + kwargs["ball_xy_size_um"] = ( + split_ball_xy_size * settings.in_plane_pixel_size ) + kwargs["ball_overlap_fraction"] = split_ball_overlap_fraction + kwargs["soma_diameter_um"] = ( + split_soma_diameter * settings.in_plane_pixel_size + ) + # always run on cpu because copying to gpu overhead is likely slower than + # any benefit for detection on smallish volumes + kwargs["torch_device"] = "cpu" + # for splitting, we only do 3d filtering. Its input is a zero volume + # with cell voxels marked with threshold_value. So just use float32 + # for input because the filters will also use float(32). So there will + # not be need to convert the input a different dtype before passing to + # the filters. + kwargs["plane_original_np_dtype"] = np.float32 + splitting_settings = DetectionSettings(**kwargs) + + # Create 3D analysis filter + mp_3d_filter = VolumeFilter(settings=settings) - clipping_val, threshold_value = setup_tile_filtering(signal_array[0, :, :]) # Create 2D analysis filter mp_tile_processor = TileProcessor( - clipping_val, - threshold_value, - soma_diameter, - log_sigma_size, - n_sds_above_mean_thresh, + plane_shape=settings.plane_shape, + clipping_value=settings.clipping_value, + threshold_value=settings.threshold_value, + n_sds_above_mean_thresh=n_sds_above_mean_thresh, + log_sigma_size=log_sigma_size, + soma_diameter=settings.soma_diameter, + torch_device=torch_device, + dtype=settings.filtering_dtype.__name__, + use_scipy=use_scipy, ) - # Force spawn context - mp_ctx = multiprocessing.get_context("spawn") - with mp_ctx.Pool(n_ball_procs) as worker_pool: - async_results, locks = _map_with_locks( - mp_tile_processor.get_tile_mask, - signal_array, # type: ignore - worker_pool, - ) - - # Release the first set of locks for the 2D filtering - for i in range(min(n_ball_procs + ball_z_size, len(locks))): - logger.debug(f"🔓 Releasing lock for plane {i}") - locks[i].release() + orig_n_threads = torch.get_num_threads() + torch.set_num_threads(settings.n_torch_comp_threads) - # Start 3D filter - # - # This runs in the main thread, and blocks until the all the 2D and - # then 3D filtering has finished. As batches of planes are filtered - # by the 3D filter, it releases the locks of subsequent 2D filter - # processes. - mp_3d_filter.process(async_results, locks, callback=callback) + # process the data + mp_3d_filter.process(mp_tile_processor, signal_array, callback=callback) + cells = mp_3d_filter.get_results(splitting_settings) - # it's now done filtering, get results with pool - cells = mp_3d_filter.get_results(worker_pool) + torch.set_num_threads(orig_n_threads) time_elapsed = datetime.now() - start_time - logger.debug( - f"All Planes done. Found {len(cells)} cells in {format(time_elapsed)}" - ) - print("Detection complete - all planes done in : {}".format(time_elapsed)) + s = f"Detection complete. Found {len(cells)} cells in {time_elapsed}" + logger.debug(s) + print(s) return cells - - -Tin = TypeVar("Tin") -Tout = TypeVar("Tout") - - -def _run_func_with_lock( - func: Callable[[Tin], Tout], arg: Tin, lock: Lock -) -> Tout: - """ - Run a function after acquiring a lock. - """ - lock.acquire(blocking=True) - return func(arg) - - -def _map_with_locks( - func: Callable[[Tin], Tout], - iterable: Sequence[Tin], - worker_pool: multiprocessing.pool.Pool, -) -> Tuple[Queue, List[Lock]]: - """ - Map a function to arguments, blocking execution. - - Maps *func* to args in *iterable*, but blocks all execution and - return a queue of asyncronous results and locks for each of the - results. Execution can be enabled by releasing the returned - locks in order. - """ - # Setup a manager to handle the locks - m = multiprocessing.Manager() - # Setup one lock per argument to be mapped - locks = [m.Lock() for _ in range(len(iterable))] - [lock.acquire(blocking=False) for lock in locks] - - async_results: Queue = Queue() - - for arg, lock in zip(iterable, locks): - async_result = worker_pool.apply_async( - _run_func_with_lock, args=(func, arg, lock) - ) - async_results.put(async_result) - - return async_results, locks diff --git a/cellfinder/core/detect/filters/plane/classical_filter.py b/cellfinder/core/detect/filters/plane/classical_filter.py index af331d52..6504d312 100644 --- a/cellfinder/core/detect/filters/plane/classical_filter.py +++ b/cellfinder/core/detect/filters/plane/classical_filter.py @@ -1,45 +1,347 @@ import numpy as np +import torch +import torch.nn.functional as F from scipy.ndimage import gaussian_filter, laplace from scipy.signal import medfilt2d -def enhance_peaks( - img: np.ndarray, clipping_value: float, gaussian_sigma: float = 2.5 -) -> np.ndarray: +@torch.jit.script +def normalize( + filtered_planes: torch.Tensor, + flip: bool, + max_value: float = 1.0, +) -> None: """ - Enhances the peaks (bright pixels) in an input image. + Normalizes the 3d tensor so each z-plane is independently scaled to be + in the [0, max_value] range. If `flip` is `True`, the sign of the tensor + values are flipped before any processing. - Parameters: + It is done to filtered_planes inplace. + """ + num_z = filtered_planes.shape[0] + filtered_planes_1d = filtered_planes.view(num_z, -1) + + if flip: + filtered_planes_1d.mul_(-1) + + planes_min = torch.min(filtered_planes_1d, dim=1, keepdim=True)[0] + filtered_planes_1d.sub_(planes_min) + # take max after subtraction + planes_max = torch.max(filtered_planes_1d, dim=1, keepdim=True)[0] + # if min = max = zero, divide by 1 - it'll stay zero + planes_max[planes_max == 0] = 1 + filtered_planes_1d.div_(planes_max) + + if max_value != 1.0: + # To leave room to label in the 3d detection. + filtered_planes_1d.mul_(max_value) + + +@torch.jit.script +def filter_for_peaks( + planes: torch.Tensor, + med_kernel: torch.Tensor, + gauss_kernel: torch.Tensor, + gauss_kernel_size: int, + lap_kernel: torch.Tensor, + device: str, + clipping_value: float, +) -> torch.Tensor: + """ + Takes the 3d z-stack and returns a new z-stack where the peaks are + highlighted. + + It applies a median filter -> gaussian filter -> laplacian filter. + """ + filtered_planes = planes.unsqueeze(1) # ZYX -> ZCYX input, C=channels + + # ------------------ median filter ------------------ + # extracts patches to compute median over for each pixel + # We go from ZCYX -> ZCYX, C=1 to C=9 with C containing the elements around + # each Z,X,Y voxel over which we compute the median + # Zero padding is ok here + filtered_planes = F.conv2d(filtered_planes, med_kernel, padding="same") + # we're going back to ZCYX=Z1YX by taking median of patches in C dim + filtered_planes = filtered_planes.median(dim=1, keepdim=True)[0] + + # ------------------ gaussian filter ------------------ + # normalize the input data to 0-1 range. Otherwise, if the values are + # large, we'd need a float64 so conv result is accurate + normalize(filtered_planes, flip=False) + + # we need to do reflection padding around the tensor for parity with scipy + # gaussian filtering. Scipy does reflection in a manner typically called + # symmetric: (dcba|abcd|dcba). Torch does it like this: (dcb|abcd|cba). So + # we manually do symmetric padding below + pad = gauss_kernel_size // 2 + padding_mode = "reflect" + # if data is too small for reflect, just use constant border value + if pad >= filtered_planes.shape[-1] or pad >= filtered_planes.shape[-2]: + padding_mode = "replicate" + filtered_planes = F.pad(filtered_planes, (pad,) * 4, padding_mode, 0.0) + # We reflected torch style, so copy/shift everything by one to be symmetric + filtered_planes[:, :, :pad, :] = filtered_planes[ + :, :, 1 : pad + 1, : + ].clone() + filtered_planes[:, :, -pad:, :] = filtered_planes[ + :, :, -pad - 1 : -1, : + ].clone() + filtered_planes[:, :, :, :pad] = filtered_planes[ + :, :, :, 1 : pad + 1 + ].clone() + filtered_planes[:, :, :, -pad:] = filtered_planes[ + :, :, :, -pad - 1 : -1 + ].clone() + + # We apply the 1D gaussian filter twice, once for Y and once for X. The + # filter shape passed in is 11K1 or 111K, depending on device. Where + # K=filter size + # see https://discuss.pytorch.org/t/performance-issue-for-conv2d-with-1d- + # filter-along-a-dim/201734/2 for the reason for the moveaxis depending + # on the device + if device == "cpu": + # kernel shape is 11K1. First do Y (second to last axis) + filtered_planes = F.conv2d( + filtered_planes, gauss_kernel, padding="valid" + ) + # To do X, exchange X,Y axis, filter, change back. On CPU, Y (second + # to last) axis is faster. + filtered_planes = F.conv2d( + filtered_planes.moveaxis(-1, -2), gauss_kernel, padding="valid" + ).moveaxis(-1, -2) + else: + # kernel shape is 111K + # First do Y (second to last axis). Exchange X,Y axis, filter, change + # back. On CUDA, X (last) axis is faster. + filtered_planes = F.conv2d( + filtered_planes.moveaxis(-1, -2), gauss_kernel, padding="valid" + ).moveaxis(-1, -2) + # now do X, last axis + filtered_planes = F.conv2d( + filtered_planes, gauss_kernel, padding="valid" + ) + + # ------------------ laplacian filter ------------------ + # it's a 2d filter. Need to pad using symmetric for scipy parity. But, + # torch doesn't have it, and we used a kernel of size 3, so for padding of + # 1, replicate == symmetric. That's enough for parity with past scipy. If + # we change kernel size in the future, we may have to do as above + padding = lap_kernel.shape[-1] // 2 + filtered_planes = F.pad(filtered_planes, (padding,) * 4, "replicate") + filtered_planes = F.conv2d(filtered_planes, lap_kernel, padding="valid") + + # we don't need the channel axis + filtered_planes = filtered_planes[:, 0, :, :] + + # scale back to full scale, filtered values are negative so flip + normalize(filtered_planes, flip=True, max_value=clipping_value) + return filtered_planes + + +class PeakEnhancer: + """ + A class that filters each plane in a z-stack such that peaks are + visualized. + + It uses a series of 2D filters of median -> gaussian -> + laplacian. Then normalizes each plane to be between [0, clipping_value]. + + Parameters ---------- - img : np.ndarray - Input image. - clipping_value : float - Maximum value for the enhanced image. - gaussian_sigma : float, optional - Standard deviation for the Gaussian filter. Default is 2.5. - - Returns: - ------- - np.ndarray - Enhanced image with peaks. - - Notes: - ------ - The enhancement process includes the following steps: - 1. Applying a 2D median filter. - 2. Applying a Laplacian of Gaussian filter (LoG). - 3. Multiplying by -1 (bright spots respond negative in a LoG). - 4. Rescaling image values to range from 0 to clipping value. - """ - type_in = img.dtype - filtered_img = medfilt2d(img.astype(np.float64)) - filtered_img = gaussian_filter(filtered_img, gaussian_sigma) - filtered_img = laplace(filtered_img) - filtered_img *= -1 - - filtered_img -= filtered_img.min() - filtered_img /= filtered_img.max() - - # To leave room to label in the 3d detection. - filtered_img *= clipping_value - return filtered_img.astype(type_in) + torch_device: str + The device on which the data and processing occurs on. Can be e.g. + "cpu", "cuda" etc. Any data passed to the filter must be on this + device. Returned data will also be on this device. + dtype : torch.dtype + The data-type of the input planes and the type to use internally. + E.g. `torch.float32`. + clipping_value : int + The value such that after normalizing, the max value will be this + clipping_value. + laplace_gaussian_sigma : float + Size of the sigma for the gaussian filter. + use_scipy : bool + If running on the CPU whether to use the scipy filters or the same + pytorch filters used on CUDA. Scipy filters can be faster. + """ + + # binary kernel that generates square patches for each pixel so we can find + # the median around the pixel + med_kernel: torch.Tensor + + # gaussian 1D kernel with kernel/weight shape 11K1 or 111K, depending + # on device. Where K=filter size + gauss_kernel: torch.Tensor + + # 2D laplacian kernel with kernel/weight shape KxK. Where + # K=filter size + lap_kernel: torch.Tensor + + # the value such that after normalizing, the max value will be this + # clipping_value + clipping_value: float + + # sigma value for gaussian filter + laplace_gaussian_sigma: float + + # the torch device to run on. E.g. cpu/cuda. + torch_device: str + + # when running on CPU whether to use pytorch or scipy for filters + use_scipy: bool + + median_filter_size: int = 3 + """ + The median filter size in x/y direction. + + **Must** be odd. + """ + + def __init__( + self, + torch_device: str, + dtype: torch.dtype, + clipping_value: float, + laplace_gaussian_sigma: float, + use_scipy: bool, + ): + super().__init__() + self.torch_device = torch_device.lower() + self.clipping_value = clipping_value + self.laplace_gaussian_sigma = laplace_gaussian_sigma + self.use_scipy = use_scipy + + # all these kernels are odd in size + self.med_kernel = self._get_median_kernel(torch_device, dtype) + self.gauss_kernel = self._get_gaussian_kernel( + torch_device, dtype, laplace_gaussian_sigma + ) + self.lap_kernel = self._get_laplacian_kernel(torch_device, dtype) + + @property + def gaussian_filter_size(self) -> int: + """ + The gaussian filter 1d size. + + It is odd. + """ + return 2 * int(round(4 * self.laplace_gaussian_sigma)) + 1 + + def _get_median_kernel( + self, torch_device: str, dtype: torch.dtype + ) -> torch.Tensor: + """ + Gets a median patch generator kernel, already on the correct + device. + + Based on how kornia does it for median filtering. + """ + # must be odd kernel + kernel_n = self.median_filter_size + if not (kernel_n % 2): + raise ValueError("The median filter size must be odd") + + # extract patches to compute median over for each pixel. When passing + # input we go from ZCYX -> ZCYX, C=1 to C=9 and containing the elements + # around each Z,X,Y over which we can then compute the median + window_range = kernel_n * kernel_n # e.g. 3x3 + kernel = torch.zeros( + (window_range, window_range), device=torch_device, dtype=dtype + ) + idx = torch.arange(window_range, device=torch_device) + # diagonal of e.g. 9x9 is 1 + kernel[idx, idx] = 1.0 + # out channels, in channels, n*y, n*x. The kernel collects all the 3x3 + # elements around a pixel, using a binary mask for each element, as a + # separate channel. So we go from 1 to 9 channels in the output + kernel = kernel.view(window_range, 1, kernel_n, kernel_n) + + return kernel + + def _get_gaussian_kernel( + self, + torch_device: str, + dtype: torch.dtype, + laplace_gaussian_sigma: float, + ) -> torch.Tensor: + """Gets the 1D gaussian kernel used to filter the data.""" + # we do 2 1D filters, once on each y, x dim. + # shape of kernel will be 11K1 with dims Z, C, Y, X. C=1, Z is expanded + # to number of z during filtering. + kernel_size = self.gaussian_filter_size + + # to get the values of a 1D gaussian kernel, we pass a single impulse + # data through the filter, which recovers the filter values. We do this + # because scipy doesn't make their kernel available in public API and + # we want parity with scipy filtering + impulse = np.zeros(kernel_size) + # the impulse needs to be to the left of center + impulse[kernel_size // 2] = 1 + kernel = gaussian_filter( + impulse, laplace_gaussian_sigma, mode="constant", cval=0 + ) + # kernel should be fully symmetric + assert kernel[0] == kernel[-1] + gauss_kernel = torch.from_numpy(kernel).type(dtype).to(torch_device) + + # default shape is (y, x) with y axis filtered only - we transpose + # input to filter on x + gauss_kernel = gauss_kernel.view(1, 1, -1, 1) + + # see https://discuss.pytorch.org/t/performance-issue-for-conv2d- + # with-1d-filter-along-a-dim/201734. Conv2d is faster on a specific dim + # for 1D filters depending on CPU/CUDA. See also filter_for_peaks + # on CPU, we only do conv2d on the (1st) dim + if torch_device != "cpu": + # on CUDA, we only filter on the x dim, flipping input to filter y + gauss_kernel = gauss_kernel.view(1, 1, 1, -1) + + return gauss_kernel + + def _get_laplacian_kernel( + self, torch_device: str, dtype: torch.dtype + ) -> torch.Tensor: + """Gets a 2d laplacian kernel, based on scipy's laplace.""" + # for parity with scipy, scipy computes the laplacian with default + # parameters and kernel size 3 using filter coefficients [1, -2, 1]. + # Each filtered pixel is the sum of the filter around the pixel + # vertically and horizontally. We can do it in 2d at once with + # coefficients below (faster than 2x1D for such small filter) + return torch.as_tensor( + [[0, 1, 0], [1, -4, 1], [0, 1, 0]], + dtype=dtype, + device=torch_device, + ).view(1, 1, 3, 3) + + def enhance_peaks(self, planes: torch.Tensor) -> torch.Tensor: + """ + Applies the filtering and normalization to the 3d z-stack (not inplace) + and returns the filtered z-stack. + """ + if self.torch_device == "cpu" and self.use_scipy: + filtered_planes = planes.clone() + for i in range(planes.shape[0]): + img = planes[i, :, :].numpy() + img = medfilt2d(img) + img = gaussian_filter(img, self.laplace_gaussian_sigma) + img = laplace(img) + filtered_planes[i, :, :] = torch.from_numpy(img) + + # laplace makes values negative so flip + normalize( + filtered_planes, + flip=True, + max_value=self.clipping_value, + ) + return filtered_planes + + filtered_planes = filter_for_peaks( + planes, + self.med_kernel, + self.gauss_kernel, + self.gaussian_filter_size, + self.lap_kernel, + self.torch_device, + self.clipping_value, + ) + return filtered_planes diff --git a/cellfinder/core/detect/filters/plane/plane_filter.py b/cellfinder/core/detect/filters/plane/plane_filter.py index f7e7868e..82434d16 100644 --- a/cellfinder/core/detect/filters/plane/plane_filter.py +++ b/cellfinder/core/detect/filters/plane/plane_filter.py @@ -1,87 +1,169 @@ -from dataclasses import dataclass -from threading import Lock -from typing import Optional, Tuple +from dataclasses import dataclass, field +from typing import Tuple -import dask.array as da -import numpy as np +import torch -from cellfinder.core import types -from cellfinder.core.detect.filters.plane.classical_filter import enhance_peaks +from cellfinder.core.detect.filters.plane.classical_filter import PeakEnhancer from cellfinder.core.detect.filters.plane.tile_walker import TileWalker @dataclass class TileProcessor: """ - Attributes + Processor that filters each plane to highlight the peaks and also + tiles and thresholds each plane returning a mask indicating which + tiles are inside the brain. + + Each input plane is: + + 1. Clipped to [0, clipping_value]. + 2. Tiled and compared to the corner tile. Any tile that is "bright" + according to `TileWalker` is marked as being in the brain. + 3. Filtered + 1. Run through the peak enhancement filter (see `PeakEnhancer`) + 2. Thresholded. Any values that are larger than + (mean + stddev * n_sds_above_mean_thresh) are set to + threshold_value. + + Parameters ---------- - clipping_value : - Upper value that the input plane is clipped to. - threshold_value : + plane_shape : tuple(int, int) + Height/width of the planes. + clipping_value : int + Upper value that the input planes are clipped to. Result is scaled so + max is this value. + threshold_value : int Value used to mark bright features in the input planes after they have been run through the 2D filter. + n_sds_above_mean_thresh : float + Number of standard deviations above the mean threshold to use for + determining whether a voxel is bright. + log_sigma_size : float + Size of the sigma for the gaussian filter. + soma_diameter : float + Diameter of the soma in voxels. + torch_device: str + The device on which the data and processing occurs on. Can be e.g. + "cpu", "cuda" etc. Any data passed to the filter must be on this + device. Returned data will also be on this device. + dtype : str + The data-type of the input planes and the type to use internally. + E.g. "float32". + use_scipy : bool + If running on the CPU whether to use the scipy filters or the same + pytorch filters used on CUDA. Scipy filters can be faster. """ + # Upper value that the input plane is clipped to. Result is scaled so + # max is this value clipping_value: int + # Value used to mark bright features in the input planes after they have + # been run through the 2D filter threshold_value: int - soma_diameter: int - log_sigma_size: float + # voxels who are this many std above mean or more are set to + # threshold_value n_sds_above_mean_thresh: float - def get_tile_mask( - self, plane: types.array, lock: Optional[Lock] = None - ) -> Tuple[np.ndarray, np.ndarray]: - """ - This thresholds the input plane, and returns a mask indicating which - tiles are inside the brain. + # filter that finds the peaks in the planes + peak_enhancer: PeakEnhancer = field(init=False) + # generates tiles of the planes, with each tile marked as being inside + # or outside the brain based on brightness + tile_walker: TileWalker = field(init=False) - The input plane is: + def __init__( + self, + plane_shape: Tuple[int, int], + clipping_value: int, + threshold_value: int, + n_sds_above_mean_thresh: float, + log_sigma_size: float, + soma_diameter: int, + torch_device: str, + dtype: str, + use_scipy: bool, + ): + self.clipping_value = clipping_value + self.threshold_value = threshold_value + self.n_sds_above_mean_thresh = n_sds_above_mean_thresh - 1. Clipped to [0, self.clipping_value] - 2. Run through a peak enhancement filter (see `classical_filter.py`) - 3. Thresholded. Any values that are larger than - (mean + stddev * self.n_sds_above_mean_thresh) are set to - self.threshold_value in-place. + laplace_gaussian_sigma = log_sigma_size * soma_diameter + self.peak_enhancer = PeakEnhancer( + torch_device=torch_device, + dtype=getattr(torch, dtype), + clipping_value=self.clipping_value, + laplace_gaussian_sigma=laplace_gaussian_sigma, + use_scipy=use_scipy, + ) + + self.tile_walker = TileWalker( + plane_shape=plane_shape, + soma_diameter=soma_diameter, + ) + + def get_tile_mask( + self, planes: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Applies the filtering listed in the class description. Parameters ---------- - plane : - Input plane. - lock : - If given, block reading the plane into memory until the lock - can be acquired. + planes : torch.Tensor + Input planes (z-stack). Note, the input data is modified. Returns ------- - plane : - Thresholded plane. - inside_brain_tiles : + planes : torch.Tensor + Filtered and thresholded planes (z-stack). + inside_brain_tiles : torch.Tensor Boolean mask indicating which tiles are inside (1) or outside (0) the brain. + It's a z-stack whose planes are the shape of the number of tiles + in each planar axis. """ - laplace_gaussian_sigma = self.log_sigma_size * self.soma_diameter - plane = plane.T - np.clip(plane, 0, self.clipping_value, out=plane) - if lock is not None: - lock.acquire(blocking=True) - # Read plane from a dask array into memory as a numpy array - if isinstance(plane, da.Array): - plane = np.array(plane) - + torch.clip_(planes, 0, self.clipping_value) # Get tiles that are within the brain - walker = TileWalker(plane, self.soma_diameter) - walker.mark_bright_tiles() - inside_brain_tiles = walker.bright_tiles_mask - + inside_brain_tiles = self.tile_walker.get_bright_tiles(planes) # Threshold the image - thresholded_img = enhance_peaks( - plane.copy(), - self.clipping_value, - gaussian_sigma=laplace_gaussian_sigma, + enhanced_planes = self.peak_enhancer.enhance_peaks(planes) + + _threshold_planes( + planes, + enhanced_planes, + self.n_sds_above_mean_thresh, + self.threshold_value, ) - avg = np.mean(thresholded_img) - sd = np.std(thresholded_img) - threshold = avg + self.n_sds_above_mean_thresh * sd - plane[thresholded_img > threshold] = self.threshold_value - return plane, inside_brain_tiles + return planes, inside_brain_tiles + + def get_tiled_buffer(self, depth: int, device: str): + return self.tile_walker.get_tiled_buffer(depth, device) + + +@torch.jit.script +def _threshold_planes( + planes: torch.Tensor, + enhanced_planes: torch.Tensor, + n_sds_above_mean_thresh: float, + threshold_value: int, +) -> None: + """ + Sets each plane (in-place) to threshold_value, where the corresponding + enhanced_plane > mean + n_sds_above_mean_thresh*std. Each plane will be + set to zero elsewhere. + """ + planes_1d = enhanced_planes.view(enhanced_planes.shape[0], -1) + + # add back last dim + avg = torch.mean(planes_1d, dim=1, keepdim=True).unsqueeze(2) + sd = torch.std(planes_1d, dim=1, keepdim=True).unsqueeze(2) + threshold = avg + n_sds_above_mean_thresh * sd + + above = enhanced_planes > threshold + planes[above] = threshold_value + # subsequent steps only care about the values that are set to threshold or + # above in planes. We set values in *planes* to threshold based on the + # value in *enhanced_planes*. So, there could be values in planes that are + # at threshold already, but in enhanced_planes they are not. So it's best + # to zero all other values, so voxels previously at threshold don't count + planes[torch.logical_not(above)] = 0 diff --git a/cellfinder/core/detect/filters/plane/tile_walker.py b/cellfinder/core/detect/filters/plane/tile_walker.py index e4abe5cf..e8b994b6 100644 --- a/cellfinder/core/detect/filters/plane/tile_walker.py +++ b/cellfinder/core/detect/filters/plane/tile_walker.py @@ -1,8 +1,8 @@ import math -from typing import Generator, Tuple +from typing import Tuple -import numpy as np -from numba import njit +import torch +import torch.nn.functional as F class TileWalker: @@ -15,74 +15,140 @@ class TileWalker: The mean and standard deviation of this tile is calculated, and the threshold set at 1 + mean + (2 * stddev). - Attributes + Parameters ---------- - bright_tiles_mask : - An boolean array whose entries correspond to whether each tile is - bright (1) or dark (0). The values are set in - self.mark_bright_tiles(). + plane_shape : tuple(int, int) + Height/width of the planes. + soma_diameter : float + Diameter of the soma in voxels. """ - def __init__(self, img: np.ndarray, soma_diameter: int) -> None: - self.img = img - self.img_width, self.img_height = img.shape - self.tile_width = soma_diameter * 2 - self.tile_height = soma_diameter * 2 + def __init__( + self, plane_shape: Tuple[int, int], soma_diameter: int + ) -> None: - n_tiles_width = math.ceil(self.img_width / self.tile_width) - n_tiles_height = math.ceil(self.img_height / self.tile_height) - self.bright_tiles_mask = np.zeros( - (n_tiles_width, n_tiles_height), dtype=bool - ) + self.img_height, self.img_width = plane_shape + self.tile_height = soma_diameter * 2 + self.tile_width = soma_diameter * 2 - corner_tile = img[0 : self.tile_width, 0 : self.tile_height] - corner_intensity = np.mean(corner_tile) - corner_sd = np.std(corner_tile) - # add 1 to ensure not 0, as disables - self.out_of_brain_threshold = (corner_intensity + (2 * corner_sd)) + 1 + self.n_tiles_height = math.ceil(self.img_height / self.tile_height) + self.n_tiles_width = math.ceil(self.img_width / self.tile_width) - def _get_tiles(self) -> Generator[Tuple[int, int, np.ndarray], None, None]: + def get_bright_tiles(self, planes: torch.Tensor) -> torch.Tensor: """ - Generator that yields tiles of the 2D image. + Takes a 3d z-stack. For each z it computes the mean/std of the corner + tile and uses that to get a in/out of brain threshold for each z. - Notes - ----- - The final tile in each dimension can have a smaller size than the - rest of the tiles if the tile shape does not exactly divide the - image shape. - """ - for y in range( - 0, self.img_height - self.tile_height, self.tile_height - ): - for x in range( - 0, self.img_width - self.tile_width, self.tile_width - ): - tile = self.img[ - x : x + self.tile_width, y : y + self.tile_height - ] - yield x, y, tile - - def mark_bright_tiles(self) -> None: - """ - Loop through tiles, and if the average value of a tile is - greater than the intensity threshold mark the tile as bright - in self.bright_tiles_mask. + Parameters + ---------- + planes : torch.Tensor + 3d z-stack. + + Returns + ------- + out_of_brain_thresholds : torch.Tensor + 3d z-stack whose planar shape is the number of tiles in a plane. + The returned data will be on the same torch device as the input + planes. """ - threshold = self.out_of_brain_threshold - if threshold == 0: - return + return _get_bright_tiles( + planes, + self.n_tiles_height, + self.n_tiles_width, + self.tile_height, + self.tile_width, + ) - for x, y, tile in self._get_tiles(): - if not is_low_average(tile, threshold): - mask_x = x // self.tile_width - mask_y = y // self.tile_height - self.bright_tiles_mask[mask_x, mask_y] = True + def get_tiled_buffer(self, depth: int, device: str): + return torch.zeros( + (depth, self.n_tiles_height, self.n_tiles_width), + dtype=torch.bool, + device=device, + ) -@njit -def is_low_average(tile: np.ndarray, threshold: float) -> bool: +@torch.jit.script +def _get_out_of_brain_threshold( + planes: torch.Tensor, tile_height: int, tile_width: int +) -> torch.Tensor: """ - Return `True` if the average value of *tile* is below *threshold*. + Takes a 3d z-stack. For each z it computes the mean/std of the corner tile + and uses that to get a in/out of brain threshold for each z-stack. + + Parameters + ---------- + planes : + 3d z-stack. + tile_height : + Height of each tile. + tile_width : + Width of each tile. + + Returns + ------- + out_of_brain_thresholds : + 1d z-stack. + """ + # get corner tile + corner_tiles = planes[:, 0:tile_height, 0:tile_width] + # convert from ZYX -> ZK, where K is the elements in the corner tile + corner_tiles = corner_tiles.reshape((planes.shape[0], -1)) + + # need to operate in float64, in case the values are large + corner64 = corner_tiles.type(torch.float64) + corner_intensity = torch.mean(corner64, dim=1).type(planes.dtype) + # for parity with past when we used np.std, which defaults to ddof=0 + corner_sd = torch.std(corner64, dim=1, correction=0).type(planes.dtype) + # add 1 to ensure not 0, as disables + out_of_brain_thresholds = corner_intensity + 2 * corner_sd + 1 + + return out_of_brain_thresholds + + +@torch.jit.script +def _get_bright_tiles( + planes: torch.Tensor, + n_tiles_height: int, + n_tiles_width: int, + tile_height: int, + tile_width: int, +) -> torch.Tensor: """ - avg = np.mean(tile) - return avg < threshold + Loop through the tiles of the plane for each plane. And if the average + value of a tile is greater than the intensity threshold of that plain, + mark the tile as bright. + """ + bright_tiles_mask = torch.zeros( + (planes.shape[0], n_tiles_height, n_tiles_width), + dtype=torch.bool, + device=planes.device, + ) + # if we don't have enough size for a single tile, it's all outside + if planes.shape[1] < tile_height or planes.shape[2] < tile_width: + return bright_tiles_mask + + # for each plane, the threshold + out_of_brain_thresholds = _get_out_of_brain_threshold( + planes, tile_height, tile_width + ) + # thresholds Z -> ZYX shape + thresholds = out_of_brain_thresholds.view(-1, 1, 1) + + # ZYX -> ZCYX required for function (C=1) + planes = planes.unsqueeze(1) + # get the average of each tile + tile_avg = F.avg_pool2d( + planes, + (tile_height, tile_width), + ceil_mode=False, # default is False, but to make sure + ) + # go back from ZCYX -> ZYX + tile_avg = tile_avg[:, 0, :, :] + + bright = tile_avg >= thresholds + # tile_avg and bright may be smaller than bright_tiles_mask because + # avg_pool2d first subtracts the kernel size before computing # tiles. + # So contain view to that size + bright_tiles_mask[:, : bright.shape[1], : bright.shape[2]][bright] = True + + return bright_tiles_mask diff --git a/cellfinder/core/detect/filters/setup_filters.py b/cellfinder/core/detect/filters/setup_filters.py index d68387fc..15db72ff 100644 --- a/cellfinder/core/detect/filters/setup_filters.py +++ b/cellfinder/core/detect/filters/setup_filters.py @@ -1,70 +1,427 @@ +""" +Container for all the settings used during 2d/3d filtering and cell detection. +""" + import math -from typing import Tuple +from dataclasses import dataclass +from functools import cached_property +from typing import Callable, Optional, Tuple, Type import numpy as np +from brainglobe_utils.general.system import get_num_processes -from cellfinder.core.detect.filters.volume.ball_filter import BallFilter -from cellfinder.core.detect.filters.volume.structure_detection import ( - CellDetector, +from cellfinder.core.tools.tools import ( + get_data_converter, + get_max_possible_int_value, ) -from cellfinder.core.tools.tools import get_max_possible_value - - -def get_ball_filter( - *, - plane: np.ndarray, - soma_diameter: int, - ball_xy_size: int, - ball_z_size: int, - ball_overlap_fraction: float = 0.6, -) -> BallFilter: - # thrsh_val is used to clip the data in plane to make sure - # a number is available to mark cells. soma_centre_val is the - # number used to mark cells. - max_value = get_max_possible_value(plane) - thrsh_val = max_value - 1 - soma_centre_val = max_value - - tile_width = soma_diameter * 2 - plane_height, plane_width = plane.shape - - ball_filter = BallFilter( - plane_width, - plane_height, - ball_xy_size, - ball_z_size, - overlap_fraction=ball_overlap_fraction, - tile_step_width=tile_width, - tile_step_height=tile_width, - threshold_value=thrsh_val, - soma_centre_value=soma_centre_val, - ) - return ball_filter - - -def get_cell_detector( - *, plane_shape: Tuple[int, int], ball_z_size: int, z_offset: int = 0 -) -> CellDetector: - plane_height, plane_width = plane_shape - start_z = z_offset + int(math.floor(ball_z_size / 2)) - return CellDetector(plane_width, plane_height, start_z=start_z) - - -def setup_tile_filtering(plane: np.ndarray) -> Tuple[int, int]: - """ - Setup values that are used to threshold the plane during 2D filtering. - - Returns - ------- - clipping_value : - Upper value used to clip planes before 2D filtering. This is chosen - to leave two numbers left that can later be used to mark bright points - during the 2D and 3D filtering stages. - threshold_value : - Value used to mark bright pixels after 2D filtering. - """ - max_value = get_max_possible_value(plane) - clipping_value = max_value - 2 - thrsh_val = max_value - 1 - - return clipping_value, thrsh_val + +MAX_TORCH_COMP_THREADS = 12 +# As seen in the benchmarks in the original PR, when running on CPU using +# more than ~12 cores it starts to result in slowdowns. So limit to this +# many cores when doing computational work (e.g. torch.functional.Conv2D). +# +# This prevents thread contention. + + +@dataclass +class DetectionSettings: + """ + Configuration class with all the parameters used during 2d and 3d filtering + and structure splitting. + """ + + plane_original_np_dtype: Type[np.number] = np.uint16 + """ + The numpy data type of the input data that will be passed to the filtering + pipeline. + + Throughout filtering at key stages, the data range is kept such + that we can convert the data back to this data type without having to + scale. I.e. the min/max of the data fits in this data type. + + Except for the cell detection stage, in that stage the data range can be + larger because the values are cell IDs and not intensity data anymore. + + During structure splitting, we do just 3d filtering/cell detection. This is + again the data type used as input to the filtering. + + Defaults to `uint16` + """ + + detection_dtype: Type[np.number] = np.uint64 + """ + The numpy data type that the cell detection code expects our filtered + data to be in. + + After filtering, where the voxels are intensity values, we pass the data + to cell detection where the voxels turn into cell IDs. So the data type + needs to be large enough to support the number of cells in the data. + + To get the data from the filtering data type to the detection data type + use `detection_data_converter_func`. + + Defaults to `uint64`. + """ + + plane_shape: Tuple[int, int] = (1, 1) + """ + The shape of each plane of the input data as (height, width) - i.e. + (axis 1, axis 2) in the z-stack where z is the first axis. + """ + + start_plane: int = 0 + """The index of first plane to process, in the input data (inclusive).""" + + end_plane: int = 1 + """ + The index of the last plane at which to stop processing the input data + (not inclusive). + """ + + voxel_sizes: Tuple[float, float, float] = (1.0, 1.0, 1.0) + """ + Tuple of voxel sizes in each dimension (z, y, x). We use this to convert + from `um` to pixel sizes. + """ + + soma_spread_factor: float = 1.4 + """Spread factor for soma size - how much it may stretch in the images.""" + + soma_diameter_um: float = 16 + """ + Diameter of a typical soma in um. Bright areas larger than this will be + split. + """ + + max_cluster_size_um3: float = 100_000 + """ + Maximum size of a cluster (bright area) that will be processed, in um. + Larger bright areas are skipped as artifacts. + """ + + ball_xy_size_um: float = 6 + """ + Diameter of the 3d spherical kernel filter in the x/y dimensions in um. + See `ball_xy_size` for size in voxels. + """ + + ball_z_size_um: float = 15 + """ + Diameter of the 3d spherical kernel filter in the z dimension in um. + See `ball_z_size` for size in voxels. + + `ball_z_size` also determines to the minimum number of planes that are + stacked before can filter the central plane of the stack. + """ + + ball_overlap_fraction: float = 0.6 + """ + Fraction of overlap between a bright area and the spherical kernel, + for the area to be considered a single ball. + """ + + log_sigma_size: float = 0.2 + """Size of the sigma for the 2d Gaussian filter.""" + + n_sds_above_mean_thresh: float = 10 + """ + Number of standard deviations above the mean intensity to use for a + threshold to define bright areas. Below it, it's not considered bright. + """ + + outlier_keep: bool = False + """Whether to keep outlier structures during detection.""" + + artifact_keep: bool = False + """Whether to keep artifact structures during detection.""" + + save_planes: bool = False + """ + Whether to save the 2d/3d filtered planes during after filtering. + + It is saved as tiffs of data type `plane_original_np_dtype`. + """ + + plane_directory: Optional[str] = None + """Directory path where to save the planes, if saving.""" + + batch_size: int = 1 + """ + The number of planes to process in each batch of the 2d/3d filters. + + For CPU, each plane in a batch is 2d filtered (the slowest filters) in its + own sub-process. But 3d filtering happens in a single thread. So larger + batches will use more processes but can speed up filtering until IO/3d + filters become the bottleneck. + + For CUDA, 2d and 3d filtering happens on the GPU and the larger the batch + size, the better the performance. Until it fills up the GPU memory - after + which it becomes slower. + + In all cases, higher batch size means more RAM used. + """ + + num_prefetch_batches: int = 2 + """ + The number of batches to load into memory. + + This many batches are loaded in memory so the next batch is ready to be + sent to the filters as soon as the previous batch is done. + + The higher the number the more RAM used, but it can also speed up + processing if IO becomes a limiting factor. + """ + + torch_device: str = "cpu" + """ + The device on which to run the 2d and/or 3d filtering. + + Either `"cpu"` or PyTorch's GPU device name, such as `"cuda"` or `"cuda:0"` + to run on the first GPU. + """ + + n_free_cpus: int = 2 + """ + Number of free CPU cores to keep available and not use during parallel + processing. Internally, more cores may actually be used by the system, + which we don't control. + """ + + n_splitting_iter: int = 10 + """ + During the structure splitting phase we iteratively shrink the bright areas + and re-filter with the 3d filter. This is the number of iterations to do. + + This is a maximum because we also stop if there are no more structures left + during any iteration. + """ + + def __getstate__(self): + d = self.__dict__.copy() + # when sending across processes, we need to be able to pickle. This + # property cannot be pickled (and doesn't need to be) + if "filter_data_converter_func" in d: + del d["filter_data_converter_func"] + return d + + @cached_property + def filter_data_converter_func(self) -> Callable[[np.ndarray], np.ndarray]: + """ + A callable that takes a numpy array of type + `plane_original_np_dtype` and converts it into the `filtering_dtype` + type. + + We use this to convert the input data into the data type used for + filtering. + """ + return get_data_converter( + self.plane_original_np_dtype, self.filtering_dtype + ) + + @cached_property + def filtering_dtype(self) -> Type[np.floating]: + """ + The numpy data type that the 2d/3d filters expect our data to be in. + Use `filter_data_converter_func` to convert to this type. + + The data will be used in the form of torch tensors, but it'll be this + data type. + + Currently, it's either float32 or float64. + """ + original_dtype = self.plane_original_np_dtype + original_max_int = get_max_possible_int_value(original_dtype) + + # does original data fit in float32 + if original_max_int <= get_max_possible_int_value(np.float32): + return np.float32 + # what about float64 + if original_max_int <= get_max_possible_int_value(np.float64): + return np.float64 + raise TypeError("Input array data type is too big for a float64") + + @cached_property + def clipping_value(self) -> int: + """ + The maximum value used to clip the input to, as well as the value to + which the filtered data is scaled to during filtering. + + This ensures the filtered data fits in the `plane_original_np_dtype`. + """ + return get_max_possible_int_value(self.plane_original_np_dtype) - 2 + + @cached_property + def threshold_value(self) -> int: + """ + The value used to set bright areas as indicating it's above a + brightness threshold, during 2d filtering. + """ + return get_max_possible_int_value(self.plane_original_np_dtype) - 1 + + @cached_property + def soma_centre_value(self) -> int: + """ + The value used to mark bright areas as the location of a soma center, + during 3d filtering. + """ + return get_max_possible_int_value(self.plane_original_np_dtype) + + @cached_property + def detection_soma_centre_value(self) -> int: + """ + The value used to mark bright areas as the location of a soma center, + during detection. See `detection_data_converter_func`. + """ + return get_max_possible_int_value(self.detection_dtype) + + @cached_property + def detection_data_converter_func( + self, + ) -> Callable[[np.ndarray], np.ndarray]: + """ + A callable that takes a numpy array of type + `filtering_dtype` and converts it into the `detection_dtype` + type. + + It takes the filtered data where somas are marked with the + `soma_centre_value` and returns a volume of the same size where the + somas are marked with `detection_soma_centre_value`. Other voxels are + zeroed. + + We use this to convert the output of the 3d filter into the data + passed to cell detection. + """ + + def convert_for_cell_detection(data: np.ndarray) -> np.ndarray: + detection_data = np.zeros_like(data, dtype=self.detection_dtype) + detection_data[data == self.soma_centre_value] = ( + self.detection_soma_centre_value + ) + return detection_data + + return convert_for_cell_detection + + @property + def tile_height(self) -> int: + """ + The height of each tile of the tiled input image, used during filtering + to mark individual tiles as inside/outside the brain. + """ + return self.soma_diameter * 2 + + @property + def tile_width(self) -> int: + """ + The width of each tile of the tiled input image, used during filtering + to mark individual tiles as inside/outside the brain. + """ + return self.soma_diameter * 2 + + @property + def plane_height(self) -> int: + """The height of each input plane of the z-stack.""" + return self.plane_shape[0] + + @property + def plane_width(self) -> int: + """The width of each input plane of the z-stack.""" + return self.plane_shape[1] + + @property + def n_planes(self) -> int: + """The number of planes in the z-stack.""" + return self.end_plane - self.start_plane + + @property + def n_processes(self) -> int: + """The maximum number of process we can use during detection.""" + n = get_num_processes(min_free_cpu_cores=self.n_free_cpus) + return max(n - 1, 1) + + @property + def n_torch_comp_threads(self) -> int: + """ + The maximum number of process we should use during filtering, + using pytorch. + + This is less than `n_processes` because we account for thread + contention. Specifically it's limited by `MAX_TORCH_COMP_THREADS`. + """ + # Reserve batch_size cores for batch multiprocess parallelization on + # CPU, 1 per plane. for GPU it doesn't matter either way because it + # doesn't use threads. Also reserve for data feeding thread and + # cell detection. Don't let it go below 4. + n = max(4, self.n_processes - self.batch_size - 2) + n = min(n, self.n_processes) + return min(n, MAX_TORCH_COMP_THREADS) + + @property + def in_plane_pixel_size(self) -> float: + """Returns the average in-plane (xy) um/pixel.""" + voxel_sizes = self.voxel_sizes + return (voxel_sizes[2] + voxel_sizes[1]) / 2 + + @cached_property + def soma_diameter(self) -> int: + """The `soma_diameter_um`, but in voxels.""" + return int(round(self.soma_diameter_um / self.in_plane_pixel_size)) + + @cached_property + def max_cluster_size(self) -> int: + """The `max_cluster_size_um3`, but in voxels.""" + voxel_sizes = self.voxel_sizes + voxel_volume = ( + float(voxel_sizes[2]) + * float(voxel_sizes[1]) + * float(voxel_sizes[0]) + ) + return int(round(self.max_cluster_size_um3 / voxel_volume)) + + @cached_property + def ball_xy_size(self) -> int: + """The `ball_xy_size_um`, but in voxels.""" + return int(round(self.ball_xy_size_um / self.in_plane_pixel_size)) + + @property + def z_pixel_size(self) -> float: + """Returns the um/pixel in the z direction.""" + return self.voxel_sizes[0] + + @cached_property + def ball_z_size(self) -> int: + """The `ball_z_size_um`, but in voxels.""" + ball_z_size = int(round(self.ball_z_size_um / self.z_pixel_size)) + + if not ball_z_size: + raise ValueError( + "Ball z size has been calculated to be 0 voxels." + " This may be due to large axial spacing of your data or the " + "ball_z_size_um parameter being too small. " + "Please check input parameters are correct. " + "Note that cellfinder requires high resolution data in all " + "dimensions, so that cells can be detected in multiple " + "image planes." + ) + return ball_z_size + + @property + def max_cell_volume(self) -> float: + """ + The maximum cell volume to consider as a single cell, in voxels. + + If we find a bright area larger than that, we will split it. + """ + radius = self.soma_spread_factor * self.soma_diameter / 2 + return (4 / 3) * math.pi * radius**3 + + @property + def plane_prefix(self) -> str: + """ + The prefix of the filename to use to save the 2d/3d filtered planes. + + To save plane `k`, do `plane_prefix.format(n=k)`. You can then add + an extension etc. + """ + n = max(4, int(math.ceil(math.log10(self.n_planes)))) + name = f"plane_{{n:0{n}d}}" + return name diff --git a/cellfinder/core/detect/filters/volume/ball_filter.py b/cellfinder/core/detect/filters/volume/ball_filter.py index c5f5f5b8..a2f1ef1c 100644 --- a/cellfinder/core/detect/filters/volume/ball_filter.py +++ b/cellfinder/core/detect/filters/volume/ball_filter.py @@ -1,29 +1,35 @@ +import math from functools import lru_cache +from typing import Optional import numpy as np -from numba import njit, objmode, prange -from numba.core import types -from numba.experimental import jitclass +import torch +import torch.nn.functional as F from cellfinder.core.tools.array_operations import bin_mean_3d from cellfinder.core.tools.geometry import make_sphere -DEBUG = False -uint32_3d_type = types.uint32[:, :, :] -bool_3d_type = types.bool_[:, :, :] -float_3d_type = types.float64[:, :, :] +class InvalidVolume(ValueError): + """ + Raised when the volume passed to BallFilter is too small or does not meet + requirements. + """ + + pass @lru_cache(maxsize=50) def get_kernel(ball_xy_size: int, ball_z_size: int) -> np.ndarray: - # Create a spherical kernel. - # - # This is done by: - # 1. Generating a binary sphere at a resolution *upscale_factor* larger - # than desired. - # 2. Downscaling the binary sphere to get a 'fuzzy' sphere at the - # original intended scale + """ + Create a spherical kernel. + + This is done by: + 1. Generating a binary sphere at a resolution *upscale_factor* larger + than desired. + 2. Downscaling the binary sphere to get a 'fuzzy' sphere at the + original intended scale + """ upscale_factor: int = 7 upscaled_kernel_shape = ( upscale_factor * ball_xy_size, @@ -42,11 +48,11 @@ def get_kernel(ball_xy_size: int, ball_z_size: int) -> np.ndarray: upscaled_ball_radius, upscaled_ball_centre_position, ) - sphere_kernel = sphere_kernel.astype(np.float64) + sphere_kernel = sphere_kernel.astype(np.float32) kernel = bin_mean_3d( sphere_kernel, - bin_height=upscale_factor, bin_width=upscale_factor, + bin_height=upscale_factor, bin_depth=upscale_factor, ) @@ -59,359 +65,351 @@ def get_kernel(ball_xy_size: int, ball_z_size: int) -> np.ndarray: return kernel -# volume indices/size is 64 bit for very large brains(!) -spec = [ - ("ball_xy_size", types.uint32), - ("ball_z_size", types.uint32), - ("tile_step_width", types.uint64), - ("tile_step_height", types.uint64), - ("THRESHOLD_VALUE", types.uint32), - ("SOMA_CENTRE_VALUE", types.uint32), - ("overlap_fraction", types.float64), - ("overlap_threshold", types.float64), - ("middle_z_idx", types.uint32), - ("_num_z_added", types.uint32), - ("kernel", float_3d_type), - ("volume", uint32_3d_type), - ("inside_brain_tiles", bool_3d_type), -] - - -@jitclass(spec=spec) class BallFilter: """ A 3D ball filter. - This runs a spherical kernel across the (x, y) dimensions + This runs a spherical kernel across the 2d planar dimensions of a *ball_z_size* stack of planes, and marks pixels in the middle - plane of the stack that have a high enough intensity within the - spherical kernel. + plane of the stack that have a high enough intensity over the + the spherical kernel. + + Parameters + ---------- + plane_height, plane_width : int + Height/width of the planes. + ball_xy_size : int + Diameter of the spherical kernel in the x/y dimensions. + ball_z_size : int + Diameter of the spherical kernel in the z dimension. + Equal to the number of planes stacked to filter + the central plane of the stack. + overlap_fraction : float + The fraction of pixels within the spherical kernel that + have to be over *threshold_value* for a pixel to be marked + as having a high intensity. + threshold_value : int + Value above which an individual pixel is considered to have + a high intensity. + soma_centre_value : int + Value used to mark pixels with a high enough intensity. + tile_height, tile_width : int + Width/height of individual tiles in the mask generated by + 2D filtering. + dtype : str + The data-type of the input planes and the type to use internally. + E.g. "float32". + batch_size: int + The number of planes that will be typically passed in a single batch to + `append`. This is only used to calculate `num_batches_before_ready`. + Defaults to 1. + torch_device: str + The device on which the data and processing occurs on. Can be e.g. + "cpu", "cuda" etc. Defaults to "cpu". Any data passed to the filter + must be on this device. Returned data will also be on this device. + use_mask : bool + Whether tiling masks will be used in `append`. If False, tile masks + won't be passed in and/or will be ignored. Defaults to True. + """ + + num_batches_before_ready: int """ + The number of batches of size `batch_size` passed to `append` + before `ready` would return True. + """ + + # the inside brain tiled mask, if tiles are used (use_mask is True) + inside_brain_tiles: Optional[torch.Tensor] = None def __init__( self, - plane_width: int, plane_height: int, + plane_width: int, ball_xy_size: int, ball_z_size: int, overlap_fraction: float, - tile_step_width: int, - tile_step_height: int, threshold_value: int, soma_centre_value: int, + tile_height: int, + tile_width: int, + dtype: str, + batch_size: int = 1, + torch_device: str = "cpu", + use_mask: bool = True, ): - """ - Parameters - ---------- - plane_width, plane_height : - Width/height of the planes. - ball_xy_size : - Diameter of the spherical kernel in the x/y dimensions. - ball_z_size : - Diameter of the spherical kernel in the z dimension. - Equal to the number of planes that stacked to filter - the central plane of the stack. - overlap_fraction : - The fraction of pixels within the spherical kernel that - have to be over *threshold_value* for a pixel to be marked - as having a high intensity. - tile_step_width, tile_step_height : - Width/height of individual tiles in the mask generated by - 2D filtering. - threshold_value : - Value above which an individual pixel is considered to have - a high intensity. - soma_centre_value : - Value used to mark pixels with a high enough intensity. - """ self.ball_xy_size = ball_xy_size self.ball_z_size = ball_z_size self.overlap_fraction = overlap_fraction - self.tile_step_width = tile_step_width - self.tile_step_height = tile_step_height + self.tile_step_height = tile_height + self.tile_step_width = tile_width + + d1 = plane_height + d2 = plane_width + ball_xy_size = self.ball_xy_size + if d1 < ball_xy_size or d2 < ball_xy_size: + raise InvalidVolume( + f"Invalid plane size {d1}x{d2}. Needs to be at least " + f"{ball_xy_size} in each dimension" + ) self.THRESHOLD_VALUE = threshold_value self.SOMA_CENTRE_VALUE = soma_centre_value - # getting kernel is not jitted - with objmode(kernel=float_3d_type): - kernel = get_kernel(ball_xy_size, ball_z_size) - self.kernel = kernel - - self.overlap_threshold = np.sum(self.overlap_fraction * self.kernel) + # kernel comes in as XYZ, change to ZYX so it aligns with data + kernel = np.moveaxis(get_kernel(ball_xy_size, self.ball_z_size), 2, 0) + self.overlap_threshold = np.sum(self.overlap_fraction * kernel) + self.kernel_xy_size = kernel.shape[-2:] + self.kernel_z_size = self.ball_z_size + + # convert to right type and pin for faster copying + kernel = torch.from_numpy(kernel).type(getattr(torch, dtype)) + if torch_device != "cpu": + # torch at one point threw a cuda memory error when splitting cells + # on cpu due to pinning. It's best to only pin on using cuda + kernel.pin_memory() + # add 2 dimensions at the start so we have 11ZYX We need this shape in + # the conv step + self.kernel = ( + kernel.unsqueeze(0) + .unsqueeze(0) + .to(device=torch_device, non_blocking=True) + ) - # Stores the current planes that are being filtered - # first axis is z for faster rotating the z-axis - self.volume = np.empty( - (ball_z_size, plane_width, plane_height), - dtype=np.uint32, + self.num_batches_before_ready = int( + math.ceil(self.ball_z_size / batch_size) + ) + # Stores the current planes that are being filtered. Start with no data + self.volume = torch.empty( + (0, plane_height, plane_width), + dtype=getattr(torch, dtype), ) # Index of the middle plane in the volume - self.middle_z_idx = int(np.floor(ball_z_size / 2)) - self._num_z_added = 0 + self.middle_z_idx = int(np.floor(self.ball_z_size / 2)) + if not use_mask: + return # first axis is z - self.inside_brain_tiles = np.empty( + n_vertical_tiles = int(np.ceil(plane_height / self.tile_step_height)) + n_horizontal_tiles = int(np.ceil(plane_width / self.tile_step_width)) + # Stores tile masks. We start with no data + self.inside_brain_tiles = torch.empty( ( - ball_z_size, - int(np.ceil(plane_width / tile_step_width)), - int(np.ceil(plane_height / tile_step_height)), + 0, + n_vertical_tiles, + n_horizontal_tiles, ), - dtype=np.bool_, + dtype=torch.bool, ) @property - def ready(self) -> bool: + def first_valid_plane(self) -> int: """ - Return `True` if enough planes have been appended to run the filter. + The index in `self.volume` (or the planes passed in) that will be the + first plane returned from `get_processed_planes`. + + E.g. if `ball_z_size` is 3, then this may return 1. Meaning the second + plane passed to `append` (index 1), will be the first returned plane + by `get_processed_planes`. """ - return self._num_z_added >= self.ball_z_size + return int(math.floor(self.ball_z_size / 2)) - def append(self, plane: np.ndarray, mask: np.ndarray) -> None: + @property + def remaining_planes(self) -> int: """ - Add a new 2D plane to the filter. + The number of planes in `self.volume` (or the planes passed in) that + will remain unprocessed after all the planes have been `walk`ed + and `get_processed_planes` called. + + E.g. if `ball_z_size` is 3, then this may return 1. Meaning the last + plane passed to `append`, will never be returned by + `get_processed_planes` because the filter "center" never overlapped + with it. """ - if DEBUG: - assert [e for e in plane.shape[:2]] == [ - e for e in self.volume.shape[1:] - ], 'plane shape mismatch, expected "{}", got "{}"'.format( - [e for e in self.volume.shape[1:]], - [e for e in plane.shape[:2]], - ) - assert [e for e in mask.shape[:2]] == [ - e for e in self.inside_brain_tiles.shape[1:] - ], 'mask shape mismatch, expected"{}", got {}"'.format( - [e for e in self.inside_brain_tiles.shape[1:]], - [e for e in mask.shape[:2]], - ) + return self.ball_z_size - self.first_valid_plane - 1 - if self.ready: - # Shift everything down by one to make way for the new plane - # this is faster than np.roll, especially with z-axis first - self.volume[:-1, :, :] = self.volume[1:, :, :] - self.inside_brain_tiles[:-1, :, :] = self.inside_brain_tiles[ - 1:, :, : - ] + @property + def ready(self) -> bool: + """ + Return whether enough planes have been appended to run the filter + using `walk`. + """ + return self.volume.shape[0] >= self.kernel_z_size + + def append( + self, planes: torch.Tensor, masks: Optional[torch.Tensor] = None + ) -> None: + """ + Add a new z-stack to the filter. - # index for *next* slice is num we added *so far* until max - idx = min(self._num_z_added, self.ball_z_size - 1) - self._num_z_added += 1 + Previous stacks passed to `append` are removed, except enough planes + at the top of the previous z-stack to provide padding so we can filter + starting from the first plane in `planes`. The first time we call + `append`, `first_valid_plane` is the first plane to actually be + filtered in the z-stack due to lack of padding. - # Add the new plane to the top of volume and inside_brain_tiles - self.volume[idx, :, :] = plane - self.inside_brain_tiles[idx, :, :] = mask + So make sure to call `walk`/`get_processed_planes` before calling + `append` again. - def get_middle_plane(self) -> np.ndarray: - """ - Get the plane in the middle of self.volume. + Parameters + ---------- + planes : torch.Tensor + The z-stack. There can be one or more planes in the stack, but it + must have 3 dimensions. Input data is not modified. + masks : torch.Tensor + A z-stack tile mask, indicating for each tile whether it's in or + outside the brain. If the latter it's excluded. + + If `use_mask` was True, this must be provided. If False, this + parameter will be ignored. + + Input data is not modified. """ - return self.volume[self.middle_z_idx, :, :].copy() - - def walk(self, parallel: bool = False) -> None: - # **don't** pass parallel as keyword arg - numba struggles with it - # Highly optimised because most time critical - ball_radius = self.ball_xy_size // 2 - # Get extents of image that are covered by tiles - tile_mask_covered_img_width = ( - self.inside_brain_tiles.shape[1] * self.tile_step_width - ) - tile_mask_covered_img_height = ( - self.inside_brain_tiles.shape[2] * self.tile_step_height - ) - # Get maximum offsets for the ball - max_width = tile_mask_covered_img_width - self.ball_xy_size - max_height = tile_mask_covered_img_height - self.ball_xy_size - - # we have to pass the raw volume so walk doesn't use its edits as it - # processes the volume. self.volume is the one edited in place - input_volume = self.volume.copy() - - if parallel: - _walk_parallel( - max_height, - max_width, - self.tile_step_width, - self.tile_step_height, - self.inside_brain_tiles, - input_volume, - self.volume, - self.kernel, - ball_radius, - self.middle_z_idx, - self.overlap_threshold, - self.THRESHOLD_VALUE, - self.SOMA_CENTRE_VALUE, + if self.volume.shape[0]: + if self.volume.shape[0] < self.kernel_z_size: + num_remaining_with_padding = 0 + else: + num_remaining = self.kernel_z_size - (self.middle_z_idx + 1) + num_remaining_with_padding = num_remaining + self.middle_z_idx + + self.volume = torch.cat( + [self.volume[-num_remaining_with_padding:, :, :], planes], + dim=0, ) + + if self.inside_brain_tiles is not None: + self.inside_brain_tiles = torch.cat( + [ + self.inside_brain_tiles[ + -num_remaining_with_padding:, :, : + ], + masks, + ], + dim=0, + ) else: - _walk_single( - max_height, - max_width, - self.tile_step_width, - self.tile_step_height, - self.inside_brain_tiles, - input_volume, - self.volume, - self.kernel, - ball_radius, - self.middle_z_idx, - self.overlap_threshold, - self.THRESHOLD_VALUE, - self.SOMA_CENTRE_VALUE, - ) + self.volume = planes.clone() + if self.inside_brain_tiles is not None: + self.inside_brain_tiles = masks.clone() + def get_processed_planes(self) -> np.ndarray: + """ + After passing enough planes to `append`, and after `walk`, this returns + a copy of the processed planes as a numpy z-stack. -@njit(cache=True) -def _cube_overlaps( - volume: np.ndarray, - x_start: int, - x_end: int, - y_start: int, - y_end: int, - overlap_threshold: float, - threshold_value: int, - kernel: np.ndarray, -) -> bool: # Highly optimised because most time critical - """ - For each pixel in cube in volume that is greater than THRESHOLD_VALUE, sum - up the corresponding pixels in *kernel*. If the total is less than - overlap_threshold, return False, otherwise return True. + It only starts returning planes corresponding to plane + `first_valid_plane` relative to the first planes passed to `append`. + E.g. if `ball_z_size` is 3 and `first_valid_plane` is 1, and you passed + 5 planes total to `append`, then this will have returned planes [1, 3]. - Halfway through scanning the z-planes, if the total overlap is - less than 0.4 * overlap_threshold, this will return False early - without scanning the second half of the z-planes. + Notice the last plane was not included, because we return only "middle" + planes - planes that can correspond to the center of a ball. + """ + if not self.ready: + raise TypeError("Not enough planes were appended") + + num_processed = self.volume.shape[0] - self.kernel_z_size + 1 + assert num_processed + middle = self.middle_z_idx + planes = ( + self.volume[middle : middle + num_processed, :, :] + .cpu() + .numpy() + .copy() + ) + return planes - Parameters - ---------- - volume : - 3D array. - x_start, x_end, y_start, y_end : - The start and end indices in volume that form the cube. End is - exclusive - overlap_threshold : - Threshold above which to return True. - threshold_value : - Value above which a pixel is marked as being part of a cell. - kernel : - 3D array, with the same shape as *cube* in the volume. - """ - current_overlap_value = 0.0 - - middle = np.floor(volume.shape[0] / 2) + 1 - halfway_overlap_thresh = ( - overlap_threshold * 0.4 - ) # FIXME: do not hard code value - - for z in range(volume.shape[0]): - # TODO: OPTIMISE: step from middle to outer boundaries to check - # more data first - # - # If halfway through the array, and the overlap value isn't more than - # 0.4 * the overlap threshold, return - if z == middle and current_overlap_value < halfway_overlap_thresh: - return False # DEBUG: optimisation attempt - - for y in range(y_start, y_end): - for x in range(x_start, x_end): - # includes self.SOMA_CENTRE_VALUE - if volume[z, x, y] >= threshold_value: - # x/y must be shifted in kernel because we x/y is relative - # to the full volume, so shift it to relative to the cube - current_overlap_value += kernel[ - x - x_start, y - y_start, z - ] - - return current_overlap_value > overlap_threshold - - -@njit -def _is_tile_to_check( - x: int, - y: int, - middle_z: int, - tile_step_width: int, - tile_step_height: int, - inside_brain_tiles: np.ndarray, -) -> bool: # Highly optimised because most time critical - """ - Check if the tile containing pixel (x, y) is a tile that needs checking. - """ - x_in_mask = x // tile_step_width # TEST: test bounds (-1 range) - y_in_mask = y // tile_step_height # TEST: test bounds (-1 range) - return inside_brain_tiles[middle_z, x_in_mask, y_in_mask] + def walk(self) -> None: + """ + Applies the filter to all the planes passed to `append`. + May only be called if `ready` was True. -def _walk_base( - max_height: int, - max_width: int, - tile_step_width: int, + You can get the processed planes from `get_processed_planes`. + """ + if not self.ready: + raise TypeError("Called walk before enough planes were appended") + + _walk( + self.kernel_z_size, + self.middle_z_idx, + self.tile_step_height, + self.tile_step_width, + self.overlap_threshold, + self.THRESHOLD_VALUE, + self.SOMA_CENTRE_VALUE, + self.kernel, + self.volume, + self.inside_brain_tiles, + ) + + +@torch.jit.script +def _walk( + kernel_z_size: int, + middle: int, tile_step_height: int, - inside_brain_tiles: np.ndarray, - input_volume: np.ndarray, - volume: np.ndarray, - kernel: np.ndarray, - ball_radius: int, - middle_z: int, + tile_step_width: int, overlap_threshold: float, threshold_value: int, soma_centre_value: int, -) -> None: - """ - Scan through *volume*, and mark pixels where there are enough surrounding - pixels with high enough intensity. + kernel: torch.Tensor, + volume: torch.Tensor, + inside_brain_tiles: Optional[torch.Tensor], +): + num_process = volume.shape[0] - kernel_z_size + 1 + height, width = volume.shape[1:] + num_z = kernel.shape[2] + + # threshold volume so it's zero/one. And add two dims at start so + # it's 11ZYX + volume_tresh = ( + (volume >= threshold_value) + .unsqueeze(0) + .unsqueeze(0) + .type(kernel.dtype) + ) - The surrounding area is defined by the *kernel*. + # we do a plane at a time, volume: i:i+num_z, for plane i+middle + for i in range(num_process): + # spherical kernel is symmetric so convolution=correlation. Use + # binary threshold mask over the kernel to sum the value of the + # kernel at voxels that are bright + overlap = F.conv3d( + volume_tresh[:, :, i : i + num_z, :, :], + kernel, + stride=1, + padding="valid", + )[0, 0, 0, :, :] + overlaps = overlap > overlap_threshold + + # only edit the volume that is valid - conv excludes edges so we + # only edit the plane parts returned by conv3d + height_valid, width_valid = overlaps.shape + height_offset = (height - height_valid) // 2 + width_offset = (width - width_valid) // 2 + sub_volume = volume[ + i + middle, + height_offset : height_offset + height_valid, + width_offset : width_offset + width_valid, + ] + + # do we use tile masks or just overlapping? + if inside_brain_tiles is not None: + # unfold tiles to cover the full voxel area each tile covers + inside = ( + inside_brain_tiles[i + middle, :, :] + .repeat_interleave(tile_step_height, dim=0) + .repeat_interleave(tile_step_width, dim=1) + ) + # again only process pixels in the valid area + inside = inside[ + height_offset : height_offset + height_valid, + width_offset : width_offset + width_valid, + ] - Parameters - ---------- - max_height, max_width : - Maximum offsets for the ball filter. - inside_brain_tiles : - 3d array containing information on whether a tile is - inside the brain or not. Tiles outside the brain are skipped. - input_volume : - 3D array containing the plane-filtered data passed to the function - before walking. volume is edited in place, so this is the original - volume to prevent the changes for some cubes affective other cubes - during a single walk call. - volume : - 3D array containing the plane-filtered data - edited in place. - kernel : - 3D array - ball_radius : - Radius of the ball in the xy plane. - soma_centre_value : - Value that is used to mark pixels in *volume*. - - Notes - ----- - Warning: modifies volume in place! - """ - for y in prange(max_height): - for x in prange(max_width): - ball_centre_x = x + ball_radius - ball_centre_y = y + ball_radius - if _is_tile_to_check( - ball_centre_x, - ball_centre_y, - middle_z, - tile_step_width, - tile_step_height, - inside_brain_tiles, - ): - if _cube_overlaps( - input_volume, - x, - x + kernel.shape[0], - y, - y + kernel.shape[1], - overlap_threshold, - threshold_value, - kernel, - ): - volume[middle_z, ball_centre_x, ball_centre_y] = ( - soma_centre_value - ) - - -_walk_parallel = njit(parallel=True)(_walk_base) -_walk_single = njit(parallel=False)(_walk_base) + # must have enough ball overlap to be bright/tile is in brain + sub_volume[torch.logical_and(overlaps, inside)] = soma_centre_value + + else: + # must have enough ball overlap to be bright + sub_volume[overlaps] = soma_centre_value diff --git a/cellfinder/core/detect/filters/volume/structure_detection.py b/cellfinder/core/detect/filters/volume/structure_detection.py index 536f00ad..da99633d 100644 --- a/cellfinder/core/detect/filters/volume/structure_detection.py +++ b/cellfinder/core/detect/filters/volume/structure_detection.py @@ -4,18 +4,20 @@ import numba.typed import numpy as np import numpy.typing as npt -from numba import njit, typed +from numba import njit, objmode, typed from numba.core import types from numba.experimental import jitclass from numba.types import DictType +from cellfinder.core.tools.tools import get_max_possible_int_value + T = TypeVar("T") # type used for the domain of the volume - the size of the vol vol_np_type = np.int64 vol_numba_type = types.int64 # type used for the structure id -sid_np_type = np.int64 -sid_numba_type = types.int64 +sid_np_type = np.uint64 +sid_numba_type = types.uint64 @dataclass @@ -26,14 +28,17 @@ class Point: @njit -def get_non_zero_dtype_min(values: np.ndarray) -> int: +def get_non_zero_dtype_min(values: np.ndarray) -> sid_numba_type: """ Get the minimum of non-zero entries in *values*. If all entries are zero, returns maximum storeable number in the values array. """ - min_val = np.iinfo(values.dtype).max + # we don't know how big the int is, so make it as large as possible (64) + with objmode(min_val="u8"): + min_val = get_max_possible_int_value(values.dtype) + for v in values: if v != 0 and v < min_val: min_val = v @@ -97,6 +102,7 @@ def _get_structure_centre(structure: types.ListType) -> np.ndarray: spec = [ ("z", vol_numba_type), ("next_structure_id", sid_numba_type), + ("soma_centre_value", sid_numba_type), # as large as possible ("shape", types.UniTuple(vol_numba_type, 2)), ("obsolete_ids", DictType(sid_numba_type, sid_numba_type)), ("coords_maps", DictType(sid_numba_type, list_of_points_type)), @@ -133,18 +139,25 @@ class CellDetector: points. """ - def __init__(self, width: int, height: int, start_z: int): + def __init__( + self, + height: int, + width: int, + start_z: int, + soma_centre_value: sid_numba_type, + ): """ Parameters ---------- - width, height + height, width: Shape of the planes input to self.process() start_z: The z-coordinate of the first processed plane. """ - self.shape = width, height + self.shape = height, width self.z = start_z self.next_structure_id = 1 + self.soma_centre_value = soma_centre_value # Mapping from obsolete IDs to the IDs that they have been # made obsolete by @@ -156,11 +169,18 @@ def __init__(self, width: int, height: int, start_z: int): key_type=sid_numba_type, value_type=list_of_points_type ) + def _set_soma(self, soma_centre_value: sid_numba_type): + # Due to https://github.com/numba/numba/issues/9576. For testing we try + # different data types. Because of that issue we cannot pass a uint64 + # soma_centre_value to constructor after we pass a uint32. This is the + # only way for now until numba fixes the issue + self.soma_centre_value = soma_centre_value + def process( self, plane: np.ndarray, previous_plane: Optional[np.ndarray] ) -> np.ndarray: """ - Process a new plane. + Process a new plane (should be in Y, X axis order). """ if plane.shape[:2] != self.shape: raise ValueError("plane does not have correct shape") @@ -185,21 +205,21 @@ def connect_four( ------- plane : Plane with pixels either set to zero (no structure) or labelled - with their structure ID. + with their structure ID. Plane is in Y, X axis order. """ - SOMA_CENTRE_VALUE = np.iinfo(plane.dtype).max - for y in range(plane.shape[1]): - for x in range(plane.shape[0]): - if plane[x, y] == SOMA_CENTRE_VALUE: + soma_centre_value = self.soma_centre_value + for y in range(plane.shape[0]): + for x in range(plane.shape[1]): + if plane[y, x] == soma_centre_value: # Labels of structures below, left and behind neighbour_ids = np.zeros(3, dtype=sid_np_type) # If in bounds look at neighbours - if x > 0: - neighbour_ids[0] = plane[x - 1, y] if y > 0: - neighbour_ids[1] = plane[x, y - 1] + neighbour_ids[0] = plane[y - 1, x] + if x > 0: + neighbour_ids[1] = plane[y, x - 1] if previous_plane is not None: - neighbour_ids[2] = previous_plane[x, y] + neighbour_ids[2] = previous_plane[y, x] if is_new_structure(neighbour_ids): neighbour_ids[0] = self.next_structure_id @@ -210,17 +230,20 @@ def connect_four( # structure in next iterations struct_id = 0 - plane[x, y] = struct_id + plane[y, x] = struct_id return plane def get_cell_centres(self) -> np.ndarray: + """ + Returns the 2D array of cell centers. It's (N, 3) with X, Y, Z columns. + """ return self.structures_to_cells() def get_structures(self) -> Dict[int, np.ndarray]: """ Gets the structures as a dict of structure IDs mapped to the 2D array - of structure points. + of structure points (points vs x, y, z columns). """ d = {} for sid, points in self.coords_maps.items(): @@ -228,7 +251,10 @@ def get_structures(self) -> Dict[int, np.ndarray]: # `item = np.array(points, dtype=vol_np_type)` so we need to create # array and then fill in the point item = np.empty((len(points), 3), dtype=vol_np_type) - d[sid] = item + # need to cast to int64, otherwise when dict is used we can get + # warnings as in numba issue #8829 b/c it assumes it's uint64. + # Python uses int(64) as the type + d[types.int64(sid)] = item for i, point in enumerate(points): item[i, :] = point @@ -239,33 +265,41 @@ def add_point( self, sid: int, point: Union[tuple, list, np.ndarray] ) -> None: """ - Add single 3d *point* to the structure with the given *sid*. + Add single 3d (x, y, z) *point* to the structure with the given *sid*. """ - if sid not in self.coords_maps: - self.coords_maps[sid] = typed.List.empty_list(tuple_point_type) + # cast in case user passes in int64 (default type for int in python) + # and numba complains + key = sid_numba_type(sid) + if key not in self.coords_maps: + self.coords_maps[key] = typed.List.empty_list(tuple_point_type) - self._add_point(sid, (int(point[0]), int(point[1]), int(point[2]))) + self._add_point(key, (int(point[0]), int(point[1]), int(point[2]))) def add_points(self, sid: int, points: np.ndarray): """ Adds ndarray of *points* to the structure with the given *sid*. - Each row is a 3d point. + Each row is a 3-column (x, y, z) point. """ - if sid not in self.coords_maps: - self.coords_maps[sid] = typed.List.empty_list(tuple_point_type) + # cast in case user passes in int64 (default type for int in python) + # and numba complains + key = sid_numba_type(sid) + if key not in self.coords_maps: + self.coords_maps[key] = typed.List.empty_list(tuple_point_type) - append = self.coords_maps[sid].append + append = self.coords_maps[key].append pts = np.round(points).astype(vol_np_type) for point in pts: append((point[0], point[1], point[2])) - def _add_point(self, sid: int, point: Tuple[int, int, int]) -> None: + def _add_point( + self, sid: sid_numba_type, point: Tuple[int, int, int] + ) -> None: # sid must exist self.coords_maps[sid].append(point) def add( self, x: int, y: int, z: int, neighbour_ids: npt.NDArray[sid_np_type] - ) -> int: + ) -> sid_numba_type: """ For the current coordinates takes all the neighbours and find the minimum structure including obsolete structures mapping to any of @@ -287,7 +321,9 @@ def add( self._add_point(updated_id, (int(x), int(y), int(z))) return updated_id - def sanitise_ids(self, neighbour_ids: npt.NDArray[sid_np_type]) -> int: + def sanitise_ids( + self, neighbour_ids: npt.NDArray[sid_np_type] + ) -> sid_numba_type: """ Get the smallest ID of all the structures that are connected to IDs in `neighbour_ids`. @@ -300,15 +336,17 @@ def sanitise_ids(self, neighbour_ids: npt.NDArray[sid_np_type]) -> int: """ for i, neighbour_id in enumerate(neighbour_ids): # walk up the chain of obsolescence - neighbour_id = int(traverse_dict(self.obsolete_ids, neighbour_id)) + neighbour_id = traverse_dict(self.obsolete_ids, neighbour_id) neighbour_ids[i] = neighbour_id # Get minimum of all non-obsolete IDs updated_id = get_non_zero_dtype_min(neighbour_ids) - return int(updated_id) + return updated_id def merge_structures( - self, updated_id: int, neighbour_ids: npt.NDArray[sid_np_type] + self, + updated_id: sid_numba_type, + neighbour_ids: npt.NDArray[sid_np_type], ) -> None: """ For all the neighbours, reassign all the points of neighbour to diff --git a/cellfinder/core/detect/filters/volume/structure_splitting.py b/cellfinder/core/detect/filters/volume/structure_splitting.py index 7480e8ec..e1d4a5f0 100644 --- a/cellfinder/core/detect/filters/volume/structure_splitting.py +++ b/cellfinder/core/detect/filters/volume/structure_splitting.py @@ -1,9 +1,14 @@ -from typing import List, Tuple +from typing import List, Tuple, Type import numpy as np +import torch from cellfinder.core import logger -from cellfinder.core.detect.filters.volume.ball_filter import BallFilter +from cellfinder.core.detect.filters.setup_filters import DetectionSettings +from cellfinder.core.detect.filters.volume.ball_filter import ( + BallFilter, + InvalidVolume, +) from cellfinder.core.detect.filters.volume.structure_detection import ( CellDetector, get_structure_centre, @@ -14,41 +19,62 @@ class StructureSplitException(Exception): pass -def get_shape(xs: np.ndarray, ys: np.ndarray, zs: np.ndarray) -> List[int]: +def get_shape( + xs: np.ndarray, ys: np.ndarray, zs: np.ndarray +) -> Tuple[int, int, int]: + """ + Takes a list of x, y, z coordinates and returns a volume size such that + all the points will fit into it. With axis order = x, y, z. + """ # +1 because difference. TEST: - shape = [int((dim.max() - dim.min()) + 1) for dim in (xs, ys, zs)] + shape = tuple(int((dim.max() - dim.min()) + 1) for dim in (xs, ys, zs)) return shape def coords_to_volume( - xs: np.ndarray, ys: np.ndarray, zs: np.ndarray, ball_radius: int = 1 -) -> np.ndarray: + xs: np.ndarray, + ys: np.ndarray, + zs: np.ndarray, + volume_shape: Tuple[int, int, int], + ball_radius: int, + dtype: Type[np.number], + threshold_value: int, +) -> torch.Tensor: + """ + Takes the series of x, y, z points along with the shape of the volume + that fully enclose them (also x, y, z order). It than expands the + shape by the ball diameter in each axis. Then, each point, shifted + by the radius internally is set to the threshold value. + + The volume is then transposed and returned in the Z, Y, X order. + """ + # it's faster doing the work in numpy and then returning as torch array, + # than doing the work in torch ball_diameter = ball_radius * 2 # Expanded to ensure the ball fits even at the border - expanded_shape = [ - dim_size + ball_diameter for dim_size in get_shape(xs, ys, zs) - ] - volume = np.zeros(expanded_shape, dtype=np.uint32) + expanded_shape = [dim_size + ball_diameter for dim_size in volume_shape] + # volume is now x, y, z order + volume = np.zeros(expanded_shape, dtype=dtype) x_min, y_min, z_min = xs.min(), ys.min(), zs.min() + # shift the points so any sphere centered on it would not have its + # radius expand beyond the volume relative_xs = np.array((xs - x_min + ball_radius), dtype=np.int64) relative_ys = np.array((ys - y_min + ball_radius), dtype=np.int64) relative_zs = np.array((zs - z_min + ball_radius), dtype=np.int64) - # OPTIMISE: vectorize + # set each point as the center with a value of threshold for rel_x, rel_y, rel_z in zip(relative_xs, relative_ys, relative_zs): - volume[rel_x, rel_y, rel_z] = np.iinfo(volume.dtype).max - 1 - return volume + volume[rel_x, rel_y, rel_z] = threshold_value + + volume = volume.swapaxes(0, 2) + return torch.from_numpy(volume) def ball_filter_imgs( - volume: np.ndarray, - threshold_value: int, - soma_centre_value: int, - ball_xy_size: int = 3, - ball_z_size: int = 3, -) -> Tuple[np.ndarray, np.ndarray]: + volume: torch.Tensor, settings: DetectionSettings +) -> np.ndarray: """ Apply ball filtering to a 3D volume and detect cell centres. @@ -56,105 +82,118 @@ def ball_filter_imgs( and the `CellDetector` class to detect cell centres. Args: - volume (np.ndarray): The 3D volume to be filtered. - threshold_value (int): The threshold value for ball filtering. - soma_centre_value (int): The value representing the soma centre. - ball_xy_size (int, optional): - The size of the ball filter in the XY plane. Defaults to 3. - ball_z_size (int, optional): - The size of the ball filter in the Z plane. Defaults to 3. + volume (torch.Tensor): The 3D volume to be filtered (Z, Y, X order). + settings (DetectionSettings): + The settings to use. Returns: - Tuple[np.ndarray, np.ndarray]: - A tuple containing the filtered volume and the cell centres. + The 2D array of cell centres (N, 3) - X, Y, Z order. """ - # OPTIMISE: reuse ball filter instance - - good_tiles_mask = np.ones((1, 1, volume.shape[2]), dtype=np.bool_) - - plane_width, plane_height = volume.shape[:2] - current_z = ball_z_size // 2 - - bf = BallFilter( - plane_width, - plane_height, - ball_xy_size, - ball_z_size, - overlap_fraction=0.8, - tile_step_width=plane_width, - tile_step_height=plane_height, - threshold_value=threshold_value, - soma_centre_value=soma_centre_value, + detection_convert = settings.detection_data_converter_func + batch_size = settings.batch_size + + # make sure volume is not less than kernel etc + try: + bf = BallFilter( + plane_height=settings.plane_height, + plane_width=settings.plane_width, + ball_xy_size=settings.ball_xy_size, + ball_z_size=settings.ball_z_size, + overlap_fraction=settings.ball_overlap_fraction, + threshold_value=settings.threshold_value, + soma_centre_value=settings.soma_centre_value, + tile_height=settings.tile_height, + tile_width=settings.tile_width, + dtype=settings.filtering_dtype.__name__, + batch_size=batch_size, + torch_device=settings.torch_device, + use_mask=False, # we don't need a mask here + ) + except InvalidVolume: + return np.empty((0, 3)) + + start_z = bf.first_valid_plane + cell_detector = CellDetector( + settings.plane_height, + settings.plane_width, + start_z=start_z, + soma_centre_value=settings.detection_soma_centre_value, ) - cell_detector = CellDetector(plane_width, plane_height, start_z=current_z) - # FIXME: hard coded type - ball_filtered_volume = np.zeros(volume.shape, dtype=np.uint32) previous_plane = None - for z in range(volume.shape[2]): - bf.append(volume[:, :, z].astype(np.uint32), good_tiles_mask[:, :, z]) + for z in range(0, volume.shape[0], batch_size): + bf.append(volume[z : z + batch_size, :, :]) + if bf.ready: bf.walk() - middle_plane = bf.get_middle_plane() - # first valid middle plane is the current_z, not z - ball_filtered_volume[:, :, current_z] = middle_plane[:] - current_z += 1 + middle_planes = bf.get_processed_planes() + n = middle_planes.shape[0] - # DEBUG: TEST: transpose - previous_plane = cell_detector.process( - middle_plane.copy(), previous_plane + # we edit volume, but only for planes already processed that won't + # be passed to the filter in this run + volume[start_z : start_z + n, :, :] = torch.from_numpy( + middle_planes ) - return ball_filtered_volume, cell_detector.get_cell_centres() + start_z += n + + # convert to type needed for detection + middle_planes = detection_convert(middle_planes) + for plane in middle_planes: + previous_plane = cell_detector.process(plane, previous_plane) + + return cell_detector.get_cell_centres() def iterative_ball_filter( - volume: np.ndarray, n_iter: int = 10 + volume: torch.Tensor, settings: DetectionSettings ) -> Tuple[List[int], List[np.ndarray]]: """ Apply iterative ball filtering to the given volume. The volume is eroded at each iteration, by subtracting 1 from the volume. Parameters: - volume (np.ndarray): The input volume. - n_iter (int): The number of iterations to perform. Default is 10. + volume (torch.Tensor): The input volume. It is edited inplace. + Of shape Z, Y, X. + settings (DetectionSettings): The settings to use. Returns: - Tuple[List[int], List[np.ndarray]]: A tuple containing two lists: - The structures found in each iteration. + tuple: A tuple containing two lists: + The number of structures found in each iteration. The cell centres found in each iteration. """ ns = [] centres = [] - threshold_value = np.iinfo(volume.dtype).max - 1 - soma_centre_value = np.iinfo(volume.dtype).max - - vol = volume.copy() # TODO: check if required - - for i in range(n_iter): - vol, cell_centres = ball_filter_imgs( - vol, threshold_value, soma_centre_value - ) - - # vol is unsigned, so can't let zeros underflow to max value - vol[:, :, :] = np.where(vol != 0, vol - 1, 0) + for i in range(settings.n_splitting_iter): + cell_centres = ball_filter_imgs(volume, settings) + volume.sub_(1) n_structures = len(cell_centres) ns.append(n_structures) centres.append(cell_centres) if n_structures == 0: break + return ns, centres def check_centre_in_cuboid(centre: np.ndarray, max_coords: np.ndarray) -> bool: """ - Checks whether a coordinate is in a cuboid - :param centre: x,y,z coordinate - :param max_coords: far corner of cuboid - :return: True if within cuboid, otherwise False + Checks whether a coordinate is in a cuboid. + + Parameters + ---------- + + centre : np.ndarray + x, y, z coordinate. + max_coords : np.ndarray + Far corner of cuboid. + + Returns + ------- + True if within cuboid, otherwise False. """ relative_coords = centre if (relative_coords > max_coords).all(): @@ -168,7 +207,7 @@ def check_centre_in_cuboid(centre: np.ndarray, max_coords: np.ndarray) -> bool: def split_cells( - cell_points: np.ndarray, outlier_keep: bool = False + cell_points: np.ndarray, settings: DetectionSettings ) -> np.ndarray: """ Split the given cell points into individual cell centres. @@ -177,28 +216,24 @@ def split_cells( cell_points (np.ndarray): Array of cell points with shape (N, 3), where N is the number of cell points and each point is represented by its x, y, and z coordinates. - outlier_keep (bool, optional): Flag indicating whether to keep outliers - during the splitting process. Defaults to False. + settings (DetectionSettings) : The settings to use for splitting. It is + modified inplace. Returns: np.ndarray: Array of absolute cell centres with shape (M, 3), where M is the number of individual cells and each centre is represented by its x, y, and z coordinates. """ + # these points are in x, y, z order columnwise, in absolute pixels orig_centre = get_structure_centre(cell_points) xs = cell_points[:, 0] ys = cell_points[:, 1] zs = cell_points[:, 2] - orig_corner = np.array( - [ - orig_centre[0] - (orig_centre[0] - xs.min()), - orig_centre[1] - (orig_centre[1] - ys.min()), - orig_centre[2] - (orig_centre[2] - zs.min()), - ] - ) - + # corner coordinates in absolute pixels + orig_corner = np.array([xs.min(), ys.min(), zs.min()]) + # volume center relative to corner relative_orig_centre = np.array( [ orig_centre[0] - orig_corner[0], @@ -207,22 +242,51 @@ def split_cells( ] ) + # total volume enclosing all points original_bounding_cuboid_shape = get_shape(xs, ys, zs) - ball_radius = 1 - vol = coords_to_volume(xs, ys, zs, ball_radius=ball_radius) + ball_radius = settings.ball_xy_size // 2 + # they should be the same dtype so as to not need a conversion before + # passing the input data with marked cells to the filters (we currently + # set both to float32) + assert settings.filtering_dtype == settings.plane_original_np_dtype + # volume will now be z, y, x order + vol = coords_to_volume( + xs, + ys, + zs, + volume_shape=original_bounding_cuboid_shape, + ball_radius=ball_radius, + dtype=settings.filtering_dtype, + threshold_value=settings.threshold_value, + ) + + # get an estimate of how much memory processing a single batch of original + # input planes takes. For this much smaller volume, our batch will be such + # that it uses at most that much memory + total_vol_size = ( + settings.plane_height * settings.plane_width * settings.batch_size + ) + batch_size = total_vol_size // (vol.shape[1] * vol.shape[2]) + batch_size = min(batch_size, vol.shape[0]) + + # update settings with our volume data + settings.plane_shape = vol.shape[1:] + settings.start_plane = 0 + settings.end_plane = vol.shape[0] + settings.batch_size = batch_size # centres is a list of arrays of centres (1 array of centres per ball run) - ns, centres = iterative_ball_filter(vol) + # in x, y, z order + ns, centres = iterative_ball_filter(vol, settings) ns.insert(0, 1) centres.insert(0, np.array([relative_orig_centre])) best_iteration = ns.index(max(ns)) - # TODO: put constraint on minimum centres distance ? relative_centres = centres[best_iteration] - if not outlier_keep: + if not settings.outlier_keep: # TODO: change to checking whether in original cluster shape original_max_coords = np.array(original_bounding_cuboid_shape) relative_centres = np.array( @@ -234,7 +298,7 @@ def split_cells( ) absolute_centres = np.empty((len(relative_centres), 3)) - # FIXME: extract functionality + # convert centers to absolute pixels absolute_centres[:, 0] = orig_corner[0] + relative_centres[:, 0] absolute_centres[:, 1] = orig_corner[1] + relative_centres[:, 1] absolute_centres[:, 2] = orig_corner[2] + relative_centres[:, 2] diff --git a/cellfinder/core/detect/filters/volume/volume_filter.py b/cellfinder/core/detect/filters/volume/volume_filter.py index 949f9f91..1cf432bf 100644 --- a/cellfinder/core/detect/filters/volume/volume_filter.py +++ b/cellfinder/core/detect/filters/volume/volume_filter.py @@ -1,151 +1,461 @@ -import math -import multiprocessing.pool +import multiprocessing as mp import os from functools import partial -from queue import Queue -from threading import Lock -from typing import Any, Callable, List, Optional, Tuple +from typing import Callable, List, Optional, Tuple import numpy as np +import torch from brainglobe_utils.cells.cells import Cell from tifffile import tifffile from tqdm import tqdm -from cellfinder.core import logger -from cellfinder.core.detect.filters.setup_filters import ( - get_ball_filter, - get_cell_detector, -) +from cellfinder.core import logger, types +from cellfinder.core.detect.filters.plane import TileProcessor +from cellfinder.core.detect.filters.setup_filters import DetectionSettings +from cellfinder.core.detect.filters.volume.ball_filter import BallFilter from cellfinder.core.detect.filters.volume.structure_detection import ( + CellDetector, get_structure_centre, ) from cellfinder.core.detect.filters.volume.structure_splitting import ( StructureSplitException, split_cells, ) +from cellfinder.core.tools.threading import ( + EOFSignal, + ProcessWithException, + ThreadWithException, +) +from cellfinder.core.tools.tools import inference_wrapper + + +@inference_wrapper +def _plane_filter( + process: ProcessWithException, + tile_processor: TileProcessor, + n_threads: int, + buffers: List[Tuple[torch.Tensor, torch.Tensor]], +): + """ + When running on cpu, we spin up a process for each plane in the batch. + This function runs in the process. + + For every new batch, main process sends a buffer token and plane index + to this function. We process that plane and let the main process know + we are done. + """ + # more than about 4 threads seems to slow down computation + torch.set_num_threads(min(n_threads, 4)) + + while True: + msg = process.get_msg_from_mainthread() + if msg == EOFSignal: + return + # with torch multiprocessing, tensors are shared in memory - so + # just update in place + token, i = msg + tensor, masks = buffers[token] + + plane, mask = tile_processor.get_tile_mask(tensor[i : i + 1, :, :]) + tensor[i : i + 1, :, :] = plane + masks[i : i + 1, :, :] = mask + + # tell the main thread we processed all the planes for this tensor + process.send_msg_to_mainthread(None) + + +class VolumeFilter: + """ + Filters and detects cells in the input data. + + This will take a 3d data array, filter each plane first with 2d filters + finding bright spots. Then it filters the stack with a ball filter to + find voxels that are potential cells. Then it runs cell detection on it + to actually identify the cells. + + Parameters + ---------- + settings : DetectionSettings + Settings object that contains all the configuration data. + """ + + def __init__(self, settings: DetectionSettings): + self.settings = settings + + self.ball_filter = BallFilter( + plane_height=settings.plane_height, + plane_width=settings.plane_width, + ball_xy_size=settings.ball_xy_size, + ball_z_size=settings.ball_z_size, + overlap_fraction=settings.ball_overlap_fraction, + threshold_value=settings.threshold_value, + soma_centre_value=settings.soma_centre_value, + tile_height=settings.tile_height, + tile_width=settings.tile_width, + dtype=settings.filtering_dtype.__name__, + batch_size=settings.batch_size, + torch_device=settings.torch_device, + use_mask=True, + ) + self.z = settings.start_plane + self.ball_filter.first_valid_plane -class VolumeFilter(object): - def __init__( - self, - *, - soma_diameter: float, - soma_size_spread_factor: float = 1.4, - setup_params: Tuple[np.ndarray, Any, int, int, float, Any], - n_planes: int, - n_locks_release: int, - save_planes: bool = False, - plane_directory: Optional[str] = None, - start_plane: int = 0, - max_cluster_size: int = 5000, - outlier_keep: bool = False, - artifact_keep: bool = True, - ): - self.soma_diameter = soma_diameter - self.soma_size_spread_factor = soma_size_spread_factor - self.n_planes = n_planes - self.z = start_plane - self.save_planes = save_planes - self.plane_directory = plane_directory - self.max_cluster_size = max_cluster_size - self.outlier_keep = outlier_keep - self.n_locks_release = n_locks_release - - self.artifact_keep = artifact_keep - - self.clipping_val = None - self.threshold_value = None - self.setup_params = setup_params - - self.previous_plane: Optional[np.ndarray] = None - - self.ball_filter = get_ball_filter( - plane=self.setup_params[0], - soma_diameter=self.setup_params[1], - ball_xy_size=self.setup_params[2], - ball_z_size=self.setup_params[3], - ball_overlap_fraction=self.setup_params[4], + self.cell_detector = CellDetector( + settings.plane_height, + settings.plane_width, + start_z=self.z, + soma_centre_value=settings.detection_soma_centre_value, ) - - self.cell_detector = get_cell_detector( - plane_shape=self.setup_params[0].shape, # type: ignore - ball_z_size=self.setup_params[3], - z_offset=self.setup_params[5], + # make sure we load enough data to filter. Otherwise, we won't be ready + # to filter and the data loading thread will wait for data to be + # processed before loading more data, but that will never happen + self.n_queue_buffer = max( + self.settings.num_prefetch_batches, + self.ball_filter.num_batches_before_ready, ) + def _get_filter_buffers( + self, cpu: bool, tile_processor: TileProcessor + ) -> List[Tuple[torch.Tensor, torch.Tensor]]: + """ + Generates buffers to use for data loading and filtering. + + It creates pinned tensors ahead of time for faster copying to gpu. + Pinned tensors are kept in RAM and are faster to copy to GPU because + they can't be paged. So loaded data is copied to the tensor and then + sent to the device. + + For CPU even though we don't pin, it's useful to create the buffers + ahead of time and reuse it so we can filter in sub-processes + (see `_plane_filter`). + For tile masks, we only create buffers for CPU. On CUDA, they are + generated every time new on the device. + """ + batch_size = self.settings.batch_size + torch_dtype = getattr(torch, self.settings.filtering_dtype.__name__) + + buffers = [] + for _ in range(self.n_queue_buffer): + # the tensor used for data loading + tensor = torch.empty( + (batch_size, *self.settings.plane_shape), + dtype=torch_dtype, + pin_memory=not cpu, + device="cpu", + ) + + # tile mask buffer - only for cpu + masks = None + if cpu: + masks = tile_processor.get_tiled_buffer( + batch_size, self.settings.torch_device + ) + + buffers.append((tensor, masks)) + + return buffers + + @inference_wrapper + def _feed_signal_batches( + self, + thread: ThreadWithException, + data: types.array, + processors: List[ProcessWithException], + buffers: List[Tuple[torch.Tensor, torch.Tensor]], + ) -> None: + """ + Runs in its own thread. It loads the input data planes, converts them + to torch tensors of the right data-type, and sends them to cuda or to + subprocesses for cpu to be filtered etc. + """ + batch_size = self.settings.batch_size + device = self.settings.torch_device + start_plane = self.settings.start_plane + end_plane = start_plane + self.settings.n_planes + data_converter = self.settings.filter_data_converter_func + cpu = self.settings.torch_device == "cpu" + # should only have 2d filter processors on the cpu + assert bool(processors) == cpu + + # seed the queue with tokens for the buffers + for token in range(len(buffers)): + thread.send_msg_to_thread(token) + + for z in range(start_plane, end_plane, batch_size): + # convert the data to the right type + np_data = data_converter(data[z : z + batch_size, :, :]) + # if we ran out of batches, we are done! + n = np_data.shape[0] + assert n + + # thread/underlying queues get first crack at msg. Unless we get + # eof, this will block until a buffer is returned from the main + # thread for reuse + token = thread.get_msg_from_mainthread() + if token is EOFSignal: + return + + # buffer is free, get it from token + tensor, masks = buffers[token] + + # for last batch, it can be smaller than normal so only set up to n + tensor[:n, :, :] = torch.from_numpy(np_data) + tensor = tensor[:n, :, :] + if not cpu: + # send to device - it won't block here because we pinned memory + tensor = tensor.to(device=device, non_blocking=True) + + # if used, send each plane in batch to processor + used_processors = [] + if cpu: + used_processors = processors[:n] + for i, process in enumerate(used_processors): + process.send_msg_to_thread((token, i)) + + # tell the main thread to wait for processors (if used) + msg = token, tensor, masks, used_processors, n + + if n < batch_size: + # on last batch, we are also done after this + thread.send_msg_to_mainthread(msg) + return + # send the data to the main thread + thread.send_msg_to_mainthread(msg) + def process( self, - async_result_queue: Queue, - locks: List[Lock], + tile_processor: TileProcessor, + signal_array, *, - callback: Callable[[int], None], + callback: Optional[Callable[[int], None]], ) -> None: - progress_bar = tqdm(total=self.n_planes, desc="Processing planes") - for z in range(self.n_planes): - # Get result from the queue. - # - # It is important to remove the result from the queue here - # to free up memory once this plane has been processed by - # the 3D filter here - logger.debug(f"🏐 Waiting for plane {z}") - result = async_result_queue.get() - # .get() blocks until the result is available - plane, mask = result.get() - logger.debug(f"🏐 Got plane {z}") - - self.ball_filter.append(plane, mask) - - if self.ball_filter.ready: - # Let the next 2D filter run - z_release = z + self.n_locks_release + 1 - if z_release < len(locks): - logger.debug(f"🔓 Releasing lock for plane {z_release}") - locks[z_release].release() - - self._run_filter() + """ + Takes the processor and the data and passes them through the filtering + and cell detection stages. + + If the callback is provided, we call it after every plane with the + current z index to update the status. It may be called from secondary + threads. + """ + progress_bar = tqdm( + total=self.settings.n_planes, desc="Processing planes" + ) + cpu = self.settings.torch_device == "cpu" + n_threads = self.settings.n_torch_comp_threads + + # we re-use these tensors for data loading, so we have a fixed number + # of planes in memory. The feeder thread will wait to load more data + # until a tensor is free to be reused. + # We have to keep the tensors in memory in main process while it's + # in used elsewhere + buffers = self._get_filter_buffers(cpu, tile_processor) + + # on cpu these processes will 2d filter each plane in the batch + plane_processes = [] + if cpu: + for _ in range(self.settings.batch_size): + process = ProcessWithException( + target=_plane_filter, + args=(tile_processor, n_threads, buffers), + pass_self=True, + ) + process.start() + plane_processes.append(process) + + # thread that reads and sends us data + feed_thread = ThreadWithException( + target=self._feed_signal_batches, + args=(signal_array, plane_processes, buffers), + pass_self=True, + ) + feed_thread.start() - callback(self.z) - self.z += 1 - progress_bar.update() + # thread that takes the 3d filtered data and does cell detection + cells_thread = ThreadWithException( + target=self._run_filter_thread, + args=(callback, progress_bar), + pass_self=True, + ) + cells_thread.start() + + try: + self._process(feed_thread, cells_thread, tile_processor, cpu) + finally: + # if we end, make sure to tell the threads to stop + feed_thread.notify_to_end_thread() + cells_thread.notify_to_end_thread() + for process in plane_processes: + process.notify_to_end_thread() + + # the notification above ensures this won't block forever + feed_thread.join() + cells_thread.join() + for process in plane_processes: + process.join() + + # in case these threads sent us an exception but we didn't yet read + # it, make sure to process them + feed_thread.clear_remaining() + cells_thread.clear_remaining() + for process in plane_processes: + process.clear_remaining() progress_bar.close() logger.debug("3D filter done") - def _run_filter(self) -> None: - logger.debug(f"🏐 Ball filtering plane {self.z}") - # filtering original images, the images should be large enough in x/y - # to benefit from parallelization. Note: don't pass arg as keyword arg - # because numba gets stuck (probably b/c class jit is new) - self.ball_filter.walk(True) - - middle_plane = self.ball_filter.get_middle_plane() - if self.save_planes: - self.save_plane(middle_plane) - - logger.debug(f"🏫 Detecting structures for plane {self.z}") - self.previous_plane = self.cell_detector.process( - middle_plane, self.previous_plane - ) + def _process( + self, + feed_thread: ThreadWithException, + cells_thread: ThreadWithException, + tile_processor: TileProcessor, + cpu: bool, + ) -> None: + """ + Processes the loaded data from feeder thread. If on cpu it is already + 2d filtered so just 3d filter. On cuda we need to do both 2d and 3d + filtering. Then, it sends the filtered data off to the detection thread + for cell detection. + """ + processing_tokens = [] + + while True: + # thread/underlying queues get first crack at msg. Unless we get + # eof, this will block until we get more loaded data until no more + # data or exception + msg = feed_thread.get_msg_from_thread() + # feeder thread exits at the end, causing a eof to be sent + if msg is EOFSignal: + break + token, tensor, masks, used_processors, n = msg + # this token is in use until we return it + processing_tokens.append(token) + + if cpu: + # we did 2d filtering in different process. Make sure all the + # planes are done filtering. Each msg from feeder thread has + # corresponding msg for each used processor (unless exception) + for process in used_processors: + process.get_msg_from_thread() + # batch size can change at the end so resize buffer + planes = tensor[:n, :, :] + masks = masks[:n, :, :] + else: + # we're not doing 2d filtering in different process + planes, masks = tile_processor.get_tile_mask(tensor) - logger.debug(f"🏫 Structures done for plane {self.z}") + self.ball_filter.append(planes, masks) + if self.ball_filter.ready: + self.ball_filter.walk() + middle_planes = self.ball_filter.get_processed_planes() + + # at this point we know input tensor can be reused - return + # it so feeder thread can load more data into it + for token in processing_tokens: + feed_thread.send_msg_to_thread(token) + processing_tokens.clear() + + # thread/underlying queues get first crack at msg. Unless + # we get eof, this will block until we get a token, + # indicating we can send more data. The cells thread has a + # fixed token supply, ensuring we don't send it too much + # data, in case detection takes longer than filtering + # Also, error messages incoming are at most # tokens behind + token = cells_thread.get_msg_from_thread() + if token is EOFSignal: + break + # send it more data and return the token + cells_thread.send_msg_to_thread((middle_planes, token)) + + @inference_wrapper + def _run_filter_thread( + self, thread: ThreadWithException, callback, progress_bar + ) -> None: + """ + Runs in its own thread and takes the filtered planes and passes them + through the cell detection system. Also saves the planes as needed. + """ + detector = self.cell_detector + original_dtype = self.settings.plane_original_np_dtype + detection_converter = self.settings.detection_data_converter_func + save_planes = self.settings.save_planes + previous_plane = None + bf = self.ball_filter + + # these many planes are not processed at start because 3d filter uses + # it as padding at the start of filter + progress_bar.update(bf.first_valid_plane) + + # main thread needs a token to send us planes - populate with some + for _ in range(self.n_queue_buffer): + thread.send_msg_to_mainthread(object()) + + while True: + # thread/underlying queues get first crack at msg. Unless we get + # eof, this will block until we get more data + msg = thread.get_msg_from_mainthread() + # requested that we return. This can mean the main thread finished + # sending data and it appended eof - so we get eof after all planes + if msg is EOFSignal: + # these many planes are not processed at the end because 3d + # filter uses it as padding at the end of the filter + progress_bar.update(bf.remaining_planes) + return + + # convert plane to the type needed by detection system + # we should not need scaling because throughout + # filtering we make sure result fits in this data type + middle_planes, token = msg + detection_middle_planes = detection_converter(middle_planes) + + logger.debug(f"🏫 Detecting structures for planes {self.z}+") + for plane, detection_plane in zip( + middle_planes, detection_middle_planes + ): + if save_planes: + self.save_plane(plane.astype(original_dtype)) + + previous_plane = detector.process( + detection_plane, previous_plane + ) + + if callback is not None: + callback(self.z) + self.z += 1 + progress_bar.update() + + # we must return the token, otherwise the main thread will run out + # and won't send more data to us + thread.send_msg_to_mainthread(token) + logger.debug(f"🏫 Structures done for planes {self.z}+") def save_plane(self, plane: np.ndarray) -> None: - if self.plane_directory is None: + """ + Saves the plane as an image according to the settings. + """ + if self.settings.plane_directory is None: raise ValueError( "plane_directory must be set to save planes to file" ) - plane_name = f"plane_{str(self.z).zfill(4)}.tif" - f_path = os.path.join(self.plane_directory, plane_name) - tifffile.imsave(f_path, plane.T) - - def get_results(self, worker_pool: multiprocessing.Pool) -> List[Cell]: + # self.z is zero based, we should save names as 1-based. + plane_name = self.settings.plane_prefix.format(n=self.z + 1) + ".tif" + f_path = os.path.join(self.settings.plane_directory, plane_name) + tifffile.imwrite(f_path, plane) + + def get_results(self, settings: DetectionSettings) -> List[Cell]: + """ + Returns the detected cells. + + After filtering, this parses the resulting cells and splits large + bright regions into individual cells. + """ logger.info("Splitting cell clusters and writing results") - max_cell_volume = sphere_volume( - self.soma_size_spread_factor * self.soma_diameter / 2 - ) + root_settings = self.settings + max_cell_volume = settings.max_cell_volume + # valid cells cells = [] + # regions that must be split into cells needs_split = [] structures = self.cell_detector.get_structures().items() logger.debug(f"Processing {len(structures)} found cells") @@ -158,7 +468,7 @@ def get_results(self, worker_pool: multiprocessing.Pool) -> List[Cell]: cell_centre = get_structure_centre(cell_points) cells.append(Cell(cell_centre.tolist(), Cell.UNKNOWN)) else: - if cell_volume < self.max_cluster_size: + if cell_volume < settings.max_cluster_size: needs_split.append((cell_id, cell_points)) else: cell_centre = get_structure_centre(cell_points) @@ -174,13 +484,22 @@ def get_results(self, worker_pool: multiprocessing.Pool) -> List[Cell]: total=len(needs_split), desc="Splitting cell clusters" ) - # we are not returning Cell instances from func because it'd be pickled - # by multiprocess which slows it down - func = partial(_split_cells, outlier_keep=self.outlier_keep) - for cell_centres in worker_pool.imap_unordered(func, needs_split): - for cell_centre in cell_centres: - cells.append(Cell(cell_centre.tolist(), Cell.UNKNOWN)) - progress_bar.update() + # the settings is pickled and re-created for each process, which is + # important because splitting can modify the settings, so we don't want + # parallel modifications for same object + f = partial(_split_cells, settings=settings) + ctx = mp.get_context("spawn") + # we can't use the context manager because of coverage issues: + # https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html + pool = ctx.Pool(processes=root_settings.n_processes) + try: + for cell_centres in pool.imap_unordered(f, needs_split): + for cell_centre in cell_centres: + cells.append(Cell(cell_centre.tolist(), Cell.UNKNOWN)) + progress_bar.update() + finally: + pool.close() + pool.join() progress_bar.close() logger.debug( @@ -190,13 +509,15 @@ def get_results(self, worker_pool: multiprocessing.Pool) -> List[Cell]: return cells -def _split_cells(arg, outlier_keep): +@inference_wrapper +def _split_cells(arg, settings: DetectionSettings): + # runs in its own process for a bright region to be split. + # For splitting cells, we only run with one thread. Because the volume is + # likely small and using multiple threads would cost more in overhead than + # is worth. num threads can be set only at processes level. + torch.set_num_threads(1) cell_id, cell_points = arg try: - return split_cells(cell_points, outlier_keep=outlier_keep) + return split_cells(cell_points, settings=settings) except (ValueError, AssertionError) as err: raise StructureSplitException(f"Cell {cell_id}, error; {err}") - - -def sphere_volume(radius: float) -> float: - return (4 / 3) * math.pi * radius**3 diff --git a/cellfinder/core/main.py b/cellfinder/core/main.py index 5aad49f7..f9cc5c48 100644 --- a/cellfinder/core/main.py +++ b/cellfinder/core/main.py @@ -26,7 +26,7 @@ def main( ball_z_size: int = 15, ball_overlap_fraction: float = 0.6, log_sigma_size: float = 0.2, - n_sds_above_mean_thresh: int = 10, + n_sds_above_mean_thresh: float = 10, soma_spread_factor: float = 1.4, max_cluster_size: int = 100000, cube_width: int = 50, @@ -36,11 +36,13 @@ def main( skip_detection: bool = False, skip_classification: bool = False, detected_cells: List[Cell] = None, + classification_batch_size: Optional[int] = None, + classification_torch_device: str = "cpu", *, detect_callback: Optional[Callable[[int], None]] = None, classify_callback: Optional[Callable[[int], None]] = None, detect_finished_callback: Optional[Callable[[list], None]] = None, -) -> List: +) -> List[Cell]: """ Parameters ---------- @@ -74,6 +76,8 @@ def main( n_free_cpus, log_sigma_size, n_sds_above_mean_thresh, + batch_size=classification_batch_size, + torch_device=classification_torch_device, callback=detect_callback, ) diff --git a/cellfinder/core/tools/IO.py b/cellfinder/core/tools/IO.py new file mode 100644 index 00000000..109580bb --- /dev/null +++ b/cellfinder/core/tools/IO.py @@ -0,0 +1,45 @@ +import pooch + + +def fetch_pooch_directory( + registry: pooch.Pooch, + directory_name: str, + processor=None, + downloader=None, + progressbar=False, +): + """ + Fetches files from the Pooch registry that belong to a specific directory. + Parameters: + registry (pooch.Pooch): The Pooch registry object. + directory_name (str): + The remote relative path of the directory to fetch files from. + processor (callable, optional): + A function to process the fetched files. Defaults to None. + downloader (callable, optional): + A function to download the files. Defaults to None. + progressbar (bool, optional): + Whether to display a progress bar during the fetch. + Defaults to False. + Returns: + str: The local absolute path to the fetched directory. + """ + names = [] + for name in registry.registry_files: + if name.startswith(f"{directory_name}/"): + names.append(name) + + if not names: + raise FileExistsError( + f"Unable to find files in directory {directory_name}" + ) + + for name in names: + registry.fetch( + name, + processor=processor, + downloader=downloader, + progressbar=progressbar, + ) + + return str(registry.abspath / directory_name) diff --git a/cellfinder/core/tools/threading.py b/cellfinder/core/tools/threading.py new file mode 100644 index 00000000..5c3e54ff --- /dev/null +++ b/cellfinder/core/tools/threading.py @@ -0,0 +1,380 @@ +""" +Provides classes that can run a function in another thread or process and +allow passing data to and from the threads/processes. It also passes on any +exceptions that occur in the secondary thread/sub-process in the main thread +or when it exits. + +If using a sub-process and pytorch Tensors are sent from/to the main +process, pytorch will memory map the tensor so the same data is shared and +edits in the main process will be visible in the sub-process. However, there +are limitations (such so not re-sharing a tensor shared with us). See +https://pytorch.org/docs/stable/multiprocessing.html#sharing-cuda-tensors for +details. + +Typical example:: + + from cellfinder.core.tools.threading import ThreadWithException, \\ + EOFSignal, ProcessWithException + import torch + + + def worker(thread: ThreadWithException, power: float): + while True: + # if the main thread wants us to exit, it'll wake us up + msg = thread.get_msg_from_mainthread() + # we were asked to exit + if msg == EOFSignal: + return + + tensor_id, tensor, add = msg + # tensors are memory mapped for subprocess (and obv threads) so we + # can do the work inplace and result will be visible in main thread + tensor += add + tensor.pow_(power) + + # we should not share a tensor shared with us as per pytorch docs, + # just send the id back + thread.send_msg_to_mainthread(tensor_id) + + # we can also handle errors here, which will be re-raised in the main + # process + if tensor_id == 7: + raise ValueError("I fell asleep") + + + if __name__ == "__main__": + data = torch.rand((5, 10)) + thread = ThreadWithException( + target=worker, args=(2.5,), pass_self=True + ) + # use thread or sub-process + # thread = ProcessWithException( + # target=worker, args=(2.5,), pass_self=True + # ) + thread.start() + + try: + for i in range(10): + thread.send_msg_to_thread((i, data, i / 2)) + # if the thread raises an exception, get_msg_from_thread will + # re-raise it here + msg = thread.get_msg_from_thread() + if msg == EOFSignal: + # thread exited for whatever reason (not exception) + break + + print(f"Thread processed tensor {i}") + finally: + # whatever happens, make sure thread is told to finish so it + # doesn't get stuck + thread.notify_to_end_thread() + thread.join() + +When run, this prints:: + + Thread processed tensor 0 + Thread processed tensor 1 + Thread processed tensor 2 + Thread processed tensor 3 + Thread processed tensor 4 + Thread processed tensor 5 + Thread processed tensor 6 + Thread processed tensor 7 + Traceback (most recent call last): + File "threading.py", line 139, in user_func_runner + self.user_func(self, *self.args) + File "play.py", line 24, in worker + raise ValueError("I fell asleep") + ValueError: I fell asleep + + The above exception was the direct cause of the following exception: + + Traceback (most recent call last): + File "play.py", line 38, in + msg = thread.get_msg_from_thread() + File "threading.py", line 203, in get_msg_from_thread + raise ExecutionFailure( + ExecutionFailure: Reporting failure from other thread/process +""" + +from queue import Queue +from threading import Thread +from typing import Any, Callable, Optional + +import torch.multiprocessing as mp + + +class EOFSignal: + """ + This class object (not instance) is returned by the thread/process as an + indicator that someone exited or that you should exit. + """ + + pass + + +class ExecutionFailure(Exception): + """ + Exception class raised in the main thread when the function running in the + thread/process raises an exception. + + Get the original exception using the `__cause__` property of this + exception. + """ + + pass + + +class ExceptionWithQueueMixIn: + """ + A mix-in that can be used with a secondary thread or sub-process to + facilitate communication with them and the passing back of exceptions and + signals such as when the thread/sub-process exits or when the main + thread/process want the sub-process/thread to end. + + Communication happens bi-directionally via queues. These queues are not + limited in buffer size, so if there's potential for the queue to be backed + up because the main thread or sub-thread/process is unable to read and + process messages quickly enough, you must implement some kind of + back-pressure to prevent the queue from growing unlimitedly in size. + E.g. using a limited number of tokens that must be held to send a message + to prevent a sender from thrashing the queue. And these tokens can be + passed between main thread to sub-thread/process etc on each message and + when the message is done processing. + + The main process uses `get_msg_from_thread` to raise exceptions in the + main thread that occurred in the sub-thread/process. So that must be + called in the main thread if you want to know if the sub-thread/process + exited. + """ + + to_thread_queue: Queue + """ + Internal queue used to send messages to the thread from the main thread. + """ + + from_thread_queue: Queue + """ + Internal queue used to send messages from the thread to the main thread. + """ + + args: tuple = () + """ + The args provided by the caller that will be passed to the function running + in the thread/process when it starts. + """ + + # user_func_runner must end with an eof msg to the main thread. This tracks + # whether the main thread saw the eof. If it didn't, we know there are more + # msgs in the queue waiting to be read + _saw_eof: bool = False + + def __init__(self, target: Callable, pass_self: bool = False): + self.user_func = target + self.pass_self = pass_self + + def user_func_runner(self) -> None: + """ + The internal function that runs the target function provided by the + user. + """ + try: + if self.user_func is not None: + if self.pass_self: + self.user_func(self, *self.args) + else: + self.user_func(*self.args) + except BaseException as e: + self.from_thread_queue.put( + ("exception", e), block=True, timeout=None + ) + finally: + self.from_thread_queue.put(("eof", None), block=True, timeout=None) + + def send_msg_to_mainthread(self, value: Any) -> None: + """ + Sends the `value` to the main thread, from the sub-thread/process. + + The value must be pickleable if it's running in a sub-process. The main + thread then can read it using `get_msg_from_thread`. + + The queue is not limited in size, so if there's potential for the + queue to be backed up because the main thread is unable to read quickly + enough, you must implement some kind of back-pressure to prevent + the queue from growing unlimitedly in size. + """ + self.from_thread_queue.put(("user", value), block=True, timeout=None) + + def clear_remaining(self) -> None: + """ + Celled in the main-thread as part of cleanup when we expect the + secondary thread to have exited (e.g. we sent it a message telling it + to). + + It will drop any waiting messages sent by the secondary thread, but + more importantly, it will handle exceptions raised in the secondary + thread before it exited, that may not have yet been processed in the + main thread (e.g. we stopped listening to messages from the secondary + thread before we got an eof from it). + """ + while not self._saw_eof: + self.get_msg_from_thread() + + def get_msg_from_thread(self, timeout: Optional[float] = None) -> Any: + """ + Gets a message from the sub-thread/process sent to the main thread. + + This blocks forever until the message is sent by the sub-thread/process + and received by us. If `timeout` is not None, that's how long we block + here, in seconds, before raising an `Empty` Exception if no message was + received by then. + + If the return value is the `EOFSignal` object, it means the + sub-thread/process has or is about to exit. + + If the sub-thread/process has raised an exception, that exception is + caught and re-raised in the main thread when this method is called. + The exception raised is an `ExecutionFailure` and its `__cause__` + property is the original exception raised in the sub-thread/process. + + A typical pattern is:: + + >>> try: + ... msg = thread.get_msg_from_thread() + ... if msg == EOFSignal: + ... # thread exited + ... pass + ... else: + ... # do something with the msg + ... pass + ... except ExecutionFailure as e: + ... print(f"got exception {type(e.__cause__)}") + ... print(f"with message {e.__cause__.args[0]}") + """ + msg, value = self.from_thread_queue.get(block=True, timeout=timeout) + if msg == "eof": + self._saw_eof = True + return EOFSignal + if msg == "exception": + raise ExecutionFailure( + "Reporting failure from other thread/process" + ) from value + + return value + + def send_msg_to_thread(self, value: Any) -> None: + """ + Sends the `value` to the sub-thread/process, from the main thread. + + The value must be pickleable if it's sent to a sub-process. The thread + then can read it using `get_msg_from_mainthread`. + + The queue is not limited in size, so if there's potential for the + queue to be backed up because the thread is unable to read quickly + enough, you must implement some kind of back-pressure to prevent + the queue from growing unlimitedly in size. + """ + self.to_thread_queue.put(("user", value), block=True, timeout=None) + + def notify_to_end_thread(self) -> None: + """ + Sends a message to the sub-process/thread that the main process wants + them to end. The sub-process/thread sees it by receiving an `EOFSignal` + message from `get_msg_from_mainthread` and it should exit asap. + """ + self.to_thread_queue.put(("eof", None), block=True, timeout=None) + + def get_msg_from_mainthread(self, timeout: Optional[float] = None) -> Any: + """ + Gets a message from the main thread sent to the sub-thread/process. + + This blocks forever until the message is sent by the main thread + and received by us. If `timeout` is not None, that's how long we block + here, in seconds, before raising an `Empty` Exception if no message was + received by then. + + If the return value is the `EOFSignal` object, it means the + main thread has sent us an EOF message using `notify_to_end_thread` + because it wants us to exit. + + A typical pattern is:: + + >>> msg = thread.get_msg_from_mainthread() + ... if msg == EOFSignal: + ... # we should exit asap + ... return + ... # do something with the msg + """ + msg, value = self.to_thread_queue.get(block=True, timeout=timeout) + if msg == "eof": + return EOFSignal + + return value + + +class ThreadWithException(ExceptionWithQueueMixIn): + """ + Runs a target function in a secondary thread. + """ + + thread: Thread = None + """The thread running the function.""" + + def __init__(self, target, args=(), **kwargs): + super().__init__(target=target, **kwargs) + self.to_thread_queue = Queue(maxsize=0) + self.from_thread_queue = Queue(maxsize=0) + self.args = args + self.thread = Thread(target=self.user_func_runner) + + def start(self) -> None: + """Starts the thread that runs the target function.""" + self.thread.start() + + def join(self, timeout: Optional[float] = None) -> None: + """ + Waits and blocks until the thread exits. If timeout is given, + it's the duration to wait, in seconds, before returning. + + To know if it exited, you need to check `is_alive` of the `thread`. + """ + self.thread.join(timeout=timeout) + + +class ProcessWithException(ExceptionWithQueueMixIn): + """ + Runs a target function in a sub-process. + + Any data sent between the processes must be pickleable. + + We run the function using `torch.multiprocessing`. Any tensors sent between + the main process and sub-process is memory mapped so that it doesn't copy + the tensor. So any edits in the main process/sub-process is seen in the + other as well. See https://pytorch.org/docs/stable/multiprocessing.html + for more details on this. + """ + + process: mp.Process = None + """The sub-process running the function.""" + + def __init__(self, target, args=(), **kwargs): + super().__init__(target=target, **kwargs) + ctx = mp.get_context("spawn") + self.to_thread_queue = ctx.Queue(maxsize=0) + self.from_thread_queue = ctx.Queue(maxsize=0) + self.process = ctx.Process(target=self.user_func_runner) + + self.args = args + + def start(self) -> None: + """Starts the sub-process that runs the target function.""" + self.process.start() + + def join(self, timeout: Optional[float] = None) -> None: + """ + Waits and blocks until the process exits. If timeout is given, + it's the duration to wait, in seconds, before returning. + + To know if it exited, you need to check `is_alive` of the `process`. + """ + self.process.join(timeout=timeout) diff --git a/cellfinder/core/tools/tools.py b/cellfinder/core/tools/tools.py index f7a628d5..231515de 100644 --- a/cellfinder/core/tools/tools.py +++ b/cellfinder/core/tools/tools.py @@ -1,19 +1,141 @@ +from functools import wraps from random import getrandbits, uniform -from typing import Optional +from typing import Callable, Optional, Type import numpy as np +import torch from natsort import natsorted -def get_max_possible_value(obj_in: np.ndarray) -> int: +def inference_wrapper(func): """ - Returns the maximum allowed value for a numpy array of integer data type. + Decorator that makes the decorated function/method run with + `torch.inference_mode` set to True. + """ + + @wraps(func) + def inner_function(*args, **kwargs): + with torch.inference_mode(True): + return func(*args, **kwargs) + + return inner_function + + +def get_max_possible_int_value(dtype: Type[np.number]) -> int: + """ + Returns the maximum allowed integer for a numpy array of given type. + + If dtype is of integer type, it's the maximum value. If it's a floating + type, it's the maximum integer that can be accurately represented. + E.g. for float32, only integers up to 2**24 can be represented (due to + the number of bits representing the mantissa (significand). """ - dtype = obj_in.dtype if np.issubdtype(dtype, np.integer): return np.iinfo(dtype).max - else: - raise ValueError("obj_in must be a numpy array of integer data type.") + if np.issubdtype(dtype, np.floating): + mant = np.finfo(dtype).nmant + return 2**mant + raise ValueError("datatype must be of integer or floating data type") + + +def get_min_possible_int_value(dtype: Type[np.number]) -> int: + """ + Returns the minimum allowed integer for a numpy array of given type. + + If dtype is of integer type, it's the minimum value. If it's a floating + type, it's the minimum integer that can be accurately represented. + E.g. for float32, only integers up to -2**24 can be represented (due to + the number of bits representing the mantissa (significand). + """ + if np.issubdtype(dtype, np.integer): + return np.iinfo(dtype).min + if np.issubdtype(dtype, np.floating): + mant = np.finfo(dtype).nmant + # the sign bit is separate so we have the full mantissa for value + return -(2**mant) + raise ValueError("datatype must be of integer or floating data type") + + +def get_data_converter( + src_dtype: Type[np.number], dest_dtype: Type[np.floating] +) -> Callable[[np.ndarray], np.ndarray]: + """ + Returns a function that can be called to convert one data-type to another, + scaling the data down as needed. + + If the maximum value supported by the input data-type is smaller than that + supported by destination data-type, the data will be scaled by the ratio + of maximum integer representable by the `output / input` data-types. + If the max is equal or less, it's simply converted to the target type. + + Parameters + ---------- + src_dtype : np.dtype + The data-type of the input data. + dest_dtype : np.dtype + The data-type of the returned data. Currently, it must be a floating + type and `np.float32` or `np.float64`. + + Returns + ------- + callable: function + A function that takes a single input data parameter and returns + the converted data. + """ + if not np.issubdtype(dest_dtype, np.float32) and not np.issubdtype( + dest_dtype, np.float64 + ): + raise ValueError( + f"Destination dtype must be a float32 or float64, " + f"but it is {dest_dtype}" + ) + + in_min = get_min_possible_int_value(src_dtype) + in_max = get_max_possible_int_value(src_dtype) + out_min = get_min_possible_int_value(dest_dtype) + out_max = get_max_possible_int_value(dest_dtype) + in_abs_max = max(in_max, abs(in_min)) + out_abs_max = max(out_max, abs(out_min)) + + def unchanged(data: np.ndarray) -> np.ndarray: + return np.asarray(data) + + def float_to_float_scale_down(data: np.ndarray) -> np.ndarray: + return ((np.asarray(data) / in_abs_max) * out_abs_max).astype( + dest_dtype + ) + + def int_to_float_scale_down(data: np.ndarray) -> np.ndarray: + # data must fit in float64 + data = np.asarray(data).astype(np.float64) + return ((data / in_abs_max) * out_abs_max).astype(dest_dtype) + + def to_float_unscaled(data: np.ndarray) -> np.ndarray: + return np.asarray(data).astype(dest_dtype) + + if src_dtype == dest_dtype: + return unchanged + + # out can hold the largest in values - just convert to float + if out_min <= in_min < in_max <= out_max: + return to_float_unscaled + + # need to scale down before converting to float + if np.issubdtype(src_dtype, np.integer): + # if going to float32 and it didn't fit input must fit in 64-bit float + # so we can temp store it there to scale. If going to float64, same. + if in_max > get_max_possible_int_value( + np.float64 + ) or in_min < get_min_possible_int_value(np.float64): + raise ValueError( + f"The input datatype {src_dtype} cannot fit in a " + f"64-bit float" + ) + return int_to_float_scale_down + + # for float input, however big it is, we can always scale it down in the + # input data type before changing type + return float_to_float_scale_down def union(a, b): diff --git a/pyproject.toml b/pyproject.toml index 0be3a158..a2982d61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "scikit-image", "scikit-learn", "keras==3.5.0", - "torch>=2.1.0", + "torch>=2.1.0,!=2.4", "tifffile", "tqdm", ] @@ -51,6 +51,7 @@ dev = [ "pytest-timeout", "pytest", "tox", + "pooch >= 1", ] napari = [ "brainglobe-napari-io", @@ -129,10 +130,12 @@ setenv = KERAS_BACKEND = torch passenv = NUMBA_DISABLE_JIT + PYTORCH_JIT CI GITHUB_ACTIONS DISPLAY XAUTHORITY NUMPY_EXPERIMENTAL_ARRAY_FUNCTION PYVISTA_OFF_SCREEN + BRAINGLOBE_TEST_DATA_DIR """ diff --git a/tests/core/conftest.py b/tests/core/conftest.py index d78e5cf5..eb4d1e8a 100644 --- a/tests/core/conftest.py +++ b/tests/core/conftest.py @@ -1,7 +1,9 @@ import os -from typing import Tuple +from pathlib import Path +from typing import List, Tuple import numpy as np +import pooch import pytest import torch.backends.mps from skimage.filters import gaussian @@ -27,6 +29,61 @@ def set_device_arm_macos_ci(): force_cpu() +def pytest_collection_modifyitems(session, config, items: List[pytest.Item]): + # this hook is called by pytest after test collection. Move the + # test_detection test to the end because if it's run in the middle we run + # into numba issue #9576 and the tests fail + # end_files are moved to the end, in the given order + end_files = [ + "test_connected_components_labelling.py", + "test_structure_detection.py", + ] + + items_new = [t for t in items if t.path.name not in end_files] + for name in end_files: + items_new.extend([t for t in items if t.path.name == name]) + + items[:] = items_new + + +@pytest.fixture +def test_data_registry(): + """ + Create a test data registry for BrainGlobe. + + Returns: + pooch.Pooch: The test data registry object. + + """ + registry = pooch.create( + path=pooch.os_cache("brainglobe_test_data"), + base_url="https://gin.g-node.org/BrainGlobe/test-data/raw/master/cellfinder/", + env="BRAINGLOBE_TEST_DATA_DIR", + ) + + registry.load_registry( + Path(__file__).parent.parent / "data" / "pooch_registry.txt" + ) + return registry + + +def mark_sphere( + data_zyx: np.ndarray, center_xyz, radius: int, fill_value: int +) -> None: + shape_zyx = data_zyx.shape + + z, y, x = np.mgrid[ + 0 : shape_zyx[0] : 1, 0 : shape_zyx[1] : 1, 0 : shape_zyx[2] : 1 + ] + dist = np.sqrt( + (x - center_xyz[0]) ** 2 + + (y - center_xyz[1]) ** 2 + + (z - center_xyz[2]) ** 2 + ) + # 100 seems to be the right size so std is not too small for filters + data_zyx[dist <= radius] = fill_value + + @pytest.fixture(scope="session") def no_free_cpus() -> int: """ @@ -85,3 +142,70 @@ def synthetic_bright_spots() -> Tuple[np.ndarray, np.ndarray]: background_array = np.zeros_like(signal_array) return signal_array, background_array + + +@pytest.fixture(scope="session") +def synthetic_single_spot() -> ( + Tuple[np.ndarray, np.ndarray, Tuple[int, int, int]] +): + """ + Creates a synthetic signal array with a single spherical spot + in a 3d numpy array to be used for cell detection testing. + + The max value is 100 and min is zero. The array is a floating type. + You must convert it to the right data type for your tests. + Also, `n_sds_above_mean_thresh` must be 1 or larger. + """ + shape_zyx = 20, 50, 50 + c_xyz = 25, 25, 10 + + signal_array = np.zeros(shape_zyx) + background_array = np.zeros_like(signal_array) + mark_sphere(signal_array, center_xyz=c_xyz, radius=2, fill_value=100) + + # 1 std should be larger, so it can be considered bright + assert np.mean(signal_array) + np.std(signal_array) > 1 + + return signal_array, background_array, c_xyz + + +@pytest.fixture(scope="session") +def synthetic_spot_clusters() -> ( + Tuple[np.ndarray, np.ndarray, List[Tuple[int, int, int]]] +): + """ + Creates a synthetic signal array with a 4 overlapping spherical spots + in a 3d numpy array to be used for cell cluster splitting testing. + + The max value is 100 and min is zero. The array is a floating type. + You must convert it to the right data type for your tests. + Also, `n_sds_above_mean_thresh` must be 1 or larger. + """ + shape_zyx = 20, 100, 100 + radius = 5 + s = 50 - radius * 4 + centers_xyz = [ + (s, 50, 10), + (s + 2 * radius - 1, 50, 10), + (s + 4 * radius - 2, 50, 10), + (s + 6 * radius - 3, 50, 10), + ] + + signal_array = np.zeros(shape_zyx) + background_array = np.zeros_like(signal_array) + + for center in centers_xyz: + mark_sphere( + signal_array, center_xyz=center, radius=radius, fill_value=100 + ) + + return signal_array, background_array, centers_xyz + + +@pytest.fixture(scope="session") +def repo_data_path() -> Path: + """ + The root path where the data used during test is stored + """ + # todo: use mod relative paths to find data instead of depending on cwd + return Path(__file__).parent.parent / "data" diff --git a/tests/core/test_integration/test_detection.py b/tests/core/test_integration/test_detection.py index fc7bf2f3..63a5fc41 100644 --- a/tests/core/test_integration/test_detection.py +++ b/tests/core/test_integration/test_detection.py @@ -4,9 +4,13 @@ import brainglobe_utils.IO.cells as cell_io import numpy as np import pytest +import torch +from brainglobe_utils.cells.cells import Cell from brainglobe_utils.general.system import get_num_processes from brainglobe_utils.IO.image.load import read_with_dask +from cellfinder.core.detect.detect import main as detect_main +from cellfinder.core.detect.filters.volume.ball_filter import InvalidVolume from cellfinder.core.main import main data_dir = os.path.join( @@ -133,9 +137,14 @@ def detect_finished_callback(points): classify_callback=classify_callback, detect_finished_callback=detect_finished_callback, n_free_cpus=no_free_cpus, + ball_z_size=15, ) - np.testing.assert_equal(planes_done, np.arange(len(signal_array))) + skipped_planes = int(round(15 / voxel_sizes[0])) - 1 + skip_start = skipped_planes // 2 + skip_end = skipped_planes - skip_start + n = len(signal_array) - skip_end + np.testing.assert_equal(planes_done, np.arange(skip_start, n)) np.testing.assert_equal(batches_classified, [0]) ncalls = len(points_found) @@ -144,12 +153,6 @@ def detect_finished_callback(points): assert npoints == 120, f"Expected 120 points, found {npoints}" -def test_floating_point_error(signal_array, background_array): - signal_array = signal_array.astype(float) - with pytest.raises(ValueError, match="signal_array must be integer"): - main(signal_array, background_array, voxel_sizes) - - def test_synthetic_data(synthetic_bright_spots, no_free_cpus): signal_array, background_array = synthetic_bright_spots detected = main( @@ -174,3 +177,141 @@ def test_data_dimension_error(ndim): with pytest.raises(ValueError, match="Input data must be 3D"): main(signal_array, background_array, voxel_sizes) + + +@pytest.mark.parametrize("device", ["cuda", "cpu"]) +@pytest.mark.parametrize( + "dtype", + [ + np.uint8, + np.uint16, + np.uint32, + np.int8, + np.int16, + np.int32, + np.float32, + np.float64, + ], +) +def test_signal_data_types(synthetic_single_spot, no_free_cpus, dtype, device): + + if device == "cuda" and not torch.cuda.is_available(): + pytest.skip("Cuda is not available") + + signal_array, background_array, center = synthetic_single_spot + signal_array = signal_array.astype(dtype) + # for signed ints, make some data negative + if np.issubdtype(dtype, np.signedinteger): + # min of signal_array is zero + assert np.isclose(0, np.min(signal_array)) + shift = (np.max(signal_array) - np.min(signal_array)) // 2 + signal_array = signal_array - shift + + background_array = background_array.astype(dtype) + detected = main( + signal_array, + background_array, + n_sds_above_mean_thresh=1.0, + voxel_sizes=voxel_sizes, + n_free_cpus=no_free_cpus, + skip_classification=True, + classification_torch_device=device, + ) + + assert len(detected) == 1 + assert detected[0] == Cell(center, Cell.UNKNOWN) + + +@pytest.mark.parametrize("use_scipy", [True, False]) +@pytest.mark.parametrize("device", ["cuda", "cpu"]) +def test_detection_scipy_torch( + synthetic_single_spot, no_free_cpus, use_scipy, device +): + + if device == "cuda" and not torch.cuda.is_available(): + pytest.xfail("Cuda is not available") + + signal_array, background_array, center = synthetic_single_spot + signal_array = signal_array.astype(np.float32) + + detected = detect_main( + signal_array, + n_sds_above_mean_thresh=1.0, + voxel_sizes=voxel_sizes, + n_free_cpus=no_free_cpus, + torch_device=device, + use_scipy=use_scipy, + ) + + assert len(detected) == 1 + assert detected[0] == Cell(center, Cell.UNKNOWN) + + +@pytest.mark.parametrize("device", ["cuda", "cpu"]) +def test_detection_cluster_splitting( + synthetic_spot_clusters, no_free_cpus, device +): + """ + Test cluster splitting for overlapping cells. + + Test filtering/detection on cpu and cuda. Because splitting is only on cpu + so make sure if detection is on cuda, splitting still works. + """ + + if device == "cuda" and not torch.cuda.is_available(): + pytest.xfail("Cuda is not available") + + signal_array, background_array, centers_xyz = synthetic_spot_clusters + signal_array = signal_array.astype(np.float32) + + detected = detect_main( + signal_array, + n_sds_above_mean_thresh=1.0, + voxel_sizes=voxel_sizes, + n_free_cpus=no_free_cpus, + torch_device=device, + ) + + assert len(detected) == len(centers_xyz) + for cell, center in zip(detected, centers_xyz): + p = [cell.x, cell.y, cell.z] + d = np.sqrt(np.sum(np.square(np.subtract(center, p)))) + assert d <= 3 + assert cell.type == Cell.UNKNOWN + + +def test_detection_cell_too_large(synthetic_spot_clusters, no_free_cpus): + """ + Test we detect one big artifact if the signal has a too large foreground + structure. + """ + # max_cell_volume is volume of soma * spread sphere. For values below + # radius is 7 pixels. So volume is ~1500 pixels + signal_array = np.zeros((15, 100, 100), dtype=np.float32) + # set volume larger than max volume to bright + signal_array[6 : 6 + 6, 40 : 40 + 26, 40 : 40 + 26] = 1000 + + detected = detect_main( + signal_array, + n_sds_above_mean_thresh=1.0, + voxel_sizes=(5, 2, 2), + soma_diameter=20, + n_free_cpus=no_free_cpus, + max_cluster_size=2000 * 5 * 2 * 2, + ) + + assert len(detected) == 1 + # not sure why it subtracts one to center, but probably rounding + assert detected[0] == Cell([39 + 13, 39 + 13, 5 + 3], Cell.ARTIFACT) + + +@pytest.mark.parametrize("y,x", [(100, 30), (30, 100)]) +def test_detection_plane_too_small(synthetic_spot_clusters, y, x): + # plane smaller than ball filter kernel should cause error + with pytest.raises(InvalidVolume): + detect_main( + np.zeros((5, y, x)), + n_sds_above_mean_thresh=1.0, + voxel_sizes=(1, 1, 1), + ball_xy_size=50, + ) diff --git a/tests/core/test_integration/test_detection_structure_splitting.py b/tests/core/test_integration/test_detection_structure_splitting.py index 31f04672..5fe74a77 100644 --- a/tests/core/test_integration/test_detection_structure_splitting.py +++ b/tests/core/test_integration/test_detection_structure_splitting.py @@ -6,38 +6,45 @@ real life data. """ -import os - import numpy as np import pytest from brainglobe_utils.IO.image.load import read_with_dask +from cellfinder.core.detect.filters.setup_filters import DetectionSettings from cellfinder.core.detect.filters.volume.structure_splitting import ( split_cells, ) from cellfinder.core.main import main -data_dir = os.path.join( - os.getcwd(), "tests", "data", "integration", "detection" -) -signal_data_path = os.path.join(data_dir, "structure_split_test", "signal") -background_data_path = os.path.join( - data_dir, "structure_split_test", "background" -) - voxel_sizes = [5, 2.31, 2.31] @pytest.fixture -def signal_array(): +def signal_array(repo_data_path): """A signal array that contains a structure that needs splitting.""" - return read_with_dask(signal_data_path) + return read_with_dask( + str( + repo_data_path + / "integration" + / "detection" + / "structure_split_test" + / "signal" + ) + ) @pytest.fixture -def background_array(): +def background_array(repo_data_path): """A background array that contains a structure that needs splitting.""" - return read_with_dask(background_data_path) + return read_with_dask( + str( + repo_data_path + / "integration" + / "detection" + / "structure_split_test" + / "background" + ) + ) def test_structure_splitting(signal_array, background_array): @@ -75,7 +82,16 @@ def test_underflow_issue_435(): bright_voxels[np.logical_or(inside1, inside2)] = True bright_indices = np.argwhere(bright_voxels) - centers = split_cells(bright_indices) + settings = DetectionSettings( + plane_shape=(100, 100), + plane_original_np_dtype=np.float32, + voxel_sizes=(1, 1, 1), + ball_xy_size_um=3, + ball_z_size_um=3, + ball_overlap_fraction=0.8, + soma_diameter_um=7, + ) + centers = split_cells(bright_indices, settings) # for some reason, same with pytorch, it's shifted by 1. Probably rounding expected = {(10, 11, 11), (20, 11, 11)} diff --git a/tests/core/test_unit/test_detect/test_detect.py b/tests/core/test_unit/test_detect/test_detect.py index b74d2ed9..0ad3291c 100644 --- a/tests/core/test_unit/test_detect/test_detect.py +++ b/tests/core/test_unit/test_detect/test_detect.py @@ -1,23 +1,123 @@ -import multiprocessing +from unittest.mock import MagicMock -from cellfinder.core.detect.detect import _map_with_locks +import numpy as np +import pytest +from pytest_mock.plugin import MockerFixture +from cellfinder.core.detect.detect import main -def add_one(a: int) -> int: - return a + 1 +@pytest.fixture +def mocked_main(mocker: MockerFixture): + from cellfinder.core.detect.filters.volume.volume_filter import ( + VolumeFilter, + ) -def test_map_with_locks(): - args = [1, 2, 3, 2, 10] + process = mocker.patch.object(VolumeFilter, "process", autospec=True) + get_results = mocker.patch.object( + VolumeFilter, "get_results", autospec=True + ) - with multiprocessing.Pool(2) as worker_pool: - result_queue, locks = _map_with_locks(add_one, args, worker_pool) + return process, get_results - async_results = [result_queue.get() for _ in range(len(args))] - assert len(async_results) == len(locks) == len(args) - for lock in locks: - lock.release() +def test_main_bad_signal_arg(mocked_main): + # should work + main(signal_array=np.empty((5, 50, 50))) - results = [res.get() for res in async_results] - assert results == [2, 3, 4, 3, 11] + with pytest.raises(ValueError): + main(signal_array=np.empty((1, 1, 1, 1))) + + with pytest.raises(ValueError): + main(signal_array=np.empty((1, 1))) + + with pytest.raises(TypeError): + main(signal_array=np.empty((5, 50, 50), dtype=np.str_)) + + with pytest.raises(TypeError): + main(signal_array=np.empty((5, 50, 50), dtype=np.uint64)) + + with pytest.raises(TypeError): + main(signal_array=np.empty((5, 50, 50), dtype=np.int64)) + + +@pytest.mark.parametrize( + "dtype", + [ + np.uint8, + np.uint16, + np.uint32, + np.int8, + np.int16, + np.int32, + np.float32, + np.float64, + ], +) +def test_main_good_signal_arg(mocked_main, dtype): + main(signal_array=np.empty((5, 50, 50))) + + +def test_main_bad_or_default_args(mocked_main): + process, get_results = mocked_main + main( + signal_array=np.empty((5, 8, 19), dtype=np.uint16), + end_plane=-1, + batch_size=None, + torch_device="cpu", + ) + process.assert_called() + get_results.assert_called() + + vol_filter, mp_tile_processor, signal_array = process.call_args.args + ( + _, + splitting_settings, + ) = get_results.call_args.args + settings = vol_filter.settings + + assert settings.plane_shape == (8, 19) + assert settings.plane_original_np_dtype == np.uint16 + # for uint16 input we should use float32 + assert settings.filtering_dtype == np.float32 + assert settings.detection_dtype == np.uint64 + assert settings.end_plane == 5 + assert settings.n_planes == 5 + assert settings.batch_size == 4 # cpu default is 4 + + assert splitting_settings.torch_device == "cpu" + + +def test_main_planes_size(mocked_main): + process, get_results = mocked_main + main(signal_array=np.empty((5, 8, 19)), end_plane=4, start_plane=1) + + process.assert_called() + vol_filter, mp_tile_processor, signal_array = process.call_args.args + settings = vol_filter.settings + + assert settings.plane_shape == (8, 19) + assert settings.end_plane == 4 + assert settings.n_planes == 3 + + +def test_main_splitting_cpu_cuda(mocker: MockerFixture): + # checks that even if main filtering runs on cuda, the structure splitting + # only runs on cpu + # patch anything that would do with cuda - in case there's no cuda + vol: MagicMock = mocker.patch( + "cellfinder.core.detect.detect.VolumeFilter", autospec=True + ) + mocker.patch("cellfinder.core.detect.detect.TileProcessor", autospec=True) + + main( + signal_array=np.empty((5, 8, 19)), batch_size=None, torch_device="cuda" + ) + + settings = vol.call_args.kwargs["settings"] + (splitting_settings,) = vol.return_value.get_results.call_args.args + + assert settings.torch_device == "cuda" + assert settings.batch_size == 1 # cuda default is 1 + + assert splitting_settings.torch_device == "cpu" diff --git a/tests/core/test_unit/test_detect/test_filters/test_plane_filters/test_classical_filters.py b/tests/core/test_unit/test_detect/test_filters/test_plane_filters/test_classical_filters.py new file mode 100644 index 00000000..181c912c --- /dev/null +++ b/tests/core/test_unit/test_detect/test_filters/test_plane_filters/test_classical_filters.py @@ -0,0 +1,245 @@ +import numpy as np +import pytest +import torch +from brainglobe_utils.IO.image.load import read_with_dask + +from cellfinder.core.detect.filters.plane import TileProcessor +from cellfinder.core.detect.filters.plane.classical_filter import PeakEnhancer +from cellfinder.core.detect.filters.plane.tile_walker import TileWalker +from cellfinder.core.detect.filters.setup_filters import DetectionSettings +from cellfinder.core.tools.IO import fetch_pooch_directory +from cellfinder.core.tools.tools import ( + get_max_possible_int_value, + inference_wrapper, +) + + +def load_pooch_dir(test_data_registry, path): + data_path = fetch_pooch_directory(test_data_registry, path) + return read_with_dask(data_path) + + +@pytest.mark.parametrize( + "signal,enhanced,soma_diameter", + [ + ("edge_cells_brain/signal", "edge_cells_brain/peak_enhanced", 16), + ("bright_brain/signal", "bright_brain/peak_enhanced", 30), + ], +) +@pytest.mark.parametrize( + "torch_device,use_scipy", [("cpu", False), ("cpu", True), ("cuda", False)] +) +@inference_wrapper +def test_2d_filtering_peak_enhance_parity( + signal, + enhanced, + soma_diameter, + torch_device, + use_scipy, + test_data_registry, +): + # test that the pure 2d plane filtering (median, gauss, laplacian) matches + # exactly. We use float64 in the test, like original code we compare to + # used + if torch_device == "cuda" and not torch.cuda.is_available(): + pytest.skip("Cuda is not available") + + # check input data size/type is as expected + data = np.asarray(load_pooch_dir(test_data_registry, signal)) + enhanced = np.asarray(load_pooch_dir(test_data_registry, enhanced)) + assert data.dtype == np.uint16 + assert enhanced.dtype == np.uint32 + assert data.shape == enhanced.shape + + # convert to working type and send to cpu/cuda, use float64, the type + # used originally + data = data.astype(np.float64) + data = torch.from_numpy(data) + data = data.to(torch_device) + + # first check that the raw filters produce the same output + clip = get_max_possible_int_value(np.uint32) + enhancer = PeakEnhancer( + torch_device=torch_device, + dtype=torch.float64, + clipping_value=clip, + laplace_gaussian_sigma=soma_diameter * 0.2, + use_scipy=use_scipy, + ) + enhanced_our = enhancer.enhance_peaks(data) + enhanced_our = enhanced_our.cpu().numpy().astype(np.uint32) + + assert enhanced_our.shape == enhanced.shape + # the number of pixels per plane that are different + different = np.sum( + np.sum(np.logical_not(np.isclose(enhanced_our, enhanced)), axis=2), + axis=1, + ) + assert np.all(different == 0) + + +@pytest.mark.parametrize( + "signal,filtered,tiles,soma_diameter,max_different", + [ + ( + "edge_cells_brain/signal", + "edge_cells_brain/2d_filter", + "edge_cells_brain/tiles", + 16, + 1, + ), + ( + "bright_brain/signal", + "bright_brain/2d_filter", + "bright_brain/tiles", + 30, + 2, + ), + ], +) +@pytest.mark.parametrize( + "torch_device,use_scipy", [("cpu", False), ("cpu", True), ("cuda", False)] +) +@inference_wrapper +def test_2d_filtering_parity( + signal, + filtered, + tiles, + soma_diameter, + max_different, + torch_device, + use_scipy, + test_data_registry, +): + # test that the pixels marked as bright after 2d plane filtering matches + # now we don't always use float64, but the bright pixels will still stay + # the same. Unlike test_2d_filtering_peak_enhance, we use float32 if the + # input data fits in it, like we do in the codebase. So we want to be sure + # that the number of bright pixels doesn't change much. Because running at + # full float64 is expensive + if torch_device == "cuda" and not torch.cuda.is_available(): + pytest.skip("Cuda is not available") + + # check input data size/type is as expected + data = np.asarray(load_pooch_dir(test_data_registry, signal)) + filtered = np.asarray(load_pooch_dir(test_data_registry, filtered)) + tiles = np.asarray(load_pooch_dir(test_data_registry, tiles)) + assert data.dtype == np.uint16 + assert filtered.dtype == np.uint16 + assert data.shape == filtered.shape + + settings = DetectionSettings(plane_original_np_dtype=np.uint16) + # convert to working type and send to cpu/cuda + data = torch.from_numpy(settings.filter_data_converter_func(data)) + data = data.to(torch_device) + + tile_processor = TileProcessor( + plane_shape=data[0, :, :].shape, + clipping_value=settings.clipping_value, + threshold_value=settings.threshold_value, + soma_diameter=soma_diameter, + log_sigma_size=0.2, + n_sds_above_mean_thresh=10, + torch_device=torch_device, + dtype=settings.filtering_dtype.__name__, + use_scipy=use_scipy, + ) + + # apply filter and get data back + filtered_our, tiles_our = tile_processor.get_tile_mask(data) + filtered_our = filtered_our.cpu().numpy().astype(np.uint16) + tiles_our = tiles_our.cpu().numpy() + + assert filtered_our.shape == filtered.shape + # we don't care about exact pixel values, only which pixels are marked + # bright and which aren't. Bright per plane + bright = np.sum( + np.sum(filtered == settings.threshold_value, axis=2), axis=1 + ) + bright_our = np.sum( + np.sum(filtered_our == settings.threshold_value, axis=2), axis=1 + ) + # the number of pixels different should be less than 2! + assert np.all(np.less(np.abs(bright - bright_our), max_different + 1)) + + # the in/out of brain tiles though should be identical + assert tiles_our.shape == tiles.shape + assert tiles_our.dtype == tiles.dtype + assert np.array_equal(tiles_our, tiles) + + +@pytest.mark.parametrize( + "plane_size", + [(1, 2), (2, 1), (2, 2), (2, 3), (3, 3), (2, 5), (22, 33), (200, 200)], +) +@inference_wrapper +def test_2d_filter_padding(plane_size): + # check that filter padding works correctly for different sized inputs - + # even if the input is smaller than filter sizes + settings = DetectionSettings(plane_original_np_dtype=np.uint16) + data = np.random.randint(0, 500, size=(1, *plane_size)) + data = data.astype(settings.filtering_dtype) + + tile_processor = TileProcessor( + plane_shape=plane_size, + clipping_value=settings.clipping_value, + threshold_value=settings.threshold_value, + soma_diameter=16, + log_sigma_size=0.2, + n_sds_above_mean_thresh=10, + torch_device="cpu", + dtype=settings.filtering_dtype.__name__, + use_scipy=False, + ) + + filtered, _ = tile_processor.get_tile_mask(torch.from_numpy(data)) + filtered = filtered.numpy() + assert filtered.shape == data.shape + + +@inference_wrapper +def test_even_filter_kernel(): + with pytest.raises(ValueError): + try: + n = PeakEnhancer.median_filter_size + PeakEnhancer.median_filter_size = 4 + PeakEnhancer( + "cpu", + torch.float32, + clipping_value=5, + laplace_gaussian_sigma=3.0, + use_scipy=False, + ) + finally: + PeakEnhancer.median_filter_size = n + + enhancer = PeakEnhancer( + "cpu", + torch.float32, + clipping_value=5, + laplace_gaussian_sigma=3.0, + use_scipy=False, + ) + + assert enhancer.gaussian_filter_size % 2 + + _, _, x, y = enhancer.lap_kernel.shape + assert x % 2, "Should be odd" + assert y % 2, "Should be odd" + assert x == y + + +@pytest.mark.parametrize( + "sizes", + [((1, 1), (1, 1)), ((1, 2), (1, 1)), ((2, 1), (1, 1)), ((22, 33), (3, 4))], +) +@inference_wrapper +def test_tile_walker_size(sizes, soma_diameter=5): + plane_size, tile_size = sizes + walker = TileWalker(plane_size, soma_diameter=soma_diameter) + assert walker.tile_height == 10 + assert walker.tile_width == 10 + + data = torch.rand((1, *plane_size), dtype=torch.float32) + tiles = walker.get_bright_tiles(data) + assert tiles.shape == (1, *tile_size) diff --git a/tests/core/test_unit/test_detect/test_filters/test_setup_filters.py b/tests/core/test_unit/test_detect/test_filters/test_setup_filters.py new file mode 100644 index 00000000..12c2ce1d --- /dev/null +++ b/tests/core/test_unit/test_detect/test_filters/test_setup_filters.py @@ -0,0 +1,131 @@ +import pickle + +import numpy as np +import pytest + +import cellfinder.core.tools.tools as tools +from cellfinder.core.detect.filters.setup_filters import DetectionSettings + + +@pytest.mark.parametrize( + "in_dtype,filter_dtype", + [ + (np.uint8, np.float32), + (np.uint16, np.float32), + (np.uint32, np.float64), + (np.int8, np.float32), + (np.int16, np.float32), + (np.int32, np.float64), + (np.float32, np.float32), + (np.float64, np.float64), + ], +) +@pytest.mark.parametrize( + "detect_dtype", + [ + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.int8, + np.int16, + np.int32, + np.int64, + np.float32, + np.float64, + ], +) +def test_good_input_dtype(in_dtype, filter_dtype, detect_dtype): + """ + These input and filter types doesn't require any conversion because the + filter type we use for the given input type is large enough to not need + scaling. So data should be identical after conversion to filtering type. + """ + settings = DetectionSettings( + plane_original_np_dtype=in_dtype, detection_dtype=detect_dtype + ) + converter = settings.filter_data_converter_func + detection_converter = settings.detection_data_converter_func + + assert settings.filtering_dtype == filter_dtype + assert settings.detection_dtype == detect_dtype + + # min input value can be converted to filter/detection type + src = np.full( + 5, tools.get_min_possible_int_value(in_dtype), dtype=in_dtype + ) + dest = converter(src) + assert np.array_equal(src, dest) + assert dest.dtype == filter_dtype + # it is safe to do this conversion for any data type because it only uses + # the soma_centre_value, and ignores everything else (e.g. outside range) + assert detection_converter(dest).dtype == detect_dtype + + # typical input value can be converted to filter/detection type + src = np.full(5, 3, dtype=in_dtype) + dest = converter(src) + assert np.array_equal(src, dest) + assert dest.dtype == filter_dtype + assert detection_converter(dest).dtype == detect_dtype + + # max input value can be converted to filter/detection type + src = np.full( + 5, tools.get_max_possible_int_value(in_dtype), dtype=in_dtype + ) + dest = converter(src) + assert np.array_equal(src, dest) + assert dest.dtype == filter_dtype + assert detection_converter(dest).dtype == detect_dtype + + # soma_centre_value can be converted to filter/detection type + src = np.full(5, settings.soma_centre_value, dtype=in_dtype) + dest = converter(src) + # data type is larger - so value is unchanged + assert np.array_equal(src, dest) + assert dest.dtype == filter_dtype + # for detect, we convert soma_centre_value to detection_soma_centre_value + detect = detection_converter(dest) + assert detect.dtype == detect_dtype + assert np.all(detect == settings.detection_soma_centre_value) + + +@pytest.mark.parametrize("in_dtype", [np.uint64, np.int64]) +def test_bad_input_dtype(in_dtype): + """ + For this input type, to be able to fit it into our largest filtering + type - float64 we'd need to scale the data. Although `converter` can do + it, we don't support it right now (maybe as an option?) + """ + settings = DetectionSettings(plane_original_np_dtype=in_dtype) + + with pytest.raises(TypeError): + # do assert to quiet linter complaints + assert settings.filter_data_converter_func + + with pytest.raises(TypeError): + assert settings.filtering_dtype + + # detection type should be available + assert settings.detection_dtype == np.uint64 + + +def test_pickle_settings(): + settings = DetectionSettings() + + # get some properties, both cached and not cached + assert settings.filter_data_converter_func is not None + assert settings.filtering_dtype is not None + assert settings.detection_dtype is not None + assert settings.threshold_value is not None + assert settings.plane_shape is not None + + # make sure pickle works + s = pickle.dumps(settings) + assert s + + +def test_bad_ball_z_size(): + settings = DetectionSettings(ball_z_size_um=0) + with pytest.raises(ValueError): + # do something with value to quiet linter + assert settings.ball_z_size diff --git a/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_ball_filter.py b/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_ball_filter.py new file mode 100644 index 00000000..2841af0b --- /dev/null +++ b/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_ball_filter.py @@ -0,0 +1,42 @@ +import pytest + +from cellfinder.core.detect.filters.volume.ball_filter import BallFilter + +bf_kwargs = { + "plane_height": 50, + "plane_width": 50, + "ball_xy_size": 3, + "ball_z_size": 3, + "overlap_fraction": 0.5, + "threshold_value": 1, + "soma_centre_value": 1, + "tile_height": 10, + "tile_width": 10, + "dtype": "float32", +} + + +def test_filter_not_ready(): + bf = BallFilter(**bf_kwargs) + assert not bf.ready + + with pytest.raises(TypeError): + bf.get_processed_planes() + + with pytest.raises(TypeError): + bf.walk() + + +@pytest.mark.parametrize( + "sizes", [(1, 0, 0), (2, 1, 0), (3, 1, 1), (4, 2, 1), (5, 2, 2), (6, 3, 2)] +) +def test_filter_unprocessed_planes(sizes): + kernel_size, start_offset, remaining = sizes + assert kernel_size == start_offset + 1 + remaining + + kwargs = bf_kwargs.copy() + kwargs["ball_z_size"] = kernel_size + bf = BallFilter(**kwargs) + + assert bf.first_valid_plane == start_offset + assert bf.remaining_planes == remaining diff --git a/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_connected_components_labelling.py b/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_connected_components_labelling.py index a8028ede..31fd4295 100644 --- a/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_connected_components_labelling.py +++ b/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_connected_components_labelling.py @@ -1,9 +1,12 @@ +from typing import Type + import numpy as np import pytest from cellfinder.core.detect.filters.volume.structure_detection import ( CellDetector, ) +from cellfinder.core.tools.tools import get_max_possible_int_value @pytest.mark.parametrize( @@ -21,7 +24,9 @@ (258, np.uint32), ], ) -def test_connect_four_limits(linear_size, datatype): +def test_connect_four_limits( + linear_size: int, datatype: Type[np.number] +) -> None: """ Test for `connect_four` with a rectangular plane (2-to-1 length ratio) containing a checkerboard of pixels marked as cells ("structures"). @@ -36,12 +41,15 @@ def test_connect_four_limits(linear_size, datatype): * there is exactly one structure with the maximum id... * ...and that structure is in the expected place (top-right pixel) """ - SOMA_CENTRE_VALUE = np.iinfo(datatype).max - checkerboard = np.zeros((linear_size * 2, linear_size), dtype=datatype) - for i in range(linear_size * 2): - for j in range(linear_size): - if (i + j) % 2 == 0: - checkerboard[i, j] = SOMA_CENTRE_VALUE + height = linear_size * 2 + width = linear_size + # use a very large value - similar to how it is normally used + soma_centre_value = get_max_possible_int_value(datatype) + + checkerboard = np.zeros((height, width), dtype=datatype) + i = np.arange(height)[:, np.newaxis] # rows + j = np.arange(width)[np.newaxis, :] # cols + checkerboard[(i + j) % 2 == 0] = soma_centre_value actual_nonzeros = np.count_nonzero(checkerboard) expected_nonzeros = linear_size**2 @@ -49,11 +57,14 @@ def test_connect_four_limits(linear_size, datatype): actual_nonzeros == expected_nonzeros ), "Checkerboard didn't have the expected number of non-zeros" - cell_detector = CellDetector(linear_size * 2, linear_size, 0) + cell_detector = CellDetector(height, width, 0, soma_centre_value) labelled_plane = cell_detector.connect_four(checkerboard, None) one_count = np.count_nonzero(labelled_plane == 1) assert one_count == 1, "There was not exactly one pixel with label 1." assert ( - labelled_plane[linear_size * 2 - 1, linear_size - 1] == actual_nonzeros + labelled_plane[height - 1, width - 1] == actual_nonzeros ), "The last labelled pixel did not have the maximum struct id." + assert np.all( + (labelled_plane != 0) == (checkerboard != 0) + ), "Structures should be exactly where centers were marked" diff --git a/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_structure_detection.py b/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_structure_detection.py index d1e1af7a..9623fedd 100644 --- a/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_structure_detection.py +++ b/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_structure_detection.py @@ -1,6 +1,9 @@ +from typing import Dict, List, Tuple, Type + import numpy as np import pytest +from cellfinder.core.detect.filters.setup_filters import DetectionSettings from cellfinder.core.detect.filters.volume.structure_detection import ( CellDetector, Point, @@ -9,7 +12,9 @@ ) -def coords_to_points(coords_arrays): +def coords_to_points( + coords_arrays: Dict[int, np.ndarray] +) -> Dict[int, List[Point]]: # Convert from arrays to dicts coords = {} for sid in coords_arrays: @@ -26,15 +31,17 @@ def coords_to_points(coords_arrays): (np.uint32, 2**32 - 1), (np.uint16, 2**16 - 1), (np.uint8, 2**8 - 1), + (np.float32, 2**23), # mantissa determine whole max int representable + (np.float64, 2**52), ], ) -def test_get_non_zero_dtype_min(dtype, expected): +def test_get_non_zero_dtype_min(dtype: Type[np.number], expected: int) -> None: assert get_non_zero_dtype_min(np.arange(10, dtype=dtype)) == 1 assert get_non_zero_dtype_min(np.zeros(10, dtype=dtype)) == expected @pytest.fixture() -def three_d_cross(): +def three_d_cross() -> np.ndarray: return np.array( [ [[0, 0, 0], [0, 1, 0], [0, 0, 0]], @@ -45,12 +52,12 @@ def three_d_cross(): @pytest.fixture() -def structure(three_d_cross) -> np.ndarray: +def structure(three_d_cross: np.ndarray) -> np.ndarray: coords = np.array(np.where(three_d_cross)).transpose() return coords -def test_get_structure_centre(structure): +def test_get_structure_centre(structure: np.ndarray) -> None: result_point = get_structure_centre(structure) assert (result_point[0], result_point[1], result_point[2]) == ( 1, @@ -65,7 +72,7 @@ def test_get_structure_centre(structure): # Each item in the test data contains: # -# - A list of indices to mark as structure pixels (ordering: [x, z, y]) +# - A list of indices to mark as structure pixels (ordering: [z, y, x]) # - A dict of expected structure coordinates test_data = [ ( @@ -75,12 +82,12 @@ def test_get_structure_centre(structure): ), ( # Two pixels connected in a single structure along x - [(0, 0, 0), (0, 1, 0)], + [(0, 0, 0), (0, 0, 1)], {1: [Point(0, 0, 0), Point(1, 0, 0)]}, ), ( # Two pixels connected in a single structure along y - [(0, 0, 0), (0, 0, 1)], + [(0, 0, 0), (0, 1, 0)], {1: [Point(0, 0, 0), Point(0, 1, 0)]}, ), ( @@ -90,13 +97,13 @@ def test_get_structure_centre(structure): ), ( # Four pixels all connected and spread across x-y-z - [(0, 0, 0), (1, 0, 0), (1, 1, 0), (1, 0, 1)], + [(0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)], {1: [Point(0, 0, 0), Point(0, 0, 1), Point(1, 0, 1), Point(0, 1, 1)]}, ), ( # three initially disconnected pixels that then get merged # by a fourth pixel - [(1, 1, 0), (0, 1, 1), (1, 0, 1), (1, 1, 1)], + [(0, 1, 1), (1, 0, 1), (1, 1, 0), (1, 1, 1)], { 1: [ Point(1, 1, 0), @@ -108,7 +115,7 @@ def test_get_structure_centre(structure): ), ( # Three pixels in x-y plane that require structure merging - [(1, 0, 0), (0, 1, 0), (1, 1, 0)], + [(0, 0, 1), (1, 0, 0), (1, 0, 1)], { 1: [ Point(1, 0, 0), @@ -119,7 +126,7 @@ def test_get_structure_centre(structure): ), ( # Two disconnected single-pixel structures - [(0, 0, 0), (0, 2, 0)], + [(0, 0, 0), (0, 0, 2)], {1: [Point(0, 0, 0)], 2: [Point(2, 0, 0)]}, ), ( @@ -130,16 +137,77 @@ def test_get_structure_centre(structure): ] -@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64]) +# Due to https://github.com/numba/numba/issues/9576 we need to run np.uint64 +# before smaller sizes +# we don't use floats in cell detection, but it works +@pytest.mark.parametrize( + "dtype", + [ + np.uint64, + np.int64, + np.uint8, + np.int8, + np.uint16, + np.int16, + np.uint32, + np.int32, + np.float32, + np.float64, + ], +) @pytest.mark.parametrize("pixels,expected_coords", test_data) -def test_detection(dtype, pixels, expected_coords): - data = np.zeros((depth, width, height)).astype(dtype) - detector = CellDetector(width, height, start_z=0) +@pytest.mark.parametrize( + "detect_dtype", + [ + np.uint64, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.int8, + np.int16, + np.int32, + np.float32, + np.float64, + ], +) +def test_detection( + dtype: Type[np.number], + pixels: List[Tuple[int, int, int]], + expected_coords: Dict[int, List[Point]], + detect_dtype: Type[np.number], +) -> None: + # original dtype is the dtype of the original data. filtering_dtype + # is the data type used during ball filtering. Currently, it can be at most + # float64. So original dtype cannot support 64-bit ints because it won't + # fit in float64. + # detection_dtype is the type that must be used during detection to fit a + # count of the number of cells present + settings = DetectionSettings( + plane_original_np_dtype=dtype, detection_dtype=detect_dtype + ) + + # should raise error for (u)int64 - too big for float64 so can't filter + if dtype in (np.uint64, np.int64): + with pytest.raises(TypeError): + filtering_dtype = settings.filtering_dtype + # do something with so linter doesn't complain + assert filtering_dtype + return + # pretend we got the intensity data from filtering + data = np.zeros((depth, height, width), dtype=settings.filtering_dtype) # This is the value used by BallFilter to mark pixels - max_poss_value = np.iinfo(dtype).max for pix in pixels: - data[pix] = max_poss_value + data[pix] = settings.soma_centre_value + + # convert intensity data to values expected by detector + data = settings.detection_data_converter_func(data) + + detector = CellDetector(height, width, 0, 0) + # similar to numba issue #9576 we can't pass to init a large value once + # a 32 bit type was used for detector. So pass it with custom method + detector._set_soma(settings.detection_soma_centre_value) previous_plane = None for plane in data: @@ -147,3 +215,28 @@ def test_detection(dtype, pixels, expected_coords): coords = detector.get_structures() assert coords_to_points(coords) == expected_coords + + +def test_add_point(): + detector = CellDetector(50, 50, 0, 0) + detector.add_point(0, (5, 5, 5)) + detector.add_point(0, (6, 5, 5)) + detector.add_point(1, (7, 5, 5)) + + +def test_add_points(): + detector = CellDetector(50, 50, 0, 0) + + points = np.array([(5, 5, 5), (6, 6, 6)], dtype=np.uint32) + points2 = np.array([(7, 5, 5), (8, 6, 6)], dtype=np.uint32) + points3 = np.array([(8, 5, 5), (8, 6, 6)], dtype=np.uint32) + detector.add_points(0, points) + detector.add_points(0, points2) + detector.add_points(1, points3) + + +def test_change_plane_size(): + # check that changing plane size errors out + detector = CellDetector(50, 50, 0, 5000) + with pytest.raises(ValueError): + detector.process(np.zeros((100, 50), dtype=np.uint32), None) diff --git a/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_volume_filter.py b/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_volume_filter.py new file mode 100644 index 00000000..2d1ff70f --- /dev/null +++ b/tests/core/test_unit/test_detect/test_filters/test_volume_filters/test_volume_filter.py @@ -0,0 +1,279 @@ +import numpy as np +import pytest +import torch +from brainglobe_utils.IO.cells import get_cells_xml +from brainglobe_utils.IO.image.load import read_with_dask +from pytest_mock.plugin import MockerFixture + +from cellfinder.core.detect.detect import main +from cellfinder.core.tools.IO import fetch_pooch_directory +from cellfinder.core.tools.threading import ExecutionFailure +from cellfinder.core.tools.tools import get_max_possible_int_value + +# even though we are testing volume filter as unit test, we are running through +# main and mocking VolumeFilter because that's the easiest way to instantiate +# it with proper args and run it + + +class ExceptionTest(Exception): + pass + + +def load_pooch_dir(test_data_registry, path): + data_path = fetch_pooch_directory(test_data_registry, path) + return read_with_dask(data_path) + + +def raise_exception(*args, **kwargs): + raise ExceptionTest("Bad times") + + +def run_main_assert_exception(): + # run volume filter - it should raise the ExceptionTest via + # ExecutionFailure from thread/process + try: + # must be on cpu b/c only on cpu do we do 2d filtering in subprocess + # lots of planes so it doesn't end naturally quickly + main( + signal_array=np.zeros((500, 500, 500), dtype=np.uint16), + torch_device="cpu", + ) + assert False, "should have raised exception" + except ExecutionFailure as e: + e2 = e.__cause__ + assert type(e2) is ExceptionTest and e2.args == ("Bad times",) + + +def test_2d_filter_process_exception(mocker: MockerFixture): + # check sub-process that does 2d filter. That exception ends things clean + mocker.patch( + "cellfinder.core.detect.filters.volume.volume_filter._plane_filter", + new=raise_exception, + ) + run_main_assert_exception() + + +def test_2d_filter_feeder_thread_exception(mocker: MockerFixture): + # check data feeder thread. That exception ends things clean + from cellfinder.core.detect.filters.volume.volume_filter import ( + VolumeFilter, + ) + + mocker.patch.object( + VolumeFilter, "_feed_signal_batches", new=raise_exception + ) + run_main_assert_exception() + + +def test_2d_filter_cell_detection_thread_exception(mocker: MockerFixture): + # check cell detection thread. That exception ends things clean + from cellfinder.core.detect.filters.volume.volume_filter import ( + VolumeFilter, + ) + + mocker.patch.object( + VolumeFilter, "_run_filter_thread", new=raise_exception + ) + run_main_assert_exception() + + +def test_3d_filter_main_thread_exception(mocker: MockerFixture): + # raises exception in the _process method in the main thread - after the + # subprocess and secondary threads were spun up. This makes sure that those + # subprocess and threads don't get stuck if main thread crashes + from cellfinder.core.detect.filters.volume.volume_filter import ( + VolumeFilter, + ) + + mocker.patch.object(VolumeFilter, "_process", new=raise_exception) + with pytest.raises(ExceptionTest): + main(signal_array=np.zeros((500, 500, 500)), torch_device="cpu") + + +@pytest.mark.parametrize("batch_size", [1, 2, 3, 4]) +def test_feeder_thread_batch(batch_size: int): + # checks various batch sizes to see if there are issues + # this also tests a batch size of 3 but 5 planes. So the feeder thread + # will feed us a batch of 3 and a batch of 2. It tests that filters can + # handle unequal batch sizes + planes = [] + + def callback(z): + planes.append(z) + + main( + signal_array=np.zeros((5, 50, 50)), + torch_device="cpu", + batch_size=batch_size, + callback=callback, + ) + + assert planes == list(range(1, 4)) + + +def test_not_enough_planes(): + # checks that even if there are not enough planes for volume filtering, it + # doesn't raise errors or gets stuck + planes = [] + + def callback(z): + planes.append(z) + + main( + signal_array=np.zeros((2, 50, 50)), + torch_device="cpu", + callback=callback, + ) + + assert not planes + + +def test_filtered_plane_range(mocker: MockerFixture): + # check that even if input data is negative, filtered data is non-negative + detector = mocker.patch( + "cellfinder.core.detect.filters.volume.volume_filter.CellDetector", + autospec=True, + ) + + # input data in range (-500, 500) + data = ((np.random.random((6, 50, 50)) - 0.5) * 1000).astype(np.float32) + data[1:3, 25:30, 25:30] = 5000 + main(signal_array=data) + + calls = detector.return_value.process.call_args_list + assert len(calls) + for call in calls: + plane, *_ = call.args + # should have either zero or soma value or both + assert len(np.unique(plane)) in (1, 2) + assert np.min(plane) >= 0 + + +def test_saving_filtered_planes(tmp_path): + # check that we can save filtered planes + path = tmp_path / "save_planes" + path.mkdir() + + main( + signal_array=np.zeros((6, 50, 50)), + save_planes=True, + plane_directory=str(path), + ) + + files = [p.name for p in path.iterdir() if p.is_file()] + # we're skipping first and last plane that isn't filtered due to kernel + assert len(files) == 4 + assert set(files) == { + "plane_0002.tif", + "plane_0003.tif", + "plane_0004.tif", + "plane_0005.tif", + } + + +def test_saving_filtered_planes_no_dir(): + # asked to save but didn't provide directory + with pytest.raises(ExecutionFailure) as exc_info: + main( + signal_array=np.zeros((6, 50, 50)), + save_planes=True, + plane_directory=None, + ) + assert type(exc_info.value.__cause__) is ValueError + + +@pytest.mark.parametrize( + "signal,filtered,cells,soma_diameter,voxel_sizes,cell_tol", + [ + ( + "edge_cells_brain/signal", + "edge_cells_brain/3d_filter", + "edge_cells_brain/detected_cells.xml", + 16, + (5, 2, 2), + 0, + ), + ( + "bright_brain/signal", + "bright_brain/3d_filter", + "bright_brain/detected_cells.xml", + 30, + (5.06, 4.5, 4.5), + 1, + ), + ], +) +@pytest.mark.parametrize( + "torch_device,use_scipy", [("cpu", False), ("cpu", True), ("cuda", False)] +) +def test_3d_filtering( + signal, + filtered, + cells, + soma_diameter, + voxel_sizes, + cell_tol, + torch_device, + use_scipy, + no_free_cpus, + tmp_path, + test_data_registry, +): + # test that the full 2d/3d matches the saved data + if torch_device == "cuda" and not torch.cuda.is_available(): + pytest.skip("Cuda is not available") + + # check input data size/type is as expected + data = np.asarray(load_pooch_dir(test_data_registry, signal)) + filtered = np.asarray(load_pooch_dir(test_data_registry, filtered)) + cells = get_cells_xml(test_data_registry.fetch(cells)) + assert data.dtype == np.uint16 + assert filtered.dtype == np.uint32 + assert data.shape == (filtered.shape[0] + 2, *filtered.shape[1:]) + + path = tmp_path / "3d_filter" + path.mkdir() + cells_our = main( + signal_array=data, + voxel_sizes=voxel_sizes, + soma_diameter=soma_diameter, + max_cluster_size=100000, + ball_xy_size=6, + ball_z_size=15, + ball_overlap_fraction=0.6, + soma_spread_factor=1.4, + n_free_cpus=no_free_cpus, + log_sigma_size=0.2, + n_sds_above_mean_thresh=10, + save_planes=True, + plane_directory=str(path), + batch_size=1, + ) + + filtered_our = np.asarray(read_with_dask(str(path))) + assert filtered_our.shape == filtered.shape + assert filtered_our.dtype == np.uint16 + # we need to rescale our data because the original data saved to uint32 + # (even though it fit in uint16), so rescale the max value the soma was + # saved as, to make comparison better + filtered_our = filtered_our.astype(np.uint32) + max16 = get_max_possible_int_value(np.uint16) + max32 = get_max_possible_int_value(np.uint32) + filtered_our[filtered_our == max16] = max32 + + # we only care about the soma value as only that is used in the next step + # in cell detection, so set everything else to zero + filtered_our[filtered_our != max16] = 0 + filtered[filtered_our != max32] = 0 + + # the number of pixels per plane that are different (marked bright/not) + diff = np.sum(np.sum(filtered_our != filtered, axis=2), axis=1) + # 100% same + assert np.all(diff == 0) + + # check that the resulting cells are the same. We expect them to be the + # same, cells at different pos count as different + cells_our = set(cells_our) + cells = set(cells) + diff = len(cells_our - cells) + len(cells - cells_our) + assert diff <= cell_tol diff --git a/tests/core/test_unit/test_tools/test_threading.py b/tests/core/test_unit/test_tools/test_threading.py new file mode 100644 index 00000000..0a341068 --- /dev/null +++ b/tests/core/test_unit/test_tools/test_threading.py @@ -0,0 +1,141 @@ +import pytest + +from cellfinder.core.tools.threading import ( + EOFSignal, + ExceptionWithQueueMixIn, + ExecutionFailure, + ProcessWithException, + ThreadWithException, +) + +cls_to_test = [ThreadWithException, ProcessWithException] + + +class ExceptionTest(Exception): + pass + + +def raise_exc(*args): + raise ExceptionTest("I'm a test") + + +def raise_exc_about_self(*args): + arg_msg = "No arg" + if args and args[-1] == "7": + arg_msg = "Got 7" + + if args and isinstance(args[0], ExceptionWithQueueMixIn): + raise ExceptionTest("Got self" + arg_msg) + raise ExceptionTest("No self" + arg_msg) + + +def do_nothing(*args): + pass + + +def send_back_msg(thread: ExceptionWithQueueMixIn): + # do this single op and exit + thread.send_msg_to_mainthread(("back", thread.get_msg_from_mainthread())) + + +def send_multiple_msgs(thread: ExceptionWithQueueMixIn): + thread.send_msg_to_mainthread("hello") + thread.send_msg_to_mainthread("to") + thread.send_msg_to_mainthread("you") + + +@pytest.mark.parametrize("cls", cls_to_test) +def test_reraise_exception_in_main_thread_from_thread(cls): + # exception in thread will show up in main thread + thread = cls(target=raise_exc, args=(1, "4")) + thread.start() + + with pytest.raises(ExecutionFailure) as exc_info: + thread.get_msg_from_thread() + assert type(exc_info.value.__cause__) is ExceptionTest + assert exc_info.value.__cause__.args[0] == "I'm a test" + # thread will have exited + thread.join() + + +@pytest.mark.parametrize("cls", cls_to_test) +def test_get_eof_in_main_thread_from_thread(cls): + # we should get eof when thread exits + thread = cls(target=do_nothing, args=(1, "4")) + thread.start() + + assert thread.get_msg_from_thread() is EOFSignal + # thread will have exited + thread.join() + + +@pytest.mark.parametrize("cls", cls_to_test) +def test_get_eof_in_thread_from_main_thread(cls): + # thread should get eof when we send it + thread = cls(target=send_back_msg, pass_self=True) + thread.start() + + thread.notify_to_end_thread() + assert thread.get_msg_from_thread() == ("back", EOFSignal) + # thread will have exited + thread.join() + + +@pytest.mark.parametrize("args", [(), (55, "7")]) +@pytest.mark.parametrize("pass_self", [(True, "Got self"), (False, "No self")]) +@pytest.mark.parametrize("cls", cls_to_test) +def test_pass_self_arg_to_func(cls, pass_self, args): + # check that passing self to the function works with/without args + # without passing self, there's no way for the thread to respond + # other than by the text of an error + thread = cls( + target=raise_exc_about_self, pass_self=pass_self[0], args=args + ) + thread.start() + + arg_msg = "No arg" + if args: + arg_msg = "Got 7" + + with pytest.raises(ExecutionFailure) as exc_info: + thread.get_msg_from_thread() + assert type(exc_info.value.__cause__) is ExceptionTest + assert exc_info.value.__cause__.args[0] == pass_self[1] + arg_msg + thread.join() + + +@pytest.mark.parametrize("cls", cls_to_test) +def test_send_to_and_recv_from_thread(cls): + # tests sending to the thread and receiving a message from the thread + thread = cls(target=send_back_msg, pass_self=True) + thread.start() + + msg = thread.get_msg_from_thread(thread.send_msg_to_thread("hello")) + assert msg == ("back", "hello") + thread.join() + + +@pytest.mark.parametrize("cls", cls_to_test) +def test_get_multiple_messages(cls): + # tests getting multiple msgs from thread + thread = cls(target=send_multiple_msgs, pass_self=True) + thread.start() + + assert thread.get_msg_from_thread() == "hello" + assert thread.get_msg_from_thread() == "to" + assert thread.get_msg_from_thread() == "you" + assert thread.get_msg_from_thread() == EOFSignal + thread.join() + + +@pytest.mark.parametrize("cls", cls_to_test) +def test_skip_until_eof(cls): + # tests skipping reading everything queued until we get eof + thread = cls(target=send_multiple_msgs, pass_self=True) + thread.start() + + thread.clear_remaining() + # it knows that there are no further messages because the thread sent an + # eof to main-thread, which is the last thing thread does before exiting + assert thread._saw_eof + thread.join() diff --git a/tests/core/test_unit/test_tools/test_tools_general.py b/tests/core/test_unit/test_tools/test_tools_general.py index d3609cb4..9a3c573e 100644 --- a/tests/core/test_unit/test_tools/test_tools_general.py +++ b/tests/core/test_unit/test_tools/test_tools_general.py @@ -1,5 +1,3 @@ -import random - import numpy as np import pytest @@ -18,16 +16,195 @@ ) -def test_get_max_possible_value(): - num = random.randint(0, 100) - assert 255 == tools.get_max_possible_value(np.array(num, dtype=np.uint8)) - assert 65535 == tools.get_max_possible_value( - np.array(num, dtype=np.uint16) - ) - with pytest.raises( - ValueError, match="must be a numpy array of integer data type" - ): - tools.get_max_possible_value(np.array(num, dtype=np.float32)) +def test_inference_wrapper(): + did_run = False + + @tools.inference_wrapper + def my_func(val, other=None): + import torch + + nonlocal did_run + + assert torch.is_inference_mode_enabled() + assert val == 1 + assert other == 5 + did_run = True + + my_func(1, other=5) + assert did_run + + +# for float, the values come from the mantissa and it's the largest int value +# representable without losing significant digits +@pytest.mark.parametrize( + "dtype,value", + [ + (np.uint8, 2**8 - 1), + (np.uint16, 2**16 - 1), + (np.uint32, 2**32 - 1), + (np.uint64, 2**64 - 1), + (np.int8, 2**7 - 1), + (np.int16, 2**15 - 1), + (np.int32, 2**31 - 1), + (np.int64, 2**63 - 1), + (np.float32, 2**23), + (np.float64, 2**52), + ], +) +def test_get_max_possible_int_value(dtype, value): + assert tools.get_max_possible_int_value(dtype) == value + + +def test_get_max_possible_int_value_bad_dtype(): + with pytest.raises(ValueError): + tools.get_max_possible_int_value(np.str_) + + +# for float, the values come from the mantissa and it's the largest int value +# representable without losing significant digits +@pytest.mark.parametrize( + "dtype,value", + [ + (np.uint8, 0), + (np.uint16, 0), + (np.uint32, 0), + (np.uint64, 0), + (np.int8, -(2**7)), + (np.int16, -(2**15)), + (np.int32, -(2**31)), + (np.int64, -(2**63)), + (np.float32, -(2**23)), + (np.float64, -(2**52)), + ], +) +def test_get_min_possible_int_value(dtype, value): + assert tools.get_min_possible_int_value(dtype) == value + + +def test_get_min_possible_int_value_bad_dtype(): + with pytest.raises(ValueError): + tools.get_min_possible_int_value(np.str_) + + +@pytest.mark.parametrize( + "src_dtype", + [ + np.uint8, + np.int8, + np.uint16, + np.int16, + np.uint32, + np.int32, + np.uint64, + np.int64, + np.float32, + np.float64, + ], +) +@pytest.mark.parametrize( + "dest_dtype", [np.uint8, np.uint16, np.uint32, np.uint64] +) +def test_get_data_converter_bad_dtype_target(src_dtype, dest_dtype): + with pytest.raises(ValueError): + tools.get_data_converter(src_dtype, dest_dtype) + + +@pytest.mark.parametrize( + "src_dtype,dest_dtype", + [ + (np.uint8, np.float32), + (np.int8, np.float32), + (np.uint16, np.float32), + (np.int16, np.float32), + (np.float32, np.float32), + (np.uint8, np.float64), + (np.int8, np.float64), + (np.uint16, np.float64), + (np.int16, np.float64), + (np.uint32, np.float64), + (np.int32, np.float64), + (np.float32, np.float64), + (np.float64, np.float64), + ], +) +def test_get_data_converter_no_scaling(src_dtype, dest_dtype): + # for these, the source is smaller than dest so no need to scale because + # it'll fit directly into the dest dtype + converter = tools.get_data_converter(src_dtype, dest_dtype) + + # min value + if np.issubdtype(src_dtype, np.integer): + src_min_val = np.iinfo(src_dtype).min + else: + assert np.issubdtype(src_dtype, np.floating) + src_min_val = np.finfo(src_dtype).min + src = np.full(5, src_min_val, dtype=src_dtype) + dest = converter(src) + assert np.array_equal(src, dest) + assert dest.dtype == dest_dtype + + # other value + src = np.full(5, 10, dtype=src_dtype) + dest = converter(src) + assert np.array_equal(src, dest) + assert dest.dtype == dest_dtype + + # max value + src_max_val = tools.get_max_possible_int_value(src_dtype) + src = np.full(3, src_max_val, dtype=src_dtype) + dest = converter(src) + assert np.array_equal(src, dest) + assert dest.dtype == dest_dtype + + +@pytest.mark.parametrize( + "src_dtype,dest_dtype,divisor", + [ + (np.uint32, np.float32, (2**32 - 1) / 2**23), + (np.int32, np.float32, (2**31) / 2**23), + (np.float64, np.float32, (2**52) / 2**23), + ], +) +def test_get_data_converter_with_scaling(src_dtype, dest_dtype, divisor): + # for these, the source is larger than dest type so we need to scale by max + # value of each type, so it'll fit into the dest dtype + converter = tools.get_data_converter(src_dtype, dest_dtype) + + # min value + src_min_val = tools.get_min_possible_int_value(src_dtype) + src = np.full(5, src_min_val, dtype=src_dtype) + dest = converter(src) + assert np.allclose(src / divisor, dest) + assert dest.dtype == dest_dtype + + # other value + src = np.full(5, 10, dtype=src_dtype) + dest = converter(src) + assert np.allclose(src / divisor, dest) + assert dest.dtype == dest_dtype + + # max value + src_max_val = tools.get_max_possible_int_value(src_dtype) + src = np.full(3, src_max_val, dtype=src_dtype) + dest = converter(src) + assert np.allclose(src / divisor, dest) + assert dest.dtype == dest_dtype + + +@pytest.mark.parametrize( + "src_dtype,dest_dtype", + [ + (np.uint64, np.float32), + (np.int64, np.float32), + (np.uint64, np.float64), + (np.int64, np.float64), + ], +) +def test_get_data_converter_with_bad_scaling(src_dtype, dest_dtype): + # for these, to scale we'd need to have a type that is at least 64 bits, + # so we can scale down. But float64 is too small + with pytest.raises(ValueError): + tools.get_data_converter(src_dtype, dest_dtype) def test_union(): diff --git a/tests/data/pooch_registry.txt b/tests/data/pooch_registry.txt new file mode 100644 index 00000000..63a04222 --- /dev/null +++ b/tests/data/pooch_registry.txt @@ -0,0 +1,129 @@ +bright_brain/2d_filter/signal0001.tif b224baa0c0a84dfbe180f6579c918ef3becabe73cae0b309ca1b55b448cf0ad1 +bright_brain/2d_filter/signal0002.tif 31d930f449131237ef606b506bc4bca20a1a3c44f0081ebd9d8a5437e5e471c8 +bright_brain/2d_filter/signal0003.tif 026bd60fc0d8823df6531f2e87266bc565a65ea64249c055a01c708e45e4345d +bright_brain/2d_filter/signal0004.tif 38180851830b0a560b3c8d1209abc6675de0975693d3e9ab1038b421db92e889 +bright_brain/2d_filter/signal0005.tif 3f4bbd3407121d89d1dc120bce3d444855d444c38e8e41949c7e76f3755fdafb +bright_brain/3d_filter/plane_0002.tif dd5a8f7799f7854ed8626a5b35cc7c9611ebf52c350b0474cbebe46b1c6f7433 +bright_brain/3d_filter/plane_0003.tif bf595ac9e760d27d1a7bf4da264a65d551f7d0918a3f2b9fba80bcef8a17a064 +bright_brain/3d_filter/plane_0004.tif 64ec4f9c131545753b9ddac667b2033602ec594a9fbb10e83299de91599067ac +bright_brain/detected_cells.xml 6f8e6a316576e4b38d3b5c04d58442288403b8a470011c16d3303c12fe60e7b3 +bright_brain/peak_enhanced/signal0001.tif 6a25266b695dfd8c3cf858bdb7db93d1ddaaf91cbcb8f82dd67b3243b1fce471 +bright_brain/peak_enhanced/signal0002.tif 34d5a8457987811101d921c2553ab8ba60a731b3d3caf81d1a6b5d1fbc42bb93 +bright_brain/peak_enhanced/signal0003.tif cca9ac6e038fe608f0cb57a9a0ef990290a9c1463b1ae358290af60427e7c11b +bright_brain/peak_enhanced/signal0004.tif 8e5a35a51e8120702eade5cd1bb9d409117f81ad22f95f4d54bc24d859ed25fc +bright_brain/peak_enhanced/signal0005.tif 9e749bc6e761799a63ff77094ff0a8c1d796e665991e1108b6f9c7442aff2004 +bright_brain/signal/signal0001.tif 3fe85dd8e154447b7296daa24f5e0b5d00310f58d40726939792bb8aa9f7f4fc +bright_brain/signal/signal0002.tif 9091e4c8ebcd5a4d1d7f3719983b33a5e804aff40b11714a66f16ecc385ffe04 +bright_brain/signal/signal0003.tif 08f6d28bc6ede525c5bc708405ae7227e0897c2808ae13c3526a52e9d75daec5 +bright_brain/signal/signal0004.tif 7927520de0ab2cbd255ff9a4721edbfa20a8135706ebd863ee9ddecc1c17eb29 +bright_brain/signal/signal0005.tif b908df40dc08899b1731ee6d0ffa7274f5faae88377594a1aba997a835d20dd9 +bright_brain/tiles/signal0001.tif bf3a5478b8a0a0e7d0625641340fa7a36ea95eceb77d6cad08ccd652e2565ada +bright_brain/tiles/signal0002.tif bf3a5478b8a0a0e7d0625641340fa7a36ea95eceb77d6cad08ccd652e2565ada +bright_brain/tiles/signal0003.tif bf3a5478b8a0a0e7d0625641340fa7a36ea95eceb77d6cad08ccd652e2565ada +bright_brain/tiles/signal0004.tif bf3a5478b8a0a0e7d0625641340fa7a36ea95eceb77d6cad08ccd652e2565ada +bright_brain/tiles/signal0005.tif bf3a5478b8a0a0e7d0625641340fa7a36ea95eceb77d6cad08ccd652e2565ada +cellfinder-test-data.zip b0ef53b1530e4fa3128fcc0a752d0751909eab129d701f384fc0ea5f138c5914 +edge_cells_brain/2d_filter/signal0000.tif 4803df36346838928c2cc9afa5041cdc7f57fcac21eda55e1c3bd2f70761bd8d +edge_cells_brain/2d_filter/signal0001.tif 39a008a23e3b91e10b0dd327a229550c85260cf57fd627a758877c668dd65cba +edge_cells_brain/2d_filter/signal0002.tif 501a706b007872353b9a638b952c4499e4b7e41a421092d33b99f3ad37432eff +edge_cells_brain/2d_filter/signal0003.tif 8c3f3ada6f6d7c108f778e8a0805a9d93dbd396d28b1e8a935a701b35e3797ef +edge_cells_brain/2d_filter/signal0004.tif 81d203c47e2e1752620cc80d79047683a5a3947973b840f4afef56ec77f5e61d +edge_cells_brain/2d_filter/signal0005.tif be0759a6601155d09f1698a67cb8b1c590a1deb51c9ece2d2fe52e9ff1eb2194 +edge_cells_brain/2d_filter/signal0006.tif 41acc77bbe3b4eb3379746939f659d6ec5b7b36f5cc9970427a9cc0cd59c2e25 +edge_cells_brain/2d_filter/signal0007.tif d8516e2a5c59b950536166525e9cefe2be81ed54527f92e9bf269431f471532a +edge_cells_brain/2d_filter/signal0008.tif d59a4ac8275882fbb8b1947a07ae45716616302e35cdeb731529d1a8f521c298 +edge_cells_brain/2d_filter/signal0009.tif 2715c53774793b395cd1ee340eb3438617414d769c5f1e293fd5698588125960 +edge_cells_brain/2d_filter/signal0010.tif 525b30be7f86ae957c8e2edae1ecc958c869d21979c8452455356020df112523 +edge_cells_brain/2d_filter/signal0011.tif 83a808ac1be98329a3767213f3a4396721a33aa13cef252bfa8906b496173101 +edge_cells_brain/2d_filter/signal0012.tif 110042dfedbc8f0189ff1e9a04a5e01ae1cc4d5a0d593dc4884bdd4a7c1dbdaa +edge_cells_brain/2d_filter/signal0013.tif c71930b8e6ed5d52b0c3699553faf75da728e343f766da174ee15057ff531e43 +edge_cells_brain/2d_filter/signal0014.tif d4b20626c940a35f5995be0b5ad36557634df3af947db10a24843ff325f94d1b +edge_cells_brain/2d_filter/signal0015.tif 1f6ed840d97320421cb9cb9e48487126e428cca4135036642292f9b6572d98b8 +edge_cells_brain/2d_filter/signal0016.tif 6e975a52d7eb282ecffa7e0ed90d6c814347b6be2fde77842f80da97d1ede01c +edge_cells_brain/2d_filter/signal0017.tif 4f3ef278746323db7cf585b4c40fead0925ce7a193af6a10ed84e330a9fd16a0 +edge_cells_brain/2d_filter/signal0018.tif f07233fe5134dff9f9935f3335b847aa1bd83e44ab379e45a48906fa4c1c7403 +edge_cells_brain/2d_filter/signal0019.tif c581dc2533e01f1e1c8d937c364f37c67af83ab3f85efb02eb9b1150a2e591dd +edge_cells_brain/2d_filter/signal0020.tif 25224c7965908fa3a6229f15ff0f79db4e76e4e50d50b3813830f844a11fa529 +edge_cells_brain/3d_filter/plane_0002.tif 26a15013be4b4120a045da6209f384dc9551a11b2726c9f702cbf6ef1046f3d5 +edge_cells_brain/3d_filter/plane_0003.tif 0fd764081ea03d6a24a9ff167c6f5c648612914c4f9451a8d5be246704d9dcce +edge_cells_brain/3d_filter/plane_0004.tif 6d09059ac4b4b67d02143223cb88b25925433a9bf9feea01826979cc697555a8 +edge_cells_brain/3d_filter/plane_0005.tif 3f71392ca5ee4c4f1164f8220ede6ffb1f6c15fa611e7ab258825929d9574601 +edge_cells_brain/3d_filter/plane_0006.tif 518320cd8b56a9f88adb647614672e8d02824125e89355facc49e6a98353bd8c +edge_cells_brain/3d_filter/plane_0007.tif e0184c6791393856a6bb2c4955891992db2f66d6c82ceaf0b1ba63511363a195 +edge_cells_brain/3d_filter/plane_0008.tif 05eca0b171879e6e837325d299b98d6cac7c2134cd456b85dbc7ba64e51d5ee7 +edge_cells_brain/3d_filter/plane_0009.tif 25c398550efc467b82e89ecca8e8188082c0dbfca3e630d47b4e7a8f9cf77141 +edge_cells_brain/3d_filter/plane_0010.tif 9b02d5a99544245c5367e5f54a0daafd3f55be43bb66220903c6b6766144070f +edge_cells_brain/3d_filter/plane_0011.tif 820b6064971a73ab5c346dda0710cf665ad87d9caf75a4d00440da56a0390e93 +edge_cells_brain/3d_filter/plane_0012.tif de1633ab4807bf61370d4ef7ff13d6cd73c8241030396013f977371a9291ade3 +edge_cells_brain/3d_filter/plane_0013.tif 819003634068868c17e6b9cca71a82607047e0e7aab01c4c61fa63df7355d7db +edge_cells_brain/3d_filter/plane_0014.tif deae555248b8b03c6b7b1cd2b02213f13255645e83737decf9a920bae30d1508 +edge_cells_brain/3d_filter/plane_0015.tif 063e8370dfdf80202a052d370d86357b6f07e263139f1f9cb115968d96ca1e76 +edge_cells_brain/3d_filter/plane_0016.tif dead984c00ef4d14582b125797d2743ce774eec46242e855a58342820e47063e +edge_cells_brain/3d_filter/plane_0017.tif d890497d0349c51379add2b188bded46ef528c56e016131da257fead119681ab +edge_cells_brain/3d_filter/plane_0018.tif d75f0a2e767dfa46f0d2cc3f63c367391eac47b3076427bce688b39305d20387 +edge_cells_brain/3d_filter/plane_0019.tif e7ee64795b3f07c4cff4f8a2700ba0c1055604ce50039fe0590c900c6ece43b8 +edge_cells_brain/3d_filter/plane_0020.tif 7f94a93aa45877be81732c5be4f4d1d67bb1d4059bb6de833d60c146be621efc +edge_cells_brain/detected_cells.xml 5519a25a60bfd0d15af86eab2d5edc3c249c0afd2c6d83a9a46838ffb6fe9543 +edge_cells_brain/peak_enhanced/signal0000.tif 11dc924b3f30def59f6ca0b31a428a8cb454c163f4e6417d620659028ac544d9 +edge_cells_brain/peak_enhanced/signal0001.tif ac3605624094838c229f2d4ca2af9df9df17ba742b11d7ca684c42ce8e49557e +edge_cells_brain/peak_enhanced/signal0002.tif a7759610b69fd0e9d023231488038482d8e810b9f4c096374ea267d2b7e2c5e7 +edge_cells_brain/peak_enhanced/signal0003.tif 25d712b49e7d97368c9f924876c23b132c0b0902f899ad5150c0b4bfe24d591b +edge_cells_brain/peak_enhanced/signal0004.tif e8607d850649ccd68d8bcd0af12c9cfb98f191bb5c092b81ed9b6c87837b986e +edge_cells_brain/peak_enhanced/signal0005.tif eff9cefc56c34f4307b9c94af90bcd6e41ef03d45855018eef9e40ab0bbdf116 +edge_cells_brain/peak_enhanced/signal0006.tif e5a058f759640b198069526772f972b9db0fb77d5502b5473a573d2f1a8fb746 +edge_cells_brain/peak_enhanced/signal0007.tif 44f7a1658c8c61f8e5b680ebb03d1f50779f0f22cf0066fb1b938764cd7353c2 +edge_cells_brain/peak_enhanced/signal0008.tif 5439cfeb6a6f17e6231bb532791a4d37c9fa00ea1c32b78184e8708938adf233 +edge_cells_brain/peak_enhanced/signal0009.tif 46c460f41e2ead3d694c9646ed4c21e6db57cae4fc3fd1fd4d411385877fdec7 +edge_cells_brain/peak_enhanced/signal0010.tif 0e74e27ef728f1c7b6db97172df8961917d9dba8c87cef045b7f2f523eec4da2 +edge_cells_brain/peak_enhanced/signal0011.tif 97495a61cfabec5df5a144305b394c306d56063307a911c7024d1b8e40089663 +edge_cells_brain/peak_enhanced/signal0012.tif 9809f14393cae19d7815c16370265e318ab13ea5188e075b73295a4968f071b5 +edge_cells_brain/peak_enhanced/signal0013.tif 136b1dd0b90be087fe87c1cf0caef02f136d54dc642aa91fc03a96944a113a0b +edge_cells_brain/peak_enhanced/signal0014.tif a74e9bed032952209bdffdd9c351dff8f0bb46b4272999dd14822c254e824f2c +edge_cells_brain/peak_enhanced/signal0015.tif eb675b9cabf241b2209473e62d84011349a997677a7e32b1d4a59b9f95229364 +edge_cells_brain/peak_enhanced/signal0016.tif 0df886c3fb7cc9a9dcd0c5803629ccd57125e5c9c574bca7445222cbb14d9be7 +edge_cells_brain/peak_enhanced/signal0017.tif 675125f17b0a4784aa5f56240abad9c155831f8fa3be0c6be13bd29cd1323a90 +edge_cells_brain/peak_enhanced/signal0018.tif be3df5edb2280eb69e05e94d7864d36635e929744667ae10e608f33b02f4363d +edge_cells_brain/peak_enhanced/signal0019.tif 90ea965719aa7ad0f4a6480ab1e860d58ddc1a6bd6441365f6edf6f1e76edfab +edge_cells_brain/peak_enhanced/signal0020.tif 7ba5bae6b3b8845cbbe0726da8d28138702e681167f6a99d050b063c4a271a23 +edge_cells_brain/signal/signal0000.tif 8214694de0e41dc6f9a456cc335cff60091129ebe700a1fe6f3a38ebbb02cc28 +edge_cells_brain/signal/signal0001.tif 6b7dec437f06c7a66f70d2c16dc8d5ec0f039a11b68a458e216574d281238d88 +edge_cells_brain/signal/signal0002.tif 4ebf6e8b96a970a64816210f9270fb19f5055cfe7fe6b2c3debd78485853c4b3 +edge_cells_brain/signal/signal0003.tif 920e5b95386d01766b94b357eb7e2906d2c6f1d27eee4e4765edb7455adcf91c +edge_cells_brain/signal/signal0004.tif 90b84ea67c92d086f993588649dd6570a2baa46de6ceaf5aa64213673c922bbd +edge_cells_brain/signal/signal0005.tif a6072c68f0636f4f7a34ada3ff3fad43105d2c7dbcf831aca6662a8b358a4d90 +edge_cells_brain/signal/signal0006.tif 87773e1f7bbbfad1324e5f83214e7339119b2e9b244a34d18f5f4211fd25978e +edge_cells_brain/signal/signal0007.tif 2a91b8ba0128314753542a7890438b0805b039c3b74de80f8d967e4aa45f31e3 +edge_cells_brain/signal/signal0008.tif e2883d175bebc48f1cf7a82690f709d379715e471ccab740205040a203c3e88e +edge_cells_brain/signal/signal0009.tif 854398ddc2de9f1f43256f935bb4bb2d3b36726ed615fedc045050adb618dda2 +edge_cells_brain/signal/signal0010.tif 8ccb209331784fe4aa8f43eb3174e68fee8abdc0d29acbb388ddf9b8236d8a31 +edge_cells_brain/signal/signal0011.tif 883722f089d5318881a2767ca4717857e52ccf0005e4535e09ed24de9ab9f73b +edge_cells_brain/signal/signal0012.tif 127d14bb8e1f7a721d23f02f05d949345d78a1471e41270c8ad0cf012454fb95 +edge_cells_brain/signal/signal0013.tif b4cd368115d17048c81934dcb310e43663cc43ea14535516e75496e4622cee1d +edge_cells_brain/signal/signal0014.tif 28c9d9dc3db609d48c69271f3755bcc5fbe3b870a8939b2c4685e7d8f3dfa8dd +edge_cells_brain/signal/signal0015.tif 2388045546ed504281a190eddbe1a362d0f4f5803cc4cd3bac92a8e10ee5c7d6 +edge_cells_brain/signal/signal0016.tif 96f834737b8be786db9aca7edbdbae04d11c8f6c4deea51616a8567f39d1d03b +edge_cells_brain/signal/signal0017.tif e79d979915c85598d1fc64e9d19c5a22f575a8c0ab7ae402f9da070027a19af9 +edge_cells_brain/signal/signal0018.tif af5d97c6365694fcff26bb04a37bab2c008b6d3ccacaaedf17ce4d003b289aec +edge_cells_brain/signal/signal0019.tif bb798c254f2627e26ec53b110ad572e0494eddb06ce79d48de7267cb6e629f89 +edge_cells_brain/signal/signal0020.tif edcc27ab82449b3d11202a8866096e885bacab2dd63471fdb8e142293a9a99e8 +edge_cells_brain/tiles/signal0000.tif ea8bd86e6c8ee62e7de84a1f6792ba7d2174d3dffe9d1b578fbbde9c71dacc8c +edge_cells_brain/tiles/signal0001.tif aa757c580996cd69264617ce3b2b7ac0a1173bac78fe5c5bdfbd7e61f633a764 +edge_cells_brain/tiles/signal0002.tif ea8bd86e6c8ee62e7de84a1f6792ba7d2174d3dffe9d1b578fbbde9c71dacc8c +edge_cells_brain/tiles/signal0003.tif aa757c580996cd69264617ce3b2b7ac0a1173bac78fe5c5bdfbd7e61f633a764 +edge_cells_brain/tiles/signal0004.tif e0aaa58adcf0f13402494ce34112f7b937bf9421a1e0064856f57946e0267a02 +edge_cells_brain/tiles/signal0005.tif e0aaa58adcf0f13402494ce34112f7b937bf9421a1e0064856f57946e0267a02 +edge_cells_brain/tiles/signal0006.tif e0aaa58adcf0f13402494ce34112f7b937bf9421a1e0064856f57946e0267a02 +edge_cells_brain/tiles/signal0007.tif f382330f73c2625e84bc36a3faee169a074ed8e59db836fcbbd236061f3888b4 +edge_cells_brain/tiles/signal0008.tif e0aaa58adcf0f13402494ce34112f7b937bf9421a1e0064856f57946e0267a02 +edge_cells_brain/tiles/signal0009.tif e0aaa58adcf0f13402494ce34112f7b937bf9421a1e0064856f57946e0267a02 +edge_cells_brain/tiles/signal0010.tif d3560fff32b42f8d7baa32b2746535137675ade1bb3e726d8ef890e321daf8cc +edge_cells_brain/tiles/signal0011.tif d3560fff32b42f8d7baa32b2746535137675ade1bb3e726d8ef890e321daf8cc +edge_cells_brain/tiles/signal0012.tif d3560fff32b42f8d7baa32b2746535137675ade1bb3e726d8ef890e321daf8cc +edge_cells_brain/tiles/signal0013.tif d3560fff32b42f8d7baa32b2746535137675ade1bb3e726d8ef890e321daf8cc +edge_cells_brain/tiles/signal0014.tif d3560fff32b42f8d7baa32b2746535137675ade1bb3e726d8ef890e321daf8cc +edge_cells_brain/tiles/signal0015.tif e0aaa58adcf0f13402494ce34112f7b937bf9421a1e0064856f57946e0267a02 +edge_cells_brain/tiles/signal0016.tif bc89f955fb25b3f536246912eaac757c7c3db4d9eddfdd5cb9b546800b782c2e +edge_cells_brain/tiles/signal0017.tif b0a9789d16458fcfc92166c4363fe48859311247939587acb1d04d2885a36476 +edge_cells_brain/tiles/signal0018.tif 5307038034e5d4ca98c33557bc5ed9ef5ccb12672934e5387ff7e1d236f5cac7 +edge_cells_brain/tiles/signal0019.tif 40c4474b4ec25a8c68ac20f6649e7296af058e1bd854585ef7d71facb11a37e0 +edge_cells_brain/tiles/signal0020.tif b0a9789d16458fcfc92166c4363fe48859311247939587acb1d04d2885a36476