From 32b8e8b7c3dacd6603054ba5b66e7572c259e633 Mon Sep 17 00:00:00 2001
From: James <12661555+jmsmkn@users.noreply.github.com>
Date: Tue, 23 Mar 2021 09:11:12 +0100
Subject: [PATCH] Return generators from builders (#19)

---
 HISTORY.md                              |   5 +
 README.md                               |  19 +++
 panimg/__init__.py                      |   2 +-
 panimg/exceptions.py                    |  16 ++
 panimg/image_builders/__init__.py       |   9 ++
 panimg/image_builders/dicom.py          |  73 ++++-----
 panimg/image_builders/fallback.py       |  40 ++---
 panimg/image_builders/metaio_mhd_mha.py |  72 ++++-----
 panimg/image_builders/nifti.py          |  47 +++---
 panimg/image_builders/tiff.py           | 181 +++++++++-------------
 panimg/image_builders/utils.py          |  85 -----------
 panimg/models.py                        | 191 +++++++++++++++++++++++-
 panimg/panimg.py                        |  43 +++++-
 panimg/post_processors/__init__.py      |   5 +
 panimg/settings.py                      |   3 -
 panimg/types.py                         |  17 ++-
 pyproject.toml                          |   2 +-
 tests/test_default_image_builders.py    |  60 ++++++++
 tests/test_dicom.py                     |  29 ++--
 tests/test_fallback.py                  |  14 +-
 tests/test_nifti.py                     |  11 +-
 tests/test_panimg.py                    |   2 +-
 tests/test_tiff.py                      |  41 ++---
 tests/test_utils.py                     |  23 ++-
 24 files changed, 580 insertions(+), 410 deletions(-)
 delete mode 100644 panimg/image_builders/utils.py
 create mode 100644 tests/test_default_image_builders.py

diff --git a/HISTORY.md b/HISTORY.md
index 67dc1bb..6ea516c 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,10 @@
 # History
 
+## 0.2.0 (2021-03-23)
+
+* Builders now return generators
+* Added post processors
+
 ## 0.1.0 (2021-03-09)
 
 * Initial version
diff --git a/README.md b/README.md
index 59f253e..57d19f4 100644
--- a/README.md
+++ b/README.md
@@ -69,3 +69,22 @@ To customise the post processors that run you can do this with
 ```python
 result = convert(..., post_processors=[...])
 ```
+
+#### Using Strategies Directly
+
+If you want to run a particular strategy directly which returns a generator of images for a set of files you can do this with
+
+```python
+files = {f for f in Path("/foo/").glob("*.dcm") if f.is_file()}
+
+try:
+    for result in image_builder_dicom(files=files):
+        sitk_image = result.image
+        process(sitk_image)  # etc. you can also look at result.name for the name of the file,
+                             # and result.consumed_files to see what files were used for this image
+except UnconsumedFilesException as e:
+    # e.errors is keyed with a Path to a file that could not be consumed,
+    # with a list of all the errors found with loading it,
+    # the user can then choose what to do with that information
+    ...
+```
diff --git a/panimg/__init__.py b/panimg/__init__.py
index 403f93c..fac3a81 100644
--- a/panimg/__init__.py
+++ b/panimg/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.1.0"
+__version__ = "0.2.0"
 
 from .panimg import convert
 
diff --git a/panimg/exceptions.py b/panimg/exceptions.py
index 15e676c..3000a88 100644
--- a/panimg/exceptions.py
+++ b/panimg/exceptions.py
@@ -1,2 +1,18 @@
+from pathlib import Path
+from typing import Dict, List
+
+
 class ValidationError(Exception):
     pass
+
+
+class UnconsumedFilesException(Exception):
+    """
+    Raised on completion of an image builder and there are unconsumed files.
+    Contains a dictionary with a map of the errors encountered when loading
+    the unconsumed file.
+    """
+
+    def __init__(self, *args, file_errors: Dict[Path, List[str]]):
+        super().__init__(*args)
+        self.file_errors = file_errors
diff --git a/panimg/image_builders/__init__.py b/panimg/image_builders/__init__.py
index 7077977..a1ae883 100644
--- a/panimg/image_builders/__init__.py
+++ b/panimg/image_builders/__init__.py
@@ -14,3 +14,12 @@
     image_builder_tiff,
     image_builder_fallback,
 ]
+
+__all__ = [
+    "image_builder_mhd",
+    "image_builder_nifti",
+    "image_builder_dicom",
+    "image_builder_tiff",
+    "image_builder_fallback",
+    "DEFAULT_IMAGE_BUILDERS",
+]
diff --git a/panimg/image_builders/dicom.py b/panimg/image_builders/dicom.py
index 6ea73ea..9a839be 100644
--- a/panimg/image_builders/dicom.py
+++ b/panimg/image_builders/dicom.py
@@ -1,14 +1,14 @@
 from collections import defaultdict, namedtuple
 from math import isclose
 from pathlib import Path
-from typing import Set
+from typing import DefaultDict, Iterator, List, Set
 
 import SimpleITK
 import numpy as np
 import pydicom
 
-from panimg.image_builders.utils import convert_itk_to_internal
-from panimg.models import PanImgFile, PanImgResult
+from panimg.exceptions import UnconsumedFilesException
+from panimg.models import SimpleITKImage
 
 NUMPY_IMAGE_TYPES = {
     "character": SimpleITK.sitkUInt8,
@@ -46,7 +46,7 @@ def pixel_data_reached(tag, vr, length):
     return pydicom.datadict.keyword_for_tag(tag) == "PixelData"
 
 
-def _get_headers_by_study(files):
+def _get_headers_by_study(files, file_errors):
     """
     Gets all headers from dicom files found in path.
 
@@ -62,7 +62,6 @@ def _get_headers_by_study(files):
     grouped by study id.
     """
     studies = {}
-    errors = defaultdict(list)
     indices = {}
 
     for file in files:
@@ -91,20 +90,22 @@ def _get_headers_by_study(files):
                 studies[key]["index"] = index
                 studies[key]["headers"] = headers
             except Exception as e:
-                errors[file].append(format_error(str(e)))
+                file_errors[file].append(format_error(str(e)))
 
     for key in studies:
         studies[key]["headers"].sort(
             key=lambda x: int(x["data"].InstanceNumber)
         )
-    return studies, errors
+    return studies
 
 
 def format_error(message: str) -> str:
     return f"Dicom image builder: {message}"
 
 
-def _validate_dicom_files(files: Set[Path]):
+def _validate_dicom_files(
+    files: Set[Path], file_errors: DefaultDict[Path, List[str]]
+):
     """
     Gets the headers for all dicom files on path and validates them.
 
@@ -123,7 +124,7 @@ def _validate_dicom_files(files: Set[Path]):
 
     Any study with an inconsistent amount of slices per time point is discarded.
     """
-    studies, errors = _get_headers_by_study(files)
+    studies = _get_headers_by_study(files=files, file_errors=file_errors)
     result = []
     dicom_dataset = namedtuple(
         "dicom_dataset", ["headers", "n_time", "n_slices", "index"]
@@ -149,7 +150,7 @@ def _validate_dicom_files(files: Set[Path]):
             continue
         if len(headers) % n_time > 0:
             for d in headers:
-                errors[d["file"]].append(
+                file_errors[d["file"]].append(
                     format_error("Number of slices per time point differs")
                 )
             continue
@@ -160,7 +161,7 @@ def _validate_dicom_files(files: Set[Path]):
             )
         )
     del studies
-    return result, errors
+    return result
 
 
 def _extract_direction(dicom_ds, direction):
@@ -177,9 +178,7 @@ def _extract_direction(dicom_ds, direction):
     return direction
 
 
-def _process_dicom_file(  # noqa: C901
-    *, dicom_ds, created_image_prefix, output_directory
-):
+def _process_dicom_file(*, dicom_ds):  # noqa: C901
     ref_file = pydicom.dcmread(str(dicom_ds.headers[0]["file"]))
     ref_origin = tuple(
         float(i) for i in getattr(ref_file, "ImagePositionPatient", (0, 0, 0))
@@ -256,15 +255,13 @@ def _process_dicom_file(  # noqa: C901
         if getattr(ref_file, f, False):
             img.SetMetaData(f, str(getattr(ref_file, f)))
 
-    # Convert the SimpleITK image to our internal representation
-    return convert_itk_to_internal(
-        simple_itk_image=img,
+    return SimpleITKImage(
+        image=img,
         name=(
-            f"{created_image_prefix}"
-            f"-{dicom_ds.headers[0]['data'].StudyInstanceUID}"
-            f"-{dicom_ds.index}"
+            f"{dicom_ds.headers[0]['data'].StudyInstanceUID}-{dicom_ds.index}"
         ),
-        output_directory=output_directory,
+        consumed_files={d["file"] for d in dicom_ds.headers},
+        spacing_valid=True,
     )
 
 
@@ -337,13 +334,7 @@ def _create_itk_from_dcm(
     return img
 
 
-def image_builder_dicom(
-    *,
-    files: Set[Path],
-    output_directory: Path,
-    created_image_prefix: str = "",
-    **_,
-) -> PanImgResult:
+def image_builder_dicom(*, files: Set[Path]) -> Iterator[SimpleITKImage]:
     """
     Constructs image objects by inspecting files in a directory.
 
@@ -361,28 +352,16 @@ def image_builder_dicom(
      - a list files associated with the detected images
      - path->error message map describing what is wrong with a given file
     """
-    studies, file_errors = _validate_dicom_files(files)
-    new_images = set()
-    new_image_files: Set[PanImgFile] = set()
-    consumed_files: Set[Path] = set()
+    file_errors: DefaultDict[Path, List[str]] = defaultdict(list)
+
+    studies = _validate_dicom_files(files=files, file_errors=file_errors)
+
     for dicom_ds in studies:
         try:
-            n_image, n_image_files = _process_dicom_file(
-                dicom_ds=dicom_ds,
-                created_image_prefix=created_image_prefix,
-                output_directory=output_directory,
-            )
-            new_images.add(n_image)
-            new_image_files |= set(n_image_files)
-            consumed_files |= {d["file"] for d in dicom_ds.headers}
+            yield _process_dicom_file(dicom_ds=dicom_ds)
         except Exception as e:
             for d in dicom_ds.headers:
                 file_errors[d["file"]].append(format_error(str(e)))
 
-    return PanImgResult(
-        consumed_files=consumed_files,
-        file_errors=file_errors,
-        new_images=new_images,
-        new_image_files=new_image_files,
-        new_folders=set(),
-    )
+    if file_errors:
+        raise UnconsumedFilesException(file_errors=file_errors)
diff --git a/panimg/image_builders/fallback.py b/panimg/image_builders/fallback.py
index 9329070..a69b375 100644
--- a/panimg/image_builders/fallback.py
+++ b/panimg/image_builders/fallback.py
@@ -1,24 +1,21 @@
 from collections import defaultdict
 from pathlib import Path
-from typing import Dict, List, Set
+from typing import DefaultDict, Iterator, List, Set
 
 import SimpleITK
 import numpy as np
 from PIL import Image
 from PIL.Image import DecompressionBombError
 
-from panimg.exceptions import ValidationError
-from panimg.image_builders.utils import convert_itk_to_internal
-from panimg.models import PanImgFile, PanImgResult
+from panimg.exceptions import UnconsumedFilesException, ValidationError
+from panimg.models import SimpleITKImage
 
 
 def format_error(message: str) -> str:
     return f"Fallback image builder: {message}"
 
 
-def image_builder_fallback(
-    *, files: Set[Path], output_directory: Path, **_
-) -> PanImgResult:
+def image_builder_fallback(*, files: Set[Path]) -> Iterator[SimpleITKImage]:
     """
     Constructs image objects by inspecting files in a directory.
 
@@ -36,10 +33,8 @@ def image_builder_fallback(
      - a list files associated with the detected images
      - path->error message map describing what is wrong with a given file
     """
-    errors: Dict[Path, List[str]] = defaultdict(list)
-    new_images = set()
-    new_image_files: Set[PanImgFile] = set()
-    consumed_files = set()
+    file_errors: DefaultDict[Path, List[str]] = defaultdict(list)
+
     for file in files:
         try:
             img = Image.open(file)
@@ -52,22 +47,15 @@ def image_builder_fallback(
             img_array = np.array(img)
             is_vector = img.mode != "L"
             img = SimpleITK.GetImageFromArray(img_array, isVector=is_vector)
-            n_image, n_image_files = convert_itk_to_internal(
-                simple_itk_image=img,
+
+            yield SimpleITKImage(
+                image=img,
                 name=file.name,
-                use_spacing=False,
-                output_directory=output_directory,
+                consumed_files={file},
+                spacing_valid=False,
             )
-            new_images.add(n_image)
-            new_image_files |= set(n_image_files)
-            consumed_files.add(file)
         except (OSError, ValidationError, DecompressionBombError):
-            errors[file].append(format_error("Not a valid image file"))
+            file_errors[file].append(format_error("Not a valid image file"))
 
-    return PanImgResult(
-        consumed_files=consumed_files,
-        file_errors=errors,
-        new_images=new_images,
-        new_image_files=new_image_files,
-        new_folders=set(),
-    )
+    if file_errors:
+        raise UnconsumedFilesException(file_errors=file_errors)
diff --git a/panimg/image_builders/metaio_mhd_mha.py b/panimg/image_builders/metaio_mhd_mha.py
index b6398d0..dc5237a 100644
--- a/panimg/image_builders/metaio_mhd_mha.py
+++ b/panimg/image_builders/metaio_mhd_mha.py
@@ -5,19 +5,19 @@
 """
 from collections import defaultdict
 from pathlib import Path
-from typing import Dict, List, Mapping, Sequence, Set, Tuple, Union
+from typing import DefaultDict, Dict, Iterator, List, Mapping, Set, Union
 
+from panimg.exceptions import UnconsumedFilesException
 from panimg.image_builders.metaio_utils import (
     load_sitk_image,
     parse_mh_header,
 )
-from panimg.image_builders.utils import convert_itk_to_internal
-from panimg.models import PanImg, PanImgFile, PanImgResult
+from panimg.models import SimpleITKImage
 
 
 def image_builder_mhd(  # noqa: C901
-    *, files: Set[Path], output_directory: Path, **_
-) -> PanImgResult:
+    *, files: Set[Path]
+) -> Iterator[SimpleITKImage]:
     """
     Constructs image objects by inspecting files in a directory.
 
@@ -34,6 +34,8 @@ def image_builder_mhd(  # noqa: C901
      - files associated with the detected images
      - path->error message map describing what is wrong with a given file
     """
+    file_errors: DefaultDict[Path, List[str]] = defaultdict(list)
+
     element_data_file_key = "ElementDataFile"
 
     def detect_mhd_file(headers: Dict[str, str], path: Path) -> bool:
@@ -60,32 +62,16 @@ def detect_mha_file(headers: Mapping[str, Union[str, None]]) -> bool:
         data_file = headers.get(element_data_file_key, None)
         return data_file == "LOCAL"
 
-    def convert_itk_file(
-        *, filename: Path, output_dir: Path,
-    ) -> Tuple[PanImg, Sequence[PanImgFile]]:
-        try:
-            simple_itk_image = load_sitk_image(filename.absolute())
-        except RuntimeError:
-            raise ValueError("SimpleITK cannot open file")
-
-        return convert_itk_to_internal(
-            simple_itk_image=simple_itk_image,
-            name=filename.name,
-            output_directory=output_dir,
-        )
-
     def format_error(message: str) -> str:
         return f"Mhd image builder: {message}"
 
-    new_images = set()
-    new_image_files: Set[PanImgFile] = set()
-    consumed_files = set()
-    invalid_file_errors: Dict[Path, List[str]] = defaultdict(list)
     for file in files:
         try:
             parsed_headers = parse_mh_header(file)
         except ValueError:
-            # Maybe add .mhd and .mha files here as "processed" but with errors
+            file_errors[file].append(
+                format_error("Could not parse ITK header")
+            )
             continue
 
         try:
@@ -93,35 +79,43 @@ def format_error(message: str) -> str:
                 parsed_headers, file.parent
             ) or detect_mha_file(parsed_headers)
         except ValueError as e:
-            invalid_file_errors[file].append(format_error(str(e)))
+            file_errors[file].append(format_error(str(e)))
             continue
 
         if is_hd_or_mha:
             file_dependency = None
+
             if parsed_headers[element_data_file_key] != "LOCAL":
                 file_dependency = (
                     file.parent / parsed_headers[element_data_file_key]
                 )
                 if not file_dependency.is_file():
-                    invalid_file_errors[file].append(
+                    file_errors[file].append(
                         format_error("Cannot find data file")
                     )
                     continue
 
-            n_image, n_image_files = convert_itk_file(
-                filename=file, output_dir=output_directory
-            )
-            new_images.add(n_image)
-            new_image_files |= set(n_image_files)
+            try:
+                simple_itk_image = load_sitk_image(file.absolute())
+            except RuntimeError:
+                file_errors[file].append(
+                    format_error("SimpleITK cannot open file")
+                )
+                continue
 
-            consumed_files.add(file)
+            consumed_files = {file}
             if file_dependency is not None:
                 consumed_files.add(file_dependency)
 
-    return PanImgResult(
-        consumed_files=consumed_files,
-        file_errors=invalid_file_errors,
-        new_images=new_images,
-        new_image_files=new_image_files,
-        new_folders=set(),
-    )
+            yield SimpleITKImage(
+                image=simple_itk_image,
+                name=file.name,
+                consumed_files=consumed_files,
+                spacing_valid=True,
+            )
+        else:
+            file_errors[file].append(format_error("Not an ITK file"))
+            continue
+
+    if file_errors:
+        raise UnconsumedFilesException(file_errors=file_errors)
diff --git a/panimg/image_builders/nifti.py b/panimg/image_builders/nifti.py
index 6ad24fa..e92f935 100644
--- a/panimg/image_builders/nifti.py
+++ b/panimg/image_builders/nifti.py
@@ -1,20 +1,18 @@
 from collections import defaultdict
 from pathlib import Path
-from typing import Dict, List, Set
+from typing import DefaultDict, Iterator, List, Set
 
 import SimpleITK
 
-from panimg.image_builders.utils import convert_itk_to_internal
-from panimg.models import PanImg, PanImgFile, PanImgResult
+from panimg.exceptions import UnconsumedFilesException
+from panimg.models import SimpleITKImage
 
 
 def format_error(message: str) -> str:
     return f"NifTI image builder: {message}"
 
 
-def image_builder_nifti(
-    *, files: Set[Path], output_directory: Path, **_
-) -> PanImgResult:
+def image_builder_nifti(*, files: Set[Path],) -> Iterator[SimpleITKImage]:
     """
     Constructs image objects from files in NifTI format (nii/nii.gz)
 
@@ -31,12 +29,11 @@ def image_builder_nifti(
      - a list files associated with the detected images
      - path->error message map describing what is wrong with a given file
     """
-    errors: Dict[Path, List[str]] = defaultdict(list)
-    new_images: Set[PanImg] = set()
-    new_image_files: Set[PanImgFile] = set()
-    consumed_files = set()
+    file_errors: DefaultDict[Path, List[str]] = defaultdict(list)
+
     for file in files:
         if not (file.name.endswith(".nii") or file.name.endswith(".nii.gz")):
+            file_errors[file].append(format_error("Not a NifTI image file"))
             continue
 
         try:
@@ -45,25 +42,17 @@ def image_builder_nifti(
             reader.SetFileName(str(file.absolute()))
             img: SimpleITK.Image = reader.Execute()
         except RuntimeError:
-            errors[file].append(format_error("Not a valid NifTI image file"))
+            file_errors[file].append(
+                format_error("Not a valid NifTI image file")
+            )
             continue
 
-        try:
-            n_image, n_image_files = convert_itk_to_internal(
-                simple_itk_image=img,
-                name=file.name,
-                output_directory=output_directory,
-            )
-            new_images.add(n_image)
-            new_image_files |= set(n_image_files)
-            consumed_files.add(file)
-        except ValueError as e:
-            errors[file].append(format_error(str(e)))
+        yield SimpleITKImage(
+            image=img,
+            name=file.name,
+            consumed_files={file},
+            spacing_valid=True,
+        )
 
-    return PanImgResult(
-        consumed_files=consumed_files,
-        file_errors=errors,
-        new_images=new_images,
-        new_image_files=new_image_files,
-        new_folders=set(),
-    )
+    if file_errors:
+        raise UnconsumedFilesException(file_errors=file_errors)
diff --git a/panimg/image_builders/tiff.py b/panimg/image_builders/tiff.py
index a314ee6..782066b 100644
--- a/panimg/image_builders/tiff.py
+++ b/panimg/image_builders/tiff.py
@@ -1,24 +1,20 @@
 import os
 import re
-import shutil
 from collections import defaultdict
 from dataclasses import dataclass, field
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import Callable, Dict, List, Optional, Set
+from typing import Callable, DefaultDict, Dict, Iterator, List, Optional, Set
 from uuid import UUID, uuid4
 
 import openslide
 import pyvips
 import tifffile
 
-from panimg.exceptions import ValidationError
+from panimg.exceptions import UnconsumedFilesException, ValidationError
 from panimg.models import (
     ColorSpace,
-    ImageType,
-    PanImg,
-    PanImgFile,
-    PanImgResult,
+    TIFFImage,
 )
 
 
@@ -32,7 +28,6 @@ class GrandChallengeTiffFile:
     color_space: Optional[ColorSpace] = None
     voxel_width_mm: float = 0
     voxel_height_mm: float = 0
-    voxel_depth_mm: Optional[float] = None
     associated_files: List[Path] = field(default_factory=list)
 
     def validate(self) -> None:
@@ -175,8 +170,6 @@ def _extract_tags(
         gc_file.voxel_width_mm = _get_voxel_spacing_mm(tags, "XResolution")
         gc_file.voxel_height_mm = _get_voxel_spacing_mm(tags, "YResolution")
 
-    gc_file.voxel_depth_mm = None
-
     return gc_file
 
 
@@ -298,6 +291,7 @@ def _convert_to_tiff(
     *, path: Path, pk: UUID, converter, output_directory: Path
 ) -> Path:
     new_file_name = output_directory / path.name / f"{pk}.tif"
+    new_file_name.parent.mkdir()
 
     image = converter.Image.new_from_file(
         str(path.absolute()), access="sequential"
@@ -310,20 +304,14 @@ def _convert_to_tiff(
         y_res = 1000.0 / float(image.get("openslide.mpp-y"))
         image = image.copy(xres=x_res, yres=y_res)
 
-    with TemporaryDirectory() as d:
-        temp_file = Path(d) / new_file_name.name
-
-        image.write_to_file(
-            str(temp_file.absolute()),
-            tile=True,
-            pyramid=True,
-            bigtiff=True,
-            compression="jpeg",
-            Q=70,
-        )
-
-        new_file_name.parent.mkdir()
-        shutil.move(temp_file, new_file_name)
+    image.write_to_file(
+        str(new_file_name.absolute()),
+        tile=True,
+        pyramid=True,
+        bigtiff=True,
+        compression="jpeg",
+        Q=70,
+    )
 
     return new_file_name
 
@@ -367,102 +355,71 @@ def _load_gc_files(
             if g.associated_files is not None
         ):
             gc_file = GrandChallengeTiffFile(file)
-
-            out_file = (
-                output_directory
-                / str(gc_file.path.name)
-                / f"{gc_file.pk}{gc_file.path.suffix}"
-            )
-            out_file.parent.mkdir()
-
-            shutil.copy(
-                src=str(gc_file.path.resolve()), dst=str(out_file.resolve())
-            )
-
             loaded_files.append(gc_file)
 
     return loaded_files
 
 
 def image_builder_tiff(  # noqa: C901
-    *, files: Set[Path], output_directory: Path, **_
-) -> PanImgResult:
-    new_images = set()
-    new_image_files: Set[PanImgFile] = set()
-    consumed_files: Set[Path] = set()
-    file_errors: Dict[Path, List[str]] = defaultdict(list)
-
-    loaded_files = _load_gc_files(
-        files=files,
-        converter=pyvips,
-        output_directory=output_directory,
-        file_errors=file_errors,
-    )
-
-    for gc_file in loaded_files:
-        # try and load image with tiff file
-        try:
-            gc_file = _load_with_tiff(gc_file=gc_file)
-        except Exception as e:
-            file_errors[gc_file.path].append(f"TIFF load error: {e}.")
-
-        # try and load image with open slide
-        try:
-            gc_file = _load_with_openslide(gc_file=gc_file)
-        except Exception as e:
-            file_errors[gc_file.path].append(f"OpenSlide load error: {e}.")
-
-        # validate
-        try:
-            gc_file.validate()
-        except ValidationError as e:
-            file_errors[gc_file.path].append(f"Validation error: {e}.")
-            continue
+    *, files: Set[Path]
+) -> Iterator[TIFFImage]:
+    file_errors: DefaultDict[Path, List[str]] = defaultdict(list)
 
-        image = _create_tiff_image_entry(tiff_file=gc_file)
-        new_images.add(image)
+    with TemporaryDirectory() as output_directory:
 
-        new_image_files.add(
-            PanImgFile(
-                image_id=image.pk,
-                image_type=ImageType.TIFF,
-                file=gc_file.path.absolute(),
-            )
+        loaded_files = _load_gc_files(
+            files=files,
+            converter=pyvips,
+            output_directory=Path(output_directory),
+            file_errors=file_errors,
         )
 
-        if gc_file.associated_files:
-            consumed_files |= {f.absolute() for f in gc_file.associated_files}
-        else:
-            consumed_files.add(gc_file.path.absolute())
-
-    return PanImgResult(
-        consumed_files=consumed_files,
-        file_errors=file_errors,
-        new_images=new_images,
-        new_image_files=new_image_files,
-        new_folders=set(),
-    )
-
+        for gc_file in loaded_files:
+            # try and load image with tiff file
+            try:
+                gc_file = _load_with_tiff(gc_file=gc_file)
+            except Exception:
+                file_errors[gc_file.path].append(
+                    "Could not open file with tifffile."
+                )
+
+            # try and load image with open slide
+            try:
+                gc_file = _load_with_openslide(gc_file=gc_file)
+            except Exception:
+                file_errors[gc_file.path].append(
+                    "Could not open file with OpenSlide."
+                )
+
+            # validate
+            try:
+                gc_file.validate()
+                if gc_file.color_space is None:
+                    # TODO This needs to be solved by refactoring of
+                    # GrandChallengeTiffFile
+                    raise RuntimeError("Color space not found")
+            except ValidationError as e:
+                file_errors[gc_file.path].append(f"Validation error: {e}.")
+                continue
+
+            if gc_file.associated_files:
+                consumed_files = {
+                    f.absolute() for f in gc_file.associated_files
+                }
+            else:
+                consumed_files = {gc_file.path.absolute()}
+
+            yield TIFFImage(
+                file=gc_file.path,
+                name=gc_file.path.name,
+                consumed_files=consumed_files,
+                width=gc_file.image_width,
+                height=gc_file.image_height,
+                voxel_width_mm=gc_file.voxel_width_mm,
+                voxel_height_mm=gc_file.voxel_height_mm,
+                resolution_levels=gc_file.resolution_levels,
+                color_space=gc_file.color_space,
+            )
 
-def _create_tiff_image_entry(*, tiff_file: GrandChallengeTiffFile) -> PanImg:
-    # Builds a new Image model item
-
-    if tiff_file.color_space is None:
-        # TODO This needs to be solved by refactoring of GrandChallengeTiffFile
-        raise RuntimeError("Color space not found")
-
-    return PanImg(
-        pk=tiff_file.pk,
-        name=tiff_file.path.name,
-        width=tiff_file.image_width,
-        height=tiff_file.image_height,
-        depth=1,
-        resolution_levels=tiff_file.resolution_levels,
-        color_space=tiff_file.color_space,
-        voxel_width_mm=tiff_file.voxel_width_mm,
-        voxel_height_mm=tiff_file.voxel_height_mm,
-        voxel_depth_mm=tiff_file.voxel_depth_mm,
-        timepoints=None,
-        window_center=None,
-        window_width=None,
-    )
+    if file_errors:
+        raise UnconsumedFilesException(file_errors=file_errors)
diff --git a/panimg/image_builders/utils.py b/panimg/image_builders/utils.py
deleted file mode 100644
index ef0b18c..0000000
--- a/panimg/image_builders/utils.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from pathlib import Path
-from typing import Optional, Sequence, Tuple
-from uuid import uuid4
-
-import SimpleITK
-
-from panimg.models import ColorSpace, ImageType, PanImg, PanImgFile
-from panimg.settings import ITK_INTERNAL_FILE_FORMAT
-
-
-def convert_itk_to_internal(
-    *,
-    simple_itk_image: SimpleITK.Image,
-    output_directory: Path,
-    name: str,
-    use_spacing: bool = True,
-) -> Tuple[PanImg, Sequence[PanImgFile]]:
-    color_space = simple_itk_image.GetNumberOfComponentsPerPixel()
-    color_space = {
-        1: ColorSpace.GRAY,
-        3: ColorSpace.RGB,
-        4: ColorSpace.RGBA,
-    }.get(color_space, None)
-    if color_space is None:
-        raise ValueError("Unknown color space for MetaIO image.")
-
-    pk = uuid4()
-
-    work_dir = Path(output_directory) / name
-    work_dir.mkdir()
-
-    SimpleITK.WriteImage(
-        simple_itk_image,
-        str(work_dir / f"{pk}.{ITK_INTERNAL_FILE_FORMAT}"),
-        True,
-    )
-
-    if simple_itk_image.GetDimension() == 4:
-        timepoints = simple_itk_image.GetSize()[-1]
-    else:
-        timepoints = None
-    depth = simple_itk_image.GetDepth()
-
-    try:
-        window_center: Optional[float] = float(
-            simple_itk_image.GetMetaData("WindowCenter")
-        )
-    except (RuntimeError, ValueError):
-        window_center = None
-
-    try:
-        window_width: Optional[float] = float(
-            simple_itk_image.GetMetaData("WindowWidth")
-        )
-    except (RuntimeError, ValueError):
-        window_width = None
-
-    db_image = PanImg(
-        pk=pk,
-        name=name,
-        width=simple_itk_image.GetWidth(),
-        height=simple_itk_image.GetHeight(),
-        depth=depth if depth else None,
-        window_center=window_center,
-        window_width=window_width,
-        timepoints=timepoints,
-        resolution_levels=None,
-        color_space=color_space,
-        voxel_width_mm=simple_itk_image.GetSpacing()[0]
-        if use_spacing
-        else None,
-        voxel_height_mm=simple_itk_image.GetSpacing()[1]
-        if use_spacing
-        else None,
-        voxel_depth_mm=simple_itk_image.GetSpacing()[2] if depth else None,
-    )
-
-    db_image_files = []
-    for file in work_dir.iterdir():
-        db_image_file = PanImgFile(
-            image_id=db_image.pk, image_type=ImageType.MHD, file=file,
-        )
-        db_image_files.append(db_image_file)
-
-    return db_image, db_image_files
diff --git a/panimg/models.py b/panimg/models.py
index b72a5e0..b445b37 100644
--- a/panimg/models.py
+++ b/panimg/models.py
@@ -1,10 +1,16 @@
+import logging
+import shutil
 from enum import Enum
 from pathlib import Path
-from typing import Dict, List, Optional, Set
-from uuid import UUID
+from typing import Dict, List, Optional, Set, Tuple
+from uuid import UUID, uuid4
 
+from SimpleITK import Image, WriteImage
+from pydantic import BaseModel, validator
 from pydantic.dataclasses import dataclass
 
+logger = logging.getLogger(__name__)
+
 
 class ColorSpace(str, Enum):
     GRAY = "GRAY"
@@ -19,6 +25,13 @@ class ImageType(str, Enum):
     DZI = "DZI"
 
 
+ITK_COLOR_SPACE_MAP = {
+    1: ColorSpace.GRAY,
+    3: ColorSpace.RGB,
+    4: ColorSpace.RGBA,
+}
+
+
 @dataclass(frozen=True)
 class PanImg:
     pk: UUID
@@ -62,3 +75,177 @@ class PanImgResult:
 class PostProcessorResult:
     new_image_files: Set[PanImgFile]
     new_folders: Set[PanImgFolder]
+
+
+class SimpleITKImage(BaseModel):
+    image: Image
+
+    name: str
+    consumed_files: Set[Path]
+
+    spacing_valid: bool
+
+    class Config:
+        arbitrary_types_allowed = True
+        allow_mutation = False
+
+    @property
+    def width(self) -> int:
+        return int(self.image.GetWidth())
+
+    @property
+    def height(self) -> int:
+        return int(self.image.GetHeight())
+
+    @property
+    def depth(self) -> Optional[int]:
+        try:
+            depth = int(self.image.GetDepth())
+        except (RuntimeError, ValueError):
+            return None
+
+        return depth or None
+
+    @property
+    def window_center(self) -> Optional[float]:
+        try:
+            return float(self.image.GetMetaData("WindowCenter"))
+        except (RuntimeError, ValueError):
+            return None
+
+    @property
+    def window_width(self) -> Optional[float]:
+        try:
+            return float(self.image.GetMetaData("WindowWidth"))
+        except (RuntimeError, ValueError):
+            return None
+
+    @property
+    def timepoints(self) -> Optional[int]:
+        if self.image.GetDimension() == 4:
+            return int(self.image.GetSize()[-1])
+        else:
+            return None
+
+    @validator("image")
+    def check_color_space(cls, image: Image):  # noqa: B902, N805
+        cs = image.GetNumberOfComponentsPerPixel()
+        if cs not in ITK_COLOR_SPACE_MAP:
+            raise ValueError(f"Unknown color space for MetaIO image: {cs}")
+        return image
+
+    @property
+    def color_space(self) -> ColorSpace:
+        return ITK_COLOR_SPACE_MAP[self.image.GetNumberOfComponentsPerPixel()]
+
+    @property
+    def voxel_width_mm(self) -> Optional[float]:
+        if self.spacing_valid:
+            return float(self.image.GetSpacing()[0])
+        else:
+            return None
+
+    @property
+    def voxel_height_mm(self) -> Optional[float]:
+        if self.spacing_valid:
+            return float(self.image.GetSpacing()[1])
+        else:
+            return None
+
+    @property
+    def voxel_depth_mm(self) -> Optional[float]:
+        if self.spacing_valid:
+            try:
+                return float(self.image.GetSpacing()[2])
+            except IndexError:
+                return None
+        else:
+            return None
+
+    def save(self, output_directory: Path) -> Tuple[PanImg, Set[PanImgFile]]:
+        pk = uuid4()
+
+        work_dir = Path(output_directory) / self.name
+        work_dir.mkdir()
+
+        new_image = PanImg(
+            pk=pk,
+            name=self.name,
+            width=self.width,
+            height=self.height,
+            depth=self.depth,
+            window_center=self.window_center,
+            window_width=self.window_width,
+            timepoints=self.timepoints,
+            resolution_levels=None,
+            color_space=self.color_space,
+            voxel_width_mm=self.voxel_width_mm,
+            voxel_height_mm=self.voxel_height_mm,
+            voxel_depth_mm=self.voxel_depth_mm,
+        )
+
+        WriteImage(
+            image=self.image,
+            fileName=str(work_dir.absolute() / f"{pk}.mha"),
+            useCompression=True,
+        )
+
+        new_files = set()
+        for file in work_dir.iterdir():
+            new_file = PanImgFile(
+                image_id=pk, image_type=ImageType.MHD, file=file,
+            )
+            new_files.add(new_file)
+
+        return new_image, new_files
+
+
+class TIFFImage(BaseModel):
+    file: Path
+
+    name: str
+    consumed_files: Set[Path]
+
+    width: int
+    height: int
+    voxel_width_mm: float
+    voxel_height_mm: float
+    resolution_levels: int
+    color_space: ColorSpace
+
+    class Config:
+        allow_mutation = False
+
+    def save(self, output_directory: Path) -> Tuple[PanImg, Set[PanImgFile]]:
+        pk = uuid4()
+
+        output_file = output_directory / self.name / f"{pk}{self.file.suffix}"
+        output_file.parent.mkdir()
+
+        new_image = PanImg(
+            pk=pk,
+            name=self.name,
+            width=self.width,
+            height=self.height,
+            depth=1,
+            resolution_levels=self.resolution_levels,
+            color_space=self.color_space,
+            voxel_width_mm=self.voxel_width_mm,
+            voxel_height_mm=self.voxel_height_mm,
+            voxel_depth_mm=None,
+            timepoints=None,
+            window_center=None,
+            window_width=None,
+        )
+
+        shutil.copy(src=self.file, dst=output_file)
+
+        new_files = {
+            PanImgFile(
+                image_id=pk,
+                image_type=ImageType.TIFF,
+                file=output_file.absolute(),
+            )
+        }
+
+        return new_image, new_files
diff --git a/panimg/panimg.py b/panimg/panimg.py
index 4b22a8c..278c7be 100644
--- a/panimg/panimg.py
+++ b/panimg/panimg.py
@@ -1,7 +1,8 @@
 from collections import defaultdict
 from pathlib import Path
-from typing import DefaultDict, Iterable, List, Optional, Set
+from typing import DefaultDict, Dict, Iterable, List, Optional, Set
 
+from panimg.exceptions import UnconsumedFilesException
 from panimg.image_builders import DEFAULT_IMAGE_BUILDERS
 from panimg.models import (
     PanImg,
@@ -20,7 +21,6 @@ def convert(
     output_directory: Path,
     builders: Optional[Iterable[ImageBuilder]] = None,
     post_processors: Optional[Iterable[PostProcessor]] = None,
-    created_image_prefix: str = "",
 ) -> PanImgResult:
     new_images: Set[PanImg] = set()
     new_image_files: Set[PanImgFile] = set()
@@ -37,7 +37,6 @@ def convert(
         new_image_files=new_image_files,
         new_folders=new_folders,
         file_errors=file_errors,
-        created_image_prefix=created_image_prefix,
     )
 
     result = _post_process(
@@ -54,7 +53,7 @@ def convert(
         new_image_files=new_image_files,
         new_folders=new_folders,
         consumed_files=consumed_files,
-        file_errors={**file_errors},
+        file_errors=file_errors,
     )
 
 
@@ -68,7 +67,6 @@ def _convert_directory(
     new_image_files: Set[PanImgFile],
     new_folders: Set[PanImgFolder],
     file_errors: DefaultDict[Path, List[str]],
-    created_image_prefix: str = "",
 ):
     input_directory = Path(input_directory).resolve()
     output_directory = Path(output_directory).resolve()
@@ -87,16 +85,15 @@ def _convert_directory(
                 new_image_files=new_image_files,
                 new_folders=new_folders,
                 file_errors=file_errors,
-                created_image_prefix=created_image_prefix,
             )
         elif o.is_file():
             files.add(o)
 
     for builder in builders:
-        builder_result = builder(
+        builder_result = _build_files(
+            builder=builder,
             files=files - consumed_files,
             output_directory=output_directory,
-            created_image_prefix=created_image_prefix,
         )
 
         new_images |= builder_result.new_images
@@ -108,6 +105,36 @@ def _convert_directory(
             file_errors[filepath].extend(errors)
 
 
+def _build_files(
+    *, builder: ImageBuilder, files: Set[Path], output_directory: Path,
+) -> PanImgResult:
+    new_images = set()
+    new_image_files: Set[PanImgFile] = set()
+    consumed_files: Set[Path] = set()
+    file_errors: Dict[Path, List[str]] = {}
+
+    try:
+        for result in builder(files=files):
+            n_image, n_image_files = result.save(
+                output_directory=output_directory
+            )
+
+            new_images.add(n_image)
+            new_image_files |= n_image_files
+            consumed_files |= result.consumed_files
+
+    except UnconsumedFilesException as e:
+        file_errors = e.file_errors
+
+    return PanImgResult(
+        new_images=new_images,
+        new_image_files=new_image_files,
+        new_folders=set(),
+        consumed_files=consumed_files,
+        file_errors=file_errors,
+    )
+
+
 def _post_process(
     *, image_files: Set[PanImgFile], post_processors: Iterable[PostProcessor]
 ) -> PostProcessorResult:
diff --git a/panimg/post_processors/__init__.py b/panimg/post_processors/__init__.py
index 0447fc3..cd7fa98 100644
--- a/panimg/post_processors/__init__.py
+++ b/panimg/post_processors/__init__.py
@@ -1,3 +1,8 @@
 from panimg.post_processors.tiff_to_dzi import tiff_to_dzi
 
 DEFAULT_POST_PROCESSORS = [tiff_to_dzi]
+
+__all__ = [
+    "tiff_to_dzi",
+    "DEFAULT_POST_PROCESSORS",
+]
diff --git a/panimg/settings.py b/panimg/settings.py
index fed94dd..8d142c4 100644
--- a/panimg/settings.py
+++ b/panimg/settings.py
@@ -1,5 +1,2 @@
 # Tile size in pixels to be used when creating dzi for tif files
 DZI_TILE_SIZE = 2560
-
-# Internal format to use for metaimages
-ITK_INTERNAL_FILE_FORMAT = "mha"
diff --git a/panimg/types.py b/panimg/types.py
index da28a35..9747f9f 100644
--- a/panimg/types.py
+++ b/panimg/types.py
@@ -1,19 +1,20 @@
 from pathlib import Path
-from typing import Set
+from typing import Iterator, Set, Union
 
 from typing_extensions import Protocol  # for py37 support
 
-from panimg.models import PanImgFile, PanImgResult, PostProcessorResult
+from panimg.models import (
+    PanImgFile,
+    PostProcessorResult,
+    SimpleITKImage,
+    TIFFImage,
+)
 
 
 class ImageBuilder(Protocol):
     def __call__(
-        self,
-        *,
-        files: Set[Path],
-        output_directory: Path,
-        created_image_prefix: str,
-    ) -> PanImgResult:
+        self, *, files: Set[Path]
+    ) -> Union[Iterator[SimpleITKImage], Iterator[TIFFImage]]:
         ...
 
 
diff --git a/pyproject.toml b/pyproject.toml
index e738a25..72ee678 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ commands =
 
 [tool.poetry]
 name = "panimg"
-version = "0.1.0"
+version = "0.2.0"
 description = "Conversion of medical images to MHA and TIFF."
 license = "Apache-2.0"
 authors = ["James Meakin <12661555+jmsmkn@users.noreply.github.com>"]
diff --git a/tests/test_default_image_builders.py b/tests/test_default_image_builders.py
new file mode 100644
index 0000000..65c86b1
--- /dev/null
+++ b/tests/test_default_image_builders.py
@@ -0,0 +1,60 @@
+import pytest
+
+from panimg.exceptions import UnconsumedFilesException
+from panimg.image_builders import DEFAULT_IMAGE_BUILDERS
+from panimg.panimg import _build_files
+from tests import RESOURCE_PATH
+
+EXPECTED_ERROR_MESSAGE = {
+    "image_builder_dicom": [
+        (
+            "Dicom image builder: File is missing DICOM File Meta "
+            "Information header or the 'DICM' prefix is missing from the "
+            "header. Use force=True to force reading."
+        )
+    ],
+    "image_builder_fallback": [
+        "Fallback image builder: Not a valid image file"
+    ],
+    "image_builder_mhd": ["Mhd image builder: Not an ITK file"],
+    "image_builder_nifti": ["NifTI image builder: Not a NifTI image file"],
+    "image_builder_tiff": [
+        "Could not open file with tifffile.",
+        "Could not open file with OpenSlide.",
+        "Validation error: Not a valid tif: Image width could not be determined.",
+    ],
+}
+
+
+@pytest.mark.parametrize("builder", DEFAULT_IMAGE_BUILDERS)
+def test_image_builder_raises_unconsumed_file_exception(tmp_path, builder):
+    f = tmp_path / "image.jpg"
+    f.write_bytes(b"")
+
+    with pytest.raises(UnconsumedFilesException) as e:
+        _ = [*builder(files={f})]
+
+    assert {**e.value.file_errors} == {
+        f: EXPECTED_ERROR_MESSAGE[builder.__name__]
+    }
+
+
+@pytest.mark.parametrize("builder", DEFAULT_IMAGE_BUILDERS)
+def test_one_image_consumed_by_each_builder(tmp_path, builder):
+    # Each of these images should only be consumed by one builder
+    # The other files should error out
+    files = {
+        *(RESOURCE_PATH / "dicom").glob("*.dcm"),
+        RESOURCE_PATH / "test_rgb.png",
+        RESOURCE_PATH / "image10x10x10.mha",
+        RESOURCE_PATH / "image10x11x12.nii.gz",
+        RESOURCE_PATH / "valid_tiff.tif",
+    }
+
+    result = _build_files(
+        builder=builder, files=files, output_directory=tmp_path
+    )
+
+    assert len(result.new_images) == 1
+    assert len(result.consumed_files) == len(files) - len(result.file_errors)
+    assert result.consumed_files | {*result.file_errors} == files
diff --git a/tests/test_dicom.py b/tests/test_dicom.py
index 496e16c..8e0fa0d 100644
--- a/tests/test_dicom.py
+++ b/tests/test_dicom.py
@@ -14,6 +14,7 @@
     image_builder_dicom,
 )
 from panimg.image_builders.metaio_utils import parse_mh_header
+from panimg.panimg import _build_files
 from tests import RESOURCE_PATH
 
 DICOM_DIR = RESOURCE_PATH / "dicom"
@@ -21,7 +22,7 @@
 
 def test_get_headers_by_study():
     files = [Path(d[0]).joinpath(f) for d in os.walk(DICOM_DIR) for f in d[2]]
-    studies, _ = _get_headers_by_study(files)
+    studies = _get_headers_by_study(files, defaultdict(list))
     assert len(studies) == 1
     for key in studies:
         assert [str(x["file"]) for x in studies[key]["headers"]] == [
@@ -32,13 +33,13 @@ def test_get_headers_by_study():
         files = [Path(root).joinpath(f) for f in files]
         break
 
-    studies, _ = _get_headers_by_study(files)
+    studies = _get_headers_by_study(files, defaultdict(list))
     assert len(studies) == 0
 
 
 def test_validate_dicom_files():
     files = [Path(d[0]).joinpath(f) for d in os.walk(DICOM_DIR) for f in d[2]]
-    studies, _ = _validate_dicom_files(files)
+    studies = _validate_dicom_files(files, defaultdict(list))
     assert len(studies) == 1
     for study in studies:
         headers = study.headers
@@ -46,12 +47,12 @@ def test_validate_dicom_files():
         assert study.n_slices == 4
     with mock.patch(
         "panimg.image_builders.dicom._get_headers_by_study",
-        return_value=(
-            {"foo": {"headers": headers[1:], "file": "bar", "index": 1}},
-            defaultdict(list),
-        ),
+        return_value={
+            "foo": {"headers": headers[1:], "file": "bar", "index": 1}
+        },
     ):
-        studies, errors = _validate_dicom_files(files)
+        errors = defaultdict(list)
+        studies = _validate_dicom_files(files, errors)
         assert len(studies) == 0
         for header in headers[1:]:
             assert errors[header["file"]] == [
@@ -61,7 +62,9 @@ def test_validate_dicom_files():
 
 def test_image_builder_dicom_4dct(tmpdir):
     files = {Path(d[0]).joinpath(f) for d in os.walk(DICOM_DIR) for f in d[2]}
-    result = image_builder_dicom(files=files, output_directory=tmpdir)
+    result = _build_files(
+        builder=image_builder_dicom, files=files, output_directory=tmpdir
+    )
     assert result.consumed_files == {
         Path(DICOM_DIR).joinpath(f"{x}.dcm") for x in range(1, 77)
     }
@@ -124,7 +127,9 @@ def test_dicom_rescaling(folder, element_type, tmpdir):
         for d in os.walk(RESOURCE_PATH / folder)
         for f in d[2]
     ]
-    result = image_builder_dicom(files=files, output_directory=tmpdir)
+    result = _build_files(
+        builder=image_builder_dicom, files=files, output_directory=tmpdir
+    )
 
     assert len(result.new_image_files) == 1
     mha_file_obj = [
@@ -141,7 +146,9 @@ def test_dicom_window_level(tmpdir):
         for d in os.walk(RESOURCE_PATH / "dicom")
         for f in d[2]
     }
-    result = image_builder_dicom(files=files, output_directory=tmpdir)
+    result = _build_files(
+        builder=image_builder_dicom, files=files, output_directory=tmpdir
+    )
 
     assert len(result.new_image_files) == 1
     mha_file_obj = [
diff --git a/tests/test_fallback.py b/tests/test_fallback.py
index a5c4552..f2d5241 100644
--- a/tests/test_fallback.py
+++ b/tests/test_fallback.py
@@ -10,6 +10,7 @@
     image_builder_fallback,
 )
 from panimg.models import ColorSpace
+from panimg.panimg import _build_files
 from tests import RESOURCE_PATH
 
 
@@ -28,7 +29,12 @@ def test_image_builder_fallback(tmpdir, src, colorspace):
     shutil.copy(str(src), str(dest))
     files = {Path(d[0]).joinpath(f) for d in os.walk(tmpdir) for f in d[2]}
     with TemporaryDirectory() as output:
-        result = image_builder_fallback(files=files, output_directory=output)
+        result = _build_files(
+            builder=image_builder_fallback,
+            files=files,
+            output_directory=output,
+        )
+
     assert result.consumed_files == {dest}
     assert len(result.new_images) == 1
     image = result.new_images.pop()
@@ -45,7 +51,11 @@ def test_image_builder_fallback_corrupt_file(tmpdir):
 
     files = {Path(d[0]).joinpath(f) for d in os.walk(tmpdir) for f in d[2]}
     with TemporaryDirectory() as output:
-        result = image_builder_fallback(files=files, output_directory=output)
+        result = _build_files(
+            builder=image_builder_fallback,
+            files=files,
+            output_directory=output,
+        )
 
     assert result.file_errors == {
         dest: [format_error("Not a valid image file")],
diff --git a/tests/test_nifti.py b/tests/test_nifti.py
index f5dbd54..287315c 100644
--- a/tests/test_nifti.py
+++ b/tests/test_nifti.py
@@ -7,6 +7,7 @@
 
 from panimg.image_builders.nifti import image_builder_nifti
 from panimg.models import ColorSpace
+from panimg.panimg import _build_files
 from tests import RESOURCE_PATH
 
 
@@ -24,8 +25,10 @@ def test_image_builder_nifti(tmpdir_factory, src: Path):
 
     files = {*dest.glob("*")}
 
-    result = image_builder_nifti(
-        files=files, output_directory=tmpdir_factory.mktemp("output")
+    result = _build_files(
+        builder=image_builder_nifti,
+        files=files,
+        output_directory=tmpdir_factory.mktemp("output"),
     )
 
     assert result.consumed_files == files
@@ -46,6 +49,8 @@ def test_image_builder_with_other_file_extension(tmpdir):
     shutil.copy(RESOURCE_PATH / dest.name, dest)
     files = {Path(d[0]).joinpath(f) for d in os.walk(tmpdir) for f in d[2]}
     with TemporaryDirectory() as output:
-        result = image_builder_nifti(files=files, output_directory=output)
+        result = _build_files(
+            builder=image_builder_nifti, files=files, output_directory=output
+        )
     assert result.consumed_files == set()
     assert len(result.new_images) == 0
diff --git a/tests/test_panimg.py b/tests/test_panimg.py
index 29afbbd..de00100 100644
--- a/tests/test_panimg.py
+++ b/tests/test_panimg.py
@@ -2,4 +2,4 @@
 
 
 def test_version():
-    assert __version__ == "0.1.0"
+    assert __version__ == "0.2.0"
diff --git a/tests/test_tiff.py b/tests/test_tiff.py
index e8bc723..882f9c0 100644
--- a/tests/test_tiff.py
+++ b/tests/test_tiff.py
@@ -15,7 +15,6 @@
 from panimg.image_builders.tiff import (
     GrandChallengeTiffFile,
     _convert,
-    _create_tiff_image_entry,
     _extract_tags,
     _get_color_space,
     _get_mrxs_files,
@@ -23,6 +22,7 @@
     image_builder_tiff,
 )
 from panimg.models import ColorSpace
+from panimg.panimg import _build_files
 from tests import RESOURCE_PATH
 
 
@@ -193,18 +193,16 @@ def test_load_with_open_slide(source_dir, filename, tmpdir_factory):
 
 @pytest.mark.parametrize(
     "resource, expected_error_message, voxel_size",
-    [(RESOURCE_PATH / "valid_tiff.tif", "", [1, 1, None])],
+    [(RESOURCE_PATH / "valid_tiff.tif", "", [1, 1])],
 )
 def test_tiff_image_entry_creation(
     resource, expected_error_message, voxel_size
 ):
     error_message = ""
-    image_entry = None
     gc_file = GrandChallengeTiffFile(resource)
     try:
         tiff_file = tifffile.TiffFile(str(gc_file.path.absolute()))
         gc_file = _extract_tags(gc_file=gc_file, pages=tiff_file.pages)
-        image_entry = _create_tiff_image_entry(tiff_file=gc_file)
     except ValidationError as e:
         error_message = str(e)
 
@@ -218,20 +216,17 @@ def test_tiff_image_entry_creation(
         tiff_file = tiff_lib.TiffFile(str(resource.absolute()))
         tiff_tags = tiff_file.pages[0].tags
 
-        assert image_entry.name == resource.name
-        assert image_entry.width == tiff_tags["ImageWidth"].value
-        assert image_entry.height == tiff_tags["ImageLength"].value
-        assert image_entry.depth == 1
-        assert image_entry.resolution_levels == len(tiff_file.pages)
-        assert image_entry.color_space == _get_color_space(
+        assert gc_file.path.name == resource.name
+        assert gc_file.image_width == tiff_tags["ImageWidth"].value
+        assert gc_file.image_height == tiff_tags["ImageLength"].value
+        assert gc_file.resolution_levels == len(tiff_file.pages)
+        assert gc_file.color_space == _get_color_space(
             color_space_string=str(
                 tiff_tags["PhotometricInterpretation"].value
             )
         )
-        assert image_entry.voxel_width_mm == approx(voxel_size[0])
-        assert image_entry.voxel_height_mm == approx(voxel_size[1])
-        assert image_entry.voxel_depth_mm == voxel_size[2]
-        assert image_entry.pk == gc_file.pk
+        assert gc_file.voxel_width_mm == approx(voxel_size[0])
+        assert gc_file.voxel_height_mm == approx(voxel_size[1])
 
 
 # Integration test of all features being accessed through the image builder
@@ -247,8 +242,8 @@ def test_image_builder_tiff(tmpdir_factory,):
     )
     files = [Path(d[0]).joinpath(f) for d in os.walk(temp_dir) for f in d[2]]
 
-    image_builder_result = image_builder_tiff(
-        files=files, output_directory=output_dir
+    image_builder_result = _build_files(
+        builder=image_builder_tiff, files=files, output_directory=output_dir
     )
 
     expected_files = [
@@ -304,7 +299,7 @@ def test_handle_complex_files(tmpdir_factory):
     ), "Remove work-around calculation of xres and yres in _convert_to_tiff function."
 
 
-@pytest.mark.skip(
+@pytest.mark.xfail(
     reason="skip for now as we don't want to upload a large testset"
 )
 @pytest.mark.parametrize(
@@ -323,7 +318,11 @@ def test_convert_to_tiff(resource, tmpdir_factory):
 
     input_files = {f for f in resource.glob("*") if f.is_file()}
 
-    result = image_builder_tiff(files=input_files, output_directory=output_dir)
+    result = _build_files(
+        builder=image_builder_tiff,
+        files=input_files,
+        output_directory=output_dir,
+    )
 
     assert len(result.new_images) == 1
     assert len(result.new_image_files) == 1
@@ -338,8 +337,10 @@ def test_error_handling(tmpdir_factory):
     shutil.copytree(RESOURCE_PATH / "complex_tiff", temp_dir)
     files = {Path(d[0]).joinpath(f) for d in os.walk(temp_dir) for f in d[2]}
 
-    image_builder_result = image_builder_tiff(
-        files=files, output_directory=Path(tmpdir_factory.mktemp("output"))
+    image_builder_result = _build_files(
+        builder=image_builder_tiff,
+        files=files,
+        output_directory=Path(tmpdir_factory.mktemp("output")),
     )
 
     assert len(image_builder_result.file_errors) == 14
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 0aa80c6..c6c924d 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,13 +1,11 @@
 from pathlib import Path
-from tempfile import TemporaryDirectory
 
 import SimpleITK
 import pytest
 from pytest import approx
 
 from panimg.image_builders.metaio_utils import load_sitk_image
-from panimg.image_builders.utils import convert_itk_to_internal
-from panimg.models import ColorSpace, PanImg
+from panimg.models import ColorSpace, SimpleITKImage
 from tests import RESOURCE_PATH
 
 
@@ -29,7 +27,9 @@
     ),
 )
 def test_convert_itk_to_internal(image: Path):
-    def assert_img_properties(img: SimpleITK.Image, internal_image: PanImg):
+    def assert_img_properties(
+        img: SimpleITK.Image, internal_image: SimpleITKImage
+    ):
         color_space = {
             1: ColorSpace.GRAY,
             3: ColorSpace.RGB,
@@ -54,13 +54,12 @@ def assert_img_properties(img: SimpleITK.Image, internal_image: PanImg):
         assert internal_image.height == img.GetHeight()
         assert internal_image.voxel_width_mm == approx(img.GetSpacing()[0])
         assert internal_image.voxel_height_mm == approx(img.GetSpacing()[1])
-        assert internal_image.resolution_levels is None
 
     img_ref = load_sitk_image(image)
-    with TemporaryDirectory() as output:
-        internal_image = convert_itk_to_internal(
-            simple_itk_image=img_ref,
-            output_directory=Path(output),
-            name=image.name,
-        )
-    assert_img_properties(img_ref, internal_image[0])
+    internal_image = SimpleITKImage(
+        name=image.name,
+        image=img_ref,
+        consumed_files=set(),
+        spacing_valid=True,
+    )
+    assert_img_properties(img_ref, internal_image)