Skip to content

Commit

Permalink
Fix several problems with filename handling
Browse files Browse the repository at this point in the history
- Starting in 1.2.0, OpenSlide() and OpenSlide.detect_format() have failed
  to accept filename arguments formatted as bytes because str(b'abc') ==
  "b'abc'".  In addition, filename arguments with invalid types (such as
  None) have been stringified and passed to OpenSlide, rather than raising
  an exception during conversion; we even had tests for this (!).

- lowlevel has always encoded filename arguments to UTF-8, but on
  non-Windows it should have used the Python filesystem encoding instead
  (usually UTF-8 but not always).  On Windows, OpenSlide 4.0.0+ expects
  UTF-8 rather than arbitrary bytes.  (OpenSlide < 4.0.0 expects the system
  codepage, which isn't very useful in practice because of its limited
  character set, so we ignore that case for now.)

- Type hints did not allow filename arguments to be bytes, nor did they
  allow os.PathLike subclasses which were not pathlib.Path (such as
  pathlib.PurePath).

Accept str, bytes, or os.PathLike for all filename arguments, and properly
convert them to bytes for OpenSlide.

Fixes: 98c11bd ("Add support for pathlib.Path instances (#123)")
Fixes: 5644229 ("tests: test passing invalid types to OpenSlide constructor")
Signed-off-by: Benjamin Gilbert <[email protected]>
  • Loading branch information
bgilbert committed Oct 20, 2024
1 parent 6a75bbd commit 8888133
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 20 deletions.
17 changes: 8 additions & 9 deletions openslide/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from __future__ import annotations

from io import BytesIO
from pathlib import Path
from types import TracebackType
from typing import Iterator, Literal, Mapping, TypeVar

Expand Down Expand Up @@ -82,7 +81,7 @@ def __exit__(
return False

@classmethod
def detect_format(cls, filename: str | Path) -> str | None:
def detect_format(cls, filename: lowlevel.Filename) -> str | None:
"""Return a string describing the format of the specified file.
If the file format is not recognized, return None."""
Expand Down Expand Up @@ -189,23 +188,23 @@ class OpenSlide(AbstractSlide):
operations on the OpenSlide object, other than close(), will fail.
"""

def __init__(self, filename: str | Path):
def __init__(self, filename: lowlevel.Filename):
"""Open a whole-slide image."""
AbstractSlide.__init__(self)
self._filename = filename
self._osr = lowlevel.open(str(filename))
self._osr = lowlevel.open(filename)
if lowlevel.read_icc_profile.available:
self._profile = lowlevel.read_icc_profile(self._osr)

def __repr__(self) -> str:
return f'{self.__class__.__name__}({self._filename!r})'

@classmethod
def detect_format(cls, filename: str | Path) -> str | None:
def detect_format(cls, filename: lowlevel.Filename) -> str | None:
"""Return a string describing the format vendor of the specified file.
If the file format is not recognized, return None."""
return lowlevel.detect_vendor(str(filename))
return lowlevel.detect_vendor(filename)

def close(self) -> None:
"""Close the OpenSlide object."""
Expand Down Expand Up @@ -358,7 +357,7 @@ def __repr__(self) -> str:
class ImageSlide(AbstractSlide):
"""A wrapper for a PIL.Image that provides the OpenSlide interface."""

def __init__(self, file: str | Path | Image.Image):
def __init__(self, file: lowlevel.Filename | Image.Image):
"""Open an image file.
file can be a filename or a PIL.Image."""
Expand All @@ -376,7 +375,7 @@ def __repr__(self) -> str:
return f'{self.__class__.__name__}({self._file_arg!r})'

@classmethod
def detect_format(cls, filename: str | Path) -> str | None:
def detect_format(cls, filename: lowlevel.Filename) -> str | None:
"""Return a string describing the format of the specified file.
If the file format is not recognized, return None."""
Expand Down Expand Up @@ -484,7 +483,7 @@ def set_cache(self, cache: OpenSlideCache) -> None:
pass


def open_slide(filename: str | Path) -> OpenSlide | ImageSlide:
def open_slide(filename: lowlevel.Filename) -> OpenSlide | ImageSlide:
"""Open a whole-slide or regular image.
Return an OpenSlide object for whole-slide images and an ImageSlide
Expand Down
35 changes: 29 additions & 6 deletions openslide/lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# openslide-python - Python bindings for the OpenSlide library
#
# Copyright (c) 2010-2013 Carnegie Mellon University
# Copyright (c) 2016-2023 Benjamin Gilbert
# Copyright (c) 2016-2024 Benjamin Gilbert
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of version 2.1 of the GNU Lesser General Public License
Expand Down Expand Up @@ -48,6 +48,7 @@
cdll,
)
from itertools import count
import os
import platform
from typing import TYPE_CHECKING, Any, Callable, Protocol, TypeVar, cast

Expand All @@ -56,7 +57,7 @@
from . import _convert

if TYPE_CHECKING:
# Python 3.10+ for ParamSpec
# Python 3.10+
from typing import ParamSpec, TypeAlias

from _convert import _Buffer
Expand Down Expand Up @@ -196,6 +197,28 @@ def from_param(cls, obj: _OpenSlideCache) -> _OpenSlideCache:
return obj


if TYPE_CHECKING:
# Python 3.10+
Filename: TypeAlias = str | bytes | os.PathLike[Any]


class _filename_p:
"""Wrapper class to convert filename arguments to bytes."""

@classmethod
def from_param(cls, obj: Filename) -> bytes:
# fspath and fsencode throw TypeError on unexpected types
if platform.system() == 'Windows':
# OpenSlide 4.0.0+ requires UTF-8 on Windows
obj = os.fspath(obj)
if isinstance(obj, str):
return obj.encode('UTF-8')
else:
return obj
else:
return os.fsencode(obj)


class _utf8_p:
"""Wrapper class to convert string arguments to bytes."""

Expand Down Expand Up @@ -350,14 +373,14 @@ def decorator(fn: Callable[_P, _T]) -> _Func[_P, _T]:


try:
detect_vendor: _Func[[str], str] = _func(
'openslide_detect_vendor', c_char_p, [_utf8_p], _check_string
detect_vendor: _Func[[Filename], str] = _func(
'openslide_detect_vendor', c_char_p, [_filename_p], _check_string
)
except AttributeError:
raise OpenSlideVersionError('3.4.0')

open: _Func[[str], _OpenSlide] = _func(
'openslide_open', c_void_p, [_utf8_p], _check_open
open: _Func[[Filename], _OpenSlide] = _func(
'openslide_open', c_void_p, [_filename_p], _check_open
)

close: _Func[[_OpenSlide], None] = _func(
Expand Down
Binary file added tests/fixtures/😐.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/fixtures/😐.svs
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_lowlevel_available(self):
if getattr(attr, '__module__', None) == '__future__':
continue
# ignore random imports
if hasattr(ctypes, name) or name in ('count', 'platform'):
if hasattr(ctypes, name) or name in ('count', 'os', 'platform'):
continue
self.assertTrue(
hasattr(attr, 'available'),
Expand Down
18 changes: 17 additions & 1 deletion tests/test_imageslide.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# openslide-python - Python bindings for the OpenSlide library
#
# Copyright (c) 2016-2023 Benjamin Gilbert
# Copyright (c) 2016-2024 Benjamin Gilbert
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of version 2.1 of the GNU Lesser General Public License
Expand All @@ -19,6 +19,7 @@

from __future__ import annotations

import sys
import unittest

from PIL import Image
Expand All @@ -44,6 +45,21 @@ def test_open_image(self):
self.assertEqual(osr.dimensions, (300, 250))
self.assertEqual(repr(osr), 'ImageSlide(%r)' % img)

@unittest.skipUnless(
sys.getfilesystemencoding() == 'utf-8',
'Python filesystem encoding is not UTF-8',
)
def test_unicode_path(self):
path = file_path('😐.png')
for arg in path, str(path):
self.assertEqual(ImageSlide.detect_format(arg), 'PNG')
self.assertEqual(ImageSlide(arg).dimensions, (300, 250))

def test_unicode_path_bytes(self):
arg = str(file_path('😐.png')).encode('UTF-8')
self.assertEqual(ImageSlide.detect_format(arg), 'PNG')
self.assertEqual(ImageSlide(arg).dimensions, (300, 250))

def test_operations_on_closed_handle(self):
with Image.open(file_path('boxes.png')) as img:
osr = ImageSlide(img)
Expand Down
21 changes: 18 additions & 3 deletions tests/test_openslide.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# openslide-python - Python bindings for the OpenSlide library
#
# Copyright (c) 2016-2023 Benjamin Gilbert
# Copyright (c) 2016-2024 Benjamin Gilbert
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of version 2.1 of the GNU Lesser General Public License
Expand Down Expand Up @@ -61,12 +61,27 @@ def test_open(self):
self.assertRaises(
OpenSlideUnsupportedFormatError, lambda: OpenSlide('setup.py')
)
self.assertRaises(OpenSlideUnsupportedFormatError, lambda: OpenSlide(None))
self.assertRaises(OpenSlideUnsupportedFormatError, lambda: OpenSlide(3))
self.assertRaises(ArgumentError, lambda: OpenSlide(None))
self.assertRaises(ArgumentError, lambda: OpenSlide(3))
self.assertRaises(
OpenSlideUnsupportedFormatError, lambda: OpenSlide('unopenable.tiff')
)

@unittest.skipUnless(
sys.getfilesystemencoding() == 'utf-8',
'Python filesystem encoding is not UTF-8',
)
def test_unicode_path(self):
path = file_path('😐.svs')
for arg in path, str(path):
self.assertEqual(OpenSlide.detect_format(arg), 'aperio')
self.assertEqual(OpenSlide(arg).dimensions, (16, 16))

def test_unicode_path_bytes(self):
arg = str(file_path('😐.svs')).encode('UTF-8')
self.assertEqual(OpenSlide.detect_format(arg), 'aperio')
self.assertEqual(OpenSlide(arg).dimensions, (16, 16))

def test_operations_on_closed_handle(self):
osr = OpenSlide(file_path('boxes.tiff'))
props = osr.properties
Expand Down

0 comments on commit 8888133

Please sign in to comment.