Skip to content

Commit

Permalink
This might fix masks and boxes
Browse files Browse the repository at this point in the history
- Compute bboxes from the masks no matter what
- Don't claim to support masks-already-aligned-with-boxes
  • Loading branch information
viklofg committed Apr 19, 2024
1 parent 27bdbdb commit 6f0e069
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 23 deletions.
35 changes: 14 additions & 21 deletions src/htrflow_core/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
from typing import Any, Callable, Literal, Optional, Sequence, TypeAlias

import numpy as np

import logging
from htrflow_core.utils import draw, geometry, imgproc
from htrflow_core.utils.geometry import Bbox, Mask, Polygon


LabelType: TypeAlias = Literal["text", "class", "conf"] | None

logger = logging.getLogger(__name__)

class Segment:
"""Segment class
Expand Down Expand Up @@ -54,10 +55,9 @@ def __init__(
bbox: The segment's bounding box, as either a `geometry.Bbox`
instance or as a (xmin, ymin, xmax, ymax) tuple. Required
if `mask` and `polygon` are None. Defaults to None.
mask: The segment's mask, either relative to the bounding box
or relative to the original input image. If both `bbox`
and `polygon` are None, `mask` is required and must be the
same shape as the original input image. Defaults to None.
mask: The segment's mask relative to the original input image.
Required if both `polygon` and `bbox` are None. Defaults
to None.
score: Segment confidence score. Defaults to None.
class_label: Segment class label. Defaults to None.
polygon: A polygon defining the segment, relative to the input
Expand All @@ -81,13 +81,14 @@ def __init__(
# Only bbox is given: Leave the polygon and mask as None.
pass

case (None, _, None):
# Only mask is given: In this case, the mask is assumed to be aligned
# with the original image, i.e., it has the same height and width as
# the input image. The other attributes (bbox and polygon) can in such
# case be inferred from the mask. After computing them, the mask is
# converted to a local mask.
bbox = geometry.mask2bbox(mask)
case (_, _, None):
# Mask (and possibly bbox) is given: The mask is assumed to be aligned
# with the original image. The bounding box is discarded (if given) and
# recomputed from the mask. A polygon is also inferred from the mask.
# The mask is then converted to a local mask.
mask_bbox = geometry.mask2bbox(mask)
if bbox is not None and mask_bbox != bbox:
logger.warning(f"Resizing the given bounding box to match the given mask ({bbox} => {mask_bbox})")
polygon = geometry.mask2polygon(mask)
mask = imgproc.crop(mask, bbox)

Expand All @@ -96,14 +97,6 @@ def __init__(
# leave the mask as None.
bbox = geometry.Polygon(polygon).bbox()

case (_, _, None):
# Both bbox and mask are given: Create a polygon from the mask.
polygon = geometry.mask2polygon(mask)
if mask.shape[:2] == (bbox.height, bbox.width):
polygon = polygon.move(bbox.p1)
else:
mask = imgproc.crop(mask, bbox)

self.bbox = bbox
self.polygon = polygon
self.mask = mask
Expand Down Expand Up @@ -132,7 +125,7 @@ def global_mask(self, orig_shape: tuple[int, int] | None = None) -> Optional[Mas

x1, y1, x2, y2 = self.bbox
mask = np.zeros(orig_shape, dtype=np.uint8)
mask[y1 : y2 + 1, x1 : x2 + 1] = self.mask
mask[y1:y2, x1:x2] = self.mask
return mask

@property
Expand Down
2 changes: 1 addition & 1 deletion src/htrflow_core/utils/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def masks2polygons(masks: Iterable[Mask], epsilon=0.005) -> Iterable[Polygon]:
def mask2bbox(mask: Mask) -> Bbox:
"""Convert mask to bounding box"""
y, x = np.where(mask != 0)
return Bbox(np.min(x).item(), np.min(y).item(), np.max(x).item(), np.max(y).item())
return Bbox(np.min(x).item(), np.min(y).item(), np.max(x).item()+1, np.max(y).item()+1)


def polygons2masks(mask: Mask, polygons: Iterable[Polygon]) -> Iterable[Mask]:
Expand Down
2 changes: 1 addition & 1 deletion src/htrflow_core/utils/imgproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def crop(image: np.ndarray, bbox: Bbox) -> np.ndarray:
bbox: The bounding box
"""
x1, y1, x2, y2 = bbox
return image[y1 : y2 + 1, x1 : x2 + 1]
return image[y1:y2, x1:x2]


def mask(
Expand Down

0 comments on commit 6f0e069

Please sign in to comment.