From c17341e239885f4a71c65c9f22945f241d320655 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Thu, 1 Aug 2019 20:14:45 +0200 Subject: [PATCH 1/5] adapt to utils moved to core, #49 --- ocrd_tesserocr/binarize.py | 6 +- ocrd_tesserocr/common.py | 226 ++----------------------------- ocrd_tesserocr/crop.py | 4 +- ocrd_tesserocr/deskew.py | 2 +- ocrd_tesserocr/recognize.py | 17 ++- ocrd_tesserocr/segment_region.py | 18 +-- 6 files changed, 33 insertions(+), 240 deletions(-) diff --git a/ocrd_tesserocr/binarize.py b/ocrd_tesserocr/binarize.py index fcaaa0b..e6db3c8 100644 --- a/ocrd_tesserocr/binarize.py +++ b/ocrd_tesserocr/binarize.py @@ -8,7 +8,8 @@ from ocrd_utils import ( getLogger, concat_padded, - MIMETYPE_PAGE + MIMETYPE_PAGE, + membername ) from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import ( @@ -24,8 +25,7 @@ from .common import ( image_from_page, image_from_segment, - save_image_file, - membername + save_image_file ) TOOL = 'ocrd-tesserocr-binarize' diff --git a/ocrd_tesserocr/common.py b/ocrd_tesserocr/common.py index 734f52e..5cfa7ae 100644 --- a/ocrd_tesserocr/common.py +++ b/ocrd_tesserocr/common.py @@ -1,151 +1,24 @@ from __future__ import absolute_import import os.path -import sys import io import numpy as np -from PIL import Image, ImageDraw, ImageStat from ocrd_models import OcrdExif -from ocrd_utils import getLogger, xywh_from_points, polygon_from_points +from ocrd_utils import ( + getLogger, + coordinates_of_segment, + xywh_from_points, + polygon_from_points, + image_from_polygon, + crop_image, +) LOG = getLogger('') # to be refined by importer -# to be refactored into core (as function in ocrd_utils): -def polygon_mask(image, coordinates): - """"Create a mask image of a polygon. - - Given a PIL.Image `image` (merely for dimensions), and - a numpy array `polygon` of relative coordinates into the image, - create a new image of the same size with black background, and - fill everything inside the polygon hull with white. - - Return the new PIL.Image. - """ - mask = Image.new('L', image.size, 0) - if isinstance(coordinates, np.ndarray): - coordinates = list(map(tuple, coordinates)) - ImageDraw.Draw(mask).polygon(coordinates, outline=1, fill=255) - return mask - -# to be refactored into core (as function in ocrd_utils): -def image_from_polygon(image, polygon): - """"Mask an image with a polygon. - - Given a PIL.Image `image` and a numpy array `polygon` - of relative coordinates into the image, put everything - outside the polygon hull to the background. Since `image` - is not necessarily binarized yet, determine the background - from the median color (instead of white). - - Return a new PIL.Image. - """ - mask = polygon_mask(image, polygon) - # create a background image from its median color - # (in case it has not been binarized yet): - # array = np.asarray(image) - # background = np.median(array, axis=[0, 1], keepdims=True) - # array = np.broadcast_to(background.astype(np.uint8), array.shape) - background = ImageStat.Stat(image).median[0] - new_image = Image.new('L', image.size, background) - new_image.paste(image, mask=mask) - return new_image - -# to be refactored into core (as function in ocrd_utils): -def crop_image(image, box=None): - """"Crop an image to a rectangle, filling with background. - - Given a PIL.Image `image` and a list `box` of the bounding - rectangle relative to the image, crop at the box coordinates, - filling everything outside `image` with the background. - (This covers the case where `box` indexes are negative or - larger than `image` width/height. PIL.Image.crop would fill - with black.) Since `image` is not necessarily binarized yet, - determine the background from the median color (instead of - white). - - Return a new PIL.Image. - """ - # todo: perhaps we should issue a warning if we encounter this - # (It should be invalid in PAGE-XML to extend beyond parents.) - if not box: - box = (0, 0, image.width, image.height) - xywh = xywh_from_bbox(*box) - background = ImageStat.Stat(image).median[0] - new_image = Image.new(image.mode, (xywh['w'], xywh['h']), - background) # or 'white' - new_image.paste(image, (-xywh['x'], -xywh['y'])) - return new_image - -# to be refactored into core (as function in ocrd_utils): -def rotate_coordinates(polygon, angle, orig=np.array([0, 0])): - """Apply a passive rotation transformation to the given coordinates. - - Given a numpy array `polygon` of points and a rotation `angle`, - as well as a numpy array `orig` of the center of rotation, - calculate the coordinate transform corresponding to the rotation - of the underlying image by `angle` degrees at `center` by - applying translation to the center, inverse rotation, - and translation from the center. - - Return a numpy array of the resulting polygon. - """ - angle = np.deg2rad(angle) - cos = np.cos(angle) - sin = np.sin(angle) - # active rotation: [[cos, -sin], [sin, cos]] - # passive rotation: [[cos, sin], [-sin, cos]] (inverse) - return orig + np.dot(polygon - orig, np.array([[cos, sin], [-sin, cos]]).transpose()) # to be refactored into core (as method of ocrd.workspace.Workspace): -def coordinates_of_segment(segment, parent_image, parent_xywh): - """Extract the relative coordinates polygon of a PAGE segment element. - - Given a Region / TextLine / Word / Glyph `segment` and - the PIL.Image of its parent Page / Region / TextLine / Word - along with its bounding box, calculate the relative coordinates - of the segment within the image. That is, shift all points from - the offset of the parent, and (in case the parent was rotated,) - rotate all points with the center of the image as origin. - - Return the rounded numpy array of the resulting polygon. - """ - # get polygon: - polygon = np.array(polygon_from_points(segment.get_Coords().points)) - # offset correction (shift coordinates to base of segment): - polygon -= np.array([parent_xywh['x'], parent_xywh['y']]) - # angle correction (rotate coordinates if image has been rotated): - if 'angle' in parent_xywh: - polygon = rotate_coordinates( - polygon, parent_xywh['angle'], - orig=np.array([0.5 * parent_image.width, - 0.5 * parent_image.height])) - return np.round(polygon).astype(np.int32) - -# to be refactored into core (as method of ocrd.workspace.Workspace): -def coordinates_for_segment(polygon, parent_image, parent_xywh): - """Convert a relative coordinates polygon to absolute. - - Given a numpy array `polygon` of points, and a parent PIL.Image - along with its bounding box to which the coordinates are relative, - calculate the absolute coordinates within the page. - That is, (in case the parent was rotated,) rotate all points in - opposite direction with the center of the image as origin, then - shift all points to the offset of the parent. - - Return the rounded numpy array of the resulting polygon. - """ - # angle correction (unrotate coordinates if image has been rotated): - if 'angle' in parent_xywh: - polygon = rotate_coordinates( - polygon, -parent_xywh['angle'], - orig=np.array([0.5 * parent_image.width, - 0.5 * parent_image.height])) - # offset correction (shift coordinates from base of segment): - polygon += np.array([parent_xywh['x'], parent_xywh['y']]) - return np.round(polygon).astype(np.uint32) - # to be refactored into core (as method of ocrd.workspace.Workspace): def image_from_page(workspace, page, page_id): """Extract the Page image from the workspace. @@ -328,86 +201,3 @@ def save_image_file(workspace, image, LOG.info('created file ID: %s, file_grp: %s, path: %s', file_id, file_grp, out.local_filename) return file_path - -# to be refactored into core (as function in ocrd_utils): -def bbox_from_points(points): - """Construct a numeric list representing a bounding box from polygon coordinates in page representation.""" - xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')] - return bbox_from_polygon(xys) - -# to be refactored into core (as function in ocrd_utils): -def points_from_bbox(minx, miny, maxx, maxy): - """Construct polygon coordinates in page representation from a numeric list representing a bounding box.""" - return "%i,%i %i,%i %i,%i %i,%i" % ( - minx, miny, maxx, miny, maxx, maxy, minx, maxy) - -# to be refactored into core (as function in ocrd_utils): -def xywh_from_bbox(minx, miny, maxx, maxy): - """Convert a bounding box from a numeric list to a numeric dict representation.""" - return { - 'x': minx, - 'y': miny, - 'w': maxx - minx, - 'h': maxy - miny, - } - -# to be refactored into core (as function in ocrd_utils): -def bbox_from_xywh(xywh): - """Convert a bounding box from a numeric dict to a numeric list representation.""" - return ( - xywh['x'], - xywh['y'], - xywh['x'] + xywh['w'], - xywh['y'] + xywh['h'] - ) - -# to be refactored into core (as function in ocrd_utils): -def points_from_polygon(polygon): - """Convert polygon coordinates from a numeric list representation to a page representation.""" - return " ".join("%i,%i" % (x, y) for x, y in polygon) - -# to be refactored into core (as function in ocrd_utils): -def xywh_from_polygon(polygon): - """Construct a numeric dict representing a bounding box from polygon coordinates in numeric list representation.""" - return xywh_from_bbox(*bbox_from_polygon(polygon)) - -# to be refactored into core (as function in ocrd_utils): -def polygon_from_xywh(xywh): - """Construct polygon coordinates in numeric list representation from numeric dict representing a bounding box.""" - return polygon_from_bbox(*bbox_from_xywh(xywh)) - -# to be refactored into core (as function in ocrd_utils): -def bbox_from_polygon(polygon): - """Construct a numeric list representing a bounding box from polygon coordinates in numeric list representation.""" - minx = sys.maxsize - miny = sys.maxsize - maxx = 0 - maxy = 0 - for xy in polygon: - if xy[0] < minx: - minx = xy[0] - if xy[0] > maxx: - maxx = xy[0] - if xy[1] < miny: - miny = xy[1] - if xy[1] > maxy: - maxy = xy[1] - return minx, miny, maxx, maxy - -# to be refactored into core (as function in ocrd_utils): -def polygon_from_bbox(minx, miny, maxx, maxy): - """Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box.""" - return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] - -# to be refactored into core (as function in ocrd_utils): -def polygon_from_x0y0x1y1(x0y0x1y1): - """Construct polygon coordinates in numeric list representation from a string list representing a bounding box.""" - minx = int(x0y0x1y1[0]) - miny = int(x0y0x1y1[1]) - maxx = int(x0y0x1y1[2]) - maxy = int(x0y0x1y1[3]) - return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] - -def membername(class_, val): - """Convert a member variable/constant into a member name string.""" - return next((k for k, v in class_.__dict__.items() if v == val), str(val)) diff --git a/ocrd_tesserocr/crop.py b/ocrd_tesserocr/crop.py index 80bd20e..9961bd9 100644 --- a/ocrd_tesserocr/crop.py +++ b/ocrd_tesserocr/crop.py @@ -4,6 +4,7 @@ import tesserocr from ocrd_utils import ( getLogger, concat_padded, + bbox_from_points, points_from_bbox, bbox_from_xywh, MIMETYPE_PAGE ) from ocrd_modelfactory import page_from_file @@ -19,8 +20,7 @@ from .config import TESSDATA_PREFIX, OCRD_TOOL from .common import ( - bbox_from_points, points_from_bbox, - bbox_from_xywh, save_image_file + save_image_file ) TOOL = 'ocrd-tesserocr-crop' diff --git a/ocrd_tesserocr/deskew.py b/ocrd_tesserocr/deskew.py index e93a0a8..ccbe5c6 100644 --- a/ocrd_tesserocr/deskew.py +++ b/ocrd_tesserocr/deskew.py @@ -12,6 +12,7 @@ from ocrd_utils import ( getLogger, concat_padded, + membername, MIMETYPE_PAGE ) from ocrd_modelfactory import page_from_file @@ -29,7 +30,6 @@ image_from_page, image_from_segment, save_image_file, - membername ) TOOL = 'ocrd-tesserocr-deskew' diff --git a/ocrd_tesserocr/recognize.py b/ocrd_tesserocr/recognize.py index 7af7c81..cf82ec5 100644 --- a/ocrd_tesserocr/recognize.py +++ b/ocrd_tesserocr/recognize.py @@ -6,10 +6,17 @@ PyTessBaseAPI, get_languages) from ocrd_utils import ( - getLogger, concat_padded, + getLogger, + concat_padded, points_from_x0y0x1y1, - xywh_from_points, points_from_xywh, - MIMETYPE_PAGE) + xywh_from_points, + points_from_xywh, + points_from_polygon, + xywh_from_polygon, + polygon_from_x0y0x1y1, + coordinates_for_segment, + MIMETYPE_PAGE +) from ocrd_models.ocrd_page import ( CoordsType, GlyphType, WordType, @@ -22,10 +29,6 @@ from .config import TESSDATA_PREFIX, OCRD_TOOL from .common import ( - points_from_polygon, - xywh_from_polygon, - polygon_from_x0y0x1y1, - coordinates_for_segment, image_from_page, image_from_segment ) diff --git a/ocrd_tesserocr/segment_region.py b/ocrd_tesserocr/segment_region.py index 3dfd789..c2a099d 100644 --- a/ocrd_tesserocr/segment_region.py +++ b/ocrd_tesserocr/segment_region.py @@ -7,10 +7,15 @@ ) from ocrd_utils import ( - getLogger, concat_padded, + getLogger, + concat_padded, points_from_x0y0x1y1, - points_from_xywh, xywh_from_points, - MIMETYPE_PAGE) + points_from_xywh, + xywh_from_points, + MIMETYPE_PAGE, + points_from_polygon, + membername +) from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import ( MetadataItemType, @@ -29,12 +34,7 @@ from ocrd import Processor from .config import TESSDATA_PREFIX, OCRD_TOOL -from .common import ( - image_from_page, - save_image_file, - points_from_polygon, - membername -) +from .common import save_image_file, image_from_page TOOL = 'ocrd-tesserocr-segment-region' LOG = getLogger('processor.TesserocrSegmentRegion') From 5d4a3ca2cd5892bd69521a7940837eebeb23b879 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 7 Aug 2019 17:15:25 +0200 Subject: [PATCH 2/5] :fire: remove commons, adapt to OCR-D/core#268 --- .pylintrc | 3 +- ocrd_tesserocr/binarize.py | 23 ++-- ocrd_tesserocr/common.py | 183 ------------------------------- ocrd_tesserocr/crop.py | 7 +- ocrd_tesserocr/deskew.py | 23 ++-- ocrd_tesserocr/recognize.py | 28 ++--- ocrd_tesserocr/segment_line.py | 12 +- ocrd_tesserocr/segment_region.py | 11 +- ocrd_tesserocr/segment_word.py | 18 ++- 9 files changed, 48 insertions(+), 260 deletions(-) diff --git a/.pylintrc b/.pylintrc index 710b8b2..fb4be43 100644 --- a/.pylintrc +++ b/.pylintrc @@ -17,7 +17,8 @@ disable = too-many-locals, too-few-public-methods, wrong-import-order, - duplicate-code + duplicate-code, + fixme # allow indented whitespace (as required by interpreter): no-space-check=empty-line diff --git a/ocrd_tesserocr/binarize.py b/ocrd_tesserocr/binarize.py index e6db3c8..685554d 100644 --- a/ocrd_tesserocr/binarize.py +++ b/ocrd_tesserocr/binarize.py @@ -8,8 +8,7 @@ from ocrd_utils import ( getLogger, concat_padded, - MIMETYPE_PAGE, - membername + MIMETYPE_PAGE ) from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import ( @@ -22,11 +21,6 @@ from ocrd import Processor from .config import TESSDATA_PREFIX, OCRD_TOOL -from .common import ( - image_from_page, - image_from_segment, - save_image_file -) TOOL = 'ocrd-tesserocr-binarize' LOG = getLogger('processor.TesserocrBinarize') @@ -52,6 +46,7 @@ def process(self): Produce a new output file by serialising the resulting hierarchy. """ + # pylint: disable=attribute-defined-outside-init try: self.page_grp, self.image_grp = self.output_file_grp.split(',') except ValueError: @@ -77,16 +72,16 @@ def process(self): value=self.parameter[name]) for name in self.parameter.keys()])])) page = pcgts.get_Page() - page_image, page_xywh, _ = image_from_page( - self.workspace, page, page_id) + page_image, page_xywh, _ = self.workspace.image_from_page( + page, page_id) LOG.info("Binarizing on '%s' level in page '%s'", oplevel, page_id) regions = page.get_TextRegion() + page.get_TableRegion() if not regions: LOG.warning("Page '%s' contains no text regions", page_id) for region in regions: - region_image, region_xywh = image_from_segment( - self.workspace, region, page_image, page_xywh) + region_image, region_xywh = self.workspace.image_from_segment( + region, page_image, page_xywh) if oplevel == 'region': tessapi.SetPageSegMode(PSM.SINGLE_BLOCK) self._process_segment(tessapi, RIL.BLOCK, region, region_image, region_xywh, @@ -98,8 +93,8 @@ def process(self): LOG.warning("Page '%s' region '%s' contains no text lines", page_id, region.id) for line in lines: - line_image, line_xywh = image_from_segment( - self.workspace, line, region_image, region_xywh) + line_image, line_xywh = self.workspace.image_from_segment( + line, region_image, region_xywh) tessapi.SetPageSegMode(PSM.SINGLE_LINE) self._process_segment(tessapi, RIL.TEXTLINE, line, line_image, line_xywh, "line '%s'" % line.id, input_file.pageId, @@ -129,7 +124,7 @@ def _process_segment(self, tessapi, ril, segment, image, xywh, where, page_id, f LOG.error('Cannot binarize %s', where) return # update METS (add the image file): - file_path = save_image_file(self.workspace, image_bin, + file_path = self.workspace.save_image_file(image_bin, file_id, page_id=page_id, file_grp=self.image_grp) diff --git a/ocrd_tesserocr/common.py b/ocrd_tesserocr/common.py index 5cfa7ae..c05722a 100644 --- a/ocrd_tesserocr/common.py +++ b/ocrd_tesserocr/common.py @@ -18,186 +18,3 @@ LOG = getLogger('') # to be refined by importer -# to be refactored into core (as method of ocrd.workspace.Workspace): -# to be refactored into core (as method of ocrd.workspace.Workspace): -def image_from_page(workspace, page, page_id): - """Extract the Page image from the workspace. - - Given a PageType object, `page`, extract its PIL.Image from - AlternativeImage if it exists. Otherwise extract the PIL.Image - from imageFilename and crop it if a Border exists. Otherwise - just return it. - - When cropping, respect any orientation angle annotated for - the page (from page-level deskewing) by rotating the - cropped image, respectively. - - If the resulting page image is larger than the bounding box of - `page`, pass down the page's box coordinates with an offset of - half the width/height difference. - - Return the extracted image, and the absolute coordinates of - the page's bounding box / border (for passing down), and - an OcrdExif instance associated with the original image. - """ - page_image = workspace.resolve_image_as_pil(page.imageFilename) - page_image_info = OcrdExif(page_image) - page_xywh = {'x': 0, - 'y': 0, - 'w': page_image.width, - 'h': page_image.height} - # region angle: PAGE orientation is defined clockwise, - # whereas PIL/ndimage rotation is in mathematical direction: - page_xywh['angle'] = -(page.get_orientation() or 0) - # FIXME: remove PrintSpace here as soon as GT abides by the PAGE standard: - border = page.get_Border() or page.get_PrintSpace() - if border: - page_points = border.get_Coords().points - LOG.debug("Using explictly set page border '%s' for page '%s'", - page_points, page_id) - page_xywh = xywh_from_points(page_points) - - alternative_image = page.get_AlternativeImage() - if alternative_image: - # (e.g. from page-level cropping, binarization, deskewing or despeckling) - # assumes implicit cropping (i.e. page_xywh has been applied already) - LOG.debug("Using AlternativeImage %d (%s) for page '%s'", - len(alternative_image), alternative_image[-1].get_comments(), - page_id) - page_image = workspace.resolve_image_as_pil( - alternative_image[-1].get_filename()) - elif border: - # get polygon outline of page border: - page_polygon = np.array(polygon_from_points(page_points)) - # create a mask from the page polygon: - page_image = image_from_polygon(page_image, page_polygon) - # recrop into page rectangle: - page_image = crop_image(page_image, - box=(page_xywh['x'], - page_xywh['y'], - page_xywh['x'] + page_xywh['w'], - page_xywh['y'] + page_xywh['h'])) - if 'angle' in page_xywh and page_xywh['angle']: - LOG.info("About to rotate page '%s' by %.2f°", - page_id, page_xywh['angle']) - page_image = page_image.rotate(page_xywh['angle'], - expand=True, - #resample=Image.BILINEAR, - fillcolor='white') - # subtract offset from any increase in binary region size over source: - page_xywh['x'] -= round(0.5 * max(0, page_image.width - page_xywh['w'])) - page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h'])) - return page_image, page_xywh, page_image_info - -# to be refactored into core (as method of ocrd.workspace.Workspace): -def image_from_segment(workspace, segment, parent_image, parent_xywh): - """Extract a segment image from its parent's image. - - Given a PIL.Image of the parent, `parent_image`, and - its absolute coordinates, `parent_xywh`, and a PAGE - segment (TextRegion / TextLine / Word / Glyph) object - logically contained in it, `segment`, extract its PIL.Image - from AlternativeImage (if it exists), or via cropping from - `parent_image`. - - When cropping, respect any orientation angle annotated for - the parent (from parent-level deskewing) by compensating the - segment coordinates in an inverse transformation (translation - to center, rotation, re-translation). - Also, mind the difference between annotated and actual size - of the parent (usually from deskewing), by a respective offset - into the image. Cropping uses a polygon mask (not just the - rectangle). - - When cropping, respect any orientation angle annotated for - the segment (from segment-level deskewing) by rotating the - cropped image, respectively. - - If the resulting segment image is larger than the bounding box of - `segment`, pass down the segment's box coordinates with an offset - of half the width/height difference. - - Return the extracted image, and the absolute coordinates of - the segment's bounding box (for passing down). - """ - segment_xywh = xywh_from_points(segment.get_Coords().points) - if 'orientation' in segment.__dict__: - # angle: PAGE orientation is defined clockwise, - # whereas PIL/ndimage rotation is in mathematical direction: - segment_xywh['angle'] = -(segment.get_orientation() or 0) - alternative_image = segment.get_AlternativeImage() - if alternative_image: - # (e.g. from segment-level cropping, binarization, deskewing or despeckling) - LOG.debug("Using AlternativeImage %d (%s) for segment '%s'", - len(alternative_image), alternative_image[-1].get_comments(), - segment.id) - segment_image = workspace.resolve_image_as_pil( - alternative_image[-1].get_filename()) - else: - # get polygon outline of segment relative to parent image: - segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh) - # create a mask from the segment polygon: - segment_image = image_from_polygon(parent_image, segment_polygon) - # recrop into segment rectangle: - segment_image = crop_image(segment_image, - box=(segment_xywh['x'] - parent_xywh['x'], - segment_xywh['y'] - parent_xywh['y'], - segment_xywh['x'] - parent_xywh['x'] + segment_xywh['w'], - segment_xywh['y'] - parent_xywh['y'] + segment_xywh['h'])) - # note: We should mask overlapping neighbouring segments here, - # but finding the right clipping rules can be difficult if operating - # on the raw (non-binary) image data alone: for each intersection, it - # must be decided which one of either segment or neighbour to assign, - # e.g. an ImageRegion which properly contains our TextRegion should be - # completely ignored, but an ImageRegion which is properly contained - # in our TextRegion should be completely masked, while partial overlap - # may be more difficult to decide. On the other hand, on the binary image, - # we can use connected component analysis to mask foreground areas which - # originate in the neighbouring regions. But that would introduce either - # the assumption that the input has already been binarized, or a dependency - # on some ad-hoc binarization method. Thus, it is preferable to use - # a dedicated processor for this (which produces clipped AlternativeImage - # or reduced polygon coordinates). - if 'angle' in segment_xywh and segment_xywh['angle']: - LOG.info("About to rotate segment '%s' by %.2f°", - segment.id, segment_xywh['angle']) - segment_image = segment_image.rotate(segment_xywh['angle'], - expand=True, - #resample=Image.BILINEAR, - fillcolor='white') - # subtract offset from any increase in binary region size over source: - segment_xywh['x'] -= round(0.5 * max(0, segment_image.width - segment_xywh['w'])) - segment_xywh['y'] -= round(0.5 * max(0, segment_image.height - segment_xywh['h'])) - return segment_image, segment_xywh - -# to be refactored into core (as method of ocrd.workspace.Workspace): -def save_image_file(workspace, image, - file_id, - page_id=None, - file_grp='OCR-D-IMG', # or -BIN? - format='PNG', - force=True): - """Store and reference an image as file into the workspace. - - Given a PIL.Image `image`, and an ID `file_id` to use in METS, - store the image under the fileGrp `file_grp` and physical page - `page_id` into the workspace (in a file name based on - the `file_grp`, `file_id` and `format` extension). - - Return the (absolute) path of the created file. - """ - image_bytes = io.BytesIO() - image.save(image_bytes, format=format) - file_path = os.path.join(file_grp, - file_id + '.' + format.lower()) - out = workspace.add_file( - ID=file_id, - file_grp=file_grp, - pageId=page_id, - local_filename=file_path, - mimetype='image/' + format.lower(), - content=image_bytes.getvalue(), - force=force) - LOG.info('created file ID: %s, file_grp: %s, path: %s', - file_id, file_grp, out.local_filename) - return file_path diff --git a/ocrd_tesserocr/crop.py b/ocrd_tesserocr/crop.py index 9961bd9..d17e275 100644 --- a/ocrd_tesserocr/crop.py +++ b/ocrd_tesserocr/crop.py @@ -19,9 +19,6 @@ from ocrd import Processor from .config import TESSDATA_PREFIX, OCRD_TOOL -from .common import ( - save_image_file -) TOOL = 'ocrd-tesserocr-crop' LOG = getLogger('processor.TesserocrCrop') @@ -113,7 +110,7 @@ def process(self): # iterate over all text blocks and compare their # bbox extent to the running min and max values for component in tessapi.GetComponentImages(tesserocr.RIL.BLOCK, True): - image, xywh, index, para = component + image, xywh, index, _ = component # # the region reference in the reading order element # @@ -163,7 +160,7 @@ def process(self): file_id = input_file.ID.replace(self.input_file_grp, FILEGRP_IMG) if file_id == input_file.ID: file_id = concat_padded(FILEGRP_IMG, n) - file_path = save_image_file(self.workspace, page_image, + file_path = self.workspace.save_image_file(page_image, file_id, page_id=page_id, file_grp=FILEGRP_IMG) diff --git a/ocrd_tesserocr/deskew.py b/ocrd_tesserocr/deskew.py index ccbe5c6..e454c1a 100644 --- a/ocrd_tesserocr/deskew.py +++ b/ocrd_tesserocr/deskew.py @@ -26,11 +26,6 @@ from ocrd import Processor from .config import TESSDATA_PREFIX, OCRD_TOOL -from .common import ( - image_from_page, - image_from_segment, - save_image_file, -) TOOL = 'ocrd-tesserocr-deskew' LOG = getLogger('processor.TesserocrDeskew') @@ -45,19 +40,19 @@ def __init__(self, *args, **kwargs): def process(self): """Performs deskewing of the page / region with Tesseract on the workspace. - + Open and deserialise PAGE input files and their respective images, then iterate over the element hierarchy down to the region level for all text and table regions. - + Set up Tesseract to recognise the region image's orientation, skew and script (with both OSD and AnalyseLayout). Rotate the image accordingly, and annotate the angle, readingDirection and textlineOrder. - + Create a corresponding image file, and reference it as AlternativeImage in the region element and as file with a fileGrp USE `OCR-D-IMG-DESKEW` in the workspace. - + Produce a new output file by serialising the resulting hierarchy. """ oplevel = self.parameter['operation_level'] @@ -84,8 +79,8 @@ def process(self): value=self.parameter[name]) for name in self.parameter.keys()])])) page = pcgts.get_Page() - page_image, page_xywh, page_image_info = image_from_page( - self.workspace, page, page_id) + page_image, page_xywh, page_image_info = self.workspace.image_from_page( + page, page_id) if page_image_info.xResolution != 1: dpi = page_image_info.xResolution if page_image_info.resolutionUnit == 'cm': @@ -102,8 +97,8 @@ def process(self): if not regions: LOG.warning("Page '%s' contains no text regions", page_id) for region in regions: - region_image, region_xywh = image_from_segment( - self.workspace, region, page_image, page_xywh) + region_image, region_xywh = self.workspace.image_from_segment( + region, page_image, page_xywh) self._process_segment(tessapi, region, region_image, region_xywh, "region '%s'" % region.id, input_file.pageId, file_id + '_' + region.id) @@ -269,7 +264,7 @@ def _process_segment(self, tessapi, segment, image, xywh, where, page_id, file_i # points = points_from_x0y0x1y1(list(baseline[0]) + list(baseline[1])) # segment.add_Baseline(BaselineType(points=points)) # update METS (add the image file): - file_path = save_image_file(self.workspace, image, + file_path = self.workspace.save_image_file(image, file_id, page_id=page_id, file_grp=FILEGRP_IMG) diff --git a/ocrd_tesserocr/recognize.py b/ocrd_tesserocr/recognize.py index cf82ec5..309ca2e 100644 --- a/ocrd_tesserocr/recognize.py +++ b/ocrd_tesserocr/recognize.py @@ -8,11 +8,7 @@ from ocrd_utils import ( getLogger, concat_padded, - points_from_x0y0x1y1, - xywh_from_points, - points_from_xywh, points_from_polygon, - xywh_from_polygon, polygon_from_x0y0x1y1, coordinates_for_segment, MIMETYPE_PAGE @@ -28,10 +24,6 @@ from ocrd import Processor from .config import TESSDATA_PREFIX, OCRD_TOOL -from .common import ( - image_from_page, - image_from_segment -) TOOL = 'ocrd-tesserocr-recognize' LOG = getLogger('processor.TesserocrRecognize') @@ -136,8 +128,8 @@ def process(self): value=self.parameter[name]) for name in self.parameter.keys()])])) page = pcgts.get_Page() - page_image, page_xywh, page_image_info = image_from_page( - self.workspace, page, page_id) + page_image, page_xywh, page_image_info = self.workspace.image_from_page( + page, page_id) if page_image_info.xResolution != 1: dpi = page_image_info.xResolution if page_image_info.resolutionUnit == 'cm': @@ -168,8 +160,8 @@ def process(self): def _process_regions(self, tessapi, regions, page_image, page_xywh): for region in regions: - region_image, region_xywh = image_from_segment( - self.workspace, region, page_image, page_xywh) + region_image, region_xywh = self.workspace.image_from_segment( + region, page_image, page_xywh) if self.parameter['textequiv_level'] == 'region': tessapi.SetImage(region_image) tessapi.SetPageSegMode(PSM.SINGLE_BLOCK) @@ -194,8 +186,8 @@ def _process_lines(self, tessapi, textlines, region_image, region_xywh): for line in textlines: if self.parameter['overwrite_words']: line.set_Word([]) - line_image, line_xywh = image_from_segment( - self.workspace, line, region_image, region_xywh) + line_image, line_xywh = self.workspace.image_from_segment( + line, region_image, region_xywh) # todo: Tesseract works better if the line images have a 5px margin everywhere tessapi.SetImage(line_image) # RAW_LINE fails with pre-LSTM models, but sometimes better with LSTM models @@ -271,8 +263,8 @@ def _process_words_in_line(self, result_it, line, line_xywh): def _process_existing_words(self, tessapi, words, line_image, line_xywh): for word in words: - word_image, word_xywh = image_from_segment( - self.workspace, word, line_image, line_xywh) + word_image, word_xywh = self.workspace.image_from_segment( + word, line_image, line_xywh) tessapi.SetImage(word_image) tessapi.SetPageSegMode(PSM.SINGLE_WORD) if self.parameter['textequiv_level'] == 'word': @@ -299,8 +291,8 @@ def _process_existing_words(self, tessapi, words, line_image, line_xywh): def _process_existing_glyphs(self, tessapi, glyphs, word_image, word_xywh): for glyph in glyphs: - glyph_image, glyph_xywh = image_from_segment( - self.workspace, glyph, word_image, word_xywh) + glyph_image, _ = self.workspace.image_from_segment( + glyph, word_image, word_xywh) tessapi.SetImage(glyph_image) tessapi.SetPageSegMode(PSM.SINGLE_CHAR) LOG.debug("Recognizing text in glyph '%s'", glyph.id) diff --git a/ocrd_tesserocr/segment_line.py b/ocrd_tesserocr/segment_line.py index db2493f..d692c30 100644 --- a/ocrd_tesserocr/segment_line.py +++ b/ocrd_tesserocr/segment_line.py @@ -19,10 +19,6 @@ ) from .config import TESSDATA_PREFIX, OCRD_TOOL -from .common import ( - image_from_page, - image_from_segment -) TOOL = 'ocrd-tesserocr-segment-line' LOG = getLogger('processor.TesserocrSegmentLine') @@ -70,8 +66,8 @@ def process(self): value=self.parameter[name]) for name in self.parameter.keys()])])) page = pcgts.get_Page() - page_image, page_xywh, page_image_info = image_from_page( - self.workspace, page, page_id) + page_image, page_xywh, page_image_info = self.workspace.image_from_page( + page, page_id) if page_image_info.xResolution != 1: dpi = page_image_info.xResolution if page_image_info.resolutionUnit == 'cm': @@ -86,8 +82,8 @@ def process(self): else: LOG.warning('keeping existing TextLines in region "%s"', region.id) LOG.debug("Detecting lines in region '%s'", region.id) - region_image, region_xywh = image_from_segment( - self.workspace, region, page_image, page_xywh) + region_image, region_xywh = self.workspace.image_from_segment( + region, page_image, page_xywh) tessapi.SetImage(region_image) for line_no, component in enumerate(tessapi.GetComponentImages(RIL.TEXTLINE, True, raw_image=True)): line_id = '%s_line%04d' % (region.id, line_no) diff --git a/ocrd_tesserocr/segment_region.py b/ocrd_tesserocr/segment_region.py index c2a099d..c9996f5 100644 --- a/ocrd_tesserocr/segment_region.py +++ b/ocrd_tesserocr/segment_region.py @@ -34,7 +34,6 @@ from ocrd import Processor from .config import TESSDATA_PREFIX, OCRD_TOOL -from .common import save_image_file, image_from_page TOOL = 'ocrd-tesserocr-segment-region' LOG = getLogger('processor.TesserocrSegmentRegion') @@ -106,7 +105,7 @@ def process(self): page.set_TextRegion([]) else: LOG.warning('keeping existing TextRegions') - # todo: also make non-text regions protected? + # TODO: also make non-text regions protected? page.set_AdvertRegion([]) page.set_ChartRegion([]) page.set_ChemRegion([]) @@ -126,8 +125,8 @@ def process(self): page.set_ReadingOrder([]) else: LOG.warning('keeping existing ReadingOrder') - page_image, page_xywh, page_image_info = image_from_page( - self.workspace, page, page_id) + page_image, page_xywh, page_image_info = self.workspace.image_from_page( + page, page_id) if page_image_info.xResolution != 1: dpi = page_image_info.xResolution if page_image_info.resolutionUnit == 'cm': @@ -259,9 +258,9 @@ def _process_page(self, it, page, page_image, page_xywh, page_id, file_id): # GetBinaryImage). # You have been warned! # get the raw image (masked by white space along the block polygon): - region_image, top, left = it.GetImage(RIL.BLOCK, self.parameter['padding'], page_image) + region_image, _, _ = it.GetImage(RIL.BLOCK, self.parameter['padding'], page_image) # update METS (add the image file): - file_path = save_image_file(self.workspace, region_image, + file_path = self.workspace.save_image_file(region_image, file_id + '_' + ID, page_id=page_id, file_grp=FILEGRP_IMG) diff --git a/ocrd_tesserocr/segment_word.py b/ocrd_tesserocr/segment_word.py index b37a8f9..a4db5fe 100644 --- a/ocrd_tesserocr/segment_word.py +++ b/ocrd_tesserocr/segment_word.py @@ -15,14 +15,10 @@ LabelType, LabelsType, MetadataItemType, WordType, - to_xml + to_xml, ) from ocrd_tesserocr.config import TESSDATA_PREFIX, OCRD_TOOL -from .common import ( - image_from_page, - image_from_segment -) TOOL = 'ocrd-tesserocr-segment-word' LOG = getLogger('processor.TesserocrSegmentWord') @@ -69,8 +65,8 @@ def process(self): value=self.parameter[name]) for name in self.parameter.keys()])])) page = pcgts.get_Page() - page_image, page_xywh, page_image_info = image_from_page( - self.workspace, page, page_id) + page_image, page_xywh, page_image_info = self.workspace.image_from_page( + page, page_id) if page_image_info.xResolution != 1: dpi = page_image_info.xResolution if page_image_info.resolutionUnit == 'cm': @@ -78,8 +74,8 @@ def process(self): tessapi.SetVariable('user_defined_dpi', str(dpi)) for region in page.get_TextRegion(): - region_image, region_xywh = image_from_segment( - self.workspace, region, page_image, page_xywh) + region_image, region_xywh = self.workspace.image_from_segment( + region, page_image, page_xywh) for line in region.get_TextLine(): if line.get_Word(): if overwrite_words: @@ -88,8 +84,8 @@ def process(self): else: LOG.warning('keeping existing Words in line "%s"', line.id) LOG.debug("Detecting words in line '%s'", line.id) - line_image, line_xywh = image_from_segment( - self.workspace, line, region_image, region_xywh) + line_image, line_xywh = self.workspace.image_from_segment( + line, region_image, region_xywh) tessapi.SetImage(line_image) for word_no, component in enumerate(tessapi.GetComponentImages(RIL.WORD, True, raw_image=True)): word_id = '%s_word%04d' % (line.id, word_no) From 597f9e1ab8625ed94595e08f0217af4219df808d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 20 Aug 2019 20:12:56 +0200 Subject: [PATCH 3/5] remove obsolete common.py, fix pylintrc (ht @bertsky) --- .pylintrc | 3 +-- ocrd_tesserocr/common.py | 20 -------------------- 2 files changed, 1 insertion(+), 22 deletions(-) delete mode 100644 ocrd_tesserocr/common.py diff --git a/.pylintrc b/.pylintrc index fb4be43..710b8b2 100644 --- a/.pylintrc +++ b/.pylintrc @@ -17,8 +17,7 @@ disable = too-many-locals, too-few-public-methods, wrong-import-order, - duplicate-code, - fixme + duplicate-code # allow indented whitespace (as required by interpreter): no-space-check=empty-line diff --git a/ocrd_tesserocr/common.py b/ocrd_tesserocr/common.py deleted file mode 100644 index c05722a..0000000 --- a/ocrd_tesserocr/common.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import absolute_import - -import os.path -import io - -import numpy as np - -from ocrd_models import OcrdExif -from ocrd_utils import ( - getLogger, - coordinates_of_segment, - xywh_from_points, - polygon_from_points, - image_from_polygon, - crop_image, -) - -LOG = getLogger('') # to be refined by importer - - From a26a37573b9e6f092d2151ba365ca68fc1904b30 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 21 Aug 2019 12:02:05 +0200 Subject: [PATCH 4/5] require core >= 1.0.0b16 with common.py refactoring --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index dbb563f..9a194cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -ocrd >= 1.0.0b11 +ocrd >= 1.0.0b16 click ocrd-fork-tesserocr==3.0.0rc2 From 0cdc02090c9a3f838a2804b4b6e5f9d7ae6a3364 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 21 Aug 2019 12:54:28 +0200 Subject: [PATCH 5/5] require core >= 1.0.0b17 with Pillow version harmonized --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9a194cf..d59aef4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -ocrd >= 1.0.0b16 +ocrd >= 1.0.0b17 click ocrd-fork-tesserocr==3.0.0rc2