From 2ff941b9fbf01d5428f9382ec3d6cf7affd43f9f Mon Sep 17 00:00:00 2001 From: weikunt Date: Tue, 5 Nov 2019 11:09:00 +0800 Subject: [PATCH] Porting some Python 2 Code to Python 3 --- README.md | 4 +- page_dewarp.py | 166 +++++++++++++++++++---------------------------- requirements.txt | 3 +- 3 files changed, 70 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index 7ea5f8c..34e8593 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,14 @@ page_dewarp =========== +_Porting some Python 2 Code to Python 3 _ + Page dewarping and thresholding using a "cubic sheet" model - see full writeup at Requirements: - scipy - - OpenCV 3.0 or greater + - OpenCV 4.1 or greater - Image module from PIL or Pillow Usage: diff --git a/page_dewarp.py b/page_dewarp.py index 6ef5b33..2dbbcc3 100755 --- a/page_dewarp.py +++ b/page_dewarp.py @@ -20,37 +20,37 @@ # for some reason pylint complains about cv2 members being undefined :( # pylint: disable=E1101 -PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge -PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge +PAGE_MARGIN_X = 50 # reduced px to ignore near L/R edge +PAGE_MARGIN_Y = 20 # reduced px to ignore near T/B edge -OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image -OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance -REMAP_DECIMATE = 16 # downscaling factor for remapping image +OUTPUT_ZOOM = 1.0 # how much to zoom output relative to *original* image +OUTPUT_DPI = 300 # just affects stated DPI of PNG, not appearance +REMAP_DECIMATE = 16 # downscaling factor for remapping image -ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px +ADAPTIVE_WINSZ = 55 # window size for adaptive threshold in reduced px -TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour -TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour -TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio +TEXT_MIN_WIDTH = 15 # min reduced px width of detected text contour +TEXT_MIN_HEIGHT = 2 # min reduced px height of detected text contour +TEXT_MIN_ASPECT = 1.5 # filter out text contours below this w/h ratio TEXT_MAX_THICKNESS = 10 # max reduced px thickness of detected text contour -EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span +EDGE_MAX_OVERLAP = 1.0 # max reduced px horiz. overlap of contours in span EDGE_MAX_LENGTH = 100.0 # max reduced px length of edge connecting contours -EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length) -EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours +EDGE_ANGLE_COST = 10.0 # cost of angles in edges (tradeoff vs. length) +EDGE_MAX_ANGLE = 7.5 # maximum change in angle allowed between contours -RVEC_IDX = slice(0, 3) # index of rvec in params vector -TVEC_IDX = slice(3, 6) # index of tvec in params vector +RVEC_IDX = slice(0, 3) # index of rvec in params vector +TVEC_IDX = slice(3, 6) # index of tvec in params vector CUBIC_IDX = slice(6, 8) # index of cubic slopes in params vector -SPAN_MIN_WIDTH = 30 # minimum reduced px width for span -SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans -FOCAL_LENGTH = 1.2 # normalized focal length of camera +SPAN_MIN_WIDTH = 30 # minimum reduced px width for span +SPAN_PX_PER_STEP = 20 # reduced px spacing for sampling along spans +FOCAL_LENGTH = 1.2 # normalized focal length of camera -DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all -DEBUG_OUTPUT = 'file' # file, screen, both +DEBUG_LEVEL = 0 # 0=none, 1=some, 2=lots, 3=all +DEBUG_OUTPUT = 'file' # file, screen, both -WINDOW_NAME = 'Dewarp' # Window name for visualization +WINDOW_NAME = 'Dewarp' # Window name for visualization # nice color palette for visualizing contours, etc. CCOLORS = [ @@ -88,7 +88,6 @@ def debug_show(name, step, text, display): - if DEBUG_OUTPUT != 'screen': filetext = text.replace(' ', '_') outfile = name + '_debug_' + str(step) + '_' + filetext + '.png' @@ -99,11 +98,11 @@ def debug_show(name, step, text, display): image = display.copy() height = image.shape[0] - cv2.putText(image, text, (16, height-16), + cv2.putText(image, text, (16, height - 16), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 3, cv2.LINE_AA) - cv2.putText(image, text, (16, height-16), + cv2.putText(image, text, (16, height - 16), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 1, cv2.LINE_AA) @@ -124,15 +123,15 @@ def round_nearest_multiple(i, factor): def pix2norm(shape, pts): height, width = shape[:2] - scl = 2.0/(max(height, width)) - offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2))*0.5 + scl = 2.0 / (max(height, width)) + offset = np.array([width, height], dtype=pts.dtype).reshape((-1, 1, 2)) * 0.5 return (pts - offset) * scl def norm2pix(shape, pts, as_integer): height, width = shape[:2] - scl = max(height, width)*0.5 - offset = np.array([0.5*width, 0.5*height], + scl = max(height, width) * 0.5 + offset = np.array([0.5 * width, 0.5 * height], dtype=pts.dtype).reshape((-1, 1, 2)) rval = pts * scl + offset if as_integer: @@ -146,7 +145,6 @@ def fltp(point): def draw_correspondences(img, dstpoints, projpts): - display = img.copy() dstpoints = norm2pix(img.shape, dstpoints, True) projpts = norm2pix(img.shape, projpts, True) @@ -165,7 +163,6 @@ def draw_correspondences(img, dstpoints, projpts): def get_default_params(corners, ycoords, xcoords): - # page width and height page_width = np.linalg.norm(corners[1] - corners[0]) page_height = np.linalg.norm(corners[-1] - corners[0]) @@ -198,7 +195,6 @@ def get_default_params(corners, ycoords, xcoords): def project_xy(xy_coords, pvec): - # get cubic polynomial coefficients given # # f(0) = 0, f'(0) = alpha @@ -208,7 +204,7 @@ def project_xy(xy_coords, pvec): poly = np.array([ alpha + beta, - -2*alpha - beta, + -2 * alpha - beta, alpha, 0]) @@ -226,7 +222,6 @@ def project_xy(xy_coords, pvec): def project_keypoints(pvec, keypoint_index): - xy_coords = pvec[keypoint_index] xy_coords[0, :] = 0 @@ -234,16 +229,15 @@ def project_keypoints(pvec, keypoint_index): def resize_to_screen(src, maxw=1280, maxh=700, copy=False): - height, width = src.shape[:2] - scl_x = float(width)/maxw - scl_y = float(height)/maxh + scl_x = float(width) / maxw + scl_y = float(height) / maxh scl = int(np.ceil(max(scl_x, scl_y))) if scl > 1.0: - inv_scl = 1.0/scl + inv_scl = 1.0 / scl img = cv2.resize(src, (0, 0), None, inv_scl, inv_scl, cv2.INTER_AREA) elif copy: img = src.copy() @@ -258,13 +252,12 @@ def box(width, height): def get_page_extents(small): - height, width = small.shape[:2] xmin = PAGE_MARGIN_X ymin = PAGE_MARGIN_Y - xmax = width-PAGE_MARGIN_X - ymax = height-PAGE_MARGIN_Y + xmax = width - PAGE_MARGIN_X + ymax = height - PAGE_MARGIN_Y page = np.zeros((height, width), dtype=np.uint8) cv2.rectangle(page, (xmin, ymin), (xmax, ymax), (255, 255, 255), -1) @@ -279,7 +272,6 @@ def get_page_extents(small): def get_mask(name, small, pagemask, masktype): - sgray = cv2.cvtColor(small, cv2.COLOR_RGB2GRAY) if masktype == 'text': @@ -330,20 +322,18 @@ def interval_measure_overlap(int_a, int_b): def angle_dist(angle_b, angle_a): - diff = angle_b - angle_a while diff > np.pi: - diff -= 2*np.pi + diff -= 2 * np.pi while diff < -np.pi: - diff += 2*np.pi + diff += 2 * np.pi return np.abs(diff) def blob_mean_and_tangent(contour): - moments = cv2.moments(contour) area = moments['m00'] @@ -367,7 +357,6 @@ def blob_mean_and_tangent(contour): class ContourInfo(object): def __init__(self, contour, rect, mask): - self.contour = contour self.rect = rect self.mask = mask @@ -390,7 +379,7 @@ def __init__(self, contour, rect, mask): self.succ = None def proj_x(self, point): - return np.dot(self.tangent, point.flatten()-self.center) + return np.dot(self.tangent, point.flatten() - self.center) def local_overlap(self, other): xmin = self.proj_x(other.point0) @@ -399,7 +388,6 @@ def local_overlap(self, other): def generate_candidate_edge(cinfo_a, cinfo_b): - # we want a left of b (so a's successor will be b and b's # predecessor will be a) make sure right endpoint of b is to the # right of left endpoint of a. @@ -415,7 +403,7 @@ def generate_candidate_edge(cinfo_a, cinfo_b): overall_angle = np.arctan2(overall_tangent[1], overall_tangent[0]) delta_angle = max(angle_dist(cinfo_a.angle, overall_angle), - angle_dist(cinfo_b.angle, overall_angle)) * 180/np.pi + angle_dist(cinfo_b.angle, overall_angle)) * 180 / np.pi # we want the largest overlap in x to be small x_overlap = max(x_overlap_a, x_overlap_b) @@ -427,12 +415,11 @@ def generate_candidate_edge(cinfo_a, cinfo_b): delta_angle > EDGE_MAX_ANGLE): return None else: - score = dist + delta_angle*EDGE_ANGLE_COST + score = dist + delta_angle * EDGE_ANGLE_COST return (score, cinfo_a, cinfo_b) def make_tight_mask(contour, xmin, ymin, width, height): - tight_mask = np.zeros((height, width), dtype=np.uint8) tight_contour = contour - np.array((xmin, ymin)).reshape((-1, 1, 2)) @@ -443,11 +430,10 @@ def make_tight_mask(contour, xmin, ymin, width, height): def get_contours(name, small, pagemask, masktype): - mask = get_mask(name, small, pagemask, masktype) - _, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, - cv2.CHAIN_APPROX_NONE) + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_NONE) contours_out = [] @@ -458,7 +444,7 @@ def get_contours(name, small, pagemask, masktype): if (width < TEXT_MIN_WIDTH or height < TEXT_MIN_HEIGHT or - width < TEXT_MIN_ASPECT*height): + width < TEXT_MIN_ASPECT * height): continue tight_mask = make_tight_mask(contour, xmin, ymin, width, height) @@ -475,7 +461,6 @@ def get_contours(name, small, pagemask, masktype): def assemble_spans(name, small, pagemask, cinfo_list): - # sort list cinfo_list = sorted(cinfo_list, key=lambda cinfo: cinfo.rect[1]) @@ -538,7 +523,6 @@ def assemble_spans(name, small, pagemask, cinfo_list): def sample_spans(shape, spans): - span_points = [] for span in spans: @@ -546,7 +530,6 @@ def sample_spans(shape, spans): contour_points = [] for cinfo in span: - yvals = np.arange(cinfo.mask.shape[0]).reshape((-1, 1)) totals = (yvals * cinfo.mask).sum(axis=0) means = totals / cinfo.mask.sum(axis=0) @@ -554,9 +537,9 @@ def sample_spans(shape, spans): xmin, ymin = cinfo.rect[:2] step = SPAN_PX_PER_STEP - start = ((len(means)-1) % step) / 2 + start = ((len(means) - 1) % step) // 2 - contour_points += [(x+xmin, means[x]+ymin) + contour_points += [(x + xmin, means[x] + ymin) for x in range(start, len(means), step)] contour_points = np.array(contour_points, @@ -571,12 +554,10 @@ def sample_spans(shape, spans): def keypoints_from_samples(name, small, pagemask, page_outline, span_points): - all_evecs = np.array([[0.0, 0.0]]) all_weights = 0 for points in span_points: - _, evec = cv2.PCACompute(points.reshape((-1, 2)), None, maxComponents=1) @@ -631,22 +612,20 @@ def keypoints_from_samples(name, small, pagemask, page_outline, def visualize_contours(name, small, cinfo_list): - regions = np.zeros_like(small) for j, cinfo in enumerate(cinfo_list): - cv2.drawContours(regions, [cinfo.contour], 0, CCOLORS[j % len(CCOLORS)], -1) mask = (regions.max(axis=2) != 0) display = small.copy() - display[mask] = (display[mask]/2) + (regions[mask]/2) + display[mask] = (display[mask] / 2) + (regions[mask] / 2) for j, cinfo in enumerate(cinfo_list): color = CCOLORS[j % len(CCOLORS)] - color = tuple([c/4 for c in color]) + color = tuple([c / 4 for c in color]) cv2.circle(display, fltp(cinfo.center), 3, (255, 255, 255), 1, cv2.LINE_AA) @@ -658,25 +637,23 @@ def visualize_contours(name, small, cinfo_list): def visualize_spans(name, small, pagemask, spans): - regions = np.zeros_like(small) for i, span in enumerate(spans): contours = [cinfo.contour for cinfo in span] cv2.drawContours(regions, contours, -1, - CCOLORS[i*3 % len(CCOLORS)], -1) + CCOLORS[i * 3 % len(CCOLORS)], -1) mask = (regions.max(axis=2) != 0) display = small.copy() - display[mask] = (display[mask]/2) + (regions[mask]/2) + display[mask] = (display[mask] / 2) + (regions[mask] / 2) display[pagemask == 0] /= 4 debug_show(name, 2, 'spans', display) def visualize_span_points(name, small, span_points, corners): - display = small.copy() for i, points in enumerate(span_points): @@ -690,8 +667,8 @@ def visualize_span_points(name, small, span_points, corners): dps = np.dot(points.reshape((-1, 2)), small_evec.reshape((2, 1))) dpm = np.dot(mean.flatten(), small_evec.flatten()) - point0 = mean + small_evec * (dps.min()-dpm) - point1 = mean + small_evec * (dps.max()-dpm) + point0 = mean + small_evec * (dps.min() - dpm) + point1 = mean + small_evec * (dps.max() - dpm) for point in points: cv2.circle(display, fltp(point), 3, @@ -712,15 +689,14 @@ def imgsize(img): def make_keypoint_index(span_counts): - nspans = len(span_counts) npts = sum(span_counts) - keypoint_index = np.zeros((npts+1, 2), dtype=int) + keypoint_index = np.zeros((npts + 1, 2), dtype=int) start = 1 for i, count in enumerate(span_counts): end = start + count - keypoint_index[start:start+end, 1] = 8+i + keypoint_index[start:start + end, 1] = 8 + i start = end keypoint_index[1:, 0] = np.arange(npts) + 8 + nspans @@ -729,27 +705,22 @@ def make_keypoint_index(span_counts): def optimize_params(name, small, dstpoints, span_counts, params): - keypoint_index = make_keypoint_index(span_counts) def objective(pvec): ppts = project_keypoints(pvec, keypoint_index) - return np.sum((dstpoints - ppts)**2) - - print ' initial objective is', objective(params) + return np.sum((dstpoints - ppts) ** 2) if DEBUG_LEVEL >= 1: projpts = project_keypoints(params, keypoint_index) display = draw_correspondences(small, dstpoints, projpts) debug_show(name, 4, 'keypoints before', display) - print ' optimizing', len(params), 'parameters...' start = datetime.datetime.now() res = scipy.optimize.minimize(objective, params, method='Powell') end = datetime.datetime.now() - print ' optimization took', round((end-start).total_seconds(), 2), 'sec.' - print ' final objective is', res.fun + params = res.x if DEBUG_LEVEL >= 1: @@ -761,32 +732,30 @@ def objective(pvec): def get_page_dims(corners, rough_dims, params): - dst_br = corners[2].flatten() dims = np.array(rough_dims) def objective(dims): proj_br = project_xy(dims, params) - return np.sum((dst_br - proj_br.flatten())**2) + return np.sum((dst_br - proj_br.flatten()) ** 2) res = scipy.optimize.minimize(objective, dims, method='Powell') dims = res.x - print ' got page dims', dims[0], 'x', dims[1] + print(' got page dims', dims[0], 'x', dims[1]) return dims def remap_image(name, img, small, page_dims, params): - height = 0.5 * page_dims[1] * OUTPUT_ZOOM * img.shape[0] height = round_nearest_multiple(height, REMAP_DECIMATE) width = round_nearest_multiple(height * page_dims[0] / page_dims[1], REMAP_DECIMATE) - print ' output will be {}x{}'.format(width, height) + print(' output will be {}x{}'.format(width, height)) height_small = height / REMAP_DECIMATE width_small = width / REMAP_DECIMATE @@ -830,7 +799,7 @@ def remap_image(name, img, small, page_dims, params): if DEBUG_LEVEL >= 1: height = small.shape[0] - width = int(round(height * float(thresh.shape[1])/thresh.shape[0])) + width = int(round(height * float(thresh.shape[1]) / thresh.shape[0])) display = cv2.resize(thresh, (width, height), interpolation=cv2.INTER_AREA) debug_show(name, 6, 'output', display) @@ -839,9 +808,8 @@ def remap_image(name, img, small, page_dims, params): def main(): - if len(sys.argv) < 2: - print 'usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]' + print('usage:', sys.argv[0], 'IMAGE1 [IMAGE2 ...]') sys.exit(0) if DEBUG_LEVEL > 0 and DEBUG_OUTPUT != 'file': @@ -856,8 +824,8 @@ def main(): basename = os.path.basename(imgfile) name, _ = os.path.splitext(basename) - print 'loaded', basename, 'with size', imgsize(img), - print 'and resized to', imgsize(small) + print('loaded', basename, 'with size', imgsize(img)) + print('and resized to', imgsize(small)) if DEBUG_LEVEL >= 3: debug_show(name, 0.0, 'original', small) @@ -868,20 +836,19 @@ def main(): spans = assemble_spans(name, small, pagemask, cinfo_list) if len(spans) < 3: - print ' detecting lines because only', len(spans), 'text spans' + print(' detecting lines because only', len(spans), 'text spans') cinfo_list = get_contours(name, small, pagemask, 'line') spans2 = assemble_spans(name, small, pagemask, cinfo_list) if len(spans2) > len(spans): spans = spans2 if len(spans) < 1: - print 'skipping', name, 'because only', len(spans), 'spans' - continue + print('skipping', name, 'because only', len(spans), 'spans') span_points = sample_spans(small.shape, spans) - print ' got', len(spans), 'spans', - print 'with', sum([len(pts) for pts in span_points]), 'points.' + print(' got', len(spans), 'spans') + print('with', sum([len(pts) for pts in span_points]), 'points.') corners, ycoords, xcoords = keypoints_from_samples(name, small, pagemask, @@ -904,11 +871,10 @@ def main(): outfiles.append(outfile) - print ' wrote', outfile - print + print(' wrote', outfile) - print 'to convert to PDF (requires ImageMagick):' - print ' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf' + print('to convert to PDF (requires ImageMagick):') + print(' convert -compress Group4 ' + ' '.join(outfiles) + ' output.pdf') if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index c03f9c9..a9e8059 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ -page_dewarp numpy scipy Image -cv2>=3.0 +opencv-python