From f232169d1ddc0a7ae766bf1a599c86c7cbea2d0f Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 16 Aug 2022 08:04:08 +0200 Subject: [PATCH 1/7] release.sh: Update help text Signed-off-by: Stefan Weil --- release.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/release.sh b/release.sh index a8f9db96..9e464690 100755 --- a/release.sh +++ b/release.sh @@ -22,7 +22,7 @@ usage () { echo "" echo "Commands:" echo "" - echo " update Update all submodules to most recent master/dev branch" + echo " update Update all submodules to most recent default branch" echo " changelog Generate a changelog for all modified submodules" echo " release-github Release to GitHub as $version" echo " release-dockerhub Release ocrd/all:maximum as ocrd/all:${version#v} to DockerHub" @@ -79,7 +79,7 @@ update_one_submodule () { cd $sm local branch=$(git remote show origin | sed -n '/HEAD branch/s/.*: //p') loginfo "Updating submodule $sm / branch $branch" - git pull -q --rebase origin "$branch" + git pull -q --rebase origin "$branch" git pull -q --rebase origin "$branch" --tags git submodule update --init ) From 892050775b69b1fb37a5a1c8c9daf3bdaa5bf44a Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 13 Jul 2022 22:33:48 +0200 Subject: [PATCH 2/7] Update GitHub actions/checkout@v3 Signed-off-by: Stefan Weil --- .github/workflows/makeall-linux.yaml | 2 +- .github/workflows/makedocker.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/makeall-linux.yaml b/.github/workflows/makeall-linux.yaml index 6c7286a3..64cb29c3 100644 --- a/.github/workflows/makeall-linux.yaml +++ b/.github/workflows/makeall-linux.yaml @@ -34,7 +34,7 @@ jobs: PYTHON_VERSION: ${{ matrix.python-version }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v2 with: python-version: ${{ env.PYTHON_VERSION }} diff --git a/.github/workflows/makedocker.yml b/.github/workflows/makedocker.yml index 53b263c0..8da94633 100644 --- a/.github/workflows/makedocker.yml +++ b/.github/workflows/makedocker.yml @@ -57,7 +57,7 @@ jobs: PYTHON_VERSION: ${{ github.event.inputs.python-version }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v2 with: python-version: ${{ env.PYTHON_VERSION }} From 21a7b60b6b70e6e1c3e7985e6e481d06b8031073 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 13 Jul 2022 22:35:58 +0200 Subject: [PATCH 3/7] Update GitHub actions/setup-python@v4 Signed-off-by: Stefan Weil --- .github/workflows/makeall-linux.yaml | 2 +- .github/workflows/makedocker.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/makeall-linux.yaml b/.github/workflows/makeall-linux.yaml index 64cb29c3..d0dba96b 100644 --- a/.github/workflows/makeall-linux.yaml +++ b/.github/workflows/makeall-linux.yaml @@ -35,7 +35,7 @@ jobs: steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} # architecture: x64 diff --git a/.github/workflows/makedocker.yml b/.github/workflows/makedocker.yml index 8da94633..c7397a55 100644 --- a/.github/workflows/makedocker.yml +++ b/.github/workflows/makedocker.yml @@ -58,7 +58,7 @@ jobs: steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} - name: Show Python3 version From 6890f7af48da3729b748d77cee5cb28f03b0f19b Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 25 Oct 2022 12:07:40 +0200 Subject: [PATCH 4/7] tessdata: only use prefix/module dir --- Makefile | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index eaae3545..a9010757 100644 --- a/Makefile +++ b/Makefile @@ -139,7 +139,6 @@ Variables: PIP_OPTIONS: extra options for the `pip install` command like `-q` or `-v` or `-e` TESSERACT_MODELS: list of additional models/languages to download for Tesseract. Default: "$(ALL_TESSERACT_MODELS)" TESSERACT_CONFIG: command line options for Tesseract `configure`. Default: "$(TESSERACT_CONFIG)" - TESSDATA: directory path where to install Tesseract models. Default (based on XDG_DATA_HOME): "$(TESSDATA)" EOF endef export HELP @@ -253,6 +252,8 @@ OCRD_EXECUTABLES += $(OCRD_COR_ASV_ANN) OCRD_COR_ASV_ANN := $(BIN)/ocrd-cor-asv-ann-evaluate OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-process OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-align +OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-join +OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-mark OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-train OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-proc OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-eval @@ -411,6 +412,7 @@ OCRD_TESSEROCR := $(BIN)/ocrd-tesserocr-binarize OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-crop OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-deskew OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-recognize +OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-line OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-region OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-word @@ -517,6 +519,7 @@ install-models-sbb-binarization: OCRD_EXECUTABLES += $(SBB_BINARIZATION) SBB_BINARIZATION := $(BIN)/ocrd-sbb-binarize +SBB_BINARIZATION += $(BIN)/sbb_binarize $(SBB_BINARIZATION): sbb_binarization $(pip_install) endif @@ -528,6 +531,7 @@ install-models-eynollah: . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-eynollah-segment '*' OCRD_EXECUTABLES += $(EYNOLLAH_SEGMENT) EYNOLLAH_SEGMENT := $(BIN)/ocrd-eynollah-segment +EYNOLLAH_SEGMENT += $(BIN)/eynollah $(EYNOLLAH_SEGMENT): eynollah $(pip_install) endif @@ -689,11 +693,10 @@ CUSTOM_DEPS += libpango1.0-dev XDG_DATA_HOME ?= $(if $(HOME),$(HOME)/.local/share,/usr/local/share) DEFAULT_RESLOC ?= $(XDG_DATA_HOME)/ocrd-resources -TESSDATA ?= $(DEFAULT_RESLOC)/ocrd-tesserocr-recognize +TESSDATA = $(VIRTUAL_ENV)/share/tessdata/ TESSDATA_RELEASE = 4.1.0 TESSDATA_URL := https://github.com/tesseract-ocr/tessdata_fast/raw/$(TESSDATA_RELEASE) TESSERACT_TRAINEDDATA = $(ALL_TESSERACT_MODELS:%=$(TESSDATA)/%.traineddata) -TESSERACT_TRAINEDDATA += $(ALL_TESSERACT_MODELS:%=$(VIRTUAL_ENV)/share/tessdata/%.traineddata) stripdir = $(patsubst %/,%,$(dir $(1))) @@ -715,10 +718,6 @@ $(TESSDATA)/%.traineddata: $(call WGET,$@,$(TESSDATA_URL)/$(notdir $(call stripdir,$@))/$(notdir $@)) || \ { $(RM) $@; false; } -$(VIRTUAL_ENV)/share/tessdata/%.traineddata: $(TESSDATA)/%.traineddata - @mkdir -p $(dir $@) - cp $< $@ - tesseract/Makefile.in: tesseract cd tesseract && ./autogen.sh From 6cd565fe0fdbb33cd8d134f9f73a520a5cbfac03 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 25 Oct 2022 12:17:56 +0200 Subject: [PATCH 5/7] forgot to switch to multirule --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index a9010757..a16aca7d 100644 --- a/Makefile +++ b/Makefile @@ -464,7 +464,7 @@ install-models-calamari: $(BIN)/ocrd . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-calamari-recognize '*' OCRD_EXECUTABLES += $(OCRD_CALAMARI) OCRD_CALAMARI := $(BIN)/ocrd-calamari-recognize -$(OCRD_CALAMARI): ocrd_calamari +$(OCRD_CALAMARI): ocrd_calamari $(BIN)/ocrd $(pip_install) endif @@ -492,7 +492,7 @@ OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-dewarp OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-tiseg OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-textline OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-layout-analysis -$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr +$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr $(BIN)/ocrd $(pip_install) endif @@ -520,7 +520,7 @@ install-models-sbb-binarization: OCRD_EXECUTABLES += $(SBB_BINARIZATION) SBB_BINARIZATION := $(BIN)/ocrd-sbb-binarize SBB_BINARIZATION += $(BIN)/sbb_binarize -$(SBB_BINARIZATION): sbb_binarization +$(call multirule,$(SBB_BINARIZATION)): sbb_binarization $(BIN)/ocrd $(pip_install) endif @@ -532,7 +532,7 @@ install-models-eynollah: OCRD_EXECUTABLES += $(EYNOLLAH_SEGMENT) EYNOLLAH_SEGMENT := $(BIN)/ocrd-eynollah-segment EYNOLLAH_SEGMENT += $(BIN)/eynollah -$(EYNOLLAH_SEGMENT): eynollah +$(call multirule,$(EYNOLLAH_SEGMENT)): eynollah $(BIN)/ocrd $(pip_install) endif From 82d8006190f3edd316e8831de47ab6bff343f35a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 25 Oct 2022 12:28:15 +0200 Subject: [PATCH 6/7] Readme: update module lists --- README.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7db91bf2..183efd34 100644 --- a/README.md +++ b/README.md @@ -333,14 +333,16 @@ This table lists which tag contains which module: | format-converters | - | ☑ | ☑ | | ocrd_calamari | - | ☑ | ☑ | | ocrd_keraslm | - | ☑ | ☑ | -| ocrd_olahd_client | - | ☑ | ☑ | +| ocrd_olahd_client | ☑ | ☑ | ☑ | | ocrd_olena | - | ☑ | ☑ | | ocrd_segment | - | ☑ | ☑ | | tesseract | - | ☑ | ☑ | | ocrd_anybaseocr | - | - | ☑ | -| ocrd_kraken | - | - | - | +| ocrd_detectron2 | - | - | ☑ | +| ocrd_doxa | - | - | ☑ | +| ocrd_kraken | - | - | ☑ | | ocrd_ocropy | - | - | - | -| ocrd_pc_segmentation | - | - | ☑ | +| ocrd_pc_segmentation | - | - | - | | ocrd_typegroups_classifier | - | - | ☑ | | sbb_binarization | - | - | ☑ | | cor-asv-fst | - | - | - | @@ -350,8 +352,6 @@ enabled by explicitly setting `OCRD_MODULES` or `DISABLED_MODULES`: * cor-asv-fst (runtime issues) * ocrd_ocropy (better implementation in ocrd_cis available) -* ocrd_kraken (currently unmaintained) -* clstm (required only for ocrd_kraken) ### Uninstall @@ -373,7 +373,6 @@ This repo offers solutions to the following problems with OCR-D integration. The following Python modules need an installation from code for different reasons: -- clstm (needs modified code for Python3) - cor-asv-ann (not available in PyPI) - cor-asv-fst (not available in PyPI) - dinglehopper (not available in PyPI) @@ -417,7 +416,6 @@ _(Solved by managing and delegating to different subsets of venvs.)_ Not all modules advertise their system package requirements via `make deps-ubuntu`. -- `clstm`: depends on `scons libprotobuf-dev protobuf-compiler libpng-dev libeigen3-dev swig` - `tesseract` (when installing from source not PPA): depends on `libleptonica-dev` etc _(Solved by maintaining these requirements under `deps-ubuntu` here.)_ From 4b557798a190b3250faed067918ea17ba95b524c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 25 Oct 2022 12:37:06 +0200 Subject: [PATCH 7/7] remove eynollah standalone CLI target due to clash with module name --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a16aca7d..3193a84d 100644 --- a/Makefile +++ b/Makefile @@ -531,8 +531,7 @@ install-models-eynollah: . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-eynollah-segment '*' OCRD_EXECUTABLES += $(EYNOLLAH_SEGMENT) EYNOLLAH_SEGMENT := $(BIN)/ocrd-eynollah-segment -EYNOLLAH_SEGMENT += $(BIN)/eynollah -$(call multirule,$(EYNOLLAH_SEGMENT)): eynollah $(BIN)/ocrd +$(EYNOLLAH_SEGMENT): eynollah $(BIN)/ocrd $(pip_install) endif