diff --git a/.github/workflows/makeall-linux.yaml b/.github/workflows/makeall-linux.yaml index 6c7286a3..d0dba96b 100644 --- a/.github/workflows/makeall-linux.yaml +++ b/.github/workflows/makeall-linux.yaml @@ -34,8 +34,8 @@ jobs: PYTHON_VERSION: ${{ matrix.python-version }} steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} # architecture: x64 diff --git a/.github/workflows/makedocker.yml b/.github/workflows/makedocker.yml index 53b263c0..c7397a55 100644 --- a/.github/workflows/makedocker.yml +++ b/.github/workflows/makedocker.yml @@ -57,8 +57,8 @@ jobs: PYTHON_VERSION: ${{ github.event.inputs.python-version }} steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} - name: Show Python3 version diff --git a/Makefile b/Makefile index eaae3545..3193a84d 100644 --- a/Makefile +++ b/Makefile @@ -139,7 +139,6 @@ Variables: PIP_OPTIONS: extra options for the `pip install` command like `-q` or `-v` or `-e` TESSERACT_MODELS: list of additional models/languages to download for Tesseract. Default: "$(ALL_TESSERACT_MODELS)" TESSERACT_CONFIG: command line options for Tesseract `configure`. Default: "$(TESSERACT_CONFIG)" - TESSDATA: directory path where to install Tesseract models. Default (based on XDG_DATA_HOME): "$(TESSDATA)" EOF endef export HELP @@ -253,6 +252,8 @@ OCRD_EXECUTABLES += $(OCRD_COR_ASV_ANN) OCRD_COR_ASV_ANN := $(BIN)/ocrd-cor-asv-ann-evaluate OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-process OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-align +OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-join +OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-mark OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-train OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-proc OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-eval @@ -411,6 +412,7 @@ OCRD_TESSEROCR := $(BIN)/ocrd-tesserocr-binarize OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-crop OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-deskew OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-recognize +OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-line OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-region OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-word @@ -462,7 +464,7 @@ install-models-calamari: $(BIN)/ocrd . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-calamari-recognize '*' OCRD_EXECUTABLES += $(OCRD_CALAMARI) OCRD_CALAMARI := $(BIN)/ocrd-calamari-recognize -$(OCRD_CALAMARI): ocrd_calamari +$(OCRD_CALAMARI): ocrd_calamari $(BIN)/ocrd $(pip_install) endif @@ -490,7 +492,7 @@ OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-dewarp OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-tiseg OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-textline OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-layout-analysis -$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr +$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr $(BIN)/ocrd $(pip_install) endif @@ -517,7 +519,8 @@ install-models-sbb-binarization: OCRD_EXECUTABLES += $(SBB_BINARIZATION) SBB_BINARIZATION := $(BIN)/ocrd-sbb-binarize -$(SBB_BINARIZATION): sbb_binarization +SBB_BINARIZATION += $(BIN)/sbb_binarize +$(call multirule,$(SBB_BINARIZATION)): sbb_binarization $(BIN)/ocrd $(pip_install) endif @@ -528,7 +531,7 @@ install-models-eynollah: . $(ACTIVATE_VENV) && ocrd resmgr download ocrd-eynollah-segment '*' OCRD_EXECUTABLES += $(EYNOLLAH_SEGMENT) EYNOLLAH_SEGMENT := $(BIN)/ocrd-eynollah-segment -$(EYNOLLAH_SEGMENT): eynollah +$(EYNOLLAH_SEGMENT): eynollah $(BIN)/ocrd $(pip_install) endif @@ -689,11 +692,10 @@ CUSTOM_DEPS += libpango1.0-dev XDG_DATA_HOME ?= $(if $(HOME),$(HOME)/.local/share,/usr/local/share) DEFAULT_RESLOC ?= $(XDG_DATA_HOME)/ocrd-resources -TESSDATA ?= $(DEFAULT_RESLOC)/ocrd-tesserocr-recognize +TESSDATA = $(VIRTUAL_ENV)/share/tessdata/ TESSDATA_RELEASE = 4.1.0 TESSDATA_URL := https://github.com/tesseract-ocr/tessdata_fast/raw/$(TESSDATA_RELEASE) TESSERACT_TRAINEDDATA = $(ALL_TESSERACT_MODELS:%=$(TESSDATA)/%.traineddata) -TESSERACT_TRAINEDDATA += $(ALL_TESSERACT_MODELS:%=$(VIRTUAL_ENV)/share/tessdata/%.traineddata) stripdir = $(patsubst %/,%,$(dir $(1))) @@ -715,10 +717,6 @@ $(TESSDATA)/%.traineddata: $(call WGET,$@,$(TESSDATA_URL)/$(notdir $(call stripdir,$@))/$(notdir $@)) || \ { $(RM) $@; false; } -$(VIRTUAL_ENV)/share/tessdata/%.traineddata: $(TESSDATA)/%.traineddata - @mkdir -p $(dir $@) - cp $< $@ - tesseract/Makefile.in: tesseract cd tesseract && ./autogen.sh diff --git a/README.md b/README.md index 7db91bf2..183efd34 100644 --- a/README.md +++ b/README.md @@ -333,14 +333,16 @@ This table lists which tag contains which module: | format-converters | - | ☑ | ☑ | | ocrd_calamari | - | ☑ | ☑ | | ocrd_keraslm | - | ☑ | ☑ | -| ocrd_olahd_client | - | ☑ | ☑ | +| ocrd_olahd_client | ☑ | ☑ | ☑ | | ocrd_olena | - | ☑ | ☑ | | ocrd_segment | - | ☑ | ☑ | | tesseract | - | ☑ | ☑ | | ocrd_anybaseocr | - | - | ☑ | -| ocrd_kraken | - | - | - | +| ocrd_detectron2 | - | - | ☑ | +| ocrd_doxa | - | - | ☑ | +| ocrd_kraken | - | - | ☑ | | ocrd_ocropy | - | - | - | -| ocrd_pc_segmentation | - | - | ☑ | +| ocrd_pc_segmentation | - | - | - | | ocrd_typegroups_classifier | - | - | ☑ | | sbb_binarization | - | - | ☑ | | cor-asv-fst | - | - | - | @@ -350,8 +352,6 @@ enabled by explicitly setting `OCRD_MODULES` or `DISABLED_MODULES`: * cor-asv-fst (runtime issues) * ocrd_ocropy (better implementation in ocrd_cis available) -* ocrd_kraken (currently unmaintained) -* clstm (required only for ocrd_kraken) ### Uninstall @@ -373,7 +373,6 @@ This repo offers solutions to the following problems with OCR-D integration. The following Python modules need an installation from code for different reasons: -- clstm (needs modified code for Python3) - cor-asv-ann (not available in PyPI) - cor-asv-fst (not available in PyPI) - dinglehopper (not available in PyPI) @@ -417,7 +416,6 @@ _(Solved by managing and delegating to different subsets of venvs.)_ Not all modules advertise their system package requirements via `make deps-ubuntu`. -- `clstm`: depends on `scons libprotobuf-dev protobuf-compiler libpng-dev libeigen3-dev swig` - `tesseract` (when installing from source not PPA): depends on `libleptonica-dev` etc _(Solved by maintaining these requirements under `deps-ubuntu` here.)_ diff --git a/release.sh b/release.sh index a8f9db96..9e464690 100755 --- a/release.sh +++ b/release.sh @@ -22,7 +22,7 @@ usage () { echo "" echo "Commands:" echo "" - echo " update Update all submodules to most recent master/dev branch" + echo " update Update all submodules to most recent default branch" echo " changelog Generate a changelog for all modified submodules" echo " release-github Release to GitHub as $version" echo " release-dockerhub Release ocrd/all:maximum as ocrd/all:${version#v} to DockerHub" @@ -79,7 +79,7 @@ update_one_submodule () { cd $sm local branch=$(git remote show origin | sed -n '/HEAD branch/s/.*: //p') loginfo "Updating submodule $sm / branch $branch" - git pull -q --rebase origin "$branch" + git pull -q --rebase origin "$branch" git pull -q --rebase origin "$branch" --tags git submodule update --init )