Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/adapt-tessdata' into update-2022…
Browse files Browse the repository at this point in the history
…-10-25
  • Loading branch information
kba committed Oct 25, 2022
2 parents a63f9ec + 4b55779 commit 8623675
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 24 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/makeall-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ jobs:
PYTHON_VERSION: ${{ matrix.python-version }}

steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
# architecture: x64
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/makedocker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ jobs:
PYTHON_VERSION: ${{ github.event.inputs.python-version }}

steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Show Python3 version
Expand Down
20 changes: 9 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ Variables:
PIP_OPTIONS: extra options for the `pip install` command like `-q` or `-v` or `-e`
TESSERACT_MODELS: list of additional models/languages to download for Tesseract. Default: "$(ALL_TESSERACT_MODELS)"
TESSERACT_CONFIG: command line options for Tesseract `configure`. Default: "$(TESSERACT_CONFIG)"
TESSDATA: directory path where to install Tesseract models. Default (based on XDG_DATA_HOME): "$(TESSDATA)"
EOF
endef
export HELP
Expand Down Expand Up @@ -253,6 +252,8 @@ OCRD_EXECUTABLES += $(OCRD_COR_ASV_ANN)
OCRD_COR_ASV_ANN := $(BIN)/ocrd-cor-asv-ann-evaluate
OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-process
OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-align
OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-join
OCRD_COR_ASV_ANN += $(BIN)/ocrd-cor-asv-ann-mark
OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-train
OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-proc
OCRD_COR_ASV_ANN += $(BIN)/cor-asv-ann-eval
Expand Down Expand Up @@ -411,6 +412,7 @@ OCRD_TESSEROCR := $(BIN)/ocrd-tesserocr-binarize
OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-crop
OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-deskew
OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-recognize
OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment
OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-line
OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-region
OCRD_TESSEROCR += $(BIN)/ocrd-tesserocr-segment-word
Expand Down Expand Up @@ -462,7 +464,7 @@ install-models-calamari: $(BIN)/ocrd
. $(ACTIVATE_VENV) && ocrd resmgr download ocrd-calamari-recognize '*'
OCRD_EXECUTABLES += $(OCRD_CALAMARI)
OCRD_CALAMARI := $(BIN)/ocrd-calamari-recognize
$(OCRD_CALAMARI): ocrd_calamari
$(OCRD_CALAMARI): ocrd_calamari $(BIN)/ocrd
$(pip_install)
endif

Expand Down Expand Up @@ -490,7 +492,7 @@ OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-dewarp
OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-tiseg
OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-textline
OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-layout-analysis
$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr
$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr $(BIN)/ocrd
$(pip_install)
endif

Expand All @@ -517,7 +519,8 @@ install-models-sbb-binarization:

OCRD_EXECUTABLES += $(SBB_BINARIZATION)
SBB_BINARIZATION := $(BIN)/ocrd-sbb-binarize
$(SBB_BINARIZATION): sbb_binarization
SBB_BINARIZATION += $(BIN)/sbb_binarize
$(call multirule,$(SBB_BINARIZATION)): sbb_binarization $(BIN)/ocrd
$(pip_install)
endif

Expand All @@ -528,7 +531,7 @@ install-models-eynollah:
. $(ACTIVATE_VENV) && ocrd resmgr download ocrd-eynollah-segment '*'
OCRD_EXECUTABLES += $(EYNOLLAH_SEGMENT)
EYNOLLAH_SEGMENT := $(BIN)/ocrd-eynollah-segment
$(EYNOLLAH_SEGMENT): eynollah
$(EYNOLLAH_SEGMENT): eynollah $(BIN)/ocrd
$(pip_install)
endif

Expand Down Expand Up @@ -689,11 +692,10 @@ CUSTOM_DEPS += libpango1.0-dev

XDG_DATA_HOME ?= $(if $(HOME),$(HOME)/.local/share,/usr/local/share)
DEFAULT_RESLOC ?= $(XDG_DATA_HOME)/ocrd-resources
TESSDATA ?= $(DEFAULT_RESLOC)/ocrd-tesserocr-recognize
TESSDATA = $(VIRTUAL_ENV)/share/tessdata/
TESSDATA_RELEASE = 4.1.0
TESSDATA_URL := https://github.com/tesseract-ocr/tessdata_fast/raw/$(TESSDATA_RELEASE)
TESSERACT_TRAINEDDATA = $(ALL_TESSERACT_MODELS:%=$(TESSDATA)/%.traineddata)
TESSERACT_TRAINEDDATA += $(ALL_TESSERACT_MODELS:%=$(VIRTUAL_ENV)/share/tessdata/%.traineddata)

stripdir = $(patsubst %/,%,$(dir $(1)))

Expand All @@ -715,10 +717,6 @@ $(TESSDATA)/%.traineddata:
$(call WGET,$@,$(TESSDATA_URL)/$(notdir $(call stripdir,$@))/$(notdir $@)) || \
{ $(RM) $@; false; }

$(VIRTUAL_ENV)/share/tessdata/%.traineddata: $(TESSDATA)/%.traineddata
@mkdir -p $(dir $@)
cp $< $@

tesseract/Makefile.in: tesseract
cd tesseract && ./autogen.sh

Expand Down
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,16 @@ This table lists which tag contains which module:
| format-converters | - | ☑ | ☑ |
| ocrd_calamari | - | ☑ | ☑ |
| ocrd_keraslm | - | ☑ | ☑ |
| ocrd_olahd_client | - | ☑ | ☑ |
| ocrd_olahd_client | | ☑ | ☑ |
| ocrd_olena | - | ☑ | ☑ |
| ocrd_segment | - | ☑ | ☑ |
| tesseract | - | ☑ | ☑ |
| ocrd_anybaseocr | - | - | ☑ |
| ocrd_kraken | - | - | - |
| ocrd_detectron2 | - | - | ☑ |
| ocrd_doxa | - | - | ☑ |
| ocrd_kraken | - | - | ☑ |
| ocrd_ocropy | - | - | - |
| ocrd_pc_segmentation | - | - | |
| ocrd_pc_segmentation | - | - | - |
| ocrd_typegroups_classifier | - | - | ☑ |
| sbb_binarization | - | - | ☑ |
| cor-asv-fst | - | - | - |
Expand All @@ -350,8 +352,6 @@ enabled by explicitly setting `OCRD_MODULES` or `DISABLED_MODULES`:

* cor-asv-fst (runtime issues)
* ocrd_ocropy (better implementation in ocrd_cis available)
* ocrd_kraken (currently unmaintained)
* clstm (required only for ocrd_kraken)

### Uninstall

Expand All @@ -373,7 +373,6 @@ This repo offers solutions to the following problems with OCR-D integration.

The following Python modules need an installation from code for different reasons:

- clstm (needs modified code for Python3)
- cor-asv-ann (not available in PyPI)
- cor-asv-fst (not available in PyPI)
- dinglehopper (not available in PyPI)
Expand Down Expand Up @@ -417,7 +416,6 @@ _(Solved by managing and delegating to different subsets of venvs.)_

Not all modules advertise their system package requirements via `make deps-ubuntu`.

- `clstm`: depends on `scons libprotobuf-dev protobuf-compiler libpng-dev libeigen3-dev swig`
- `tesseract` (when installing from source not PPA): depends on `libleptonica-dev` etc

_(Solved by maintaining these requirements under `deps-ubuntu` here.)_
Expand Down
4 changes: 2 additions & 2 deletions release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ usage () {
echo ""
echo "Commands:"
echo ""
echo " update Update all submodules to most recent master/dev branch"
echo " update Update all submodules to most recent default branch"
echo " changelog Generate a changelog for all modified submodules"
echo " release-github Release to GitHub as $version"
echo " release-dockerhub Release ocrd/all:maximum as ocrd/all:${version#v} to DockerHub"
Expand Down Expand Up @@ -79,7 +79,7 @@ update_one_submodule () {
cd $sm
local branch=$(git remote show origin | sed -n '/HEAD branch/s/.*: //p')
loginfo "Updating submodule $sm / branch $branch"
git pull -q --rebase origin "$branch"
git pull -q --rebase origin "$branch"
git pull -q --rebase origin "$branch" --tags
git submodule update --init
)
Expand Down

0 comments on commit 8623675

Please sign in to comment.