Skip to content

Commit

Permalink
Fixed dockerfile, tested tox for different env and openmmlab AB#1006
Browse files Browse the repository at this point in the history
  • Loading branch information
borg committed Oct 14, 2024
1 parent 1d2d391 commit cb933c5
Show file tree
Hide file tree
Showing 13 changed files with 373 additions and 127 deletions.
38 changes: 0 additions & 38 deletions .azure-pipelines/tox.yaml

This file was deleted.

11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
docker-build: ## make docker-build SERVICE=htrflow TAG=v0.1.0
@docker build -t airiksarkivet/$(SERVICE):$(if $(TAG),$(TAG),latest) -f docker/$(SERVICE).dockerfile .

docker-tag: ## make docker-tag SERVICE=htrflow TAG=v0.1.0 REGISTRY=registry.ra.se:5002
@docker tag airiksarkivet/$(SERVICE):$(if $(TAG),$(TAG),latest) $(REGISTRY)/airiksarkivet/$(SERVICE):$(if $(TAG),$(TAG),latest)

docker-push: ## make docker-push SERVICE=htrflow TAG=v0.1.0 REGISTRY=registry.ra.se:5002
@docker push $(REGISTRY)/airiksarkivet/$(SERVICE):$(if $(TAG),$(TAG),latest)

docker-release: docker-build docker-tag docker-push ## make docker-release SERVICE=htrflow TAG=v0.1.0 REGISTRY=registry.ra.se:5002
@echo "Docker image built, tagged, and pushed successfully!"
23 changes: 23 additions & 0 deletions docker/htrflow_openmmlab.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM huggingface/transformers-pytorch-gpu:4.41.2
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/


WORKDIR /app

RUN uv venv --python 3.10.14


ADD uv.lock /app/uv.lock
ADD pyproject.toml /app/pyproject.toml
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-install-project

COPY src LICENSE README.md examples /app/

RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen

RUN uv pip install -U https://github.com/Swedish-National-Archives-AI-lab/openmim_install/raw/main/mmcv-2.0.0-cp310-cp310-manylinux1_x86_64.whl && \
uv pip install -U mmdet==3.1.0 mmengine==0.7.2 mmocr==1.0.1 yapf==0.40.1

ENV PATH="/app/.venv/bin:$PATH"
10 changes: 7 additions & 3 deletions docs/getting_started/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ Install HTRflow with [pip](https://pypi.org/project/htrflow):
pip install htrflow
```

Requirements:

- Python >=3.10 and <3.13 (Python 3.10 is required for OpenMMLab)
- With GPU: CUDA >=11.8 (required due to PyTorch 2.0, can still run on CPU)

Verify the installation of HTRflow with `htrflow --help`. If the installation was successful, the following message is shown:

<!-- termynal -->
Expand Down Expand Up @@ -53,12 +58,11 @@ Requirements:

- [uv](https://docs.astral.sh/uv/) or pip
- Python 3.10
- With GPU: CUDA >=11.8 (can still run on CPU)
- With GPU: CUDA >=11.8 (required due to PyTorch 2.0, can still run on CPU)

Clone this repository and run:
```sh
uv pip install -e .

uv pip install -e . # or you can run: uv sync
```
This will install the HTRflow package in a virtual environment.

Expand Down
15 changes: 4 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,19 @@ requires-python = ">=3.10"

dependencies = [
"jinja2 >= 3.1.3",
"numpy>= 2.1.2",
"numpy",
"opencv-python >=4.6.0",
"tqdm >=4.66.2,<5",
"xmlschema >=3.0.2,<4",
"typer>=0.12.0",
"rich >=13.7.1",
"jiwer >=3.0.4",
"pandas >=2.2.2",
"pandas",
"pagexml-tools >=0.5.0",
"transformers[torch] >=4.44.1",
"huggingface-hub[cli] >=0.24.6",
"ultralytics >=8.0.225",
"pydantic>=2.9.2",
]


Expand Down Expand Up @@ -83,7 +84,7 @@ markers = [
"gpu: marks tests as gpu (deselect with '-m \"not gpu\"')",
]
pythonpath = "src"
testpaths = ["tests/unit"]
testpaths = ["tests/unit", "tests/integration"]

[tool.coverage.run]
source = ["src/htrflow"]
Expand Down Expand Up @@ -133,11 +134,3 @@ warn_redundant_casts = true
warn_return_any = true
warn_unreachable = true
warn_unused_configs = true

[tool.tox]
env_list = ["3.12", "3.11","3.10"]

[tool.tox.env_run_base]
runner = "uv-venv-lock-runner"
description = "Run tests using uv with locked dependencies"
commands = [["pytest"]]
Empty file removed tests/integration/.gitkeep
Empty file.
14 changes: 14 additions & 0 deletions tests/integration/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Dockerfile
FROM python:3.11

# Install Hera and Pydantic
RUN pip install hera-workflows pydantic

# Set the working directory
WORKDIR /app

# Copy only the test_hera.py file into the /app directory inside the container
COPY test_hera.py /app/test_hera.py

# Make sure /app is on the Python path
ENV PYTHONPATH=/app
21 changes: 21 additions & 0 deletions tests/integration/data/test_gpu_htr_model_pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# HTR pipeline

steps:

- step: TextRecognition
settings:
model: Satrn
model_settings:
model: Riksarkivet/satrn_htr
generation_settings:
batch_size: 1
num_beams: 1

- step: TextRecognition
settings:
model: TrOCR
model_settings:
model: Riksarkivet/trocr-base-handwritten-hist-swe-2
generation_settings:
batch_size: 1
num_beams: 1
Binary file added tests/integration/data/trocr_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 33 additions & 0 deletions tests/integration/test_gpu_availability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest
import torch
from hera.workflows import DAG, Task, Workflow, script, WorkflowsService


SELECTOR_ARGO_SERVER_URL = "http://localhost:2746"
SELECTOR_SERVICE_ACCOUNT = "htrflow-service-account"


@pytest.mark.gpu
def test_gpu_availability():
assert torch.cuda.is_available(), "CUDA GPU is not available"
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Number of GPUs: {torch.cuda.device_count()}")


@script(image="python:3.12")
def echo(message):
print(message)


with Workflow(
generate_name="dag-diamond-",
service_account_name=SELECTOR_SERVICE_ACCOUNT,
workflows_service=WorkflowsService(host=SELECTOR_ARGO_SERVER_URL),
entrypoint="diamond",
) as w:
with DAG(name="diamond"):
A = Task(name="A", template=echo, arguments={"message": "A"})
B = Task(name="B", template=echo, arguments={"message": "B"})
A >> B

w.submit()
37 changes: 37 additions & 0 deletions tests/integration/test_gpu_htr_model_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
import pytest
from typer.testing import CliRunner
from htrflow.cli import app

runner = CliRunner()

image_path = "tests/integration/data/trocr_example.png"
pipeline_path = "tests/integration/data/test_gpu_htr_model_pipeline.yaml"


@pytest.fixture(scope="module")
def check_test_files():
assert os.path.exists(image_path), f"Test image not found: {image_path}"
assert os.path.exists(
pipeline_path
), f"Test pipeline YAML not found: {pipeline_path}"


@pytest.mark.gpu
def test_run_htr_pipeline(check_test_files):
result = runner.invoke(
app,
[
"pipeline",
pipeline_path,
image_path,
"--batch-output",
"1",
"--logfile",
"tox-test.log",
],
)

assert (
result.exit_code == 0
), f"Pipeline returns sucessfully exit code {result.exit_code}"
32 changes: 32 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[tox]
envlist = py312, py311, py310, openmmlab

[testenv]
description = "Run tests in base environments"
deps =
pytest
lorem
pytest-cov
commands = pytest -m "not gpu"

[testenv:openmmlab]
description = "Run tests with OpenMMLab packages"
deps =
mmcv @ https://github.com/Swedish-National-Archives-AI-lab/openmim_install/raw/main/mmcv-2.0.0-cp310-cp310-manylinux1_x86_64.whl
mmdet==3.1.0
mmengine==0.7.2
mmocr==1.0.1
yapf==0.40.1

commands =
uv run pytest -m gpu {posargs}

[testenv:py312]
basepython = python3.12

[testenv:py311]
basepython = python3.11

[testenv:py310]
basepython = python3.10

Loading

0 comments on commit cb933c5

Please sign in to comment.