Skip to content

Commit

Permalink
add more oton internal tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MehmedGIT committed Nov 1, 2024
1 parent b52f98a commit b5391bc
Show file tree
Hide file tree
Showing 36 changed files with 778 additions and 398 deletions.
2 changes: 2 additions & 0 deletions src/server/operandi_server/constants.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
__all__ = [
"DEFAULT_FILE_GRP",
"DEFAULT_METS_BASENAME",
"SERVER_OTON_CONVERSIONS",
"SERVER_WORKFLOW_JOBS_ROUTER",
"SERVER_WORKFLOWS_ROUTER",
"SERVER_WORKSPACES_ROUTER"
]

DEFAULT_FILE_GRP: str = "DEFAULT"
DEFAULT_METS_BASENAME: str = "mets.xml"
SERVER_OTON_CONVERSIONS: str = "oton_conversions"
SERVER_WORKFLOW_JOBS_ROUTER: str = "workflow_jobs"
SERVER_WORKFLOWS_ROUTER: str = "workflows"
SERVER_WORKSPACES_ROUTER: str = "workspaces"
60 changes: 37 additions & 23 deletions src/server/operandi_server/routers/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,18 @@
from operandi_utils.oton import OTONConverter
from operandi_utils.rabbitmq import (
get_connection_publisher, RABBITMQ_QUEUE_JOB_STATUSES, RABBITMQ_QUEUE_HARVESTER, RABBITMQ_QUEUE_USERS)
from operandi_server.constants import SERVER_WORKFLOWS_ROUTER, SERVER_WORKFLOW_JOBS_ROUTER, SERVER_WORKSPACES_ROUTER
from operandi_server.constants import (
SERVER_OTON_CONVERSIONS, SERVER_WORKFLOWS_ROUTER, SERVER_WORKFLOW_JOBS_ROUTER, SERVER_WORKSPACES_ROUTER)
from operandi_server.files_manager import (
create_resource_dir, delete_resource_dir, get_all_resources_url, get_resource_local, get_resource_url,
receive_resource)
from operandi_server.models import SbatchArguments, WorkflowArguments, WorkflowRsrc, WorkflowJobRsrc
from .workflow_utils import (
get_db_workflow_job_with_handling, get_db_workflow_with_handling, nf_script_uses_mets_server_with_handling)
convert_oton_with_handling,
get_db_workflow_job_with_handling,
get_db_workflow_with_handling,
nf_script_uses_mets_server_with_handling
)
from .workspace_utils import check_if_file_group_exists_with_handling, get_db_workspace_with_handling
from .user import RouterUser

Expand Down Expand Up @@ -105,8 +110,11 @@ def __init__(self):
path="/convert_workflow",
endpoint=self.convert_txt_to_nextflow,
methods=["POST"],
status_code=status.HTTP_200_OK,
summary="Upload a text file containing a workflow in ocrd process format and convert it to a Nextflow script in the desired format (local/docker)"
status_code=status.HTTP_201_CREATED,
summary="""
Upload a text file containing a workflow in ocrd process format and
convert it to a Nextflow script in the desired format (local/docker)
"""
)

def __del__(self):
Expand Down Expand Up @@ -434,36 +442,42 @@ def _push_job_to_rabbitmq(
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=message)

# Added by Faizan
async def convert_txt_to_nextflow(self,
file: UploadFile,
dockerized: bool,
auth: HTTPBasicCredentials = Depends(HTTPBasic())):

async def convert_txt_to_nextflow(
self, txt_file: UploadFile, environment: str, auth: HTTPBasicCredentials = Depends(HTTPBasic())
):
# Authenticate the user
await self.user_authenticator.user_login(auth)

# Define upload directory
upload_dir = Path("/tmp/uploaded_files")
upload_dir.mkdir(parents=True, exist_ok=True)
environments = ["local", "docker", "apptainer"]
if environment not in environments:
message = f"Unknown environment value: {environment}. Must be one of: {environments}"
self.logger.error(message)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message)

oton_id, oton_dir = create_resource_dir(SERVER_OTON_CONVERSIONS, resource_id=None)
ocrd_process_txt = join(oton_dir, f"ocrd_process_input.txt")
nf_script_dest = join(oton_dir, f"nextflow_output.nf")

# Save the uploaded file to the server
file_path = upload_dir / "tmp.txt"
with open(file_path, "wb") as buffer:
buffer.write(await file.read())
try:
await receive_resource(file=txt_file, resource_dst=ocrd_process_txt)
except Exception as error:
message = "Failed to receive the workflow resource"
self.logger.error(f"{message}, error: {error}")
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message)

# Create the output Nextflow file path
output_file = file_path.with_suffix(".nf")
# Use the Converter's convert_OtoN function instead of directly calling OCRDValidator
converter = OTONConverter()
try:
# Call the conversion function (this will also perform validation inside)
if dockerized:
converter.convert_oton_env_docker(str(file_path), str(output_file))
else:
converter.convert_oton_env_local(str(file_path), str(output_file))
if environment == "local":
converter.convert_oton_env_local(str(ocrd_process_txt), str(nf_script_dest))
elif environment == "docker":
converter.convert_oton_env_docker(str(ocrd_process_txt), str(nf_script_dest))
elif environment == "apptainer":
converter.convert_oton_env_apptainer(str(ocrd_process_txt), str(nf_script_dest))
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

# Return the generated Nextflow (.nf) file as a response

return FileResponse(output_file, filename=output_file.name)
return FileResponse(nf_script_dest, filename=f'{oton_id}.nf')
4 changes: 4 additions & 0 deletions src/server/operandi_server/routers/workflow_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,7 @@ async def nf_script_uses_mets_server_with_handling(
message = "Failed to identify whether a mets server is used or not in the provided Nextflow workflow."
logger.error(f"{message}, error: {error}")
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=message)


async def convert_oton_with_handling():
pass
3 changes: 2 additions & 1 deletion src/utils/operandi_utils/oton/ocrd_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ def validate_processor_params(
processor_args.parameters = deepcopy(backup_curr_params)
return report

def validate_ocrd_process_command(self, line: str):
def validate_ocrd_process_command(self, line: str) -> bool:
expected = 'ocrd process'
if line != expected:
self.logger.error(f"Invalid first line. Expected: '{expected}', got: '{line}'")
raise ValueError(f"Invalid first line. Expected: '{expected}', got: '{line}'")
return True
89 changes: 89 additions & 0 deletions tests/assets/oton/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
OTON_RESOURCES_DIR = 'tests/assets/oton'

IN_TXT_WF1 = f'{OTON_RESOURCES_DIR}/workflow1.txt'
IN_TXT_WF2 = f'{OTON_RESOURCES_DIR}/workflow2.txt'
IN_TXT_WF3 = f'{OTON_RESOURCES_DIR}/workflow3.txt'
IN_TXT_WF4 = f'{OTON_RESOURCES_DIR}/workflow4.txt'

OUT_NF_WF1_APPTAINER = f'{OTON_RESOURCES_DIR}/test_output_nextflow1_apptainer.nf'
OUT_NF_WF1_DOCKER = f'{OTON_RESOURCES_DIR}/test_output_nextflow1_docker.nf'
OUT_NF_WF1_LOCAL = f'{OTON_RESOURCES_DIR}/test_output_nextflow1.nf'
OUT_NF_WF2_LOCAL = f'{OTON_RESOURCES_DIR}/test_output_nextflow2.nf'
OUT_NF_WF3_LOCAL = f'{OTON_RESOURCES_DIR}/test_output_nextflow3.nf'
OUT_NF_WF4_LOCAL = f'{OTON_RESOURCES_DIR}/test_output_nextflow4.nf'

INVALID_WF1 = f'{OTON_RESOURCES_DIR}/invalid_workflow1.txt'
INVALID_WF2 = f'{OTON_RESOURCES_DIR}/invalid_workflow2.txt'
INVALID_WF3 = f'{OTON_RESOURCES_DIR}/invalid_workflow3.txt'


EXPECTED_WF1 = """
workflow {
main:
ocrd_cis_ocropy_binarize_0(params.mets_path, params.input_file_group, "OCR-D-BIN")
ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out, "OCR-D-BIN", "OCR-D-CROP")
ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out, "OCR-D-CROP", "OCR-D-BIN2")
ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out, "OCR-D-BIN2", "OCR-D-BIN-DENOISE")
ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out, "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW")
ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out, "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG")
ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out, "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP")
ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out, "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR")
}
"""

EXPECTED_WF2 = """
workflow {
main:
ocrd_cis_ocropy_binarize_0(params.mets_path, params.input_file_group, "OCR-D-BIN")
ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out, "OCR-D-BIN", "OCR-D-CROP")
ocrd_skimage_denoise_2(ocrd_anybaseocr_crop_1.out, "OCR-D-CROP", "OCR-D-BIN-DENOISE")
ocrd_tesserocr_deskew_3(ocrd_skimage_denoise_2.out, "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW")
ocrd_tesserocr_segment_4(ocrd_tesserocr_deskew_3.out, "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG")
ocrd_cis_ocropy_dewarp_5(ocrd_tesserocr_segment_4.out, "OCR-D-SEG", "OCR-D-SEG-DEWARP")
ocrd_tesserocr_recognize_6(ocrd_cis_ocropy_dewarp_5.out, "OCR-D-SEG-DEWARP", "OCR-D-OCR")
}
"""

EXPECTED_WF3 = """
workflow {
main:
ocrd_dinglehopper_0(params.mets_path, params.input_file_group, "OCR-D-EVAL-SEG-BLOCK")
ocrd_dinglehopper_1(ocrd_dinglehopper_0.out, "OCR-D-GT-SEG-LINE,OCR-D-OCR", "OCR-D-EVAL-SEG-LINE")
ocrd_dinglehopper_2(ocrd_dinglehopper_1.out, "OCR-D-GT-SEG-PAGE,OCR-D-OCR", "OCR-D-EVAL-SEG-PAGE")
}
"""

EXPECTED_WF4 = """
workflow {
main:
ocrd_olena_binarize_0(params.mets_path, params.input_file_group, "OCR-D-BIN")
ocrd_anybaseocr_crop_1(ocrd_olena_binarize_0.out, "OCR-D-BIN", "OCR-D-CROP")
ocrd_olena_binarize_2(ocrd_anybaseocr_crop_1.out, "OCR-D-CROP", "OCR-D-BIN2")
ocrd_cis_ocropy_denoise_3(ocrd_olena_binarize_2.out, "OCR-D-BIN2", "OCR-D-BIN-DENOISE")
ocrd_cis_ocropy_deskew_4(ocrd_cis_ocropy_denoise_3.out, "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW")
ocrd_tesserocr_segment_region_5(ocrd_cis_ocropy_deskew_4.out, "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG-REG")
ocrd_segment_repair_6(ocrd_tesserocr_segment_region_5.out, "OCR-D-SEG-REG", "OCR-D-SEG-REPAIR")
ocrd_cis_ocropy_deskew_7(ocrd_segment_repair_6.out, "OCR-D-SEG-REPAIR", "OCR-D-SEG-REG-DESKEW")
ocrd_cis_ocropy_clip_8(ocrd_cis_ocropy_deskew_7.out, "OCR-D-SEG-REG-DESKEW", "OCR-D-SEG-REG-DESKEW-CLIP")
ocrd_tesserocr_segment_line_9(ocrd_cis_ocropy_clip_8.out, "OCR-D-SEG-REG-DESKEW-CLIP", "OCR-D-SEG-LINE")
ocrd_segment_repair_10(ocrd_tesserocr_segment_line_9.out, "OCR-D-SEG-LINE", "OCR-D-SEG-REPAIR-LINE")
ocrd_cis_ocropy_dewarp_11(ocrd_segment_repair_10.out, "OCR-D-SEG-REPAIR-LINE", "OCR-D-SEG-LINE-RESEG-DEWARP")
ocrd_calamari_recognize_12(ocrd_cis_ocropy_dewarp_11.out, "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR")
}
"""

PARAMETERS_COMMON = [
'nextflow.enable.dsl = 2',
'params.mets_path = "null"',
'params.workspace_dir = "null"'
]

PARAMETERS_LOCAL = []

PARAMETERS_DOCKER = [
'params.env_wrapper = "null"'
]

PARAMETERS_APPTAINER = [
'params.env_wrapper = "null"'
]
1 change: 1 addition & 0 deletions tests/assets/oton/invalid_workflow1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
invalid ocrd process text
4 changes: 4 additions & 0 deletions tests/assets/oton/invalid_workflow2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ocrd process \
"cis-ocropy-binarize -I OCR-D-IMG -O OCR-D-BIN" \
"anybaseocr-crop -I OCR-D-NON-EXISTING -O OCR-D-CROP" \
"skimage-denoise -I OCR-D-CROP -O OCR-D-BIN-DENOISE -P level-of-operation page"
4 changes: 4 additions & 0 deletions tests/assets/oton/invalid_workflow3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ocrd process \
cis-ocropy-binarize -I OCR-D-IMG -O OCR-D-BIN \
anybaseocr-crop -I OCR-D-BIN -O \
skimage-denoise -I OCR-D-CROP -O OCR-D-BIN-DENOISE -P level-of-operation page
File renamed without changes.
Loading

0 comments on commit b5391bc

Please sign in to comment.