Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-623 test: dynamically find hdf5 plugin path #15135

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/tests/ftest/deployment/basic_checkout.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,4 @@ mdtest:
dfuse:
disable_caching: true
hdf5_vol:
plugin_path: /usr/lib64/mpich/lib
plugin_name: libhdf5_vol_daos.so
2 changes: 1 addition & 1 deletion src/tests/ftest/deployment/io_sys_admin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ dcp:
client_processes:
np: 16
hdf5_vol:
plugin_path: /usr/lib64/mpich/lib
plugin_name: libhdf5_vol_daos.so

io_sys_admin:
steps_to_run:
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/interoperability/diff_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ior:
write_flg: "-w -W -k -G 1 -i 1"
read_flg: "-C -k -e -r -R -g -G 1 -Q 1 -vv"
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
plugin_name: libhdf5_vol_daos.so
interop:
# Example of upgrade/downgrade RPMs from local tar file
# upgrade_rpms: ["/home/dinghwah/RPM/2.1.104/daos-2.1.104-1.el8.x86_64.rpm"]
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/interoperability/down_grade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ior:
write_flg: "-w -W -k -G 1 -i 1"
read_flg: "-C -k -e -r -R -g -G 1 -Q 1 -vv"
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
plugin_name: libhdf5_vol_daos.so
interop:
# Example of upgrade/downgrade RPMs from local tar file
# upgrade_rpms: ["/home/dinghwah/RPM/2.1.104/daos-2.1.104-1.el8.x86_64.rpm"]
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/interoperability/updown_grade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ior:
write_flg: "-w -W -k -G 1 -i 1"
read_flg: "-C -k -e -r -R -g -G 1 -Q 1 -vv"
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
plugin_name: libhdf5_vol_daos.so
interop:
# Example of upgrade/downgrade RPMs from local tar file
# upgrade_rpms: ["/home/dinghwah/RPM/2.1.104/daos-2.1.104-1.el8.x86_64.rpm"]
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/interoperability/updown_grade_8svr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ior:
write_flg: "-w -W -k -G 1 -i 1"
read_flg: "-C -k -e -r -R -g -G 1 -Q 1 -vv"
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
plugin_name: libhdf5_vol_daos.so
interop:
# Example of upgrade/downgrade RPMs from local tar file
# upgrade_rpms: ["/home/dinghwah/RPM/2.1.104/daos-2.1.104-1.el8.x86_64.rpm"]
Expand Down
5 changes: 3 additions & 2 deletions src/tests/ftest/interoperability/upgrade_downgrade_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'''
(C) Copyright 2023 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
'''
Expand All @@ -11,7 +12,7 @@

from agent_utils import include_local_host
from command_utils_base import CommandFailure
from general_utils import get_random_bytes, pcmd, run_pcmd
from general_utils import find_library, get_random_bytes, pcmd, run_pcmd
from ior_test_base import IorTestBase
from pydaos.raw import DaosApiError

Expand Down Expand Up @@ -574,7 +575,7 @@ def upgrade_and_downgrade(self, fault_on_pool_upgrade=False):
# (3.b)ior hdf5
elif ior_api == "HDF5":
self.log.info("(3.b)==Run IOR HDF5 write and read.")
hdf5_plugin_path = self.params.get("plugin_path", '/run/hdf5_vol/')
hdf5_plugin_path = find_library(self.params.get("plugin_name", '/run/hdf5_vol/'))
self.ior_cmd.flags.update(iorflags_write)
self.run_ior_with_pool(
plugin_path=hdf5_plugin_path, mount_dir=mount_dir,
Expand Down
20 changes: 14 additions & 6 deletions src/tests/ftest/io/macsio_test.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""
(C) Copyright 2020-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""

from apricot import TestWithServers
from command_utils_base import CommandFailure
from dfuse_utils import get_dfuse, start_dfuse
from general_utils import get_log_file, list_to_str
from general_utils import find_library, get_log_file, list_to_str
from job_manager_utils import get_job_manager
from macsio_util import MacsioCommand

Expand Down Expand Up @@ -44,13 +45,15 @@ def _get_macsio_command(self, pool, cont):

return macsio

def run_macsio(self, macsio, hosts, processes, plugin=None, slots=None, working_dir=None):
def run_macsio(self, job_manager, macsio, hosts, processes, plugin=None, slots=None,
working_dir=None):
"""Run the macsio test.

Parameters for the macsio command are obtained from the test yaml file,
including the path to the macsio executable.

Args:
job_manager (JobManager): Orterun, Mpirun, Srun, etc.
macsio (MacsioCommand): object defining the macsio command
hosts (NodeSet): hosts on which to run macsio
processes (int): total number of processes to use to run macsio
Expand All @@ -68,7 +71,6 @@ def run_macsio(self, macsio, hosts, processes, plugin=None, slots=None, working_
# Include DAOS VOL environment settings
env["HDF5_VOL_CONNECTOR"] = "daos"
env["HDF5_PLUGIN_PATH"] = str(plugin)
job_manager = get_job_manager(self)
job_manager.job = macsio
job_manager.assign_hosts(hosts, self.workdir, slots)
job_manager.assign_processes(processes)
Expand Down Expand Up @@ -115,7 +117,8 @@ def test_macsio(self):
# Run macsio
self.log_step("Running MACSio")
macsio = self._get_macsio_command(pool, container)
result = self.run_macsio(macsio, self.hostlist_clients, processes)
job_manager = get_job_manager(self)
result = self.run_macsio(job_manager, macsio, self.hostlist_clients, processes)
if not macsio.check_results(result, self.hostlist_clients):
self.fail("MACSio failed")
self.log.info("Test passed")
Expand All @@ -136,7 +139,12 @@ def test_macsio_daos_vol(self):
:avocado: tags=MacsioTest,test_macsio_daos_vol
:avocado: tags=DAOS_5610
"""
plugin_path = self.params.get("plugin_path", "/run/job_manager/*")
# Setup job_manager and find plugin_path
job_manager = get_job_manager(self)
plugin_name = self.params.get("plugin_name", "/run/job_manager/*")
plugin_path = find_library(plugin_name)
if not plugin_path:
self.fail(f"Failed to find {plugin_name}")
processes = self.params.get("processes", "/run/macsio/*", len(self.hostlist_clients))

# Create a pool
Expand All @@ -160,7 +168,7 @@ def test_macsio_daos_vol(self):
self.log_step("Running MACSio with DAOS VOL connector")
macsio = self._get_macsio_command(pool, container)
result = self.run_macsio(
macsio, self.hostlist_clients, processes, plugin_path,
job_manager, macsio, self.hostlist_clients, processes, plugin_path,
working_dir=dfuse.mount_dir.value)
if not macsio.check_results(result, self.hostlist_clients):
self.fail("MACSio failed")
Expand Down
4 changes: 2 additions & 2 deletions src/tests/ftest/io/macsio_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ job_manager: !mux
class_name: Mpirun
mpi_type: mpich
macsio_path: /usr/lib64/mpich/bin
plugin_path: /usr/lib64/mpich/lib
plugin_name: libhdf5_vol_daos.so
timeout:
test_macsio: 10
test_macsio_daos_vol: 20
openmpi:
class_name: Orterun
mpi_type: openmpi
macsio_path: /usr/lib64/openmpi3/bin
plugin_path: /usr/lib64/openmpi3/lib
plugin_name: libhdf5_vol_daos.so
timeout:
test_macsio: 10
test_macsio_daos_vol: 20
2 changes: 1 addition & 1 deletion src/tests/ftest/ior/small.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,4 @@ ior:
dfuse:
disable_caching: true
hdf5_vol:
plugin_path: /usr/lib64/mpich/lib
plugin_name: libhdf5_vol_daos.so
2 changes: 1 addition & 1 deletion src/tests/ftest/soak/faults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ ior_faults:
mount_dir: "/tmp/soak_dfuse_ior/"
disable_caching: true
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
plugin_name: libhdf5_vol_daos.so
events:
- "mce: [Hardware Error]: Machine check events logged"
- "Package temperature above threshold"
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/soak/harassers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ mdtest_harasser:
mount_dir: "/tmp/soak_dfuse_mdtest/"
disable_caching: true
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
plugin_name: libhdf5_vol_daos.so
events:
- "mce: [Hardware Error]: Machine check events logged"
- "Package temperature above threshold"
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/soak/smoke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ datamover_smoke:
test_file: "daos:/testFile"
dfs_destroy: false
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
plugin_name: libhdf5_vol_daos.so
events:
- "mce: [Hardware Error]: Machine check events logged"
- "Package temperature above threshold"
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/soak/soak-extra-suse.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
hdf5_vol:
plugin_path: /usr/lib64/mpi/gcc/mpich/lib
plugin_name: libhdf5_vol_daos.so
mpi_module: "gnu-mpich"
2 changes: 1 addition & 1 deletion src/tests/ftest/soak/stress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ datamover_stress:
test_file: "daos:/testFile"
dfs_destroy: false
hdf5_vol:
plugin_path: "/usr/lib64/mpich/lib"
plugin_name: libhdf5_vol_daos.so
events:
- "mce: [Hardware Error]: Machine check events logged"
- "Package temperature above threshold"
Expand Down
7 changes: 6 additions & 1 deletion src/tests/ftest/util/file_count_test_base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""
(C) Copyright 2020-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os

from avocado.core.exceptions import TestFail
from general_utils import find_library
from ior_test_base import IorTestBase
from mdtest_test_base import MdtestBase
from oclass_utils import extract_redundancy_factor
Expand Down Expand Up @@ -68,7 +70,10 @@ def run_file_count(self):
results = []
dir_oclass = None
apis = self.params.get("api", "/run/largefilecount/*")
hdf5_plugin_path = self.params.get("plugin_path", '/run/hdf5_vol/*')
hdf5_plugin_name = self.params.get("plugin_name", '/run/hdf5_vol/*')
hdf5_plugin_path = find_library(hdf5_plugin_name)
if not hdf5_plugin_path:
self.fail(f"Failed to find {hdf5_plugin_name}")
ior_np = self.params.get("np", '/run/ior/client_processes/*', 1)
ior_ppn = self.params.get("ppn", '/run/ior/client_processes/*', None)
mdtest_np = self.params.get("np", '/run/mdtest/client_processes/*', 1)
Expand Down
29 changes: 29 additions & 0 deletions src/tests/ftest/util/general_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2018-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -1207,3 +1208,31 @@ def check_ssh(log, hosts, cmd_timeout=60, verbose=True):
"""
result = run_remote(log, hosts, "uname", timeout=cmd_timeout, verbose=verbose)
return result.passed


def find_library(name):
"""Find a library by a given name.

In order of preference, searches in
LD_LIBRARY_PATH
MPI_LIB
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MPI_LIB is not set until after module load mpi, which is where the library is in CI

/usr/lib
/usr/lib64

Args:
name (str): library name to find

Returns:
str: directory path containing the library. None if not found
"""
paths = []
for env_name in ("LD_LIBRARY_PATH", "MPI_LIB"):
env_val = os.environ.get(env_name, None)
if env_val is not None:
paths.extend(env_val.split(":"))
paths.append(os.path.join(os.sep, "usr", "lib"))
paths.append(os.path.join(os.sep, "usr", "lib64"))
for path in paths:
if os.path.exists(os.path.join(path, name)):
return path
return None
9 changes: 7 additions & 2 deletions src/tests/ftest/util/ior_test_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2018-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -8,7 +9,7 @@
from apricot import TestWithServers
from dfuse_utils import get_dfuse, start_dfuse
from exception_utils import CommandFailure
from general_utils import get_random_string
from general_utils import find_library, get_random_string
from host_utils import get_local_host
from ior_utils import IorCommand
from job_manager_utils import get_job_manager
Expand Down Expand Up @@ -316,7 +317,11 @@ def run_ior_multiple_variants(self, obj_class, apis, transfer_block_size, flags)
flags_w = flags[0]
if api == "HDF5-VOL":
api = "HDF5"
hdf5_plugin_path = self.params.get("plugin_path", '/run/hdf5_vol/*')
hdf5_plugin_name = self.params.get("plugin_name", '/run/hdf5_vol/*')
hdf5_plugin_path = find_library(hdf5_plugin_name)
if not hdf5_plugin_path:
results.append(["FAIL", f"Failed to find {hdf5_plugin_name}"])
continue
flags_w += " -k"
elif api == "POSIX+IL":
api = "POSIX"
Expand Down
13 changes: 10 additions & 3 deletions src/tests/ftest/util/soak_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2019-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -27,9 +28,9 @@
from duns_utils import format_path
from exception_utils import CommandFailure
from fio_utils import FioCommand
from general_utils import (DaosTestError, check_ping, check_ssh, get_journalctl, get_log_file,
get_random_bytes, get_random_string, list_to_str, pcmd, run_command,
run_pcmd, wait_for_result)
from general_utils import (DaosTestError, check_ping, check_ssh, find_library, get_journalctl,
get_log_file, get_random_bytes, get_random_string, list_to_str, pcmd,
run_command, run_pcmd, wait_for_result)
from ior_utils import IorCommand
from job_manager_utils import Mpirun
from macsio_util import MacsioCommand
Expand Down Expand Up @@ -1094,7 +1095,7 @@
self.log.info("Dfuse mount points not deleted Error")


def create_ior_cmdline(self, job_spec, pool, ppn, nodesperjob, oclass_list=None, cont=None):

Check warning on line 1098 in src/tests/ftest/util/soak_utils.py

View workflow job for this annotation

GitHub Actions / Pylint check

too-many-locals, Too many local variables (31/30)
"""Create an IOR cmdline to run in slurm batch.

Args:
Expand Down Expand Up @@ -1122,6 +1123,9 @@
if not oclass_list:
oclass_list = self.params.get("dfs_oclass", ior_params)
plugin_path = self.params.get("plugin_path", "/run/hdf5_vol/")
plugin_name = self.params.get("plugin_name", "/run/hdf5_vol/")
if plugin_name and not plugin_path:
plugin_path = find_library(plugin_name)
# update IOR cmdline for each additional IOR obj
for api in api_list:
if not self.enable_il and api in ["POSIX-LIBIOIL", "POSIX-LIBPIL4DFS"]:
Expand Down Expand Up @@ -1222,6 +1226,9 @@
oclass_list = self.params.get("oclass", macsio_params)
api_list = self.params.get("api", macsio_params)
plugin_path = self.params.get("plugin_path", "/run/hdf5_vol/")
plugin_name = self.params.get("plugin_name", "/run/hdf5_vol/")
if plugin_name and not plugin_path:
plugin_path = find_library(plugin_name)
# update macsio cmdline for each additional MACsio obj
for api in api_list:
for file_oclass, dir_oclass in oclass_list:
Expand Down
Loading