diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..6740f060 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,20 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF +formats: + - pdf + +# Optionally set the version of Python and requirements required to build your docs +python: + version: "3.7" + install: + - requirements: docs/requirements.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a467b43..b0277e28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,5 +87,11 @@ if(${ENABLE_TESTS}) add_subdirectory(tests) endif(${ENABLE_TESTS}) if(${ENABLE_EXAMPLES}) - add_subdirectory(examples) + configure_file( + ${PROJECT_SOURCE_DIR}/examples/opts.mk.in + ${PROJECT_BINARY_DIR}/examples/opts.mk + @ONLY + ) + file(GLOB EXAMPLE_DIRS ${PROJECT_SOURCE_DIR}/examples/ex*) + file(COPY ${PROJECT_SOURCE_DIR}/examples/Makefile ${EXAMPLE_DIRS} DESTINATION ${PROJECT_BINARY_DIR}/examples) endif(${ENABLE_EXAMPLES}) diff --git a/bindings/python/dspaces.py b/bindings/python/dspaces.py index 771620a8..277c2666 100644 --- a/bindings/python/dspaces.py +++ b/bindings/python/dspaces.py @@ -1,4 +1,4 @@ -from dspaces_wrapper import * +from dspaces.dspaces_wrapper import * import numpy as np class dspaces: diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..b942bb70 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = .build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 00000000..f2fb1be0 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,4 @@ +DataSpaces API +============== + + diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..78c454c1 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = u'DataSpaces' +copyright = u'2021, ' +author = u'' + +# The short X.Y version +version = u'' +# The full version, including alpha/beta/rc tags +release = u'' + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.ifconfig', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['.templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [u'.build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = None + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'renku' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['.static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'DataSpacesdoc' + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'DataSpaces.tex', u'DataSpaces Documentation', + u'Philip Davis', 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'dataspaces', u'DataSpaces Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'DataSpaces', u'DataSpaces Documentation', + author, 'DataSpaces', 'One line description of project.', + 'Miscellaneous'), +] + + +# -- Options for Epub output ------------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = project + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# +# epub_identifier = '' + +# A unique identification for the text. +# +# epub_uid = '' + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ['search.html'] + + +# -- Extension configuration ------------------------------------------------- + +# -- Options for todo extension ---------------------------------------------- + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 00000000..72c88880 --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,2 @@ +DataSpaces Examples +=================== diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..4cc57911 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,32 @@ +.. DataSpaces documentation master file, created by + sphinx-quickstart on Tue Sep 28 14:04:59 2021. + +Welcome to DataSpaces +===================== + +DataSpaces is a communication library aimed at supporting interactions between large-scale scientific simulation, analysis, and visualization programs. +DataSpaces enables programs to write to and read from shared N-dimensional arrays without centralized query processing or indexing using low-latency RDMA transfers. +The result is highly-scalable data access between components of an HPC workflow. +DataSpaces can be used to tranfer data in *in-situ* workflows, such as coupled simulations and in-situ analysis workflows, +moving data through shared memory and RDMA tranfers, rather than using the file system. +Like a shared file system, DataSpaces allow data readers to be decoupled from writers in both space in time. +In other words, no sychronization of writers and readers is required, and readers may access data written by any process. + + +Contents +======== + +.. toctree:: + + installation + usage + running + API + examples + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 00000000..1d3e055f --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,42 @@ +Installing +========== + +The easiest way to install DataSpaces is using `Spack `_. Spack is a package manager aimed at HPC and scientific computing. + Using Spack simplifies the installation of DataSpaces and its dependencies. + +Installing Spack +---------------- + +To install Spack, follow the getting started instructions found `here `_. +This will install the package manager, and make a large variety of packages available. + +Installing the DataSpaces repository +------------------------------------ + +The DataSpaces group maintains a repository for the DataSpaces spack package (and any relevant ancillary packages). This can be found `here `_. +In order to use this package, you will need to first install Spack using the above instructions. +Once you have done this, you can load the DataSpaces package repository by doing the following: + +.. code-block:: console + + git clone https://github.com/rdi2dspaces/dspaces-spack.git + spack repo add dspaces-spack + +Installing DataSpaces +--------------------- + +One the DataSpaces repository has been loaded, the dataspaces package can be installed with: + +.. code-block:: console + + spack install dataspaces + +This will automatically install allDdataSpaces dependencies and the dataspaces package itself. +Once the package has been installed the command: + +.. code-block:: console + + spack load dataspaces + +Configures the environment to use DataSpaces, adding the server binary's directory to `PATH`, any shared library paths to `LD_LIBRARY_PATH`, etc. +This simplifies building and running programs that use DataSpaces. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..8543872b --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +renku-sphinx-theme diff --git a/docs/running.rst b/docs/running.rst new file mode 100644 index 00000000..5e561d75 --- /dev/null +++ b/docs/running.rst @@ -0,0 +1,57 @@ +How to Run +========== + +Running the Server +----------------- +The DataSpaces server expects to find the `dataspaces.conf` file in its working directory. +The format of this file is a list of configuration values, one per line. +The possible values are as follows: + +` = `, e.g. +`num_apps = 1` + +`num_apps`: this value is the number of `dspaces_kill()` calls from clients that are needed to kill the server binary. + +`ndim`: number of dimensions for the default global data domain. + +`dims`: size of each dimension for the default global data domain. + +`max_versions`: maximum number of versions of a data object to be cached in DataSpaces servers. + +`hash_version`: the type of distributed hash table used. A value of `1` means that a Hilbert SFC is partitioned into continuous segments and distributed across the servers. + A value of `2` means the space is partitioned by repeating bisection along the longest domain. + +*NOTES* on what values to use + +The global dimensions have implications for performance. Data indexing will be partitioned evently across the global dimensions, +and so if data is only being writtent to a subset of the global dimensions there is a risk of unabalanced indexing load. +Ideally, the data domain being written to will match the global dimensions as closely as possible. The default value set in +`dataspaces.conf` is for convenience. The application can set this per variable with `dspaces_define_gdim()`. + +`hash_version = 1` has better locality in the most general case, and should be preferred unless the dimensions of the data +domain are not a power of two or the ratio of longest to shortest dimension is greater than two. + +`num_apps` should be set in conjunction with how `dspaces_kill()` is used in the application(s) using dataspaces. Generally, one rank +of each application should call `dspaces_kill()`, and the number of process groups using dataspaces will be the same as `num_apps`. +Occasionally, it is not practical to have a client call `dspaces_kill()`, and the dataspaces repo provides a standalone binary `terminator` +to send a single `dspaces_kill()` and then exit. + +Bootstrapping communication +--------------------------- +The server produces a bootstrap file during its init phase, `conf.ds`. This file must be read by the clients (or rank zero of the clients +if `dspaces_init_mpi()` is being used. This file provides the clients with enough information to make initial contact with the server and +perform wire-up. In order to find this file, the server and client application must be run in the same working directory, or at last a symlink of `ds.conf` should be present. + +Environment variables +--------------------- +There are a few environment variables that can be used to influence DataSpaces. + +`DSPACES_DEBUG` - enables substantial debug output for both clients and server. + +`DSPACES_DEFAULT_NUM_HANDLERS` - the number of request handling threads launched by the server (in addition to the main thread). Default: 4. + This value should be changed if it is likely to oversubscribe or underutilize the node the server is running on. + + Running the server + ------------------ + +The server binary, `dspaces_server`, takes a single argument: the listen_address. This is a Mercury-specific connection string (see Mercury documentation for details.) Common values are: `sockets` to use TCP for communication, `sm` for shared memory (if all clienta and server processes are on the same node) and `ofi+X` for RDMA, where `X` is `verbs`, `psm2`, or `cray` as is appropriate for the system fabric. diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 00000000..4384ecff --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,58 @@ +How to Use DataSpaces +===================== + +DataSpaces consists of two components: a client library and a server library. +Additionally, the DataSpaces package comes packaged with a standalone, MPI-based server binary. +The typical usage of DataSpaces is to run the server binary along-side the user's application, +and use the DataSpaces calls provided by the client library to store and access data from the server. +It is also possible to run the server in a subset of application proccesses, if it is not desired to run +the server as an independent binary. + +DataSpaces provides a full set of bindings for C/C++, and a subset of the API for fortran and python. +This makes it possible to share data between applications written in different programming languages via the common put/get abstraction. + +Building a C/C++ program with DataSpaces +---------------------------------------- + +Flags necessary for compiling a program that uses DataSpaces can be found from the pkg-config file installed by DataSpaces in `/lib/pkgconfig`. +If installing using spack, the appropriate directory will be added to `PGK_CONFIG_PATH` when the dataspaces module is loaded. +`pkg-config` can provide useful information that depends on which flag is provided: + + +Provides compilation flags for building a program that uses the dataspaces API: + +.. code-block:: console + + pkg-config --cflags dspaces + +Provides linking flags for building a program that uses the dataspaces API: + +.. code-block:: console + + pkg-config --libs dspaces + +Provides the path to the dspaces_server binary: + +.. code-block:: console + + pkg-config --variable=exec_prefix dspaces + +Alternatively, dataspaces installs a CMake targets file that makes it easy to include dspaces in a CMake project. +If dataspaces was installed with Spack, `CMAKE_PREFIX_PATH` will be updated when the dataspaces package is loaded. +Recent versions of cmake will also be able to find dspaces if `/bin` is in the users `PATH` environment variable. + +To include dspaces in a CMake project, simply add `find_package(dspaces)` to the project's CMakeLists.txt file and include `dspaces::dspaces` +in the target_link_libraries for whatever target is using dspaces. + +Building a Fortran program with DataSpaces +------------------------------------------ + +Flags for Fortran compilation cannot be obtained through pkg-config. However, a CMake project can be configured to automatically configure +compilation for dataspaces with Fortran. To do this, add `find_package(dspaces)` to the project's CMakeLists.txt file and include `dspaces::fortran` +in the target_link_libraries for whatever target is using dspaces. + +Using DataSpaces with Python +---------------------------- + +In order to use the DataSpaces pythong bindings, `/lib//dist-packages` must be added to `PYTHONPATH`. +Spack will do this automatically when the dataspaces package is loaded. To use the Python bindings, import the `dspaces` module.` diff --git a/examples/ex1_putget/dataspaces.conf b/examples/ex1_putget/dataspaces.conf index 83deb020..c312c46d 100644 --- a/examples/ex1_putget/dataspaces.conf +++ b/examples/ex1_putget/dataspaces.conf @@ -2,6 +2,5 @@ ndim = 1 dims = 4 max_versions = 10 -max_readers = 8 num_apps = 2 diff --git a/examples/ex1_putget/get.c b/examples/ex1_putget/get.c index 965e92f6..7be02804 100644 --- a/examples/ex1_putget/get.c +++ b/examples/ex1_putget/get.c @@ -11,9 +11,9 @@ int main(int argc, char **argv) { dspaces_client_t client; - // DataSpaces: Initalize and identify application - // Usage: dspaces_init(num_peers, appid, Ptr to MPI comm, parameters) - // Note: appid for get.c is 2 [for put.c, it was 1] + // Initalize DataSpaces + // # peer number (usually MPI rank) + // # handle to initialize dspaces_init(0, &client); int timestep = 0; @@ -41,7 +41,7 @@ int main(int argc, char **argv) // Usage: dspaces_get(Name of variable, version num, // size (in bytes of each element), dimensions for bounding box, // lower bound coordinates, upper bound coordinates, - // ptr to data buffer + // ptr to data buffer, flag value (-1) means wait for data indefinitely dspaces_get(client, var_name, timestep, sizeof(int), ndim, &lb, &ub, data, -1); diff --git a/examples/ex1_putget/put.c b/examples/ex1_putget/put.c index f0fedeb0..060651f8 100644 --- a/examples/ex1_putget/put.c +++ b/examples/ex1_putget/put.c @@ -15,12 +15,8 @@ int main(int argc, char **argv) dspaces_client_t client; // Initalize DataSpaces - // # of Peers, Application ID, ptr MPI comm, additional parameters - // # Peers: Number of connecting clients to the DS server - // Application ID: Unique idenitifier (integer) for application - // Pointer to the MPI Communicator: - // when NOT NULL, allows DS Layer to use MPI barrier func - // Addt'l parameters: Placeholder for future arguments, currently NULL. + // # peer number (usually MPI rank) + // # handle to initialize dspaces_init(0, &client); int timestep = 0; diff --git a/examples/ex2_boundingBox/dataspaces.conf b/examples/ex2_boundingBox/dataspaces.conf index 89466daf..30399362 100644 --- a/examples/ex2_boundingBox/dataspaces.conf +++ b/examples/ex2_boundingBox/dataspaces.conf @@ -2,6 +2,5 @@ ndim = 1 dims = 64 max_versions = 10 -max_readers = 8 num_apps = 2 diff --git a/examples/ex2_boundingBox/get.c b/examples/ex2_boundingBox/get.c index bef62a25..24e95733 100644 --- a/examples/ex2_boundingBox/get.c +++ b/examples/ex2_boundingBox/get.c @@ -20,9 +20,9 @@ int main(int argc, char **argv) MPI_Barrier(MPI_COMM_WORLD); gcomm = MPI_COMM_WORLD; - // DataSpaces: Initalize and identify application - // Usage: dspaces_init(num_peers, appid, Ptr to MPI comm, parameters) - // Note: appid for get.c is 2 [for put.c, it was 1] + // Initalize DataSpaces + // # MPI communicator for collective bootstrapping + // # handle to initialize dspaces_init_mpi(gcomm, &client); int timestep = 0; diff --git a/examples/ex2_boundingBox/put.c b/examples/ex2_boundingBox/put.c index 1d10d0b1..45b281f0 100644 --- a/examples/ex2_boundingBox/put.c +++ b/examples/ex2_boundingBox/put.c @@ -25,11 +25,8 @@ int main(int argc, char **argv) gcomm = MPI_COMM_WORLD; // Initalize DataSpaces - // # of Peers, Application ID, ptr MPI comm, additional parameters - // # Peers: Number of connecting clients to the DS server - // Application ID: Unique idenitifier (integer) for application - // Pointer to the MPI Communicator, allows DS Layer to use MPI barrier func - // Addt'l parameters: Placeholder for future arguments, currently NULL. + // # MPI communicator for collective bootstrapping + // # handle to initialize dspaces_init_mpi(gcomm, &client); int timestep = 0; diff --git a/examples/ex3_minmax/dataspaces.conf b/examples/ex3_minmax/dataspaces.conf index d8fb24af..0b3dd70d 100644 --- a/examples/ex3_minmax/dataspaces.conf +++ b/examples/ex3_minmax/dataspaces.conf @@ -2,6 +2,5 @@ ndim = 1 dims = 128 max_versions = 10 -max_readers = 8 num_apps = 2 diff --git a/examples/ex3_minmax/minmaxavg_reader.c b/examples/ex3_minmax/minmaxavg_reader.c index 454210dc..bb813f66 100644 --- a/examples/ex3_minmax/minmaxavg_reader.c +++ b/examples/ex3_minmax/minmaxavg_reader.c @@ -26,9 +26,10 @@ int main(int argc, char **argv) MPI_Barrier(MPI_COMM_WORLD); gcomm = MPI_COMM_WORLD; - // DataSpaces: Initalize and identify application - // Usage: dspaces_init(num_peers, appid, Ptr to MPI comm, parameters) - // Note: appid for get.c is 2 [for put.c, it was 1] + + // Initalize DataSpaces + // # MPI communicator for collective bootstrapping + // # handle to initialize dspaces_init_mpi(gcomm, &client); // Name our data. diff --git a/examples/ex3_minmax/minmaxavg_writer.c b/examples/ex3_minmax/minmaxavg_writer.c index 64211569..16f79938 100644 --- a/examples/ex3_minmax/minmaxavg_writer.c +++ b/examples/ex3_minmax/minmaxavg_writer.c @@ -27,11 +27,8 @@ int main(int argc, char **argv) gcomm = MPI_COMM_WORLD; // Initalize DataSpaces - // # of Peers, Application ID, ptr MPI comm, additional parameters - // # Peers: Number of connecting clients to the DS server - // Application ID: Unique idenitifier (integer) for application - // Pointer to the MPI Communicator, allows DS Layer to use MPI barrier func - // Addt'l parameters: Placeholder for future arguments, currently NULL. + // # MPI communicator for collective bootstrapping + // # handle to initialize dspaces_init_mpi(gcomm, &client); // Timestep notation left in to demonstrate how this can be adjusted diff --git a/examples/opts.mk.in b/examples/opts.mk.in new file mode 100644 index 00000000..92b6cc51 --- /dev/null +++ b/examples/opts.mk.in @@ -0,0 +1,6 @@ +CC=@CMAKE_C_COMPILER@ +OPTS=-g +DSPACES_INC=$(shell pkg-config --cflags dspaces) +DSPACES_LIBS=$(shell pkg-config --libs dspaces) +CFLAGS=$(OPTS) $(DSPACES_INC) +LDFLAGS=$(DSPACES_LIBS) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a99baad8..c1121156 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -75,7 +75,7 @@ configure_file ("dspaces.pc.in" "dspaces.pc" @ONLY) install (TARGETS dspaces dspaces-server EXPORT dspaces-targets ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) -install (EXPORT dspaces-targets +install (EXPORT dspaces-targets NAMESPACE dspaces:: DESTINATION ${dspaces-pkg} FILE "dspaces-targets.cmake") install (FILES "${CMAKE_CURRENT_BINARY_DIR}/dspaces-config.cmake"