Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
rdfriese committed Nov 15, 2024
2 parents 56ae0a3 + 22f25a4 commit 617d7dd
Show file tree
Hide file tree
Showing 234 changed files with 28,330 additions and 17,347 deletions.
66 changes: 36 additions & 30 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "lamellar"
version = "0.6.1"
version = "0.7.0-rc.1"
authors = ["Ryan D. Friese <[email protected]>", "Roberto Gioiosa <[email protected]>", "Joseph Cottam <[email protected]>","Greg Roek <[email protected]>","Erdal Mutlu <[email protected]>"]
edition = "2021"
description = "Lamellar is an asynchronous tasking runtime for HPC systems developed in RUST."
Expand All @@ -12,48 +12,42 @@ keywords = ["hpc","runtime","pgas","distributed","asynchronous"]
categories = ["asynchronous","concurrency", "network-programming","science"]

[dependencies]
lamellar-impl = { version = "0.6.0", path = "impl" }
lamellar-impl = { version = "0.7.0-rc.1", path = "impl" }
rofisys = { version ="0.3", optional = true }
#rofisys = {git = "https://github.com/pnnl/rofi-sys.git", branch = "master", optional = true}
inventory = "0.3"
inventory = "0.3"
serde = { version = "1.0.147", features = ["derive"] }
serde_bytes = "0.11.7"
serde_with = "3.0.0"
bincode = "1.3.3"
anyhow = "1.0.66"
futures = "0.3.25"
futures-lite= "1.12.0"
futures-util = "0.3.30"
pin-project = "1.1.4"
pin-weak = "1.1.0"
lazy_static = "1.4.0"
crossbeam = "0.8.2"
rand = "0.8.5"
parking_lot = {version = "0.12.1", features = ["arc_lock", "send_guard", "serde"] }
indexmap = "1.9.1" #lamellar_alloc
core_affinity = "0.5.10"
#log = "0.4.19"
#simple_logger = "4.0.0"
async-task = "4.3.0"
async-trait = "0.1.58"
async-std = "1.12.0"
async-recursion = "1.0.0"
libc = { version = "0.2.137", optional = true }
async-lock = "2.8.0"
enum_dispatch = "0.3.8"
memoffset = "0.7.1"
shared_memory = "0.12.4"
#raw_sync = "0.1.5"
paste = "1.0.9"
newtype_derive = "0.1.6"
custom_derive = "0.1.7"
glob = "0.3.0"
thread_local = "1.1.4"
#tracing = "0.1.37"
#tracing-futures = "0.2.5"
#tracing-flame = "0.2.0"
pin-project = "1.0.12"
#enum-as-inner = "0.5.1"
#itertools = "0.10.5"
serde_with = "3.0.0"
pin-weak = "1.1.0"
async-lock = "2.8.0"
itertools = "0.12.1"
tokio = { version = "1.35.1", features = ["full"] , optional = true}
libc = { version = "0.2.137", optional = true }
async-global-executor = "2.4.1"
envy = "0.4.2"



[dev-dependencies]
Expand All @@ -70,20 +64,27 @@ tracing-subscriber = "0.3"
[workspace]
members = ["impl"]


# Set the settings for build scripts and proc-macros.
[profile.dev.build-override]
opt-level = 3

#features are strictly additive.... can't have mutual exclusitivity
[features]
enable-rofi=["rofisys", "libc"]
enable-rofi=["rofi","rofisys", "libc"]
enable-rofi-shared=["rofi","rofisys/shared","libc"]
rofi=[]
tokio-executor=["tokio"]
slurm-test=[]
disable-runtime-warnings=[]
runtime-warnings-panic=[]
default=[]


[profile.release]
opt-level=3
lto=true
lto=false
codegen-units=1
debug = true
debug = true


[lib]
Expand Down Expand Up @@ -169,10 +170,6 @@ path = "tests/array/atomic_ops/swap_test.rs"
name = "compare_exchange_test"
path = "tests/array/atomic_ops/compare_exchange_test.rs"

[[example]]
name = "array_into_test"
path = "tests/array/array_into_test.rs"

##------------ Bandwidth Examples -----------------##

[[example]]
Expand Down Expand Up @@ -367,6 +364,10 @@ path="examples/array_examples/global_lock_array.rs"
name="histo"
path="examples/array_examples/histo.rs"

#[[example]]
#name="single_pe_array"
#path="examples/array_examples/single_pe_array.rs"

##------------ RDMA Examples -----------------##
[[example]]
name="rdma_put"
Expand Down Expand Up @@ -441,6 +442,14 @@ path="examples/misc/simple_ptp.rs"
name="lamellar_env"
path="examples/misc/lamellar_env.rs"

[[example]]
name="ping_pong"
path="examples/misc/ping_pong.rs"

[[example]]
name="dist_hashmap"
path="examples/misc/dist_hashmap.rs"


##------------ Darc examples ------------------##
[[example]]
Expand All @@ -467,6 +476,3 @@ path="examples/hello_world/hello_world_array.rs"
[[example]]
name="hello_world_array_iteration"
path="examples/hello_world/hello_world_array_iteration.rs"



87 changes: 53 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,28 @@ Currently the inverse is true, if it compiles and runs using `rofi` it will comp

Additional information on using each of the lamellae backends can be found below in the `Running Lamellar Applications` section

# Environment Variables

Please see [env_var.rs] for a description of available environment variables.

Commonly used variables include:
- `LAMELLAR_THREADS` - The number of worker threads used within a lamellar PE, defaults to [std::thread::available_parallelism] if available or else 4
- `LAMELLAR_BACKEND` - the backend used during execution. Note that if a backend is explicitly set in the world builder, this variable is ignored.
- possible values
- `local` -- default (if `enable-local` feature is not active)
- `shmem`
- `rofi` -- only available with the `enable-rofi` feature in which case it is the default backend
- `LAMELLAR_EXECUTOR` - the executor used during execution. Note that if a executor is explicitly set in the world builder, this variable is ignored.
- possible values
- `lamellar` -- default, work stealing backend
- `async_std` -- alternative backend from async_std
- `tokio` -- only available with the `tokio-executor` feature in which case it is the default executor


Examples
--------
All of the examples in the [documentation](https://docs.rs/lamellar/latest/lamellar) should also be valid Lamellar programs (please open an issue if you encounter an issue).

Our repository also provides numerous examples highlighting various features of the runtime: <https://github.com/pnnl/lamellar-runtime/tree/master/examples>

Additionally, we are compiling a set of benchmarks (some with multiple implementations) that may be helpful to look at as well: <https://github.com/pnnl/lamellar-benchmarks/>
Expand Down Expand Up @@ -112,12 +132,12 @@ fn main(){
let num_pes = world.num_pes();
let am = HelloWorld { my_pe: my_pe };
for pe in 0..num_pes{
world.exec_am_pe(pe,am.clone()); // explicitly launch on each PE
world.exec_am_pe(pe,am.clone()).spawn(); // explicitly launch on each PE
}
world.wait_all(); // wait for all active messages to finish
world.barrier(); // synchronize with other PEs
let request = world.exec_am_all(am.clone()); //also possible to execute on every PE with a single call
world.block_on(request); //both exec_am_all and exec_am_pe return futures that can be used to wait for completion and access any returned result
request.block(); //both exec_am_all and exec_am_pe return futures that can be used to wait for completion and access any returned result
}
```

Expand All @@ -129,12 +149,11 @@ use lamellar::array::prelude::*;
fn main(){
let world = lamellar::LamellarWorldBuilder::new().build();
let my_pe = world.my_pe();
let block_array = AtomicArray::<usize>::new(&world, 1000, Distribution::Block); //we also support Cyclic distribution.
block_array.dist_iter_mut().enumerate().for_each(move |(i,elem)| elem.store(i) ); //simultaneosuly initialize array accross all pes, each pe only updates its local data
block_array.wait_all();
let block_array = AtomicArray::<usize>::new(&world, 1000, Distribution::Block).block(); //we also support Cyclic distribution.
block_array.dist_iter_mut().enumerate().for_each(move |(i,elem)| elem.store(i) ).block(); //simultaneosuly initialize array accross all pes, each pe only updates its local data
block_array.barrier();
if my_pe == 0{
for (i,elem) in block_array.onesided_iter().into_iter().enumerate(){ //iterate through entire array on pe 0 (automatically transfering remote data)
for (i,elem) in block_onesided_iter!($array,array).into_iter().enumerate(){ //iterate through entire array on pe 0 (automatically transfering remote data)
println!("i: {} = {})",i,elem);
}
}
Expand Down Expand Up @@ -163,11 +182,11 @@ fn main(){
let mut world = lamellar::LamellarWorldBuilder::new().build();
let my_pe = world.my_pe();
let num_pes = world.num_pes();
let cnt = Darc::new(&world, AtomicUsize::new());
let cnt = Darc::new(&world, AtomicUsize::new()).block().expect("calling pe is in the world);
for pe in 0..num_pes{
world.exec_am_pe(pe,DarcAm{cnt: cnt.clone()}); // explicitly launch on each PE
world.exec_am_pe(pe,DarcAm{cnt: cnt.clone()}).spawn(); // explicitly launch on each PE
}
world.exec_am_all(am.clone()); //also possible to execute on every PE with a single call
world.exec_am_all(am.clone()).spawn(); //also possible to execute on every PE with a single call
cnt.fetch_add(1,Ordering::SeqCst); //this is valid as well!
world.wait_all(); // wait for all active messages to finish
world.barrier(); // synchronize with other PEs
Expand All @@ -178,11 +197,11 @@ fn main(){
Lamellar is capable of running on single node workstations as well as distributed HPC systems.
For a workstation, simply copy the following to the dependency section of you Cargo.toml file:

``` lamellar = "0.6.1" ```
``` lamellar = "0.7.0-rc.1" ```

If planning to use within a distributed HPC system copy the following to your Cargo.toml file:

```lamellar = { version = "0.6.1", features = ["enable-rofi"]}```
```lamellar = { version = "0.7.0-rc.1", features = ["enable-rofi"]}```

NOTE: as of Lamellar 0.6.1 It is no longer necessary to manually install Libfabric, the build process will now try to automatically build libfabric for you.
If this process fails, it is still possible to pass in a manual libfabric installation via the OFI_DIR envrionment variable.
Expand All @@ -209,23 +228,17 @@ There are a number of ways to run Lamellar applications, mostly dictated by the
- ```srun -N 2 -mpi=pmi2 ./target/release/<appname>```
- `pmi2` library is required to grab info about the allocated nodes and helps set up initial handshakes

# Environment Variables
Lamellar exposes a number of environment variables that can used to control application execution at runtime
- `LAMELLAR_THREADS` - The number of worker threads used within a lamellar PE
- `export LAMELLAR_THREADS=10`
- `LAMELLAE_BACKEND` - the backend used during execution. Note that if a backend is explicitly set in the world builder, this variable is ignored.
- possible values
- `local`
- `shmem`
- `rofi`
- `LAMELLAR_MEM_SIZE` - Specify the initial size of the Runtime "RDMAable" memory pool. Defaults to 1GB
- `export LAMELLAR_MEM_SIZE=$((20*1024*1024*1024))` 20GB memory pool
- Internally, Lamellar utilizes memory pools of RDMAable memory for Runtime data structures (e.g. [Darcs][crate::Darc], [OneSidedMemoryRegion][crate::memregion::OneSidedMemoryRegion],etc), aggregation buffers, and message queues. Additional memory pools are dynamically allocated across the system as needed. This can be a fairly expensive operation (as the operation is synchronous across all PEs) so the runtime will print a message at the end of execution with how many additional pools were allocated.
- if you find you are dynamically allocating new memory pools, try setting `LAMELLAR_MEM_SIZE` to a larger value
- Note: when running multiple PEs on a single system, the total allocated memory for the pools would be equal to `LAMELLAR_MEM_SIZE * number of processes`

Repository Organization
-----------------------

Generally the 'master' branch corresponds to the latest stable release at [https://crates.io/crates/lamellar] and [https://docs.rs/lamellar/latest/lamellar/].
The 'dev' branch will contain the most recent 'working' features, where working means all the examples compile and execute properly (but the documentation may not yet be up-to-date).
All other branches are active feature branches and may or may not be in a working state.

NEWS
----
* November 2024: Alpha release -- v0.7.1
* February 2023: Alpha release -- v0.6.1
* November 2023: Alpha release -- v0.6
* January 2023: Alpha release -- v0.5
Expand Down Expand Up @@ -286,6 +299,14 @@ Note: we do an explicit build instead of `cargo run --examples` as they are inte

HISTORY
-------
- version 0.7.0
- add support for integration with various async executor backends including tokio and async-std
- 'handle' based api, allowing for 'spawn()'ing, 'block()'ing, and 'await'ing remote operations.
- conversion from `Pin<Box<dyn Future>>` to concrete types for most remote operations.
- improved execution time warning framework for potential deadlock, unexecuted remote operations, blocking calls in async code, etc.
- can be completely disabled
- can panic instead of print warning
- various optimizations and bug fixes
- version 0.6.1
- Clean up apis for lock based data structures
- N-way dissemination barrier
Expand Down Expand Up @@ -378,15 +399,13 @@ CONTACTS

Current Team Members

Ryan Friese - [email protected]
Roberto Gioiosa - [email protected]
Erdal Mutlu - [email protected]
Joseph Cottam - [email protected]
Greg Roek - [email protected]

Past Team Members

Mark Raugas - [email protected]
Ryan Friese - [email protected]
Roberto Gioiosa - [email protected]
Polykarpos Thomadakis - [email protected]
Erdal Mutlu - [email protected]
Joseph Cottam - [email protected]
Greg Roek - [email protected]
Mark Raugas - [email protected]

## License

Expand Down
21 changes: 21 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#[cfg(feature = "enable-rofi-shared")]
use std::env;
#[cfg(feature = "enable-rofi-shared")]
use std::path::PathBuf;

fn main() {
println!("cargo:rerun-if-env-changed=DEP_ROFI_ROOT");
#[cfg(feature = "enable-rofi-shared")]
{
if let Ok(rofi_lib_dir) = env::var("DEP_ROFI_ROOT") {
let lib_path = PathBuf::from(rofi_lib_dir).join("lib");
println!("cargo:rustc-link-search=native={}", lib_path.display());
println!("cargo:rustc-link-arg=-Wl,-rpath,{}", lib_path.display());
} else {
panic!(
"unable to set rofi backend, recompile with 'enable-rofi' feature {:?}",
env::vars()
)
}
}
}
Loading

0 comments on commit 617d7dd

Please sign in to comment.