diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 914a0b990..8104efa42 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -38,7 +38,7 @@ jobs:
 
       - name: Install VSL and dependencies
         run: |
-          sudo apt-get install --quiet -y --no-install-recommends gfortran liblapacke-dev libopenblas-dev libgc-dev
+          sudo apt-get install --quiet -y --no-install-recommends gfortran liblapacke-dev libopenblas-dev libgc-dev libopenmpi-dev
 
       - name: Copy VSL source code to V Modules
         run: cp -rf ./vsl ~/.vmodules
@@ -71,7 +71,7 @@ jobs:
 
       - name: Install VSL and dependencies
         run: |
-          sudo apt-get install --quiet -y --no-install-recommends gfortran liblapacke-dev libopenblas-dev libgc-dev
+          sudo apt-get install --quiet -y --no-install-recommends gfortran liblapacke-dev libopenblas-dev libgc-dev libopenmpi-dev
 
       - name: Move VSL source code to V Modules
         run: mv ./vsl ~/.vmodules
diff --git a/float/float32/f32_test_util.v b/float/float32/f32_test_util.v
index 3dde466a9..9b2aa6c1c 100644
--- a/float/float32/f32_test_util.v
+++ b/float/float32/f32_test_util.v
@@ -47,7 +47,7 @@ fn new_inc_to_set(inc ...int) []IncToSet {
 	return inc_to_set
 }
 
-// s returns true when the inputs have the same value, allowing NaN equality.
+// same returns true when the inputs have the same value, allowing NaN equality.
 fn same(a f32, b f32) bool {
 	return a == b || (math.is_nan(f64(a)) && math.is_nan(f64(b)))
 }
@@ -75,6 +75,18 @@ fn tolerance(a f32, b f32, tol f32) bool {
 	return d < e_
 }
 
+pub fn arrays_tolerance(data1 []f32, data2 []f32, tol f32) bool {
+	if data1.len != data2.len {
+		return false
+	}
+	for i := 0; i < data1.len; i++ {
+		if !tolerance(data1[i], data2[i], tol) {
+			return false
+		}
+	}
+	return true
+}
+
 // new_guarded_vector allocates a new slice and returns it as three subslices.
 // v is a strided vector that contains elements of data at indices i*inc and
 // nan elsewhere. frontGuard and backGuard are filled with nan values, and
diff --git a/float/float64/f64_test_util.v b/float/float64/f64_test_util.v
index 7526a4970..3192b3112 100644
--- a/float/float64/f64_test_util.v
+++ b/float/float64/f64_test_util.v
@@ -47,7 +47,7 @@ pub fn new_inc_to_set(inc ...int) []IncToSet {
 	return inc_to_set
 }
 
-// s returns true when the inputs have the same value, allowing NaN equality.
+// same returns true when the inputs have the same value, allowing NaN equality.
 pub fn same(a f64, b f64) bool {
 	return a == b || (math.is_nan(a) && math.is_nan(b))
 }
@@ -75,6 +75,18 @@ pub fn tolerance(a f64, b f64, tol f64) bool {
 	return d < e_
 }
 
+pub fn arrays_tolerance(data1 []f64, data2 []f64, tol f64) bool {
+	if data1.len != data2.len {
+		return false
+	}
+	for i := 0; i < data1.len; i++ {
+		if !tolerance(data1[i], data2[i], tol) {
+			return false
+		}
+	}
+	return true
+}
+
 pub fn close(a f64, b f64) bool {
 	return tolerance(a, b, 1e-14)
 }
diff --git a/la/densesol_test.v b/la/densesol_test.v
index a2504a8c2..673d0c0ab 100644
--- a/la/densesol_test.v
+++ b/la/densesol_test.v
@@ -6,18 +6,6 @@ const (
 	tol = 1e-12
 )
 
-fn tolerance_equal(data1 []f64, data2 []f64) bool {
-	if data1.len != data2.len {
-		return false
-	}
-	for i := 0; i < data1.len; i++ {
-		if !float64.tolerance(data1[i], data2[i], la.tol) {
-			return false
-		}
-	}
-	return true
-}
-
 fn test_den_solve() {
 	// case 1
 	mat1 := matrix_deep2([
@@ -27,7 +15,7 @@ fn test_den_solve() {
 	b1 := [1.0, 1]
 	mut x1 := []f64{len: mat1.m}
 	den_solve(mut x1, mat1, b1, false)
-	assert tolerance_equal(x1, [1.0, 0.5])
+	assert float64.arrays_tolerance(x1, [1.0, 0.5], la.tol)
 	// case 2
 	mat2 := matrix_deep2([
 		[2.0, 0, 0, -5.6],
@@ -38,10 +26,10 @@ fn test_den_solve() {
 	b2 := [1.0, 2.0, 3.0, 4.0]
 	mut x2 := []f64{len: mat2.m}
 	den_solve(mut x2, mat2, b2, false)
-	assert tolerance_equal(x2, [
+	assert float64.arrays_tolerance(x2, [
 		2.867389875082183,
 		0.32846811308349777,
 		-0.20118343195266275,
 		0.8454963839579225,
-	])
+	], la.tol)
 }
diff --git a/la/sparse_config.v b/la/sparse_config.v
index d25061982..a85a9baaf 100644
--- a/la/sparse_config.v
+++ b/la/sparse_config.v
@@ -1,12 +1,14 @@
 module la
 
 import vsl.errors
+// import vsl.mpi
 
 // The SparseConfig structure holds configuration arguments for sparse solvers
 pub struct SparseConfig {
 mut:
 	mumps_ordering int // ICNTL(7) default = "" == "auto"
 	mumps_scaling  int // Scaling type (check MUMPS solver) [may be empty]
+        // communicator &mpi.Communicator = 0 // MPI communicator for parallel solvers [may be nil]
 	// internal
 	symmetric   bool // indicates symmetric system. NOTE: when using MUMPS, only the upper or lower part of the matrix must be provided
 	sym_pos_def bool // indicates symmetric-positive-defined system. NOTE: when using MUMPS, only the upper or lower part of the matrix must be provided
@@ -31,6 +33,20 @@ pub fn new_sparse_config() SparseConfig {
 	return o
 }
 
+// new_sparse_config_with_comm returns a new SparseConfig
+// Input:
+//  comm -- may be nil
+// pub fn new_sparse_config_with_comm(comm &mpi.Communicator) SparseConfig {
+// 	mut o := SparseConfig{
+// 		mumps_increase_of_working_space_pct: 100
+// 		mumps_max_memory_per_processor: 2000
+//                 communicator: unsafe{ comm }
+// 	}
+// 	o.set_mumps_ordering('')
+// 	o.set_mumps_scaling('')
+// 	return o
+// }
+
 // set_mumps_symmetry sets symmetry options for MUMPS solver
 pub fn (mut o SparseConfig) set_mumps_symmetry(only_upper_or_lower_given bool, positive_defined bool) {
 	if !only_upper_or_lower_given {
diff --git a/mpi/README.md b/mpi/README.md
new file mode 100644
index 000000000..b5c0ffca8
--- /dev/null
+++ b/mpi/README.md
@@ -0,0 +1,6 @@
+# Message Passing Interface for parallel computing
+
+The `mpi` package is a light wrapper to the [OpenMPI](https://www.open-mpi.org) C++ library designed
+to develop algorithms for parallel computing.
+
+This package allows parallel computations over the network.
diff --git a/mpi/_cflags.c.v b/mpi/_cflags.c.v
new file mode 100644
index 000000000..e08591da6
--- /dev/null
+++ b/mpi/_cflags.c.v
@@ -0,0 +1,10 @@
+module mpi
+
+#flag linux -I/usr/lib/x86_64-linux-gnu/openmpi/include/openmpi -I/usr/lib/x86_64-linux-gnu/openmpi/include -pthread -I@VMODROOT
+#flag linux -pthread -L/usr/lib/x86_64-linux-gnu/openmpi/lib -lmpi
+#flag darwin -I/usr/local/Cellar/open-mpi/4.0.1_2/include -I@VMODROOT
+#flag darwin -L/usr/local/opt/libevent/lib -L/usr/local/Cellar/open-mpi/4.0.1_2/lib -lmpi
+#flag freebsd -I/usr/local/include -I@VMODROOT
+#flag freebsd -L/usr/local/lib -lmpi
+
+#include <cmpi.h>
diff --git a/mpi/cmpi.h b/mpi/cmpi.h
new file mode 100644
index 000000000..abdf5759a
--- /dev/null
+++ b/mpi/cmpi.h
@@ -0,0 +1,6 @@
+#ifndef V_MPI_H
+#define V_MPI_H
+
+#include "mpi.h"
+
+#endif
diff --git a/mpi/examples/example1_not_ci.v b/mpi/examples/example1_not_ci.v
new file mode 100644
index 000000000..a020c19d9
--- /dev/null
+++ b/mpi/examples/example1_not_ci.v
@@ -0,0 +1,53 @@
+module main
+
+import vsl.float.float64
+import vsl.mpi
+
+fn example() ? {
+	mpi.start()?
+
+	defer {
+		mpi.stop()
+	}
+
+	if mpi.world_rank() == 0 {
+		println('Test MPI 01')
+	}
+
+	println('Hello from rank $mpi.world_rank()')
+	println('The world has $mpi.world_size() processes')
+
+	n := 11
+	mut x := []f64{len: n}
+	id, sz := mpi.world_rank(), mpi.world_size()
+	start, endp1 := (id * n) / sz, ((id + 1) * n) / sz
+	for i := start; i < endp1; i++ {
+		x[i] = f64(i)
+	}
+
+	// Communicator
+	comm := mpi.new_communicator([])?
+
+	// Barrier
+	comm.barrier()
+
+	// sum to root
+	mut r := []f64{len: n}
+	comm.reduce_sum(mut r, x)
+	if id == 0 {
+		assertion := float64.arrays_tolerance(r, []f64{len: n, init: it}, 1e-17)
+		println('ID: $id - Assertion: $assertion')
+	} else {
+		assertion := float64.arrays_tolerance(r, []f64{len: n}, 1e-17)
+		println('ID: $id - Assertion: $assertion')
+	}
+
+	r[0] = 123.0
+	comm.bcast_from_root(r)
+	assertion := float64.arrays_tolerance(r, [123.0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 1e-17)
+	println('ID: $id - Assertion: $assertion')
+}
+
+fn main() {
+	example() or { panic(err) }
+}
diff --git a/mpi/mpi_default.c.v b/mpi/mpi_default.c.v
new file mode 100644
index 000000000..a247ad99d
--- /dev/null
+++ b/mpi/mpi_default.c.v
@@ -0,0 +1,147 @@
+module mpi
+
+import vsl.errors
+import math.complex
+
+// is_on tells whether MPI is on or not
+//  note: this returns true even after stop
+pub fn is_on() bool {
+	return false
+}
+
+// start initialises MPI
+pub fn start() ? {
+	return errors.error('MPI is not supported on this platform', .efailed)
+}
+
+// stop finalises MPI
+pub fn stop() {
+}
+
+// world_rank returns the processor rank/ID within the World Communicator
+pub fn world_rank() int {
+	return 0
+}
+
+// world_size returns the number of processors in the World Communicator
+pub fn world_size() int {
+	return 0
+}
+
+// Communicator holds the World Communicator or a subset Communicator
+pub struct Communicator {}
+
+// new_communicator creates a new communicator or returns the World Communicator
+//   ranks -- World indices of processors in this Communicator.
+//            use nil or empty to get the World Communicator
+pub fn new_communicator(ranks []int) ?&Communicator {
+	return errors.error('MPI is not supported on this platform', .efailed)
+}
+
+// rank returns the processor rank/ID
+pub fn (o &Communicator) rank() int {
+	return 0
+}
+
+// size returns the number of processors
+pub fn (o &Communicator) size() int {
+	return 0
+}
+
+// abort aborts MPI
+pub fn (o &Communicator) abort() {
+}
+
+// barrier forces synchronisation
+pub fn (o &Communicator) barrier() {
+}
+
+// bcast_from_root broadcasts slice from root (Rank == 0) to all other processors
+pub fn (o &Communicator) bcast_from_root(x []f64) {
+}
+
+// bcast_from_root_c broadcasts slice from root (Rank == 0) to all other processors (complex version)
+pub fn (o &Communicator) bcast_from_root_c(x []complex.Complex) {
+}
+
+// reduce_sum sums all values in 'orig' to 'dest' in root (Rank == 0) processor
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) reduce_sum(mut dest []f64, orig []f64) {
+}
+
+// reduce_sum_c sums all values in 'orig' to 'dest' in root (Rank == 0) processor (complex version)
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) reduce_sum_c(mut dest []complex.Complex, orig []complex.Complex) {
+}
+
+// all_reduce_sum combines all values from orig into dest summing values
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_sum(mut dest []f64, orig []f64) {
+}
+
+// all_reduce_sum_c combines all values from orig into dest summing values (complex version)
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_sum_c(mut dest []complex.Complex, orig []complex.Complex) {
+}
+
+// all_reduce_min combines all values from orig into dest picking minimum values
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_min(mut dest []f64, orig []f64) {
+}
+
+// all_reduce_max combines all values from orig into dest picking minimum values
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_max(mut dest []f64, orig []f64) {
+}
+
+// all_reduce_min_i combines all values from orig into dest picking minimum values (integer version)
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_min_i(mut dest []int, orig []int) {
+}
+
+// all_reduce_max_i combines all values from orig into dest picking minimum values (integer version)
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_max_i(mut dest []int, orig []int) {
+}
+
+// send sends values to processor toID
+pub fn (o &Communicator) send(vals []f64, to_id int) {
+}
+
+// recv receives values from processor fromId
+pub fn (o &Communicator) recv(vals []f64, from_id int) {
+}
+
+// send_c sends values to processor toID (complex version)
+pub fn (o &Communicator) send_c(vals []complex.Complex, to_id int) {
+}
+
+// recv_c receives values from processor fromId (complex version)
+pub fn (o &Communicator) recv_c(vals []complex.Complex, from_id int) {
+}
+
+// send_i sends values to processor toID (integer version)
+pub fn (o &Communicator) send_i(vals []int, to_id int) {
+}
+
+// recv_i receives values from processor fromId (integer version)
+pub fn (o &Communicator) recv_i(vals []int, from_id int) {
+}
+
+// send_one sends one value to processor toID
+pub fn (o &Communicator) send_one(val f64, to_id int) {
+}
+
+// recv_one receives one value from processor fromId
+pub fn (o &Communicator) recv_one(from_id int) f64 {
+	return 0
+}
+
+// send_one_i sends one value to processor toID (integer version)
+pub fn (o &Communicator) send_one_i(val int, to_id int) {
+}
+
+// recv_one_i receives one value from processor fromId (integer version)
+pub fn (o &Communicator) recv_one_i(from_id int) int {
+	return 0
+}
diff --git a/mpi/mpi_linux.c.v b/mpi/mpi_linux.c.v
new file mode 100644
index 000000000..3d6600c68
--- /dev/null
+++ b/mpi/mpi_linux.c.v
@@ -0,0 +1,246 @@
+module mpi
+
+import vsl.errors
+import math.complex
+
+fn C.MPI_Initialized(flag &int) int
+fn C.MPI_Init(argc int, argv &charptr) int
+fn C.MPI_Init_thread(argc int, argv &charptr, required int, provided &int) int
+
+type MPI_Comm = int
+type MPI_Datatype = voidptr
+type MPI_Group = voidptr
+type MPI_Status = voidptr
+type MPI_Op = voidptr
+
+fn C.MPI_Comm_rank(comm MPI_Comm, rank &int) int
+fn C.MPI_Comm_size(comm MPI_Comm, size &int) int
+fn C.MPI_Comm_group(comm MPI_Comm, group &MPI_Group) int
+fn C.MPI_Group_incl(group MPI_Group, n int, ranks &int, newgroup &MPI_Group) int
+fn C.MPI_Comm_create(comm MPI_Comm, group MPI_Group, newcomm &MPI_Comm) int
+
+fn C.MPI_Abort(comm MPI_Comm, errorcode int) int
+fn C.MPI_Finalize() int
+fn C.MPI_Finalized(flag &int) int
+fn C.MPI_Barrier(comm MPI_Comm) int
+fn C.MPI_Bcast(buffer &voidptr, count int, datatype MPI_Datatype, root int, comm MPI_Comm) int
+fn C.MPI_Reduce(sendbuf &voidptr, recvbuf &voidptr, count int, datatype MPI_Datatype, op MPI_Op, root int, comm MPI_Comm) int
+fn C.MPI_Allreduce(sendbuf &voidptr, recvbuf &voidptr, count int, datatype MPI_Datatype, op MPI_Op, comm MPI_Comm) int
+fn C.MPI_Send(buf &voidptr, count int, datatype MPI_Datatype, dest int, tag int, comm MPI_Comm) int
+fn C.MPI_Recv(buf &voidptr, count int, datatype MPI_Datatype, source int, tag int, comm MPI_Comm, status &MPI_Status) int
+
+// is_on tells whether MPI is on or not
+//  note: this returns true even after stop
+pub fn is_on() bool {
+	flag := 0
+	C.MPI_Initialized(&flag)
+	return flag != 0
+}
+
+// start initialises MPI
+pub fn start() ? {
+	C.MPI_Init(0, voidptr(0))
+}
+
+// start_thread_safe initialises MPI in a thread safe way
+pub fn start_thread_safe() ? {
+	r := 0
+	C.MPI_Init_thread(0, voidptr(0), C.MPI_THREAD_MULTIPLE, &r)
+	if r != C.MPI_THREAD_MULTIPLE {
+		return errors.error("MPI_THREAD_MULTIPLE can't be set: got $r", .efailed)
+	}
+}
+
+// stop finalises MPI
+pub fn stop() {
+	C.MPI_Finalize()
+}
+
+// world_rank returns the processor rank/ID within the World Communicator
+pub fn world_rank() int {
+	r := 0
+	C.MPI_Comm_rank(C.MPI_COMM_WORLD, &r)
+	return r
+}
+
+// world_size returns the number of processors in the World Communicator
+pub fn world_size() int {
+	r := 0
+	C.MPI_Comm_size(C.MPI_COMM_WORLD, &r)
+	return r
+}
+
+// Communicator holds the World Communicator or a subset Communicator
+pub struct Communicator {
+mut:
+	comm  MPI_Comm
+	group MPI_Group
+}
+
+// new_communicator creates a new communicator or returns the World Communicator
+//   ranks -- World indices of processors in this Communicator.
+//            use nil or empty to get the World Communicator
+pub fn new_communicator(ranks []int) ?&Communicator {
+	mut o := &Communicator{
+		comm: MPI_Comm(C.MPI_COMM_WORLD)
+		group: voidptr(0)
+	}
+	if ranks.len == 0 {
+		C.MPI_Comm_group(C.MPI_COMM_WORLD, &o.group)
+		return o
+	}
+
+	rs := ranks.clone()
+	r := unsafe { &rs[0] }
+	wgroup := MPI_Group(0)
+	C.MPI_Comm_group(C.MPI_COMM_WORLD, &wgroup)
+	C.MPI_Group_incl(wgroup, ranks.len, r, &o.group)
+	C.MPI_Comm_create(C.MPI_COMM_WORLD, o.group, &o.comm)
+	return o
+}
+
+// rank returns the processor rank/ID
+pub fn (o &Communicator) rank() int {
+	r := 0
+	C.MPI_Comm_rank(o.comm, &r)
+	return r
+}
+
+// size returns the number of processors
+pub fn (o &Communicator) size() int {
+	r := 0
+	C.MPI_Comm_size(o.comm, &r)
+	return r
+}
+
+// abort aborts MPI
+pub fn (o &Communicator) abort() {
+	C.MPI_Abort(o.comm, 0)
+}
+
+// barrier forces synchronisation
+pub fn (o &Communicator) barrier() {
+	C.MPI_Barrier(o.comm)
+}
+
+// bcast_from_root broadcasts slice from root (Rank == 0) to all other processors
+pub fn (o &Communicator) bcast_from_root(x []f64) {
+	C.MPI_Bcast(unsafe { &x[0] }, x.len, C.MPI_DOUBLE, 0, o.comm)
+}
+
+// bcast_from_root_c broadcasts slice from root (Rank == 0) to all other processors (complex version)
+pub fn (o &Communicator) bcast_from_root_c(x []complex.Complex) {
+	C.MPI_Bcast(unsafe { &x[0] }, x.len, C.MPI_DOUBLE, 0, o.comm)
+}
+
+// reduce_sum sums all values in 'orig' to 'dest' in root (Rank == 0) processor
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) reduce_sum(mut dest []f64, orig []f64) {
+	C.MPI_Reduce(unsafe { &orig[0] }, unsafe { &dest[0] }, orig.len, C.MPI_DOUBLE, C.MPI_SUM,
+		0, o.comm)
+}
+
+// reduce_sum_c sums all values in 'orig' to 'dest' in root (Rank == 0) processor (complex version)
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) reduce_sum_c(mut dest []complex.Complex, orig []complex.Complex) {
+	C.MPI_Reduce(unsafe { &orig[0] }, unsafe { &dest[0] }, orig.len, C.MPI_DOUBLE, C.MPI_SUM,
+		0, o.comm)
+}
+
+// all_reduce_sum combines all values from orig into dest summing values
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_sum(mut dest []f64, orig []f64) {
+	C.MPI_Allreduce(unsafe { &orig[0] }, unsafe { &dest[0] }, orig.len, C.MPI_DOUBLE,
+		C.MPI_SUM, o.comm)
+}
+
+// all_reduce_sum_c combines all values from orig into dest summing values (complex version)
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_sum_c(mut dest []complex.Complex, orig []complex.Complex) {
+	C.MPI_Allreduce(unsafe { &orig[0] }, unsafe { &dest[0] }, orig.len, C.MPI_DOUBLE,
+		C.MPI_SUM, o.comm)
+}
+
+// all_reduce_min combines all values from orig into dest picking minimum values
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_min(mut dest []f64, orig []f64) {
+	C.MPI_Allreduce(unsafe { &orig[0] }, unsafe { &dest[0] }, orig.len, C.MPI_DOUBLE,
+		C.MPI_MIN, o.comm)
+}
+
+// all_reduce_max combines all values from orig into dest picking minimum values
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_max(mut dest []f64, orig []f64) {
+	C.MPI_Allreduce(unsafe { &orig[0] }, unsafe { &dest[0] }, orig.len, C.MPI_DOUBLE,
+		C.MPI_MAX, o.comm)
+}
+
+// all_reduce_min_i combines all values from orig into dest picking minimum values (integer version)
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_min_i(mut dest []int, orig []int) {
+	C.MPI_Allreduce(unsafe { &orig[0] }, unsafe { &dest[0] }, orig.len, C.MPI_INT, C.MPI_MIN,
+		o.comm)
+}
+
+// all_reduce_max_i combines all values from orig into dest picking minimum values (integer version)
+//   note (important): orig and dest must be different slices
+pub fn (o &Communicator) all_reduce_max_i(mut dest []int, orig []int) {
+	C.MPI_Allreduce(unsafe { &orig[0] }, unsafe { &dest[0] }, orig.len, C.MPI_INT, C.MPI_MAX,
+		o.comm)
+}
+
+// send sends values to processor toID
+pub fn (o &Communicator) send(vals []f64, to_id int) {
+	C.MPI_Send(unsafe { &vals[0] }, vals.len, C.MPI_DOUBLE, to_id, 0, o.comm)
+}
+
+// recv receives values from processor fromId
+pub fn (o &Communicator) recv(vals []f64, from_id int) {
+	C.MPI_Recv(unsafe { &vals[0] }, vals.len, C.MPI_DOUBLE, from_id, 0, o.comm, voidptr(0))
+}
+
+// send_c sends values to processor toID (complex version)
+pub fn (o &Communicator) send_c(vals []complex.Complex, to_id int) {
+	C.MPI_Send(unsafe { &vals[0] }, vals.len, C.MPI_DOUBLE, to_id, 0, o.comm)
+}
+
+// recv_c receives values from processor fromId (complex version)
+pub fn (o &Communicator) recv_c(vals []complex.Complex, from_id int) {
+	C.MPI_Recv(unsafe { &vals[0] }, vals.len, C.MPI_DOUBLE, from_id, 0, o.comm, voidptr(0))
+}
+
+// send_i sends values to processor toID (integer version)
+pub fn (o &Communicator) send_i(vals []int, to_id int) {
+	C.MPI_Send(unsafe { &vals[0] }, vals.len, C.MPI_INT, to_id, 0, o.comm)
+}
+
+// recv_i receives values from processor fromId (integer version)
+pub fn (o &Communicator) recv_i(vals []int, from_id int) {
+	C.MPI_Recv(unsafe { &vals[0] }, vals.len, C.MPI_INT, from_id, 0, o.comm, voidptr(0))
+}
+
+// send_one sends one value to processor toID
+pub fn (o &Communicator) send_one(val f64, to_id int) {
+	vals := [val]
+	C.MPI_Send(unsafe { &vals[0] }, 1, C.MPI_DOUBLE, to_id, 0, o.comm)
+}
+
+// recv_one receives one value from processor fromId
+pub fn (o &Communicator) recv_one(from_id int) f64 {
+	vals := [0.0]
+	C.MPI_Recv(unsafe { &vals[0] }, 1, C.MPI_DOUBLE, from_id, 0, o.comm, voidptr(0))
+	return vals[0]
+}
+
+// send_one_i sends one value to processor toID (integer version)
+pub fn (o &Communicator) send_one_i(val int, to_id int) {
+	vals := [val]
+	C.MPI_Send(unsafe { &vals[0] }, 1, C.MPI_INT, to_id, 0, o.comm)
+}
+
+// recv_one_i receives one value from processor fromId (integer version)
+pub fn (o &Communicator) recv_one_i(from_id int) int {
+	vals := [0]
+	C.MPI_Recv(unsafe { &vals[0] }, 1, C.MPI_INT, from_id, 0, o.comm, voidptr(0))
+	return vals[0]
+}
diff --git a/mpi/v.mod b/mpi/v.mod
new file mode 100644
index 000000000..104fac39c
--- /dev/null
+++ b/mpi/v.mod
@@ -0,0 +1,8 @@
+Module {
+        name: 'mpi'
+        description: 'Message Passing Interface for parallel computing'
+        version: '0.1.0'
+        license: 'MIT'
+	    repo_url: 'https://github.com/vlang/vsl'
+        dependencies: []
+}
diff --git a/vcl/_ctypes.c.v b/vcl/_ctypes.c.v
index 1ed84ba76..127aae498 100644
--- a/vcl/_ctypes.c.v
+++ b/vcl/_ctypes.c.v
@@ -44,21 +44,21 @@ fn C.clCreateContext(properties &ClContextProperties, num_devices u32, devices &
 
 // ImageChannelOrder represents available image types
 pub enum ImageChannelOrder {
-        intensity = C.CL_INTENSITY
-        rgba = C.CL_RGBA
+	intensity = C.CL_INTENSITY
+	rgba = C.CL_RGBA
 }
 
 // ImageChannelDataType describes the size of the channel data type
 pub enum ImageChannelDataType {
-        unorm_int8 = C.CL_UNORM_INT8
+	unorm_int8 = C.CL_UNORM_INT8
 }
 
 type ClMemObjectType = int
 type ClImageDesc = voidptr
 
 pub struct ClImageFormat {
-        image_channel_order ImageChannelOrder
-        image_channel_data_type ImageChannelDataType
+	image_channel_order     ImageChannelOrder
+	image_channel_data_type ImageChannelDataType
 }
 
 fn C.clCreateImage(context ClContext, flags ClMemFlags, format &ClImageFormat, desc ClImageDesc, data voidptr, errcode_ret &int) ClMem
diff --git a/vcl/kernel.c.v b/vcl/kernel.c.v
index 326664bb9..f1cb730c4 100644
--- a/vcl/kernel.c.v
+++ b/vcl/kernel.c.v
@@ -229,7 +229,7 @@ fn (k &Kernel) call(work_sizes []int, lokal_sizes []int) chan IError {
 			C.clReleaseEvent(event)
 		}
 		res := C.clWaitForEvents(1, unsafe { &event })
-                ch <- vcl_error(res)
+		ch <- vcl_error(res)
 	}(ch, event)
 	return ch
 }
diff --git a/vcl/vcl.c.v b/vcl/vcl.c.v
index b00582c9b..69a823a09 100644
--- a/vcl/vcl.c.v
+++ b/vcl/vcl.c.v
@@ -29,8 +29,9 @@ pub fn get_devices(device_type DeviceType) ?[]&Device {
 // get_default_device ...
 pub fn get_default_device() ?&Device {
 	mut id := ClDeviceId(0)
-        platform_ids := get_platforms()?
-	ret := C.clGetDeviceIDs(unsafe { &platform_ids[0] }, ClDeviceType(device_default_device), 1, &id, voidptr(0))
+	platform_ids := get_platforms()?
+	ret := C.clGetDeviceIDs(unsafe { &platform_ids[0] }, ClDeviceType(device_default_device),
+		1, &id, voidptr(0))
 	if ret != success {
 		return vcl_error(ret)
 	}