Skip to content

MPI library

lucamar edited this page Dec 5, 2019 · 1 revision

This page shows the importance of using a mpi library that is ABI compatible, as explained in: https://sarus.readthedocs.io/en/latest/config/mpi-hook.html#native-mpi-hook-mpich-based

osu_bw

Running the container

mpich/314 (ABI)

srun -Cmc -N1 -n2 -t10 sarus run --mpi \
load/ethcscs/mpich:ub1804_cuda101_mpi314_mpip \
/usr/local/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_bw

mpich/33a (not ABI)

srun -Cmc -N1 -n2 -t10 --mpi=pmi2 sarus run \
load/ethcscs/mpich:ub1804_cuda101_mpi33a_mpip \
/usr/local/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_bw

cray-mpich/7.7.10 (native, gcc/8.3.0)

srun -Cmc -N1 -n2 osu_bw
Results

# OSU MPI Bandwidth Test v5.6.2
# Size  Bandwidth (MB/s)
#        mpich/314    mpich/33a     cray
1             2.50         4.30     2.51   plt
2             4.90         8.65     5.17   plt
4            10.26        16.63    10.37   plt
8            20.54        34.36    19.98   plt
16           40.39        69.84    41.59   plt
32           79.38       130.74    83.72   plt
64          157.66       247.30   141.04   plt
128         303.40       451.09   275.36   plt
256         606.15       942.14   528.16   plt
512        1081.86      1700.35  1132.30   plt
1024       1944.66      3143.26  1968.62   plt
2048       2944.91      4886.28  2998.72   plt
4096       4110.78      7025.18  4099.29   plt
8192       9186.71      9676.67  9602.30   plt
16384     10888.95     11200.93 11161.14   plt
32768     15273.65     10738.07 15840.25   plt
65536     19134.42      8254.40 19191.82   plt
131072    16772.03      9516.88 17316.00   plt
262144    11711.83      9386.44 12133.27   plt
524288    11959.39      9359.01 12457.07   plt
1048576   12109.58      9855.95 12612.07   plt
2097152   12181.78     10073.88 12656.90   plt
4194304   12380.03      5064.79 12772.70   plt
                             osu_bw bandwidth (MB/sec)

  20000 +------------------------------------------------------------------+
        |A     +       +      +       +      +       +      +       +      |
  18000 |**                                              mpich/314 ***A***-|
  16000 |-A                                              mpich/33a ###B###-|
        |  *                                                               |
  14000 |-+*                                                             +-|
        |   *                                          ***************A    |
  12000 |-+ A***A*******A**************A***************                  +-|
  10000 |-+                     #######B#########                        +-|
        |#B#B###B#######B#######                 ######                    |
   8000 |B+                                            ######            +-|
        |                                                    ######        |
   6000 |-+                                                        ###   +-|
        |                                                             B    |
   4000 |-+                                                              +-|
   2000 |-+                                                              +-|
        |      +       +      +       +      +       +      +       +      |
      0 +------------------------------------------------------------------+
        0    500000  1e+06 1.5e+06  2e+06 2.5e+06  3e+06 3.5e+06  4e+06 4.5e+06
                                   Message size
Analysis

mpich/314 (ABI)

srun -Cmc -N1 -n2 -t10 sarus run --mpi \
--mount=type=bind,source=$SCRATCH/NEMO,destination=$SCRATCH/NEMO \
load/ethcscs/mpich:ub1804_cuda101_mpi314_mpip bash -c \
'cd $SCRATCH/NEMO ;/tmp/OSU/osu-micro-benchmarks-5.6.2/mpi/pt2pt/osu_bw'

mpich/33a (not ABI)

srun -Cmc -N1 -n2 -t10 --mpi=pmi2 sarus run \
--mount=type=bind,source=$SCRATCH/NEMO,destination=$SCRATCH/NEMO \
load/ethcscs/mpich:ub1804_cuda101_mpi33a_mpip bash -c \
'cd $SCRATCH/NEMO ;/tmp/OSU/osu-micro-benchmarks-5.6.2/mpi/pt2pt/osu_bw'

cray-mpich/7.7.10 (native, gcc/8.3.0)

make osu_bw LDFLAGS="$MPIPLD"
srun -Cmc -N1 -n2 osu_bw

performance report

----------------------------------- ------------------------------------
@--- MPI Time (seconds) ----------- @--- MPI Time (seconds) ------------
----------------------------------- ------------------------------------
Task    AppTime    MPITime     MPI% Task    AppTime    MPITime     MPI%
   0       1.48       1.47    99.04   0       1.26       1.24    98.81
   1       1.48       1.47    99.07   1       1.26       1.24    98.93
   *       2.96       2.93    99.06   *       2.51       2.48    98.87
osu_bw.2.528.1.mpiP.mpich33a        osu_bw.2.548.1.mpiP.mpich314

---------------------------------------------------------------------------
@--- MPI Time (seconds) ---------------------------------------------------
---------------------------------------------------------------------------
Task    AppTime    MPITime     MPI%
   0       1.36       1.35    99.08
   1       1.36       1.35    99.18
   *       2.72        2.7    99.13
osu_bw.2.1520.1.mpiP.cray
osu_latency

performance report

------------------------------------ ------------------------------------
@--- MPI Time (seconds) ------------ -@--- MPI Time (seconds) -----------
------------------------------------ ------------------------------------
Task    AppTime    MPITime     MPI%  Task    AppTime    MPITime     MPI%
   0       4.76       4.71    98.94     0       2.33       2.27    97.37
   1       4.76        4.7    98.56     1       2.33       2.27    97.13
   *       9.53       9.41    98.75     *       4.67       4.54    97.25
osu_latency.2.528.1.mpiP.mpich33a    osu_latency.2.548.1.mpiP.mpich314