Skip to content

Commit

Permalink
Lassen regression test update (#1094)
Browse files Browse the repository at this point in the history
This PR updates the regression test on lassen because the default CUDA version has been upgraded.
  • Loading branch information
liruipeng authored May 3, 2024
1 parent d475cdf commit 94a9b16
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 141 deletions.
32 changes: 17 additions & 15 deletions AUTOTEST/machine-lassen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,57 +47,57 @@ save="lassen"

######################
## DEFAULT CUDA ##
## (cuda/10.1.243) ##
## (cuda/11.2.0) ##
######################

module -q load cuda
module -q load xl

# CUDA with UM in debug mode [ij, ams, struct, sstruct]
co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
ro="-ij-gpu -ams -struct -sstruct -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
eo="-gpu -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -eo: $eo
./renametest.sh basic $output_dir/basic-cuda-um

# CUDA with UM in debug mode [ij, ams, struct, sstruct]
co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-print-errors --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
co="--with-cuda --enable-unified-memory --enable-persistent --enable-debug --with-print-errors --with-gpu-arch=70 --with-memory-tracker --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
ro="-error -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-um-with-errors

# CUDA with UM and mixed-int
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
co="--with-cuda --enable-unified-memory --enable-mixedint --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
ro="-ij-mixed -ams -struct -sstruct-mixed -rt -mpibind -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-um-mixedint

# CUDA with UM with shared library
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
co="--with-cuda --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
ro="-gpumemcheck -rt -mpibind -cudamemcheck -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-um-shared

# CUDA with UM and single precision
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
co="--with-cuda --enable-unified-memory --enable-single --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
ro="-single -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: ${ro}
./renametest.sh basic $output_dir/basic-cuda-um-single

# CUDA with UM without MPI [no run]
#co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=70 --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
#co="--with-cuda --enable-unified-memory --without-MPI --with-gpu-arch=70 --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
#./renametest.sh basic $output_dir/basic-cuda-um-without-MPI

# CUDA without UM with device memory pool [struct]
co="--with-cuda --enable-device-memory-pool --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
co="--with-cuda --enable-device-memory-pool --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
ro="-struct -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-nonum

# CUDA without UM with umpire [benchmark]
UMPIRE_DIR=/usr/workspace/hypre/ext-libs/Umpire/2022.03.1-nvcc10.1.243-sm_70-xl2021.09.22
co="--with-cuda --with-gpu-arch=70 --with-umpire --with-umpire-include=${UMPIRE_DIR}/include --with-umpire-lib-dirs=${UMPIRE_DIR}/lib --with-umpire-libs=umpire --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
UMPIRE_DIR=/usr/workspace/hypre/ext-libs/Umpire/2022.03.1-nvcc11.2-sm_70-xl2023.06.28-cuda-11.2.0-gcc-8.3.1
co="--with-cuda --with-gpu-arch=70 --with-umpire --with-umpire-include=${UMPIRE_DIR}/include --with-umpire-lib-dirs=${UMPIRE_DIR}/lib --with-umpire-libs=umpire --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
ro="-bench -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-cuda-bench
Expand All @@ -113,34 +113,36 @@ ro="-ij-noilu -ams -struct -sstruct -rt -mpibind -save lassen_cpu"
############

# OMP 4.5 with UM with shared library [no run]
#co="--with-device-openmp --enable-unified-memory --enable-shared --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029:1500-030:1501-308\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029:1500-030:1501-308\\'"
#co="--with-device-openmp --enable-unified-memory --enable-shared --with-extra-CFLAGS=\\'-qsuppress=1500-029:1500-030:1501-308\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029:1500-030:1501-308\\'"
#./test.sh basic.sh $src_dir -co: $co -mo: $mo
#./renametest.sh basic $output_dir/basic-deviceomp-um-shared

# OMP 4.5 without UM in debug mode [struct]
co="--with-device-openmp --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qmaxmem=-1 -qsuppress=1500-029\\'"
co="--with-device-openmp --enable-debug --with-gpu-arch=70 --with-extra-CFLAGS=\\'-qsuppress=1500-029\\' --with-extra-CXXFLAGS=\\'-qsuppress=1500-029\\'"
ro="-struct -rt -mpibind -save ${save}"
./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro
./renametest.sh basic $output_dir/basic-deviceomp-nonum-debug-struct

#####################################
## CUDA + CMake build (only) tests ##
#####################################
module -q load cmake/3.16.8
module list cmake/3.16.8 |& grep "None found"

mo="-j"

# CUDA with UM + CMake
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_DEVICE_POOL=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
./renametest.sh cmake $output_dir/cmake-cuda-um-ij

# CUDA with UM [shared library] + CMake
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_WITH_OPENMP=ON -DHYPRE_ENABLE_HOPSCOTCH=ON -DHYPRE_ENABLE_SHARED=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029 "\'" -DHYPRE_CUDA_SM=70"
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_WITH_OPENMP=ON -DHYPRE_ENABLE_HOPSCOTCH=ON -DHYPRE_ENABLE_SHARED=ON -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qsuppress=1500-029 "\'" -DHYPRE_CUDA_SM=70"
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
./renametest.sh cmake $output_dir/cmake-cuda-um-shared

# CUDA w.o UM + CMake
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qmaxmem=-1 -qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
co="-DCMAKE_C_COMPILER=$(which xlc) -DCMAKE_CXX_COMPILER=$(which xlc++) -DCMAKE_CUDA_COMPILER=$(which nvcc) -DMPI_C_COMPILER=$(which mpicc) -DMPI_CXX_COMPILER=$(which mpicxx) -DHYPRE_WITH_CUDA=ON -DCMAKE_BUILD_TYPE=Debug -DHYPRE_WITH_EXTRA_CFLAGS="\'"-qsuppress=1500-029"\'" -DHYPRE_WITH_EXTRA_CXXFLAGS="\'"-qsuppress=1500-029"\'" -DHYPRE_CUDA_SM=70"
./test.sh cmake.sh $src_dir -co: $co -mo: $mo
./renametest.sh cmake $output_dir/cmake-cuda-nonum-struct

Expand Down
126 changes: 63 additions & 63 deletions src/test/TEST_ams/solvers.saved.lassen
Original file line number Diff line number Diff line change
@@ -1,117 +1,117 @@
# Output file: solvers.out.0
Cycle 58 2.180962e-05 0.801100 1.624257e-06
Cycle 59 1.747511e-05 0.801257 1.301447e-06
Cycle 60 1.399773e-05 0.801009 1.042471e-06
Cycle 61 1.122074e-05 0.801612 8.356570e-07
Cycle 58 2.180961e-05 0.801052 1.624256e-06
Cycle 59 1.747620e-05 0.801308 1.301528e-06
Cycle 60 1.400242e-05 0.801228 1.042821e-06
Cycle 61 1.121910e-05 0.801226 8.355349e-07


Average Convergence Factor = 0.794989
Average Convergence Factor = 0.794987

# Output file: solvers.out.1
Cycle 58 2.180962e-05 0.801100 1.624257e-06
Cycle 59 1.747511e-05 0.801257 1.301447e-06
Cycle 60 1.399773e-05 0.801009 1.042471e-06
Cycle 61 1.122074e-05 0.801612 8.356570e-07
Cycle 58 2.180961e-05 0.801052 1.624256e-06
Cycle 59 1.747620e-05 0.801308 1.301528e-06
Cycle 60 1.400242e-05 0.801228 1.042821e-06
Cycle 61 1.121910e-05 0.801226 8.355349e-07


Average Convergence Factor = 0.794989
Average Convergence Factor = 0.794987

# Output file: solvers.out.2
Cycle 97 1.783837e-05 0.873468 1.328500e-06
Cycle 98 1.558153e-05 0.873484 1.160424e-06
Cycle 99 1.361003e-05 0.873472 1.013597e-06
Cycle 100 1.188958e-05 0.873590 8.854686e-07
Cycle 97 1.784019e-05 0.873467 1.328636e-06
Cycle 98 1.557979e-05 0.873297 1.160294e-06
Cycle 99 1.360847e-05 0.873470 1.013482e-06
Cycle 100 1.188950e-05 0.873684 8.854624e-07


Average Convergence Factor = 0.869905

# Output file: solvers.out.3
Cycle 97 1.783837e-05 0.873468 1.328500e-06
Cycle 98 1.558153e-05 0.873484 1.160424e-06
Cycle 99 1.361003e-05 0.873472 1.013597e-06
Cycle 100 1.188958e-05 0.873590 8.854686e-07
Cycle 97 1.784019e-05 0.873467 1.328636e-06
Cycle 98 1.557979e-05 0.873297 1.160294e-06
Cycle 99 1.360847e-05 0.873470 1.013482e-06
Cycle 100 1.188950e-05 0.873684 8.854624e-07


Average Convergence Factor = 0.869905

# Output file: solvers.out.4

Iterations = 13
Final Relative Residual Norm = 4.939012e-07
Final Relative Residual Norm = 4.939034e-07

# Output file: solvers.out.5

Iterations = 13
Final Relative Residual Norm = 4.939012e-07
Final Relative Residual Norm = 4.939034e-07

# Output file: solvers.out.6

Iterations = 16
Final Relative Residual Norm = 6.410688e-07
Final Relative Residual Norm = 6.410653e-07

# Output file: solvers.out.7

Iterations = 16
Final Relative Residual Norm = 6.410688e-07
Final Relative Residual Norm = 6.410653e-07

# Output file: solvers.out.12

Iterations = 18
Final Relative Residual Norm = 2.433909e+01
Final Relative Residual Norm = 2.433983e+01

# Output file: solvers.out.8

Eigenvalue lambda 3.02357653920195e+01
Eigenvalue lambda 3.03135374702301e+01
Eigenvalue lambda 3.85013899430578e+01
Eigenvalue lambda 5.14395940112140e+01
Eigenvalue lambda 5.15742481824047e+01
Residual 7.45352589377429e-05
Residual 7.43880033246762e-05
Residual 1.26594022210671e-04
Residual 8.99904407865315e-05
Residual 9.30548924090266e-05
Eigenvalue lambda 3.02357653920079e+01
Eigenvalue lambda 3.03135374702236e+01
Eigenvalue lambda 3.85013899430559e+01
Eigenvalue lambda 5.14395940112081e+01
Eigenvalue lambda 5.15742481823856e+01
Residual 7.45352588578718e-05
Residual 7.43880032620007e-05
Residual 1.26594022215394e-04
Residual 8.99904406834779e-05
Residual 9.30548921427010e-05

24 iterations
# Output file: solvers.out.9

Eigenvalue lambda 3.02357653920195e+01
Eigenvalue lambda 3.03135374702301e+01
Eigenvalue lambda 3.85013899430578e+01
Eigenvalue lambda 5.14395940112140e+01
Eigenvalue lambda 5.15742481824047e+01
Residual 7.45352589377429e-05
Residual 7.43880033246762e-05
Residual 1.26594022210671e-04
Residual 8.99904407865315e-05
Residual 9.30548924090266e-05
Eigenvalue lambda 3.02357653920079e+01
Eigenvalue lambda 3.03135374702236e+01
Eigenvalue lambda 3.85013899430559e+01
Eigenvalue lambda 5.14395940112081e+01
Eigenvalue lambda 5.15742481823856e+01
Residual 7.45352588578718e-05
Residual 7.43880032620007e-05
Residual 1.26594022215394e-04
Residual 8.99904406834779e-05
Residual 9.30548921427010e-05

24 iterations
# Output file: solvers.out.10

Eigenvalue lambda 3.02357653945254e+01
Eigenvalue lambda 3.03135374737707e+01
Eigenvalue lambda 3.85013899480154e+01
Eigenvalue lambda 5.14395940359891e+01
Eigenvalue lambda 5.15742483525312e+01
Residual 8.77563343325342e-05
Residual 1.06541588442403e-04
Residual 8.94426356809917e-05
Residual 1.08234820725642e-04
Residual 1.17206146877284e-04
Eigenvalue lambda 3.02357653967707e+01
Eigenvalue lambda 3.03135374800728e+01
Eigenvalue lambda 3.85013899397799e+01
Eigenvalue lambda 5.14395940101665e+01
Eigenvalue lambda 5.15742480845151e+01
Residual 8.77562666069003e-05
Residual 1.06541449357735e-04
Residual 8.94425865221178e-05
Residual 1.08234881163452e-04
Residual 1.17205182508686e-04

35 iterations
# Output file: solvers.out.11

Eigenvalue lambda 3.02357653945254e+01
Eigenvalue lambda 3.03135374737707e+01
Eigenvalue lambda 3.85013899480154e+01
Eigenvalue lambda 5.14395940359891e+01
Eigenvalue lambda 5.15742483525312e+01
Residual 8.77563343325342e-05
Residual 1.06541588442403e-04
Residual 8.94426356809917e-05
Residual 1.08234820725642e-04
Residual 1.17206146877284e-04
Eigenvalue lambda 3.02357653967707e+01
Eigenvalue lambda 3.03135374800728e+01
Eigenvalue lambda 3.85013899397799e+01
Eigenvalue lambda 5.14395940101665e+01
Eigenvalue lambda 5.15742480845151e+01
Residual 8.77562666069003e-05
Residual 1.06541449357735e-04
Residual 8.94425865221178e-05
Residual 1.08234881163452e-04
Residual 1.17205182508686e-04

35 iterations
2 changes: 1 addition & 1 deletion src/test/TEST_bench/benchmark_spgemm.jobs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mpirun -np 1 ./ij_mm -n 1024 1024 1 -5pt -verify 1 -spgemm_binned 0 -spgemmalg
mpirun -np 1 ./ij_mm -n 1024 1024 1 -9pt -verify 1 -spgemm_binned 0 -spgemmalg 3 -rep 10 > benchmark_spgemm.out.12

mpirun -np 1 ./ij_mm -n 128 128 128 -7pt -verify 1 -vendor 1 -rep 10 > benchmark_spgemm.out.13
mpirun -np 1 ./ij_mm -n 128 128 128 -27pt -verify 1 -vendor 1 -rep 10 > benchmark_spgemm.out.14
mpirun -np 1 ./ij_mm -n 80 80 80 -27pt -verify 1 -vendor 1 -rep 10 > benchmark_spgemm.out.14
mpirun -np 1 ./ij_mm -n 1024 1024 1 -5pt -verify 1 -vendor 1 -rep 10 > benchmark_spgemm.out.15
mpirun -np 1 ./ij_mm -n 1024 1024 1 -9pt -verify 1 -vendor 1 -rep 10 > benchmark_spgemm.out.16

Expand Down
14 changes: 7 additions & 7 deletions src/test/TEST_bench/benchmark_spgemm.perf.saved.lassen
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ Device Parcsr Matrix-by-Matrix wall clock time = 0.008427 seconds
# Output file: benchmark_spgemm.out.12
Device Parcsr Matrix-by-Matrix wall clock time = 0.011918 seconds
# Output file: benchmark_spgemm.out.13
Device Parcsr Matrix-by-Matrix wall clock time = 0.122758 seconds
Device Parcsr Matrix-by-Matrix wall clock time = 0.027048 seconds
# Output file: benchmark_spgemm.out.14
Device Parcsr Matrix-by-Matrix wall clock time = 0.654239 seconds
Device Parcsr Matrix-by-Matrix wall clock time = 0.064765 seconds
# Output file: benchmark_spgemm.out.15
Device Parcsr Matrix-by-Matrix wall clock time = 0.041012 seconds
Device Parcsr Matrix-by-Matrix wall clock time = 0.006528 seconds
# Output file: benchmark_spgemm.out.16
Device Parcsr Matrix-by-Matrix wall clock time = 0.075197 seconds
Device Parcsr Matrix-by-Matrix wall clock time = 0.018399 seconds
# Output file: benchmark_spgemm.out.17
Device Parcsr Matrix-by-Matrix wall clock time = 0.001930 seconds
Device Parcsr Matrix-by-Matrix wall clock time = 0.001937 seconds
# Output file: benchmark_spgemm.out.18
Device Parcsr Matrix-by-Matrix wall clock time = 0.005978 seconds
Device Parcsr Matrix-by-Matrix wall clock time = 0.006223 seconds
# Output file: benchmark_spgemm.out.19
Device Parcsr Matrix-by-Matrix wall clock time = 0.001117 seconds
# Output file: benchmark_spgemm.out.20
Device Parcsr Matrix-by-Matrix wall clock time = 0.001318 seconds
# Output file: benchmark_spgemm.out.21
Device Parcsr Matrix-by-Matrix wall clock time = 0.031280 seconds
Device Parcsr Matrix-by-Matrix wall clock time = 0.026565 seconds
# Output file: benchmark_spgemm.out.22
Device Parcsr Matrix-by-Matrix wall clock time = 0.138814 seconds
# Output file: benchmark_spgemm.out.23
Expand Down
2 changes: 1 addition & 1 deletion src/test/TEST_bench/benchmark_spgemm.perf.saved.tioga
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Device Parcsr Matrix-by-Matrix wall clock time = 0.019611 seconds
# Output file: benchmark_spgemm.out.13
Device Parcsr Matrix-by-Matrix wall clock time = 0.018017 seconds
# Output file: benchmark_spgemm.out.14
Device Parcsr Matrix-by-Matrix wall clock time = 0.130224 seconds
Device Parcsr Matrix-by-Matrix wall clock time = 0.014804 seconds
# Output file: benchmark_spgemm.out.15
Device Parcsr Matrix-by-Matrix wall clock time = 0.006545 seconds
# Output file: benchmark_spgemm.out.16
Expand Down
4 changes: 2 additions & 2 deletions src/test/TEST_bench/benchmark_spgemm.saved.lassen
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ B 1048576 x 1048576, NNZ 26152996, RNZ 24
A^2: 2097152 x 2097152, nnz [CPU 51742208, GPU 51742208], CPU-GPU err 0.000000e+00
B 2097152 x 2097152, NNZ 51742208, RNZ 24
# Output file: benchmark_spgemm.out.14
A^2: 2097152 x 2097152, nnz [CPU 254840104, GPU 254840104], CPU-GPU err 0.000000e+00
B 2097152 x 2097152, NNZ 254840104, RNZ 121
A^2: 512000 x 512000, nnz [CPU 61162984, GPU 61162984], CPU-GPU err 0.000000e+00
B 512000 x 512000, NNZ 61162984, RNZ 119
# Output file: benchmark_spgemm.out.15
A^2: 1048576 x 1048576, nnz [CPU 13611012, GPU 13611012], CPU-GPU err 0.000000e+00
B 1048576 x 1048576, NNZ 13611012, RNZ 12
Expand Down
4 changes: 2 additions & 2 deletions src/test/TEST_bench/benchmark_spgemm.saved.tioga
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ B 1048576 x 1048576, NNZ 26152996, RNZ 24
A^2: 2097152 x 2097152, nnz [CPU 51742208, GPU 51742208], CPU-GPU err 0.000000e+00
B 2097152 x 2097152, NNZ 51742208, RNZ 24
# Output file: benchmark_spgemm.out.14
A^2: 2097152 x 2097152, nnz [CPU 254840104, GPU 254840104], CPU-GPU err 0.000000e+00
B 2097152 x 2097152, NNZ 254840104, RNZ 121
A^2: 512000 x 512000, nnz [CPU 61162984, GPU 61162984], CPU-GPU err 0.000000e+00
B 512000 x 512000, NNZ 61162984, RNZ 119
# Output file: benchmark_spgemm.out.15
A^2: 1048576 x 1048576, nnz [CPU 13611012, GPU 13611012], CPU-GPU err 0.000000e+00
B 1048576 x 1048576, NNZ 13611012, RNZ 12
Expand Down
Loading

0 comments on commit 94a9b16

Please sign in to comment.