From e8ec356d271fc34d7116de4adc62f59ab98aeace Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Tue, 27 Aug 2019 09:28:08 -0400 Subject: [PATCH 1/3] Benchmarking forcing functions Former-commit-id: feeb59a83ddee778259a67055ac84fdadfda3d03 --- benchmark/benchmark_forcing_functions.jl | 63 ++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 benchmark/benchmark_forcing_functions.jl diff --git a/benchmark/benchmark_forcing_functions.jl b/benchmark/benchmark_forcing_functions.jl new file mode 100644 index 0000000000..a59885457e --- /dev/null +++ b/benchmark/benchmark_forcing_functions.jl @@ -0,0 +1,63 @@ +using TimerOutputs, Printf + +using Oceananigans + +const timer = TimerOutput() + +Ni = 2 # Number of iterations before benchmarking starts. +Nt = 10 # Number of iterations to use for benchmarking time stepping. + +# Run benchmark across these parameters. + Ns = [(64, 64, 64)] + float_types = [Float64] # Float types to benchmark. + archs = [CPU()] # Architectures to benchmark on. +@hascuda archs = [CPU(), GPU()] # Benchmark GPU on systems with CUDA-enabled GPUs. + +arch_name(::String) = "" +arch_name(::CPU) = "CPU" +arch_name(::GPU) = "GPU" + +function benchmark_name(N, id, arch, FT; npad=2) + Nx, Ny, Nz = N + + bn = "" + bn *= lpad(Nx, npad, " ") * "×" * lpad(Ny, npad, " ") * "×" * lpad(Nz, npad, " ") + bn *= " $id" + + arch = arch_name(arch) + bn *= " ($arch, $FT)" + + return bn +end + +benchmark_name(N, id) = benchmark_name(N, id, "", "", "") + +for arch in archs, float_type in float_types, N in Ns + Nx, Ny, Nz = N + Lx, Ly, Lz = 1, 1, 1 + + model = Model(N=(Nx, Ny, Nz), L=(Lx, Ly, Lz), arch=arch, float_type=float_type) + time_step!(model, Ni, 1) # First 1-2 iterations usually slower. + + bn = benchmark_name(N, " no forcing", arch, float_type) + @printf("Running benchmark: %s...\n", bn) + for i in 1:Nt + @timeit timer bn time_step!(model, 1, 1) + end + + @inline FT(grid, U, Φ, i, j, k) = ifelse(k == 1, -1e-4 * (Φ.T[i, j, 1] - 0), 0) + forcing = Forcing(FT=FT) + + model = Model(N=(Nx, Ny, Nz), L=(Lx, Ly, Lz), arch=arch, float_type=float_type, forcing=forcing) + time_step!(model, Ni, 1) # First 1-2 iterations usually slower. + + bn = benchmark_name(N, "with forcing", arch, float_type) + @printf("Running benchmark: %s...\n", bn) + for i in 1:Nt + @timeit timer bn time_step!(model, 1, 1) + end +end + +print_timer(timer, title="Forcing function benchmarks") + +println("") From 502fffa15795dbd9c37a30d867b5ae7d1bce62ef Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Tue, 27 Aug 2019 13:58:54 +0000 Subject: [PATCH 2/3] Cleanup benchmarking script Former-commit-id: fd864d2f1b3f9300af97e67ccff34a0330819e1c --- benchmark/benchmark_forcing_functions.jl | 42 +++++++++++++++--------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/benchmark/benchmark_forcing_functions.jl b/benchmark/benchmark_forcing_functions.jl index a59885457e..78f8bf1c7a 100644 --- a/benchmark/benchmark_forcing_functions.jl +++ b/benchmark/benchmark_forcing_functions.jl @@ -8,10 +8,10 @@ Ni = 2 # Number of iterations before benchmarking starts. Nt = 10 # Number of iterations to use for benchmarking time stepping. # Run benchmark across these parameters. - Ns = [(64, 64, 64)] + Ns = [(128, 128, 128)] float_types = [Float64] # Float types to benchmark. - archs = [CPU()] # Architectures to benchmark on. -@hascuda archs = [CPU(), GPU()] # Benchmark GPU on systems with CUDA-enabled GPUs. + archs = [CPU()] # Architectures to benchmark on. +@hascuda archs = [GPU()] # Benchmark GPU on systems with CUDA-enabled GPUs. arch_name(::String) = "" arch_name(::CPU) = "CPU" @@ -32,32 +32,42 @@ end benchmark_name(N, id) = benchmark_name(N, id, "", "", "") +@inline function Fu(grid, U, Φ, i, j, k) + if k == 1 + return -2*0.1/grid.Δz^2 * (U.u[i, j, 1] - 0) + elseif k == grid.Nz + return -2*0.1/grid.Δz^2 * (U.u[i, j, grid.Nz] - 0) + else + return 0 + end +end + +@inline FT(grid, U, Φ, i, j, k) = ifelse(k == 1, -1e-4 * (Φ.T[i, j, 1] - 0), 0) +forcing = Forcing(Fu=Fu, FT=FT) + for arch in archs, float_type in float_types, N in Ns Nx, Ny, Nz = N Lx, Ly, Lz = 1, 1, 1 + + forced_model = Model(N=(Nx, Ny, Nz), L=(Lx, Ly, Lz), arch=arch, float_type=float_type, forcing=forcing) + time_step!(forced_model, Ni, 1) # First 1-2 iterations usually slower. - model = Model(N=(Nx, Ny, Nz), L=(Lx, Ly, Lz), arch=arch, float_type=float_type) - time_step!(model, Ni, 1) # First 1-2 iterations usually slower. - - bn = benchmark_name(N, " no forcing", arch, float_type) + bn = benchmark_name(N, "with forcing", arch, float_type) @printf("Running benchmark: %s...\n", bn) for i in 1:Nt - @timeit timer bn time_step!(model, 1, 1) + @timeit timer bn time_step!(forced_model, 1, 1) end - @inline FT(grid, U, Φ, i, j, k) = ifelse(k == 1, -1e-4 * (Φ.T[i, j, 1] - 0), 0) - forcing = Forcing(FT=FT) - - model = Model(N=(Nx, Ny, Nz), L=(Lx, Ly, Lz), arch=arch, float_type=float_type, forcing=forcing) - time_step!(model, Ni, 1) # First 1-2 iterations usually slower. + unforced_model = Model(N=(Nx, Ny, Nz), L=(Lx, Ly, Lz), arch=arch, float_type=float_type) + time_step!(unforced_model, Ni, 1) # First 1-2 iterations usually slower. - bn = benchmark_name(N, "with forcing", arch, float_type) + bn = benchmark_name(N, " no forcing", arch, float_type) @printf("Running benchmark: %s...\n", bn) for i in 1:Nt - @timeit timer bn time_step!(model, 1, 1) + @timeit timer bn time_step!(unforced_model, 1, 1) end end print_timer(timer, title="Forcing function benchmarks") - println("") + From db41dcf29329edcedda2c6206733378ea7f0eeea Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Tue, 27 Aug 2019 14:26:49 +0000 Subject: [PATCH 3/3] Let's go with @inbounds Former-commit-id: f870e5722f06fa21e535f4fb88420a86a40ad1c8 --- benchmark/benchmark_forcing_functions.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/benchmark_forcing_functions.jl b/benchmark/benchmark_forcing_functions.jl index 78f8bf1c7a..5568e4d93a 100644 --- a/benchmark/benchmark_forcing_functions.jl +++ b/benchmark/benchmark_forcing_functions.jl @@ -34,15 +34,15 @@ benchmark_name(N, id) = benchmark_name(N, id, "", "", "") @inline function Fu(grid, U, Φ, i, j, k) if k == 1 - return -2*0.1/grid.Δz^2 * (U.u[i, j, 1] - 0) + return @inbounds -2*0.1/grid.Δz^2 * (U.u[i, j, 1] - 0) elseif k == grid.Nz - return -2*0.1/grid.Δz^2 * (U.u[i, j, grid.Nz] - 0) + return @inbounds -2*0.1/grid.Δz^2 * (U.u[i, j, grid.Nz] - 0) else return 0 end end -@inline FT(grid, U, Φ, i, j, k) = ifelse(k == 1, -1e-4 * (Φ.T[i, j, 1] - 0), 0) +@inline FT(grid, U, Φ, i, j, k) = @inbounds ifelse(k == 1, -1e-4 * (Φ.T[i, j, 1] - 0), 0) forcing = Forcing(Fu=Fu, FT=FT) for arch in archs, float_type in float_types, N in Ns