From 41adacf1a5d83dd15ee823c436a4e6f99725eab3 Mon Sep 17 00:00:00 2001 From: Daniel Peter Date: Tue, 23 Feb 2021 10:59:08 +0300 Subject: [PATCH] updates config.fh.in; updates comment and formatting --- setup/config.fh.in | 21 +++++++++++++++++++ src/gpu/kernels/Kernel_2_acoustic_impl.cu | 12 +++++++++++ .../assemble_boundary_potential_on_device.cu | 1 + .../compute_add_sources_acoustic_kernel.cu | 10 ++++----- src/gpu/kernels/get_maximum_field_kernel.cu | 11 +++++----- src/gpu/kernels/get_maximum_vector_kernel.cu | 10 ++++----- .../create_specfem3D_gpu_cuda_kernel_proto.pl | 10 ++++----- 7 files changed, 55 insertions(+), 20 deletions(-) diff --git a/setup/config.fh.in b/setup/config.fh.in index 79598f2e2..d14da1d80 100644 --- a/setup/config.fh.in +++ b/setup/config.fh.in @@ -47,6 +47,8 @@ !! x being the variable name inside the code. #ifdef __INTEL_COMPILER #define STRINGIFY_VAR(a) #a, a +#elif __PGI +#define STRINGIFY_VAR(a) #a, a #else #define STRINGIFY_VAR(a) "a", a #endif @@ -60,6 +62,25 @@ #define STRINGIFY_VAR_TYPE(t,a) "a", t%a #endif +! for debugging +#define DEBUG_ADIOS 0 +#if DEBUG_ADIOS == 1 /* low-level tracing */ +#define TRACE_ADIOS(x) print *,'***debug ADIOS: ',x,' ***' +#define TRACE_ADIOS_ARG(x,y) print *,'***debug ADIOS: ',x,y,' ***' +#define TRACE_ADIOS_L2(x) +#define TRACE_ADIOS_L2_ARG(x,y) +#elif DEBUG_ADIOS == 2 /* high-level tracing */ +#define TRACE_ADIOS(x) print *,'***debug ADIOS: ',x,' ***' +#define TRACE_ADIOS_ARG(x,y) print *,'***debug ADIOS: ',x,y,' ***' +#define TRACE_ADIOS_L2(x) print *,'***debug ADIOS: ',x,' ***' +#define TRACE_ADIOS_L2_ARG(x,y) print *,'***debug ADIOS: ',x,y,' ***' +#else +#define TRACE_ADIOS(x) +#define TRACE_ADIOS_ARG(x,y) +#define TRACE_ADIOS_L2(x) +#define TRACE_ADIOS_L2_ARG(x,y) +#endif + !----------------------------------------------------------------------- ! ! Force vectorization diff --git a/src/gpu/kernels/Kernel_2_acoustic_impl.cu b/src/gpu/kernels/Kernel_2_acoustic_impl.cu index 36092b4f5..8c8b10c5a 100644 --- a/src/gpu/kernels/Kernel_2_acoustic_impl.cu +++ b/src/gpu/kernels/Kernel_2_acoustic_impl.cu @@ -490,6 +490,18 @@ Kernel_2_acoustic_impl(const int nb_blocks_to_compute, } +// note: in the past, we used templating to be able to call the same kernel_2 twice for both, +// forward and backward wavefields. that is, calling it by +// Kernel_2_acoustic_impl<1> +// and +// Kernel_2_acoustic_impl<3> +// the templating helped to use textures for forward/backward fields. +// +// most of this has become obsolete, textures are hardly needed for speedup anymore +// and the Kernel_2 has become more and more specialized for different cases to +// reduce register pressure and increase occupancy for better performance. +// thus, in future we might re-evaluate and remove this template-feature. +// // "forced" template instantiation // see: https://isocpp.org/wiki/faq/templates#separate-template-fn-defn-from-decl // https://stackoverflow.com/questions/31705764/cuda-c-using-a-template-function-which-calls-a-template-kernel diff --git a/src/gpu/kernels/assemble_boundary_potential_on_device.cu b/src/gpu/kernels/assemble_boundary_potential_on_device.cu index fcc81ec5d..ad912041c 100644 --- a/src/gpu/kernels/assemble_boundary_potential_on_device.cu +++ b/src/gpu/kernels/assemble_boundary_potential_on_device.cu @@ -27,6 +27,7 @@ !===================================================================== */ + __global__ void assemble_boundary_potential_on_device(field* d_potential_dot_dot_acoustic, field* d_send_potential_dot_dot_buffer, const int num_interfaces_ext_mesh, diff --git a/src/gpu/kernels/compute_add_sources_acoustic_kernel.cu b/src/gpu/kernels/compute_add_sources_acoustic_kernel.cu index 3c8d2d100..7a9981beb 100644 --- a/src/gpu/kernels/compute_add_sources_acoustic_kernel.cu +++ b/src/gpu/kernels/compute_add_sources_acoustic_kernel.cu @@ -4,11 +4,11 @@ ! S p e c f e m 3 D V e r s i o n 3 . 0 ! --------------------------------------- ! -! Main historical authors: Dimitri Komatitsch and Jeroen Tromp -! CNRS, France -! and Princeton University, USA -! (there are currently many more authors!) -! (c) October 2017 +! Main historical authors: Dimitri Komatitsch and Jeroen Tromp +! CNRS, France +! and Princeton University, USA +! (there are currently many more authors!) +! (c) October 2017 ! ! This program is free software; you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by diff --git a/src/gpu/kernels/get_maximum_field_kernel.cu b/src/gpu/kernels/get_maximum_field_kernel.cu index 61eec1412..15d50a111 100644 --- a/src/gpu/kernels/get_maximum_field_kernel.cu +++ b/src/gpu/kernels/get_maximum_field_kernel.cu @@ -4,11 +4,11 @@ ! S p e c f e m 3 D V e r s i o n 3 . 0 ! --------------------------------------- ! -! Main historical authors: Dimitri Komatitsch and Jeroen Tromp -! CNRS, France -! and Princeton University, USA -! (there are currently many more authors!) -! (c) October 2017 +! Main historical authors: Dimitri Komatitsch and Jeroen Tromp +! CNRS, France +! and Princeton University, USA +! (there are currently many more authors!) +! (c) October 2017 ! ! This program is free software; you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by @@ -27,6 +27,7 @@ !===================================================================== */ + __global__ void get_maximum_field_kernel(field* array, int size, realw* d_max){ /* simplest version: uses only 1 thread diff --git a/src/gpu/kernels/get_maximum_vector_kernel.cu b/src/gpu/kernels/get_maximum_vector_kernel.cu index aff692e88..672311fdd 100644 --- a/src/gpu/kernels/get_maximum_vector_kernel.cu +++ b/src/gpu/kernels/get_maximum_vector_kernel.cu @@ -4,11 +4,11 @@ ! S p e c f e m 3 D V e r s i o n 3 . 0 ! --------------------------------------- ! -! Main historical authors: Dimitri Komatitsch and Jeroen Tromp -! CNRS, France -! and Princeton University, USA -! (there are currently many more authors!) -! (c) October 2017 +! Main historical authors: Dimitri Komatitsch and Jeroen Tromp +! CNRS, France +! and Princeton University, USA +! (there are currently many more authors!) +! (c) October 2017 ! ! This program is free software; you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by diff --git a/utils/create_specfem3D_gpu_cuda_kernel_proto.pl b/utils/create_specfem3D_gpu_cuda_kernel_proto.pl index 19c777e5f..e6a3c4110 100755 --- a/utils/create_specfem3D_gpu_cuda_kernel_proto.pl +++ b/utils/create_specfem3D_gpu_cuda_kernel_proto.pl @@ -20,11 +20,11 @@ ! S p e c f e m 3 D V e r s i o n 3 . 0 ! --------------------------------------- ! -! Main historical authors: Dimitri Komatitsch and Jeroen Tromp -! CNRS, France -! and Princeton University, USA -! (there are currently many more authors!) -! (c) October 2017 +! Main historical authors: Dimitri Komatitsch and Jeroen Tromp +! CNRS, France +! and Princeton University, USA +! (there are currently many more authors!) +! (c) October 2017 ! ! This program is free software; you can redistribute it and/or modify ! it under the terms of the GNU General Public License as published by