From 7380064469938e65feb0b1857974ae18a9311db9 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Sat, 16 Nov 2024 20:43:44 -0600 Subject: [PATCH] add printing gauge field for debugging initialize dslash_type_precondition fix CMake to use QUDA_DIRAC_LAPLACE instead of old QUDA_LAPLACE --- include/gauge_field.h | 27 +++++++++++-- include/gauge_field_order.h | 23 +++++++----- lib/check_params.h | 1 + lib/color_spinor_util.in.cu | 4 +- lib/gauge_field.cpp | 9 ++++- lib/gauge_norm.in.cu | 75 ++++++++++++++++++++++++++++++++++++- tests/CMakeLists.txt | 6 +-- 7 files changed, 123 insertions(+), 22 deletions(-) diff --git a/include/gauge_field.h b/include/gauge_field.h index a62b6a684f..776681e44b 100644 --- a/include/gauge_field.h +++ b/include/gauge_field.h @@ -250,12 +250,12 @@ namespace quda { */ void setTuningString(); + public: /** @brief Initialize the padded region to 0 */ void zeroPad(); - public: /** @brief Default constructor */ @@ -455,7 +455,7 @@ namespace quda { std::enable_if_t && !std::is_pointer_v::type>, T> data() const { if (is_pointer_array(order)) errorQuda("Non dim-array ordered field requested but order is %d", order); - return reinterpret_cast(gauge.data()); + return static_cast(gauge.data()); } /** @@ -473,7 +473,7 @@ namespace quda { "data() requires a pointer cast type"); if (d >= (unsigned)geometry) errorQuda("Invalid array index %d for geometry %d field", d, geometry); if (!is_pointer_array(order)) errorQuda("Dim-array ordered field requested but order is %d", order); - return reinterpret_cast(gauge_array[d].data()); + return static_cast(gauge_array[d].data()); } void *raw_pointer() const @@ -500,7 +500,7 @@ namespace quda { { if (!is_pointer_array(order)) errorQuda("Dim-array ordered field requested but order is %d", order); array u = {}; - for (auto d = 0; d < geometry; d++) u[d] = static_cast(gauge_array[d]); + for (auto d = 0; d < geometry; d++) u[d] = static_cast(gauge_array[d].data()); return u; } @@ -651,9 +651,28 @@ namespace quda { } } + /** + * @brief Print the site data + * @param[in] parity Parity index + * @param[in] dim The dimension in which we are printing + * @param[in] x_cb Checkerboard space-time index + * @param[in] rank The rank we are requesting from (default is rank = 0) + */ + void PrintMatrix(int dim, int parity, unsigned int x_cb, int rank = 0) const; + friend struct GaugeFieldParam; }; + /** + @brief Print the value of the field at the requested coordinates + @param[in] a The field we are printing from + @param[in] dim The dimension in which we are printing + @param[in] parity Parity index + @param[in] x_cb Checkerboard space-time index + @param[in] rank The rank we are requesting from (default is rank = 0) + */ + void genericPrintMatrix(const GaugeField &a, int dim, int parity, unsigned int x_cb, int rank = 0); + /** @brief This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm. diff --git a/include/gauge_field_order.h b/include/gauge_field_order.h index 38079c3cbb..e3d777565e 100644 --- a/include/gauge_field_order.h +++ b/include/gauge_field_order.h @@ -153,8 +153,11 @@ namespace quda { template __host__ __device__ inline constexpr bool fixed_point() { return false; } template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } - template<> __host__ __device__ inline constexpr bool fixed_point() { return true; } - template<> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } + template <> __host__ __device__ inline constexpr bool fixed_point() { return true; } template __host__ __device__ inline constexpr bool match() { return false; } template<> __host__ __device__ inline constexpr bool match() { return true; } @@ -377,7 +380,7 @@ namespace quda { { for (int d = 0; d < U.Geometry(); d++) u[d] = gauge_ ? static_cast **>(gauge_)[d] : U.data *>(d); - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -459,7 +462,7 @@ namespace quda { ghostOffset[d + 4] = U.Nface() * U.SurfaceCB(d) * U.Ncolor() * U.Ncolor(); } - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -494,7 +497,7 @@ namespace quda { scale(static_cast(1.0)), scale_inv(static_cast(1.0)) { - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -583,7 +586,7 @@ namespace quda { ghostOffset[d + 4] = U.Nface() * U.SurfaceCB(d) * U.Ncolor() * U.Ncolor(); } - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -636,7 +639,7 @@ namespace quda { scale(static_cast(1.0)), scale_inv(static_cast(1.0)) { - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -715,7 +718,7 @@ namespace quda { nullptr; ghostVolumeCB[d + 4] = U.Nface() * U.SurfaceCB(d); } - resetScale(U.Scale()); + resetScale(U.Scale() * (U.LinkMax() == 0.0 ? 1.0 : U.LinkMax())); } void resetScale(Float max) @@ -745,8 +748,8 @@ namespace quda { @tparam nSpinCoarse Number of "spin degrees of freedom" (for coarse-link fields only) @tparam order Storage order of the field @tparam native_ghost Whether to use native ghosts (inlined into + the padded area for internal-order fields or use a separate array if false) @tparam storeFloat_ Underlying storage type for the field - the padded area for internal-order fields or use a separate array if false) */ template @@ -1478,7 +1481,7 @@ namespace quda { z *= scale; #pragma unroll for (int i = 0; i < 9; i++) out[i] = cmul(z, out[i]); - } else { // stagic phase + } else { // static phase #pragma unroll for (int i = 0; i < 9; i++) { out[i] *= phase; } } diff --git a/lib/check_params.h b/lib/check_params.h index f227ee4716..639db3eb3d 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -578,6 +578,7 @@ void printQudaInvertParam(QudaInvertParam *param) { // domain decomposition parameters //P(inv_type_sloppy, QUDA_INVALID_INVERTER); // disable since invalid means no preconditioner #if defined INIT_PARAM + P(dslash_type_precondition, QUDA_INVALID_DSLASH); P(inv_type_precondition, QUDA_INVALID_INVERTER); P(preconditioner, 0); P(tol_precondition, INVALID_DOUBLE); diff --git a/lib/color_spinor_util.in.cu b/lib/color_spinor_util.in.cu index ef370eda18..324184153f 100644 --- a/lib/color_spinor_util.in.cu +++ b/lib/color_spinor_util.in.cu @@ -329,7 +329,7 @@ namespace quda { printf("rank = %d x = %u, s = %d, { ", comm_rank(), x_cb, s); for (int c = 0; c < o.Ncolor(); c++) { auto value = complex(o(parity, x_cb, s, c)); - printf("(%f,%f) ", value.real(), value.imag()); + printf("(%g,%g) ", value.real(), value.imag()); } printf("}\n"); } @@ -340,7 +340,7 @@ namespace quda { { if (a.isNative()) { constexpr auto order = colorspinor::getNative(nSpin); - print_vector(FieldOrderCB(a), parity, x_cb); + print_vector(FieldOrderCB(a), parity, x_cb); } else if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) { constexpr auto order = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER; print_vector(FieldOrderCB(a), parity, x_cb); diff --git a/lib/gauge_field.cpp b/lib/gauge_field.cpp index 20cf7621ac..40a1351656 100644 --- a/lib/gauge_field.cpp +++ b/lib/gauge_field.cpp @@ -1355,7 +1355,7 @@ namespace quda { qudaMemcpy(buffer, data(), Bytes(), qudaMemcpyDeviceToHost); } else { if (is_pointer_array(order)) { - char *dst_buffer = reinterpret_cast(buffer); + char *dst_buffer = static_cast(buffer); for (int d = 0; d < site_dim; d++) { std::memcpy(&dst_buffer[d * bytes / site_dim], gauge_array[d].data(), bytes / site_dim); } @@ -1375,7 +1375,7 @@ namespace quda { qudaMemcpy(data(), buffer, Bytes(), qudaMemcpyHostToDevice); } else { if (is_pointer_array(order)) { - const char *dst_buffer = reinterpret_cast(buffer); + const char *dst_buffer = static_cast(buffer); for (int d = 0; d < site_dim; d++) { std::memcpy(gauge_array[d].data(), &dst_buffer[d * bytes / site_dim], bytes / site_dim); } @@ -1389,4 +1389,9 @@ namespace quda { } } + void GaugeField::PrintMatrix(int dim, int parity, unsigned int x_cb, int rank) const + { + genericPrintMatrix(*this, dim, parity, x_cb, rank); + } + } // namespace quda diff --git a/lib/gauge_norm.in.cu b/lib/gauge_norm.in.cu index 0da4412d17..1b1f3c6882 100644 --- a/lib/gauge_norm.in.cu +++ b/lib/gauge_norm.in.cu @@ -1,5 +1,6 @@ #include #include +#include namespace quda { @@ -61,7 +62,7 @@ namespace quda { norm_ = norm(u, d, type); // factor of two to account for spin with MG fields } else { if constexpr (sizeof...(N) > 0) { - norm_ = norm(u, d, type, IntList()); + norm_ = norm(u, d, type, IntList()); } else { errorQuda("Nc = %d has not been instantiated", u.Ncolor()); } @@ -108,4 +109,76 @@ namespace quda { return nrm; } + template void print_matrix(const Order &o, int d, int parity, unsigned int x_cb) + { + for (int r = 0; r < o.Ncolor(); r++) { + printf("rank %d parity %d x %u row %d", comm_rank(), parity, x_cb, r); + for (int c = 0; c < o.Ncolor(); c++) { + auto value = complex(o(d, parity, x_cb, r, c)); + printf(" (%g,%g)", value.real(), value.imag()); + } + printf("\n"); + } + } + + template + void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb) + { + switch (a.FieldOrder()) { + case QUDA_FLOAT2_GAUGE_ORDER: + print_matrix(FieldOrder(a), d, parity, x_cb); + break; + case QUDA_QDP_GAUGE_ORDER: + print_matrix(FieldOrder(a), d, parity, x_cb); + break; + case QUDA_MILC_GAUGE_ORDER: + print_matrix(FieldOrder(a), d, parity, x_cb); + break; + default: errorQuda("Unsupported field order %d", a.FieldOrder()); + } + } + + template + void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb, IntList) + { + if (a.Ncolor() == nColor) { + genericPrintMatrix(a, d, parity, x_cb); + } else { + if constexpr (sizeof...(N) > 0) { + genericPrintMatrix(a, d, parity, x_cb, IntList()); + } else { + errorQuda("Not supported Ncolor = %d", a.Ncolor()); + } + } + } + + template void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb) + { + genericPrintMatrix(a, d, parity, x_cb, IntList<@QUDA_MULTIGRID_NC_NVEC_LIST @>()); + } + + void genericPrintMatrix(const GaugeField &a, int d, int parity, unsigned int x_cb, int rank) + { + if (rank != comm_rank()) return; + + GaugeFieldParam param(a); + param.field = const_cast(&a); + param.location = QUDA_CPU_FIELD_LOCATION; + param.create = QUDA_COPY_FIELD_CREATE; + // if field is a pinned device field then we need to clone it on the host + bool host_clone + = (a.Location() == QUDA_CUDA_FIELD_LOCATION && a.MemType() == QUDA_MEMORY_DEVICE && !use_managed_memory()) ? true : + false; + std::unique_ptr clone_a = !host_clone ? nullptr : std::make_unique(param); + const GaugeField &a_ = !host_clone ? a : *clone_a.get(); + + switch (a.Precision()) { + case QUDA_DOUBLE_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; + case QUDA_SINGLE_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; + case QUDA_HALF_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; + case QUDA_QUARTER_PRECISION: genericPrintMatrix(a_, d, parity, x_cb); break; + default: errorQuda("Precision %d not implemented", a.Precision()); + } + } + } // namespace quda diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b295e00d9c..2beb90ceae 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -963,7 +963,7 @@ endif() set_tests_properties(dslash_${DIRAC_NAME}_build_policy${pol2} PROPERTIES ENVIRONMENT QUDA_ENABLE_DSLASH_POLICY=${pol2}) endif() - if(QUDA_LAPLACE) + if(QUDA_DIRAC_LAPLACE) set(DIRAC_NAME laplace) add_test(NAME dslash_${DIRAC_NAME}_mat_policy${pol2} COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} @@ -1221,7 +1221,7 @@ foreach(prec IN LISTS TEST_PRECS) endif() endif() - if (QUDA_LAPLACE) + if (QUDA_DIRAC_LAPLACE) add_test(NAME invert_test_laplace_${prec} COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} --dslash-type laplace --ngcrkrylov 8 --compute-fat-long true @@ -1469,7 +1469,7 @@ foreach(prec IN LISTS TEST_PRECS) --gtest_output=xml:staggered_eigensolve_test_staggered_${prec}.xml) endif() - if (QUDA_LAPLACE) + if (QUDA_DIRAC_LAPLACE) add_test(NAME eigensolve_test_laplace_${prec} COMMAND ${QUDA_CTEST_LAUNCH} $ ${MPIEXEC_POSTFLAGS} --dslash-type laplace --compute-fat-long true