From d46e0761caa7ed7629d55f2c5c2995fbb3861d92 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 14 Aug 2024 17:47:12 +0000 Subject: [PATCH] Revert "[11/N] Fix clang-tidy warnings in aten/src/ATen (#133298)" This reverts commit 35785984013a74469de8c1d29eaecb25aa0c141e. Reverted https://github.com/pytorch/pytorch/pull/133298 on behalf of https://github.com/izaitsevfb due to causes build time regression in aten/src/ATen/native/cpu/ReduceOpsKernel.cpp ([comment](https://github.com/pytorch/pytorch/pull/133298#issuecomment-2289453440)) --- aten/src/ATen/native/cpu/CatKernel.h | 4 ++-- .../ATen/native/cpu/ChannelShuffleKernel.h | 4 ++-- aten/src/ATen/native/cpu/CopyKernel.h | 2 -- .../ATen/native/cpu/DistributionTemplates.h | 19 +++++++++-------- aten/src/ATen/native/cpu/GridSamplerKernel.h | 4 ++-- aten/src/ATen/native/cpu/IndexKernelUtils.h | 9 +++++--- aten/src/ATen/native/cpu/IsContiguous.h | 4 ++-- aten/src/ATen/native/cpu/LogAddExp.h | 4 ++-- aten/src/ATen/native/cpu/PixelShuffleKernel.h | 4 ++-- aten/src/ATen/native/cpu/Reduce.h | 21 ++++++++++--------- aten/src/ATen/native/cpu/ReduceUtils.h | 8 +++---- aten/src/ATen/native/cpu/SampledAddmmKernel.h | 4 ++-- aten/src/ATen/native/cpu/SerialStackImpl.h | 4 ++-- aten/src/ATen/native/cpu/StackKernel.h | 4 ++-- aten/src/ATen/native/cpu/WeightNormKernel.h | 4 ++-- aten/src/ATen/native/cpu/mixed_data_type.h | 4 ++-- aten/src/ATen/native/cpu/moments_utils.h | 12 +++++++---- aten/src/ATen/native/cpu/utils.h | 7 ++++--- aten/src/ATen/native/cpu/zmath.h | 4 ++-- 19 files changed, 67 insertions(+), 59 deletions(-) diff --git a/aten/src/ATen/native/cpu/CatKernel.h b/aten/src/ATen/native/cpu/CatKernel.h index 5afa1add4da3f8..aedb4aec4f5747 100644 --- a/aten/src/ATen/native/cpu/CatKernel.h +++ b/aten/src/ATen/native/cpu/CatKernel.h @@ -4,9 +4,9 @@ #include #include -namespace at::native { +namespace at { namespace native { using cat_serial_fn = void(*)(const Tensor &, const MaterializedITensorListRef&, int64_t); DECLARE_DISPATCH(cat_serial_fn, cat_serial_stub); -} // namespace at::native +}} // namespace at::native diff --git a/aten/src/ATen/native/cpu/ChannelShuffleKernel.h b/aten/src/ATen/native/cpu/ChannelShuffleKernel.h index 387c301c25f030..10e592cf59eb75 100644 --- a/aten/src/ATen/native/cpu/ChannelShuffleKernel.h +++ b/aten/src/ATen/native/cpu/ChannelShuffleKernel.h @@ -6,9 +6,9 @@ namespace at { class TensorBase; } -namespace at::native { +namespace at { namespace native { using channel_shuffle_fn = void(*)(TensorBase&, const TensorBase&, int64_t); DECLARE_DISPATCH(channel_shuffle_fn, channel_shuffle_kernel); -} // at::native +}} // at::native diff --git a/aten/src/ATen/native/cpu/CopyKernel.h b/aten/src/ATen/native/cpu/CopyKernel.h index 3378e16f93d23e..9d2affd6101ab9 100644 --- a/aten/src/ATen/native/cpu/CopyKernel.h +++ b/aten/src/ATen/native/cpu/CopyKernel.h @@ -1,7 +1,5 @@ #pragma once -#include - namespace at { struct TensorIteratorBase; diff --git a/aten/src/ATen/native/cpu/DistributionTemplates.h b/aten/src/ATen/native/cpu/DistributionTemplates.h index 8171ae8e79ad2a..961c0a3811ec15 100644 --- a/aten/src/ATen/native/cpu/DistributionTemplates.h +++ b/aten/src/ATen/native/cpu/DistributionTemplates.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #ifdef CPU_CAPABILITY_AVX2 @@ -14,10 +15,10 @@ #include #endif - - - -namespace at::native::templates::cpu { +namespace at { +namespace native { +namespace templates { +namespace cpu { namespace { // ==================================================== Random ======================================================== @@ -39,10 +40,10 @@ void random_from_to_kernel(TensorIteratorBase& iter, uint64_t range, int64_t bas template void random_full_64_bits_range_kernel(TensorIteratorBase& iter, RNG generator) { AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::BFloat16, iter.dtype(), "random_full_64_bits_range_kernel_cpu", [&] { - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) { std::lock_guard lock(generator->mutex_); cpu_serial_kernel(iter, [generator]() -> scalar_t { uniform_int_full_range_distribution random; @@ -422,4 +423,4 @@ struct BernoulliKernel { } }; -}} +}}}}} diff --git a/aten/src/ATen/native/cpu/GridSamplerKernel.h b/aten/src/ATen/native/cpu/GridSamplerKernel.h index 3d332f88fc7cbd..b1830fcd3911ec 100644 --- a/aten/src/ATen/native/cpu/GridSamplerKernel.h +++ b/aten/src/ATen/native/cpu/GridSamplerKernel.h @@ -9,7 +9,7 @@ namespace at { class TensorBase; } -namespace at::native { +namespace at { namespace native { using forward_2d_fn = void (*) ( const TensorBase &output, @@ -31,4 +31,4 @@ using backward_2d_fn = void (*) ( DECLARE_DISPATCH(forward_2d_fn, grid_sampler_2d_cpu_kernel); DECLARE_DISPATCH(backward_2d_fn, grid_sampler_2d_backward_cpu_kernel); -} // namespace at::native +}} // namespace at::native diff --git a/aten/src/ATen/native/cpu/IndexKernelUtils.h b/aten/src/ATen/native/cpu/IndexKernelUtils.h index c513d128e23421..cc19ce995da4a7 100644 --- a/aten/src/ATen/native/cpu/IndexKernelUtils.h +++ b/aten/src/ATen/native/cpu/IndexKernelUtils.h @@ -2,9 +2,11 @@ #include #include -namespace at::native { +namespace at { +namespace native { -inline bool is_constant_index(int ntensor, const int64_t* strides) { +namespace { +static bool is_constant_index(int ntensor, const int64_t* strides) { AT_ASSERT(ntensor >= 3); for (const auto arg : c10::irange(2, ntensor)) { if (strides[arg] != 0) { @@ -48,6 +50,7 @@ struct Indexer { return offset; } }; +} // anonymous namespace template void cpu_index_kernel(TensorIteratorBase& iter, IntArrayRef index_size, IntArrayRef index_stride, @@ -82,4 +85,4 @@ void cpu_index_kernel(TensorIteratorBase& iter, IntArrayRef index_size, IntArray } } } // at -// native +} // native diff --git a/aten/src/ATen/native/cpu/IsContiguous.h b/aten/src/ATen/native/cpu/IsContiguous.h index ddbbb6fb8f5afc..192177cc9bcfb0 100644 --- a/aten/src/ATen/native/cpu/IsContiguous.h +++ b/aten/src/ATen/native/cpu/IsContiguous.h @@ -1,6 +1,6 @@ #pragma once -namespace at::native { inline namespace CPU_CAPABILITY { +namespace at { namespace native { inline namespace CPU_CAPABILITY { // n: number of function arguments (arity) // traits: function_traits (see FunctionTraits.h) @@ -59,4 +59,4 @@ static inline bool is_contiguous_scalar(const int64_t* strides) { return IsContiguous::eval(strides); } -}} +}}} diff --git a/aten/src/ATen/native/cpu/LogAddExp.h b/aten/src/ATen/native/cpu/LogAddExp.h index e2b80a648df6b1..c03cbebafaffbe 100644 --- a/aten/src/ATen/native/cpu/LogAddExp.h +++ b/aten/src/ATen/native/cpu/LogAddExp.h @@ -3,7 +3,7 @@ #include #include -namespace at::native { +namespace at { namespace native { inline namespace CPU_CAPABILITY { // custom min and max to be used in logcumsumexp for complex arguments @@ -58,4 +58,4 @@ c10::complex _log_add_exp_helper(const c10::complex& x, cons } } // end namespace -} //end at::native +}} //end at::native diff --git a/aten/src/ATen/native/cpu/PixelShuffleKernel.h b/aten/src/ATen/native/cpu/PixelShuffleKernel.h index d5eee58c1ab151..c015e674a24c59 100644 --- a/aten/src/ATen/native/cpu/PixelShuffleKernel.h +++ b/aten/src/ATen/native/cpu/PixelShuffleKernel.h @@ -5,10 +5,10 @@ namespace at { class TensorBase; } -namespace at::native { +namespace at { namespace native { using pixel_shuffle_fn = void(*)(TensorBase&, const TensorBase&, int64_t); DECLARE_DISPATCH(pixel_shuffle_fn, pixel_shuffle_kernel); DECLARE_DISPATCH(pixel_shuffle_fn, pixel_unshuffle_kernel); -} // at::native +}} // at::native diff --git a/aten/src/ATen/native/cpu/Reduce.h b/aten/src/ATen/native/cpu/Reduce.h index 62d1ee1c3ec45b..37bd32d1c4c13a 100644 --- a/aten/src/ATen/native/cpu/Reduce.h +++ b/aten/src/ATen/native/cpu/Reduce.h @@ -6,9 +6,10 @@ #include #include +#include #include -namespace at::native { inline namespace CPU_CAPABILITY { +namespace at { namespace native { inline namespace CPU_CAPABILITY { using namespace vec; @@ -33,9 +34,9 @@ inline bool is_outer_reduction(const int64_t* strides) { strides[3] == sizeof(typename traits::arg2_t); } -template +template inline void vectorized_reduction(char** data, int64_t n, int64_t stride, - func_t op [[maybe_unused]], vec_func_t vop) { + func_t op, vec_func_t vop, bool reduce) { VEC_LOOP_HEADER(func_t, data) const char* in1_ptr = data[1]; Vec acc[4]; @@ -49,7 +50,7 @@ inline void vectorized_reduction(char** data, int64_t n, int64_t stride, acc[2] = vop(acc[2], Vec::loadu(ptr + (2 * Vec::size() * sizeof(scalar_t)))); acc[3] = vop(acc[3], Vec::loadu(ptr + (3 * Vec::size() * sizeof(scalar_t)))); } - if constexpr (reduce) { + if (reduce) { scalar_t buffer[Vec::size()]; acc[0] = vop(vop(acc[0], acc[1]), vop(acc[2], acc[3])); acc[0].store(buffer); @@ -80,10 +81,10 @@ inline void UNARY_OUTER_LOOP(char* data[2], const int64_t strides[2], int64_t n, template inline void vectorized_inner_reduction(char** data, int64_t n, func_t op, vec_func_t vop) { VEC_LOOP_HEADER(func_t, data) - constexpr int64_t vector_stride = 4 * Vec::size() * sizeof(scalar_t); + int64_t vector_stride = 4 * Vec::size() * sizeof(scalar_t); int64_t count = n / (4 * Vec::size()); if (count > 0) { - vectorized_reduction(data, count, vector_stride, op, vop); + vectorized_reduction(data, count, vector_stride, op, vop, /*reduce=*/true); } char* ptrs[3] = { data[0], data[0], data[1] }; int64_t strides[] = { 0, 0, sizeof(scalar_t) }; @@ -102,7 +103,7 @@ inline void vectorized_outer_reduction(char** data, int64_t inner_stride, int64_ int64_t outer_stride[2] = { 128, 128 }; #endif UNARY_OUTER_LOOP(data, outer_stride, size1 / (4 * Vec::size()), [&] { - vectorized_reduction(data, size0, inner_stride, op, vop); + vectorized_reduction(data, size0, inner_stride, op, vop, /*reduce=*/false); }); // reduce down the remaining columns @@ -131,13 +132,13 @@ static void set_results(const res_t result, const TensorIteratorBase &iter, cons } template -inline std::enable_if_t +inline typename std::enable_if::type for_each_in_tuple(const std::tuple& /*t*/, const TensorIteratorBase& /*iter*/, const int /*num_outputs*/) { return i; } template -inline std::enable_if_t +inline typename std::enable_if::type for_each_in_tuple(const std::tuple& t, const TensorIteratorBase &iter, const int num_outputs) { if (i < (size_t)num_outputs) { set_result(i, std::get(t), iter, num_outputs); @@ -310,4 +311,4 @@ void binary_kernel_reduce_lastdim(TensorIteratorBase& iter, reduce_func_t reduce sub_iter.for_each(loop, grain_size); } -}} // namespace at::native:: +}}} // namespace at::native:: diff --git a/aten/src/ATen/native/cpu/ReduceUtils.h b/aten/src/ATen/native/cpu/ReduceUtils.h index fd7c4a2750a6c9..8c6424f8b0eac8 100644 --- a/aten/src/ATen/native/cpu/ReduceUtils.h +++ b/aten/src/ATen/native/cpu/ReduceUtils.h @@ -106,7 +106,7 @@ inline void _init(scalar_t* self_ptr, at::opmath_type* buffer_ptr, int } template -inline std::enable_if_t, scalar_t> +inline typename std::enable_if::value, scalar_t>::type _max(const scalar_t& x, const scalar_t& y) { return at::_isnan(y) ? y : std::max(x, y); } @@ -118,14 +118,14 @@ inline Vectorized _max(const Vectorized& x, const Vectorized } template -inline std::enable_if_t, Vec2> +inline typename std::enable_if::value, Vec2>::type _max(const vec_t& x, const vec_t& y) { // vec::maximum propagates NaN return maximum(x, y); } template -inline std::enable_if_t, scalar_t> +inline typename std::enable_if::value, scalar_t>::type _min(const scalar_t& x, const scalar_t& y) { return at::_isnan(y) ? y : std::min(x, y); } @@ -137,7 +137,7 @@ inline Vectorized _min(const Vectorized& x, const Vectorized } template -inline std::enable_if_t, Vec2> +inline typename std::enable_if::value, Vec2>::type _min(const vec_t& x, const vec_t& y) { // vec::minimum propagates NaN return minimum(x, y); diff --git a/aten/src/ATen/native/cpu/SampledAddmmKernel.h b/aten/src/ATen/native/cpu/SampledAddmmKernel.h index e1d75b17698c2e..04dba4b9b61ced 100644 --- a/aten/src/ATen/native/cpu/SampledAddmmKernel.h +++ b/aten/src/ATen/native/cpu/SampledAddmmKernel.h @@ -3,10 +3,10 @@ #include #include -namespace at::native { +namespace at { namespace native { using sampled_addmm_sparse_csr_fn = void(*)(const Tensor&, const Tensor&, const Scalar&, const Scalar&, const Tensor&); DECLARE_DISPATCH(sampled_addmm_sparse_csr_fn, sampled_addmm_sparse_csr_stub); -} // at::native +}} // at::native diff --git a/aten/src/ATen/native/cpu/SerialStackImpl.h b/aten/src/ATen/native/cpu/SerialStackImpl.h index 88ba1c91b6c8cb..57d0dd73daf4b8 100644 --- a/aten/src/ATen/native/cpu/SerialStackImpl.h +++ b/aten/src/ATen/native/cpu/SerialStackImpl.h @@ -10,7 +10,7 @@ #include #include -namespace at::native::detail { +namespace at { namespace native { namespace detail { struct InputMeta { void* data_ptr; @@ -143,4 +143,4 @@ struct CanUseNativeSerialStack { } }; -} // namespace at::native::detail +}}} // namespace at::native::detail diff --git a/aten/src/ATen/native/cpu/StackKernel.h b/aten/src/ATen/native/cpu/StackKernel.h index 6c96d83b9eaa03..4e9a45e4dd12ba 100644 --- a/aten/src/ATen/native/cpu/StackKernel.h +++ b/aten/src/ATen/native/cpu/StackKernel.h @@ -4,9 +4,9 @@ #include #include -namespace at::native { +namespace at { namespace native { using stack_serial_fn = void(*)(Tensor &, TensorList, int64_t); DECLARE_DISPATCH(stack_serial_fn, stack_serial_stub); -} // namespace at::native +}} // namespace at::native diff --git a/aten/src/ATen/native/cpu/WeightNormKernel.h b/aten/src/ATen/native/cpu/WeightNormKernel.h index 1fd8c75cc73b30..6e1f3ec3b02917 100644 --- a/aten/src/ATen/native/cpu/WeightNormKernel.h +++ b/aten/src/ATen/native/cpu/WeightNormKernel.h @@ -6,7 +6,7 @@ namespace at { class TensorBase; } -namespace at::native { +namespace at { namespace native { using weight_norm_fn = void(*)( TensorBase&, TensorBase&, const TensorBase&, const TensorBase&, int64_t); @@ -17,4 +17,4 @@ using weight_norm_backward_fn = void(*)( DECLARE_DISPATCH(weight_norm_fn, weight_norm_stub); DECLARE_DISPATCH(weight_norm_backward_fn, weight_norm_backward_stub); -} // namespace at::native +}} // namespace at::native diff --git a/aten/src/ATen/native/cpu/mixed_data_type.h b/aten/src/ATen/native/cpu/mixed_data_type.h index 13244af3b34a0f..ef598b281a905d 100644 --- a/aten/src/ATen/native/cpu/mixed_data_type.h +++ b/aten/src/ATen/native/cpu/mixed_data_type.h @@ -2,7 +2,7 @@ #include -namespace at::native { +namespace at { namespace native { inline ScalarType first_type() { return ScalarType::Undefined; @@ -38,4 +38,4 @@ inline ScalarType param_scalar_type(const Tensor& t, bool is_mixed_type) { return is_mixed_type ? ScalarType::Float : t.scalar_type(); } -} // namespace at::native +}} // namespace at::native diff --git a/aten/src/ATen/native/cpu/moments_utils.h b/aten/src/ATen/native/cpu/moments_utils.h index 6f403d60ea7c09..f5337f5ff4ebe4 100644 --- a/aten/src/ATen/native/cpu/moments_utils.h +++ b/aten/src/ATen/native/cpu/moments_utils.h @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include #include @@ -11,7 +13,8 @@ #include #include -namespace at::native { +namespace at { +namespace native { inline namespace CPU_CAPABILITY { template using opmath_t = at::opmath_type; @@ -53,7 +56,7 @@ C10_ALWAYS_INLINE void AddMomentsVec( } template -inline std::enable_if_t>, void> +inline typename std::enable_if>::value, void>::type UpdateMomentsVec( int64_t m0, const T* X_ptr, @@ -76,7 +79,7 @@ UpdateMomentsVec( // each bfloat16/half vector will be converted to two float vectors, // and accumulated successively on m1_stk0/m2_stk0. template -inline std::enable_if_t>, void> +inline typename std::enable_if>::value, void>::type UpdateMomentsVec( int64_t m0, const T* X_ptr, @@ -199,4 +202,5 @@ std::pair, opmath_t> RowwiseMoments(const T* X, int64_t N, int64_ } } // namespace CPU_CAPABILITY -} // namespace at::native +} // namespace native +} // namespace at diff --git a/aten/src/ATen/native/cpu/utils.h b/aten/src/ATen/native/cpu/utils.h index 9fa62a3a5aaeaa..641ac0cd061254 100644 --- a/aten/src/ATen/native/cpu/utils.h +++ b/aten/src/ATen/native/cpu/utils.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include @@ -9,7 +8,8 @@ #include #endif -namespace at::native { +namespace at { +namespace native { template inline void _store(T* dst, at::vec::Vectorized src) { @@ -194,4 +194,5 @@ inline void parallel_sparse_csr( } // namespace utils -} // namespace at::native +} // namespace native +} // namespace at diff --git a/aten/src/ATen/native/cpu/zmath.h b/aten/src/ATen/native/cpu/zmath.h index 2b4f44db085c99..9b52039e84f918 100644 --- a/aten/src/ATen/native/cpu/zmath.h +++ b/aten/src/ATen/native/cpu/zmath.h @@ -5,7 +5,7 @@ #include #include -namespace at::native { +namespace at { namespace native { inline namespace CPU_CAPABILITY { template @@ -247,4 +247,4 @@ inline TYPE min_impl (TYPE a, TYPE b) { } } // end namespace -} //end at::native +}} //end at::native