From d46e0761caa7ed7629d55f2c5c2995fbb3861d92 Mon Sep 17 00:00:00 2001
From: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>
Date: Wed, 14 Aug 2024 17:47:12 +0000
Subject: [PATCH] Revert "[11/N] Fix clang-tidy warnings in aten/src/ATen 
 (#133298)"

This reverts commit 35785984013a74469de8c1d29eaecb25aa0c141e.

Reverted https://github.com/pytorch/pytorch/pull/133298 on behalf of https://github.com/izaitsevfb due to causes build time regression in aten/src/ATen/native/cpu/ReduceOpsKernel.cpp ([comment](https://github.com/pytorch/pytorch/pull/133298#issuecomment-2289453440))
---
 aten/src/ATen/native/cpu/CatKernel.h          |  4 ++--
 .../ATen/native/cpu/ChannelShuffleKernel.h    |  4 ++--
 aten/src/ATen/native/cpu/CopyKernel.h         |  2 --
 .../ATen/native/cpu/DistributionTemplates.h   | 19 +++++++++--------
 aten/src/ATen/native/cpu/GridSamplerKernel.h  |  4 ++--
 aten/src/ATen/native/cpu/IndexKernelUtils.h   |  9 +++++---
 aten/src/ATen/native/cpu/IsContiguous.h       |  4 ++--
 aten/src/ATen/native/cpu/LogAddExp.h          |  4 ++--
 aten/src/ATen/native/cpu/PixelShuffleKernel.h |  4 ++--
 aten/src/ATen/native/cpu/Reduce.h             | 21 ++++++++++---------
 aten/src/ATen/native/cpu/ReduceUtils.h        |  8 +++----
 aten/src/ATen/native/cpu/SampledAddmmKernel.h |  4 ++--
 aten/src/ATen/native/cpu/SerialStackImpl.h    |  4 ++--
 aten/src/ATen/native/cpu/StackKernel.h        |  4 ++--
 aten/src/ATen/native/cpu/WeightNormKernel.h   |  4 ++--
 aten/src/ATen/native/cpu/mixed_data_type.h    |  4 ++--
 aten/src/ATen/native/cpu/moments_utils.h      | 12 +++++++----
 aten/src/ATen/native/cpu/utils.h              |  7 ++++---
 aten/src/ATen/native/cpu/zmath.h              |  4 ++--
 19 files changed, 67 insertions(+), 59 deletions(-)
diff --git a/aten/src/ATen/native/cpu/CatKernel.h b/aten/src/ATen/native/cpu/CatKernel.h
index 5afa1add4da3f8..aedb4aec4f5747 100644
--- a/aten/src/ATen/native/cpu/CatKernel.h
+++ b/aten/src/ATen/native/cpu/CatKernel.h
@@ -4,9 +4,9 @@
 #include <ATen/native/DispatchStub.h>
 #include <ATen/core/IListRef.h>
 
-namespace at::native {
+namespace at { namespace native {
 
 using cat_serial_fn = void(*)(const Tensor &, const MaterializedITensorListRef&, int64_t);
 DECLARE_DISPATCH(cat_serial_fn, cat_serial_stub);
 
-} // namespace at::native
+}}  // namespace at::native
diff --git a/aten/src/ATen/native/cpu/ChannelShuffleKernel.h b/aten/src/ATen/native/cpu/ChannelShuffleKernel.h
index 387c301c25f030..10e592cf59eb75 100644
--- a/aten/src/ATen/native/cpu/ChannelShuffleKernel.h
+++ b/aten/src/ATen/native/cpu/ChannelShuffleKernel.h
@@ -6,9 +6,9 @@ namespace at {
 class TensorBase;
 }
 
-namespace at::native {
+namespace at { namespace native {
 
 using channel_shuffle_fn = void(*)(TensorBase&, const TensorBase&, int64_t);
 DECLARE_DISPATCH(channel_shuffle_fn, channel_shuffle_kernel);
 
-} // at::native
+}} // at::native
diff --git a/aten/src/ATen/native/cpu/CopyKernel.h b/aten/src/ATen/native/cpu/CopyKernel.h
index 3378e16f93d23e..9d2affd6101ab9 100644
--- a/aten/src/ATen/native/cpu/CopyKernel.h
+++ b/aten/src/ATen/native/cpu/CopyKernel.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include <ATen/native/TensorIterator.h>
-
 namespace at {
 struct TensorIteratorBase;
 
diff --git a/aten/src/ATen/native/cpu/DistributionTemplates.h b/aten/src/ATen/native/cpu/DistributionTemplates.h
index 8171ae8e79ad2a..961c0a3811ec15 100644
--- a/aten/src/ATen/native/cpu/DistributionTemplates.h
+++ b/aten/src/ATen/native/cpu/DistributionTemplates.h
@@ -7,6 +7,7 @@
 #include <ATen/core/DistributionsHelper.h>
 #include <ATen/native/TensorIterator.h>
 #include <ATen/native/cpu/Loops.h>
+#include <limits>
 #include <mutex>
 
 #ifdef CPU_CAPABILITY_AVX2
@@ -14,10 +15,10 @@
 #include <c10/util/irange.h>
 #endif
 
-
-
-
-namespace at::native::templates::cpu {
+namespace at {
+namespace native {
+namespace templates {
+namespace cpu {
 namespace {
 
 // ==================================================== Random ========================================================
@@ -39,10 +40,10 @@ void random_from_to_kernel(TensorIteratorBase& iter, uint64_t range, int64_t bas
 template<typename RNG>
 void random_full_64_bits_range_kernel(TensorIteratorBase& iter, RNG generator) {
   AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::BFloat16, iter.dtype(), "random_full_64_bits_range_kernel_cpu", [&] {
-    if constexpr (std::is_same_v<scalar_t, int64_t> ||
-        std::is_same_v<scalar_t, double> ||
-        std::is_same_v<scalar_t, float> ||
-        std::is_same_v<scalar_t, at::BFloat16>) {
+    if constexpr (std::is_same<scalar_t, int64_t>::value ||
+        std::is_same<scalar_t, double>::value ||
+        std::is_same<scalar_t, float>::value ||
+        std::is_same<scalar_t, at::BFloat16>::value) {
       std::lock_guard<std::mutex> lock(generator->mutex_);
       cpu_serial_kernel(iter, [generator]() -> scalar_t {
         uniform_int_full_range_distribution<scalar_t> random;
@@ -422,4 +423,4 @@ struct BernoulliKernel {
   }
 };
 
-}}
+}}}}}
diff --git a/aten/src/ATen/native/cpu/GridSamplerKernel.h b/aten/src/ATen/native/cpu/GridSamplerKernel.h
index 3d332f88fc7cbd..b1830fcd3911ec 100644
--- a/aten/src/ATen/native/cpu/GridSamplerKernel.h
+++ b/aten/src/ATen/native/cpu/GridSamplerKernel.h
@@ -9,7 +9,7 @@ namespace at {
 class TensorBase;
 }
 
-namespace at::native {
+namespace at { namespace native {
 
 using forward_2d_fn = void (*) (
     const TensorBase &output,
@@ -31,4 +31,4 @@ using backward_2d_fn = void (*) (
 DECLARE_DISPATCH(forward_2d_fn, grid_sampler_2d_cpu_kernel);
 DECLARE_DISPATCH(backward_2d_fn, grid_sampler_2d_backward_cpu_kernel);
 
-} // namespace at::native
+}}  // namespace at::native
diff --git a/aten/src/ATen/native/cpu/IndexKernelUtils.h b/aten/src/ATen/native/cpu/IndexKernelUtils.h
index c513d128e23421..cc19ce995da4a7 100644
--- a/aten/src/ATen/native/cpu/IndexKernelUtils.h
+++ b/aten/src/ATen/native/cpu/IndexKernelUtils.h
@@ -2,9 +2,11 @@
 #include <ATen/native/TensorIterator.h>
 #include <c10/util/irange.h>
 
-namespace at::native {
+namespace at {
+namespace native {
 
-inline bool is_constant_index(int ntensor, const int64_t* strides) {
+namespace {
+static bool is_constant_index(int ntensor, const int64_t* strides) {
   AT_ASSERT(ntensor >= 3);
   for (const auto arg : c10::irange(2, ntensor)) {
     if (strides[arg] != 0) {
@@ -48,6 +50,7 @@ struct Indexer {
     return offset;
   }
 };
+} // anonymous namespace
 
 template <typename scalar_t, typename func_t>
 void cpu_index_kernel(TensorIteratorBase& iter, IntArrayRef index_size, IntArrayRef index_stride,
@@ -82,4 +85,4 @@ void cpu_index_kernel(TensorIteratorBase& iter, IntArrayRef index_size, IntArray
   }
 }
 } // at
-// native
+} // native
diff --git a/aten/src/ATen/native/cpu/IsContiguous.h b/aten/src/ATen/native/cpu/IsContiguous.h
index ddbbb6fb8f5afc..192177cc9bcfb0 100644
--- a/aten/src/ATen/native/cpu/IsContiguous.h
+++ b/aten/src/ATen/native/cpu/IsContiguous.h
@@ -1,6 +1,6 @@
 #pragma once
 
-namespace at::native { inline namespace CPU_CAPABILITY {
+namespace at { namespace native { inline namespace CPU_CAPABILITY {
 
 // n: number of function arguments (arity)
 // traits: function_traits (see FunctionTraits.h)
@@ -59,4 +59,4 @@ static inline bool is_contiguous_scalar(const int64_t* strides) {
   return IsContiguous<traits::arity, traits::arity, traits, s>::eval(strides);
 }
 
-}}
+}}}
diff --git a/aten/src/ATen/native/cpu/LogAddExp.h b/aten/src/ATen/native/cpu/LogAddExp.h
index e2b80a648df6b1..c03cbebafaffbe 100644
--- a/aten/src/ATen/native/cpu/LogAddExp.h
+++ b/aten/src/ATen/native/cpu/LogAddExp.h
@@ -3,7 +3,7 @@
 #include <c10/util/complex.h>
 #include <ATen/NumericUtils.h>
 
-namespace at::native {
+namespace at { namespace native {
 inline namespace CPU_CAPABILITY {
 
 // custom min and max to be used in logcumsumexp for complex arguments
@@ -58,4 +58,4 @@ c10::complex<scalar_t> _log_add_exp_helper(const c10::complex<scalar_t>& x, cons
 }
 
 } // end namespace
-} //end at::native
+}} //end at::native
diff --git a/aten/src/ATen/native/cpu/PixelShuffleKernel.h b/aten/src/ATen/native/cpu/PixelShuffleKernel.h
index d5eee58c1ab151..c015e674a24c59 100644
--- a/aten/src/ATen/native/cpu/PixelShuffleKernel.h
+++ b/aten/src/ATen/native/cpu/PixelShuffleKernel.h
@@ -5,10 +5,10 @@ namespace at {
 class TensorBase;
 }
 
-namespace at::native {
+namespace at { namespace native {
 
 using pixel_shuffle_fn = void(*)(TensorBase&, const TensorBase&, int64_t);
 DECLARE_DISPATCH(pixel_shuffle_fn, pixel_shuffle_kernel);
 DECLARE_DISPATCH(pixel_shuffle_fn, pixel_unshuffle_kernel);
 
-} // at::native
+}} // at::native
diff --git a/aten/src/ATen/native/cpu/Reduce.h b/aten/src/ATen/native/cpu/Reduce.h
index 62d1ee1c3ec45b..37bd32d1c4c13a 100644
--- a/aten/src/ATen/native/cpu/Reduce.h
+++ b/aten/src/ATen/native/cpu/Reduce.h
@@ -6,9 +6,10 @@
 #include <c10/core/Scalar.h>
 #include <c10/util/irange.h>
 
+#include <sstream>
 #include <type_traits>
 
-namespace at::native { inline namespace CPU_CAPABILITY {
+namespace at { namespace native { inline namespace CPU_CAPABILITY {
 
 using namespace vec;
 
@@ -33,9 +34,9 @@ inline bool is_outer_reduction(const int64_t* strides) {
          strides[3] == sizeof(typename traits::arg2_t);
 }
 
-template <typename func_t, typename vec_func_t, bool reduce>
+template <typename func_t, typename vec_func_t>
 inline void vectorized_reduction(char** data, int64_t n, int64_t stride,
-                                        func_t op [[maybe_unused]], vec_func_t vop) {
+                                        func_t op, vec_func_t vop, bool reduce) {
   VEC_LOOP_HEADER(func_t, data)
   const char* in1_ptr = data[1];
   Vec acc[4];
@@ -49,7 +50,7 @@ inline void vectorized_reduction(char** data, int64_t n, int64_t stride,
     acc[2] = vop(acc[2], Vec::loadu(ptr + (2 * Vec::size() * sizeof(scalar_t))));
     acc[3] = vop(acc[3], Vec::loadu(ptr + (3 * Vec::size() * sizeof(scalar_t))));
   }
-  if constexpr (reduce) {
+  if (reduce) {
     scalar_t buffer[Vec::size()];
     acc[0] = vop(vop(acc[0], acc[1]), vop(acc[2], acc[3]));
     acc[0].store(buffer);
@@ -80,10 +81,10 @@ inline void UNARY_OUTER_LOOP(char* data[2], const int64_t strides[2], int64_t n,
 template <typename func_t, typename vec_func_t>
 inline void vectorized_inner_reduction(char** data, int64_t n, func_t op, vec_func_t vop) {
   VEC_LOOP_HEADER(func_t, data)
-  constexpr int64_t vector_stride = 4 * Vec::size() * sizeof(scalar_t);
+  int64_t vector_stride = 4 * Vec::size() * sizeof(scalar_t);
   int64_t count = n / (4 * Vec::size());
   if (count > 0) {
-    vectorized_reduction<func_t, vec_func_t, true>(data, count, vector_stride, op, vop);
+    vectorized_reduction(data, count, vector_stride, op, vop, /*reduce=*/true);
   }
   char* ptrs[3] = { data[0], data[0], data[1] };
   int64_t strides[] = { 0, 0, sizeof(scalar_t) };
@@ -102,7 +103,7 @@ inline void vectorized_outer_reduction(char** data, int64_t inner_stride, int64_
   int64_t outer_stride[2] = { 128, 128 };
 #endif
   UNARY_OUTER_LOOP(data, outer_stride, size1 / (4 * Vec::size()), [&] {
-    vectorized_reduction<func_t, vec_func_t, false>(data, size0, inner_stride, op, vop);
+    vectorized_reduction(data, size0, inner_stride, op, vop, /*reduce=*/false);
   });
 
   // reduce down the remaining columns
@@ -131,13 +132,13 @@ static void set_results(const res_t result, const TensorIteratorBase &iter, cons
 }
 
 template<typename traits, std::size_t i = 0, typename... tuple_t>
-inline std::enable_if_t<i == sizeof...(tuple_t), std::size_t>
+inline typename std::enable_if<i == sizeof...(tuple_t), std::size_t>::type
 for_each_in_tuple(const std::tuple<tuple_t...>& /*t*/, const TensorIteratorBase& /*iter*/, const int /*num_outputs*/) {
   return i;
 }
 
 template<typename traits, std::size_t i = 0, typename... tuple_t>
-inline std::enable_if_t<i < sizeof...(tuple_t), std::size_t>
+inline typename std::enable_if<i < sizeof...(tuple_t), std::size_t>::type
 for_each_in_tuple(const std::tuple<tuple_t...>& t, const TensorIteratorBase &iter, const int num_outputs) {
   if (i < (size_t)num_outputs) {
     set_result<traits>(i, std::get<i>(t), iter, num_outputs);
@@ -310,4 +311,4 @@ void binary_kernel_reduce_lastdim(TensorIteratorBase& iter, reduce_func_t reduce
   sub_iter.for_each(loop, grain_size);
 }
 
-}} // namespace at::native::<anonymous>
+}}}  // namespace at::native::<anonymous>
diff --git a/aten/src/ATen/native/cpu/ReduceUtils.h b/aten/src/ATen/native/cpu/ReduceUtils.h
index fd7c4a2750a6c9..8c6424f8b0eac8 100644
--- a/aten/src/ATen/native/cpu/ReduceUtils.h
+++ b/aten/src/ATen/native/cpu/ReduceUtils.h
@@ -106,7 +106,7 @@ inline void _init(scalar_t* self_ptr, at::opmath_type<scalar_t>* buffer_ptr, int
 }
 
 template <typename scalar_t>
-inline std::enable_if_t<!std::is_same_v<scalar_t, Vec2>, scalar_t>
+inline typename std::enable_if<!std::is_same<scalar_t, Vec2>::value, scalar_t>::type
 _max(const scalar_t& x, const scalar_t& y) {
   return at::_isnan(y) ? y : std::max(x, y);
 }
@@ -118,14 +118,14 @@ inline Vectorized<scalar_t> _max(const Vectorized<scalar_t>& x, const Vectorized
 }
 
 template <typename vec_t>
-inline std::enable_if_t<std::is_same_v<vec_t, Vec2>, Vec2>
+inline typename std::enable_if<std::is_same<vec_t, Vec2>::value, Vec2>::type
 _max(const vec_t& x, const vec_t& y) {
   // vec::maximum propagates NaN
   return maximum(x, y);
 }
 
 template <typename scalar_t>
-inline std::enable_if_t<!std::is_same_v<scalar_t, Vec2>, scalar_t>
+inline typename std::enable_if<!std::is_same<scalar_t, Vec2>::value, scalar_t>::type
 _min(const scalar_t& x, const scalar_t& y) {
   return at::_isnan(y) ? y : std::min(x, y);
 }
@@ -137,7 +137,7 @@ inline Vectorized<scalar_t> _min(const Vectorized<scalar_t>& x, const Vectorized
 }
 
 template <typename vec_t>
-inline std::enable_if_t<std::is_same_v<vec_t, Vec2>, Vec2>
+inline typename std::enable_if<std::is_same<vec_t, Vec2>::value, Vec2>::type
 _min(const vec_t& x, const vec_t& y) {
   // vec::minimum propagates NaN
   return minimum(x, y);
diff --git a/aten/src/ATen/native/cpu/SampledAddmmKernel.h b/aten/src/ATen/native/cpu/SampledAddmmKernel.h
index e1d75b17698c2e..04dba4b9b61ced 100644
--- a/aten/src/ATen/native/cpu/SampledAddmmKernel.h
+++ b/aten/src/ATen/native/cpu/SampledAddmmKernel.h
@@ -3,10 +3,10 @@
 #include <ATen/core/Tensor.h>
 #include <ATen/native/DispatchStub.h>
 
-namespace at::native {
+namespace at { namespace native {
 
 using sampled_addmm_sparse_csr_fn = void(*)(const Tensor&, const Tensor&, const Scalar&, const Scalar&, const Tensor&);
 
 DECLARE_DISPATCH(sampled_addmm_sparse_csr_fn, sampled_addmm_sparse_csr_stub);
 
-} // at::native
+}} // at::native
diff --git a/aten/src/ATen/native/cpu/SerialStackImpl.h b/aten/src/ATen/native/cpu/SerialStackImpl.h
index 88ba1c91b6c8cb..57d0dd73daf4b8 100644
--- a/aten/src/ATen/native/cpu/SerialStackImpl.h
+++ b/aten/src/ATen/native/cpu/SerialStackImpl.h
@@ -10,7 +10,7 @@
 #include <ATen/cpu/vec/vec.h>
 #include <c10/util/irange.h>
 
-namespace at::native::detail {
+namespace at { namespace native { namespace detail {
 
 struct InputMeta {
   void* data_ptr;
@@ -143,4 +143,4 @@ struct CanUseNativeSerialStack<TensorListType, true> {
   }
 };
 
-} // namespace at::native::detail
+}}}  // namespace at::native::detail
diff --git a/aten/src/ATen/native/cpu/StackKernel.h b/aten/src/ATen/native/cpu/StackKernel.h
index 6c96d83b9eaa03..4e9a45e4dd12ba 100644
--- a/aten/src/ATen/native/cpu/StackKernel.h
+++ b/aten/src/ATen/native/cpu/StackKernel.h
@@ -4,9 +4,9 @@
 #include <ATen/core/Tensor.h>
 #include <ATen/native/DispatchStub.h>
 
-namespace at::native {
+namespace at { namespace native {
 
 using stack_serial_fn = void(*)(Tensor &, TensorList, int64_t);
 DECLARE_DISPATCH(stack_serial_fn, stack_serial_stub);
 
-} // namespace at::native
+}}  // namespace at::native
diff --git a/aten/src/ATen/native/cpu/WeightNormKernel.h b/aten/src/ATen/native/cpu/WeightNormKernel.h
index 1fd8c75cc73b30..6e1f3ec3b02917 100644
--- a/aten/src/ATen/native/cpu/WeightNormKernel.h
+++ b/aten/src/ATen/native/cpu/WeightNormKernel.h
@@ -6,7 +6,7 @@ namespace at {
 class TensorBase;
 }
 
-namespace at::native {
+namespace at { namespace native {
 
 using weight_norm_fn = void(*)(
     TensorBase&, TensorBase&, const TensorBase&, const TensorBase&, int64_t);
@@ -17,4 +17,4 @@ using weight_norm_backward_fn = void(*)(
 DECLARE_DISPATCH(weight_norm_fn, weight_norm_stub);
 DECLARE_DISPATCH(weight_norm_backward_fn, weight_norm_backward_stub);
 
-} // namespace at::native
+}}  // namespace at::native
diff --git a/aten/src/ATen/native/cpu/mixed_data_type.h b/aten/src/ATen/native/cpu/mixed_data_type.h
index 13244af3b34a0f..ef598b281a905d 100644
--- a/aten/src/ATen/native/cpu/mixed_data_type.h
+++ b/aten/src/ATen/native/cpu/mixed_data_type.h
@@ -2,7 +2,7 @@
 
 #include <ATen/core/Tensor.h>
 
-namespace at::native {
+namespace at { namespace native {
 
 inline ScalarType first_type() {
   return ScalarType::Undefined;
@@ -38,4 +38,4 @@ inline ScalarType param_scalar_type(const Tensor& t, bool is_mixed_type) {
   return is_mixed_type ? ScalarType::Float : t.scalar_type();
 }
 
-} // namespace at::native
+}}  // namespace at::native
diff --git a/aten/src/ATen/native/cpu/moments_utils.h b/aten/src/ATen/native/cpu/moments_utils.h
index 6f403d60ea7c09..f5337f5ff4ebe4 100644
--- a/aten/src/ATen/native/cpu/moments_utils.h
+++ b/aten/src/ATen/native/cpu/moments_utils.h
@@ -2,7 +2,9 @@
 
 #include <array>
 #include <cstring>
+#include <numeric>
 #include <utility>
+#include <vector>
 
 #include <ATen/Parallel.h>
 #include <ATen/OpMathType.h>
@@ -11,7 +13,8 @@
 #include <c10/util/SmallVector.h>
 #include <c10/util/irange.h>
 
-namespace at::native {
+namespace at {
+namespace native {
 inline namespace CPU_CAPABILITY {
 
 template<typename T> using opmath_t = at::opmath_type<T>;
@@ -53,7 +56,7 @@ C10_ALWAYS_INLINE void AddMomentsVec(
 }
 
 template <typename T>
-inline std::enable_if_t<std::is_same_v<T, opmath_t<T>>, void>
+inline typename std::enable_if<std::is_same<T, opmath_t<T>>::value, void>::type
 UpdateMomentsVec(
     int64_t m0,
     const T* X_ptr,
@@ -76,7 +79,7 @@ UpdateMomentsVec(
 // each bfloat16/half vector will be converted to two float vectors,
 // and accumulated successively on m1_stk0/m2_stk0.
 template <typename T>
-inline std::enable_if_t<!std::is_same_v<T, at::opmath_type<T>>, void>
+inline typename std::enable_if<!std::is_same<T, at::opmath_type<T>>::value, void>::type
 UpdateMomentsVec(
     int64_t m0,
     const T* X_ptr,
@@ -199,4 +202,5 @@ std::pair<opmath_t<T>, opmath_t<T>> RowwiseMoments(const T* X, int64_t N, int64_
 }
 
 } // namespace CPU_CAPABILITY
-} // namespace at::native
+} // namespace native
+} // namespace at
diff --git a/aten/src/ATen/native/cpu/utils.h b/aten/src/ATen/native/cpu/utils.h
index 9fa62a3a5aaeaa..641ac0cd061254 100644
--- a/aten/src/ATen/native/cpu/utils.h
+++ b/aten/src/ATen/native/cpu/utils.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <ATen/Parallel.h>
-#include <ATen/core/TensorAccessor.h>
 #include <ATen/cpu/vec/vec.h>
 #include <c10/util/llvmMathExtras.h>
 
@@ -9,7 +8,8 @@
 #include <fbgemm/Fbgemm.h>
 #endif
 
-namespace at::native {
+namespace at {
+namespace native {
 
 template <typename T>
 inline void _store(T* dst, at::vec::Vectorized<T> src) {
@@ -194,4 +194,5 @@ inline void parallel_sparse_csr(
 
 } // namespace utils
 
-} // namespace at::native
+} // namespace native
+} // namespace at
diff --git a/aten/src/ATen/native/cpu/zmath.h b/aten/src/ATen/native/cpu/zmath.h
index 2b4f44db085c99..9b52039e84f918 100644
--- a/aten/src/ATen/native/cpu/zmath.h
+++ b/aten/src/ATen/native/cpu/zmath.h
@@ -5,7 +5,7 @@
 #include <c10/util/MathConstants.h>
 #include<ATen/NumericUtils.h>
 
-namespace at::native {
+namespace at { namespace native {
 inline namespace CPU_CAPABILITY {
 
 template <typename SCALAR_TYPE, typename VALUE_TYPE=SCALAR_TYPE>
@@ -247,4 +247,4 @@ inline TYPE min_impl (TYPE a, TYPE b) {
 }
 
 } // end namespace
-} //end at::native
+}} //end at::native