forked from NVIDIA/cccl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8244738
commit 71c6652
Showing
5 changed files
with
337 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,69 @@ | ||
foreach(cn_target IN LISTS cudax_TARGETS) | ||
cudax_get_target_property(config_prefix ${cn_target} PREFIX) | ||
find_package(cudax) # already found, bring in version info. | ||
find_package(Thrust ${cudax_VERSION} EXACT CONFIG | ||
NO_DEFAULT_PATH # Only check the explicit path in HINTS: | ||
HINTS "${CCCL_SOURCE_DIR}/lib/cmake/thrust/" | ||
) | ||
thrust_create_target(cudax.examples.thrust) | ||
|
||
function(cudax_add_example target_name_var example_src cudax_target) | ||
cudax_get_target_property(config_prefix ${cudax_target} PREFIX) | ||
cudax_get_target_property(config_dialect ${cudax_target} DIALECT) | ||
|
||
get_filename_component(example_name ${example_src} NAME_WE) | ||
|
||
# The actual name of the test's target: | ||
set(example_target ${config_prefix}.example.${example_name}) | ||
set(${target_name_var} ${example_target} PARENT_SCOPE) | ||
|
||
# Related target names: | ||
set(config_meta_target ${config_prefix}.examples) | ||
set(example_meta_target cudax.all.example.${example_name}) | ||
|
||
add_executable(${example_target} "${example_src}") | ||
cccl_configure_target(${example_target} DIALECT ${config_dialect}) | ||
target_link_libraries(${example_target} PRIVATE | ||
${cudax_target} | ||
cudax.examples.thrust | ||
) | ||
cudax_clone_target_properties(${example_target} ${cudax_target}) | ||
target_include_directories(${example_target} PRIVATE "${CUB_SOURCE_DIR}/examples") | ||
|
||
# Add to the active configuration's meta target | ||
add_dependencies(${config_meta_target} ${example_target}) | ||
|
||
# Meta target that builds examples with this name for all configurations: | ||
if (NOT TARGET ${example_meta_target}) | ||
add_custom_target(${example_meta_target}) | ||
endif() | ||
add_dependencies(${example_meta_target} ${example_target}) | ||
|
||
add_test(NAME ${example_target} | ||
COMMAND "$<TARGET_FILE:${example_target}>" | ||
) | ||
endfunction() | ||
|
||
file(GLOB example_srcs | ||
RELATIVE "${cudax_SOURCE_DIR}/examples" | ||
CONFIGURE_DEPENDS | ||
*.cu | ||
) | ||
|
||
foreach(cudax_target IN LISTS cudax_TARGETS) | ||
cudax_get_target_property(config_prefix ${cudax_target} PREFIX) | ||
|
||
# Metatarget for the current configuration's tests: | ||
set(config_meta_target ${config_prefix}.examples) | ||
add_custom_target(${config_meta_target}) | ||
add_dependencies(${config_prefix}.all ${config_meta_target}) | ||
|
||
foreach (example_src IN LISTS example_srcs) | ||
cudax_add_example(example_target "${example_src}" ${cudax_target}) | ||
endforeach() | ||
endforeach() | ||
|
||
# FIXME: Enable MSVC | ||
if (NOT "MSVC" STREQUAL "${CMAKE_CXX_COMPILER_ID}" AND | ||
NOT "NVHPC" STREQUAL "${CMAKE_CXX_COMPILER_ID}") | ||
# STF tests are handled separately: | ||
# STF examples are handled separately: | ||
add_subdirectory(stf) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of CUDA Experimental in CUDA C++ Core Libraries, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _CUDAX__CONTAINER_VECTOR | ||
#define _CUDAX__CONTAINER_VECTOR | ||
|
||
#include <cuda/__cccl_config> | ||
|
||
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
# pragma GCC system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
# pragma clang system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
# pragma system_header | ||
#endif // no system header | ||
|
||
#include <thrust/device_vector.h> | ||
#include <thrust/host_vector.h> | ||
|
||
#include <cuda/std/__type_traits/maybe_const.h> | ||
#include <cuda/std/span> | ||
#include <cuda/stream_ref> | ||
|
||
#include <cuda/experimental/__detail/utility.cuh> | ||
#include <cuda/experimental/__launch/param_kind.cuh> | ||
|
||
#if _CCCL_STD_VER >= 2017 | ||
namespace cuda::experimental | ||
{ | ||
using ::cuda::std::span; | ||
using ::thrust::device_vector; | ||
using ::thrust::host_vector; | ||
|
||
template <typename _Ty> | ||
class vector | ||
{ | ||
public: | ||
vector() = default; | ||
explicit vector(size_t __n) | ||
: __h_(__n) | ||
{} | ||
|
||
_Ty& operator[](size_t __i) noexcept | ||
{ | ||
__dirty_ = true; | ||
return __h_[__i]; | ||
} | ||
|
||
const _Ty& operator[](size_t __i) const noexcept | ||
{ | ||
return __h_[__i]; | ||
} | ||
|
||
private: | ||
void sync_host_to_device(::cuda::stream_ref __str, detail::__param_kind __p) const | ||
{ | ||
(void) __str; | ||
if (__dirty_) | ||
{ | ||
if (__p == detail::__param_kind::_out) | ||
{ | ||
// There's no need to copy the data from host to device if the data is | ||
// only going to be written to. We can just allocate the device memory. | ||
__d_.resize(__h_.size()); | ||
} | ||
else | ||
{ | ||
// TODO: use a memcpy async here | ||
__d_ = __h_; | ||
} | ||
__dirty_ = false; | ||
} | ||
} | ||
|
||
void sync_device_to_host(::cuda::stream_ref __str, detail::__param_kind __p) const | ||
{ | ||
if (__p != detail::__param_kind::_in) | ||
{ | ||
// TODO: use a memcpy async here | ||
__str.wait(); // wait for the kernel to finish executing | ||
__h_ = __d_; | ||
} | ||
} | ||
|
||
template <detail::__param_kind _Kind> | ||
class __action //: private detail::__immovable | ||
{ | ||
using __cv_vector = ::cuda::std::__maybe_const<_Kind == detail::__param_kind::_in, vector>; | ||
|
||
public: | ||
explicit __action(::cuda::stream_ref __str, __cv_vector& __v) noexcept | ||
: __str_(__str) | ||
, __v_(__v) | ||
{ | ||
__v_.sync_host_to_device(__str_, _Kind); | ||
} | ||
|
||
__action(__action&&) = delete; | ||
|
||
~__action() | ||
{ | ||
__v_.sync_device_to_host(__str_, _Kind); | ||
} | ||
|
||
using __as_kernel_arg = ::cuda::std::span<_Ty>; | ||
|
||
operator ::cuda::std::span<_Ty>() | ||
{ | ||
return {__v_.__d_.data().get(), __v_.__d_.size()}; | ||
} | ||
|
||
private: | ||
::cuda::stream_ref __str_; | ||
__cv_vector& __v_; | ||
}; | ||
|
||
_CCCL_NODISCARD_FRIEND __action<detail::__param_kind::_inout> | ||
__cudax_launch_transform(::cuda::stream_ref __str, vector& __v) noexcept | ||
{ | ||
return __action<detail::__param_kind::_inout>{__str, __v}; | ||
} | ||
|
||
_CCCL_NODISCARD_FRIEND __action<detail::__param_kind::_in> | ||
__cudax_launch_transform(::cuda::stream_ref __str, const vector& __v) noexcept | ||
{ | ||
return __action<detail::__param_kind::_in>{__str, __v}; | ||
} | ||
|
||
template <detail::__param_kind _Kind> | ||
_CCCL_NODISCARD_FRIEND __action<_Kind> | ||
__cudax_launch_transform(::cuda::stream_ref __str, detail::__box<vector, _Kind> __b) noexcept | ||
{ | ||
return __action<_Kind>{__str, __b.__val}; | ||
} | ||
|
||
mutable host_vector<_Ty> __h_; | ||
mutable device_vector<_Ty> __d_{}; | ||
mutable bool __dirty_ = true; | ||
}; | ||
|
||
} // namespace cuda::experimental | ||
|
||
#endif | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions | ||
* are met: | ||
* * Redistributions of source code must retain the above copyright | ||
* notice, this list of conditions and the following disclaimer. | ||
* * Redistributions in binary form must reproduce the above copyright | ||
* notice, this list of conditions and the following disclaimer in the | ||
* documentation and/or other materials provided with the distribution. | ||
* * Neither the name of NVIDIA CORPORATION nor the names of its | ||
* contributors may be used to endorse or promote products derived | ||
* from this software without specific prior written permission. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
*/ | ||
|
||
/** | ||
* Vector addition: C = A + B. | ||
* | ||
* This sample is a very basic sample that implements element by element | ||
* vector addition. It is the same as the sample illustrating Chapter 2 | ||
* of the programming guide with some additions like error checking. | ||
*/ | ||
|
||
#include <stdio.h> | ||
|
||
// For the CUDA runtime routines (prefixed with "cuda_") | ||
#include <cuda_runtime.h> | ||
|
||
#include <cuda/std/span> | ||
|
||
#include <cuda/experimental/launch.cuh> | ||
#include <cuda/experimental/stream.cuh> | ||
|
||
#include "vector.cuh" | ||
|
||
namespace cudax = cuda::experimental; | ||
using cudax::in; | ||
using cudax::out; | ||
|
||
/** | ||
* CUDA Kernel Device code | ||
* | ||
* Computes the vector addition of A and B into C. The 3 vectors have the same | ||
* number of elements numElements. | ||
*/ | ||
__global__ void vectorAdd(cudax::span<const float> A, cudax::span<const float> B, cudax::span<float> C) | ||
{ | ||
int i = blockDim.x * blockIdx.x + threadIdx.x; | ||
|
||
if (i < A.size()) | ||
{ | ||
C[i] = A[i] + B[i] + 0.0f; | ||
} | ||
} | ||
|
||
/** | ||
* Host main routine | ||
*/ | ||
int main(void) | ||
try | ||
{ | ||
// A CUDA stream on which to execute the vector addition kernel | ||
cudax::stream stream(cudax::devices[0]); | ||
|
||
// Print the vector length to be used, and compute its size | ||
int numElements = 50000; | ||
printf("[Vector addition of %d elements]\n", numElements); | ||
|
||
// Allocate the host vectors | ||
cudax::vector<float> A(numElements); // input | ||
cudax::vector<float> B(numElements); // input | ||
cudax::vector<float> C(numElements); // output | ||
|
||
// Initialize the host input vectors | ||
for (int i = 0; i < numElements; ++i) | ||
{ | ||
A[i] = rand() / (float) RAND_MAX; | ||
B[i] = rand() / (float) RAND_MAX; | ||
} | ||
|
||
// Define the kernel launch parameters | ||
constexpr int threadsPerBlock = 256; | ||
auto dims = cudax::distribute<threadsPerBlock>(numElements); | ||
|
||
// Launch the vectorAdd kernel | ||
printf("CUDA kernel launch with %d blocks of %d threads\n", dims.count(cudax::block, cudax::grid), threadsPerBlock); | ||
cudax::launch(stream, dims, vectorAdd, in(A), in(B), out(C)); | ||
|
||
printf("waiting for the stream to finish\n"); | ||
stream.wait(); | ||
|
||
printf("veryfying the results\n"); | ||
// Verify that the result vector is correct | ||
for (int i = 0; i < numElements; ++i) | ||
{ | ||
if (fabs(A[i] + B[i] - C[i]) > 1e-5) | ||
{ | ||
fprintf(stderr, "Result verification failed at element %d!\n", i); | ||
exit(EXIT_FAILURE); | ||
} | ||
} | ||
|
||
printf("Test PASSED\n"); | ||
|
||
printf("Done\n"); | ||
return 0; | ||
} | ||
catch (const std::exception& e) | ||
{ | ||
printf("caught an exception: \"%s\"\n", e.what()); | ||
} | ||
catch (...) | ||
{ | ||
printf("caught an unknown exception\n"); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters