From fe6c1ea89c0c82f7fc547bedb3c0086631fbeab3 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Fri, 27 Aug 2021 13:06:30 +0200 Subject: [PATCH 01/63] * It is no longer possible to add null pointer arguments --- Source/Tuner.inl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/Tuner.inl b/Source/Tuner.inl index a0123860..4e1ac143 100644 --- a/Source/Tuner.inl +++ b/Source/Tuner.inl @@ -64,7 +64,8 @@ ArgumentId Tuner::AddArgumentLocal(const size_t localMemorySize) template ArgumentDataType Tuner::DeriveArgumentDataType() const { - static_assert(std::is_trivially_copyable_v && !std::is_reference_v && !std::is_pointer_v, "Unsupported argument data type"); + static_assert(std::is_trivially_copyable_v && !std::is_reference_v && !std::is_pointer_v && !std::is_null_pointer_v, + "Unsupported argument data type"); static_assert(!std::is_same_v, bool>, "Bool argument data type is not supported"); if constexpr (std::is_same_v, half>) From 0be4580977b247ae8a9b671c3869489159585dd1 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Fri, 3 Sep 2021 16:29:37 +0200 Subject: [PATCH 02/63] * Added support for constant memory variables in CUDA --- Source/ComputeEngine/Cuda/CudaEngine.cpp | 8 +++-- Source/ComputeEngine/Cuda/CudaKernel.cpp | 7 +++-- Source/ComputeEngine/Cuda/CudaKernel.h | 4 ++- Source/ComputeEngine/Cuda/CudaProgram.cpp | 26 +++++++++++++++-- Source/ComputeEngine/Cuda/CudaProgram.h | 8 ++++- Source/ComputeEngine/KernelComputeData.cpp | 10 ++++++- Source/ComputeEngine/OpenCl/OpenClKernel.cpp | 1 + .../Vulkan/VulkanComputePipeline.cpp | 2 +- Source/ComputeEngine/Vulkan/VulkanEngine.cpp | 5 ++-- .../Vulkan/VulkanPushConstant.cpp | 3 +- .../Vulkan/VulkanSpecializationConstant.cpp | 3 +- Source/Kernel/KernelDefinition.cpp | 12 +------- Source/KernelArgument/ArgumentMemoryType.h | 7 ++++- Source/KernelArgument/KernelArgument.cpp | 29 ++++++++++++++++++- Source/KernelArgument/KernelArgument.h | 8 ++++- .../KernelArgument/KernelArgumentManager.cpp | 22 +++++++++++--- Source/KernelArgument/KernelArgumentManager.h | 6 ++-- Source/Tuner.cpp | 4 +-- Source/Tuner.h | 12 +++++++- Source/Tuner.inl | 8 +++++ Source/TunerCore.cpp | 9 ++++-- Source/TunerCore.h | 3 +- 22 files changed, 156 insertions(+), 41 deletions(-) diff --git a/Source/ComputeEngine/Cuda/CudaEngine.cpp b/Source/ComputeEngine/Cuda/CudaEngine.cpp index 92b8b9b1..7542011b 100644 --- a/Source/ComputeEngine/Cuda/CudaEngine.cpp +++ b/Source/ComputeEngine/Cuda/CudaEngine.cpp @@ -669,8 +669,9 @@ std::shared_ptr CudaEngine::LoadKernel(const KernelComputeData& data return m_KernelCache.Get(id)->second; } + const auto symbolArguments = KernelArgument::GetArgumentsWithMemoryType(data.GetArguments(), ArgumentMemoryType::Symbol); auto kernel = std::make_shared(m_ComputeIdGenerator, m_Configuration, data.GetName(), data.GetSource(), - data.GetTemplatedName()); + data.GetTemplatedName(), symbolArguments); if (m_KernelCache.GetMaxSize() > 0) { @@ -686,7 +687,7 @@ std::vector CudaEngine::GetKernelArguments(const std::vectorGetMemoryType() == ArgumentMemoryType::Local) + if (argument->GetMemoryType() == ArgumentMemoryType::Local || argument->GetMemoryType() == ArgumentMemoryType::Symbol) { continue; } @@ -716,7 +717,8 @@ CUdeviceptr* CudaEngine::GetKernelArgument(KernelArgument& argument) return m_Buffers[id]->GetBuffer(); } case ArgumentMemoryType::Local: - KttError("Local memory arguments do not have CUdeviceptr representation"); + case ArgumentMemoryType::Symbol: + KttError("Local memory and symbol arguments cannot be retrieved as kernel arguments"); return nullptr; default: KttError("Unhandled argument memory type value"); diff --git a/Source/ComputeEngine/Cuda/CudaKernel.cpp b/Source/ComputeEngine/Cuda/CudaKernel.cpp index 881fddd4..89e4e05c 100644 --- a/Source/ComputeEngine/Cuda/CudaKernel.cpp +++ b/Source/ComputeEngine/Cuda/CudaKernel.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -13,7 +14,7 @@ namespace ktt { CudaKernel::CudaKernel(IdGenerator& generator, const EngineConfiguration& configuration, const std::string& name, - const std::string& source, const std::string& templatedName) : + const std::string& source, const std::string& templatedName, const std::vector& symbolArguments) : m_Name(name), m_Generator(generator), m_Configuration(configuration) @@ -21,7 +22,7 @@ CudaKernel::CudaKernel(IdGenerator& generator, const EngineConf Logger::LogDebug("Initializing CUDA kernel with name " + name); const auto& programName = templatedName.empty() ? name : templatedName; - m_Program = std::make_unique(programName, source); + m_Program = std::make_unique(programName, source, symbolArguments); m_Program->Build(m_Configuration.GetCompilerOptions()); const std::string ptx = m_Program->GetPtxSource(); @@ -29,6 +30,8 @@ CudaKernel::CudaKernel(IdGenerator& generator, const EngineConf const std::string loweredName = m_Program->GetLoweredName(); CheckError(cuModuleGetFunction(&m_Kernel, m_Module, loweredName.c_str()), "cuModuleGetFunction"); + + m_Program->InitializeSymbolData(*this); } CudaKernel::~CudaKernel() diff --git a/Source/ComputeEngine/Cuda/CudaKernel.h b/Source/ComputeEngine/Cuda/CudaKernel.h index 7ae14595..c718c4de 100644 --- a/Source/ComputeEngine/Cuda/CudaKernel.h +++ b/Source/ComputeEngine/Cuda/CudaKernel.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -18,13 +19,14 @@ namespace ktt class CudaComputeAction; class CudaStream; class EngineConfiguration; +class KernelArgument; struct KernelCompilationData; class CudaKernel : public std::enable_shared_from_this { public: explicit CudaKernel(IdGenerator& generator, const EngineConfiguration& configuration, const std::string& name, - const std::string& source, const std::string& templatedName = ""); + const std::string& source, const std::string& templatedName = "", const std::vector& symbolArguments = {}); ~CudaKernel(); std::unique_ptr Launch(const CudaStream& stream, const DimensionVector& globalSize, diff --git a/Source/ComputeEngine/Cuda/CudaProgram.cpp b/Source/ComputeEngine/Cuda/CudaProgram.cpp index 4f601f51..674185f9 100644 --- a/Source/ComputeEngine/Cuda/CudaProgram.cpp +++ b/Source/ComputeEngine/Cuda/CudaProgram.cpp @@ -6,16 +6,19 @@ #include #include +#include #include #include +#include #include namespace ktt { -CudaProgram::CudaProgram(const std::string& name, const std::string& source) : +CudaProgram::CudaProgram(const std::string& name, const std::string& source, const std::vector& symbolArguments) : m_Name(name), - m_Source(source) + m_Source(source), + m_SymbolArguments(symbolArguments) { CheckError(nvrtcCreateProgram(&m_Program, source.data(), nullptr, 0, nullptr, nullptr), "nvrtcCreateProgram"); } @@ -28,6 +31,12 @@ CudaProgram::~CudaProgram() void CudaProgram::Build(const std::string& compilerOptions) const { CheckError(nvrtcAddNameExpression(m_Program, m_Name.c_str()), "nvrtcAddNameExpression"); + + for (const auto* argument : m_SymbolArguments) + { + CheckError(nvrtcAddNameExpression(m_Program, argument->GetSymbolName().c_str()), "nvrtcAddNameExpression"); + } + std::vector individualOptions; std::vector individualOptionsChar; @@ -51,6 +60,19 @@ void CudaProgram::Build(const std::string& compilerOptions) const CheckError(result, "nvrtcCompileProgram", buildInfo, ExceptionReason::CompilerError); } +void CudaProgram::InitializeSymbolData(const CudaKernel& kernel) const +{ + for (const auto* argument : m_SymbolArguments) + { + const char* symbolName; + CheckError(nvrtcGetLoweredName(m_Program, argument->GetSymbolName().c_str(), &symbolName), "nvrtcGetLoweredName"); + + CUdeviceptr symbolAddress; + CheckError(cuModuleGetGlobal(&symbolAddress, nullptr, kernel.GetModule(), symbolName), "cuModuleGetGlobal"); + CheckError(cuMemcpyHtoD(symbolAddress, argument->GetData(), argument->GetDataSize()), "cuMemcpyHtoD"); + } +} + const std::string& CudaProgram::GetSource() const { return m_Source; diff --git a/Source/ComputeEngine/Cuda/CudaProgram.h b/Source/ComputeEngine/Cuda/CudaProgram.h index 1c719406..b4ec8610 100644 --- a/Source/ComputeEngine/Cuda/CudaProgram.h +++ b/Source/ComputeEngine/Cuda/CudaProgram.h @@ -3,18 +3,23 @@ #ifdef KTT_API_CUDA #include +#include #include namespace ktt { +class CudaKernel; +class KernelArgument; + class CudaProgram { public: - explicit CudaProgram(const std::string& name, const std::string& source); + explicit CudaProgram(const std::string& name, const std::string& source, const std::vector& symbolArguments = {}); ~CudaProgram(); void Build(const std::string& compilerOptions) const; + void InitializeSymbolData(const CudaKernel& kernel) const; const std::string& GetSource() const; std::string GetLoweredName() const; @@ -24,6 +29,7 @@ class CudaProgram private: std::string m_Name; std::string m_Source; + std::vector m_SymbolArguments; nvrtcProgram m_Program; std::string GetBuildInfo() const; diff --git a/Source/ComputeEngine/KernelComputeData.cpp b/Source/ComputeEngine/KernelComputeData.cpp index 55288dd4..3ca56f04 100644 --- a/Source/ComputeEngine/KernelComputeData.cpp +++ b/Source/ComputeEngine/KernelComputeData.cpp @@ -98,7 +98,15 @@ const std::string& KernelComputeData::GetTemplatedName() const KernelComputeId KernelComputeData::GetUniqueIdentifier() const { - return m_Name + m_TemplatedName + m_ConfigurationPrefix; + KernelComputeId id = m_Name + m_TemplatedName + m_ConfigurationPrefix; + const auto symbolArguments = KernelArgument::GetArgumentsWithMemoryType(m_Arguments, ArgumentMemoryType::Symbol); + + for (const auto* argument : symbolArguments) + { + id += argument->GetSymbolName(); + } + + return id; } const DimensionVector& KernelComputeData::GetGlobalSize() const diff --git a/Source/ComputeEngine/OpenCl/OpenClKernel.cpp b/Source/ComputeEngine/OpenCl/OpenClKernel.cpp index 6264ef39..3f7c35ae 100644 --- a/Source/ComputeEngine/OpenCl/OpenClKernel.cpp +++ b/Source/ComputeEngine/OpenCl/OpenClKernel.cpp @@ -62,6 +62,7 @@ void OpenClKernel::SetArgument(const KernelArgument& argument) switch (argument.GetMemoryType()) { case ArgumentMemoryType::Scalar: + case ArgumentMemoryType::Symbol: SetKernelArgumentScalar(argument.GetData(), argument.GetElementSize()); break; case ArgumentMemoryType::Vector: diff --git a/Source/ComputeEngine/Vulkan/VulkanComputePipeline.cpp b/Source/ComputeEngine/Vulkan/VulkanComputePipeline.cpp index 0a8acbc1..2c7f9ab8 100644 --- a/Source/ComputeEngine/Vulkan/VulkanComputePipeline.cpp +++ b/Source/ComputeEngine/Vulkan/VulkanComputePipeline.cpp @@ -29,7 +29,7 @@ VulkanComputePipeline::VulkanComputePipeline(const VulkanDevice& device, IdGener for (auto* argument : arguments) { - if (argument->GetMemoryType() == ArgumentMemoryType::Scalar) + if (argument->GetMemoryType() == ArgumentMemoryType::Scalar || argument->GetMemoryType() == ArgumentMemoryType::Symbol) { scalarArguments.push_back(argument); } diff --git a/Source/ComputeEngine/Vulkan/VulkanEngine.cpp b/Source/ComputeEngine/Vulkan/VulkanEngine.cpp index d471aefc..add431e2 100644 --- a/Source/ComputeEngine/Vulkan/VulkanEngine.cpp +++ b/Source/ComputeEngine/Vulkan/VulkanEngine.cpp @@ -440,7 +440,8 @@ VulkanBuffer* VulkanEngine::GetPipelineArgument(KernelArgument& argument) { case ArgumentMemoryType::Scalar: case ArgumentMemoryType::Local: - KttError("Scalar and local memory arguments do not have Vulkan buffer representation"); + case ArgumentMemoryType::Symbol: + KttError("Scalar, symbol and local memory arguments do not have Vulkan buffer representation"); return nullptr; case ArgumentMemoryType::Vector: { @@ -518,7 +519,7 @@ std::vector VulkanEngine::GetScalarArguments(const std::vector< for (auto* argument : arguments) { - if (argument->GetMemoryType() == ArgumentMemoryType::Scalar) + if (argument->GetMemoryType() == ArgumentMemoryType::Scalar || argument->GetMemoryType() == ArgumentMemoryType::Symbol) { result.push_back(argument); } diff --git a/Source/ComputeEngine/Vulkan/VulkanPushConstant.cpp b/Source/ComputeEngine/Vulkan/VulkanPushConstant.cpp index e92c7add..925fb6e2 100644 --- a/Source/ComputeEngine/Vulkan/VulkanPushConstant.cpp +++ b/Source/ComputeEngine/Vulkan/VulkanPushConstant.cpp @@ -14,7 +14,8 @@ VulkanPushConstant::VulkanPushConstant(const std::vector& argum for (const auto* argument : arguments) { - KttAssert(argument->GetMemoryType() == ArgumentMemoryType::Scalar, "Only scalar arguments can be used as push constants"); + KttAssert(argument->GetMemoryType() == ArgumentMemoryType::Scalar || argument->GetMemoryType() == ArgumentMemoryType::Symbol, + "Only scalar and symbol arguments can be used as push constants"); const size_t dataSize = argument->GetDataSize(); for (size_t i = 0; i < dataSize; ++i) diff --git a/Source/ComputeEngine/Vulkan/VulkanSpecializationConstant.cpp b/Source/ComputeEngine/Vulkan/VulkanSpecializationConstant.cpp index bd88c15f..3be41c64 100644 --- a/Source/ComputeEngine/Vulkan/VulkanSpecializationConstant.cpp +++ b/Source/ComputeEngine/Vulkan/VulkanSpecializationConstant.cpp @@ -11,7 +11,8 @@ VulkanSpecializationConstant::VulkanSpecializationConstant(const std::vector& KernelDefinition::GetArguments() const std::vector KernelDefinition::GetVectorArguments() const { - std::vector result; - - for (auto* argument : m_Arguments) - { - if (argument->GetMemoryType() == ArgumentMemoryType::Vector) - { - result.push_back(argument); - } - } - - return result; + return KernelArgument::GetArgumentsWithMemoryType(m_Arguments, ArgumentMemoryType::Vector); } bool KernelDefinition::HasArgument(const ArgumentId id) const diff --git a/Source/KernelArgument/ArgumentMemoryType.h b/Source/KernelArgument/ArgumentMemoryType.h index 95f96d2d..1c775580 100644 --- a/Source/KernelArgument/ArgumentMemoryType.h +++ b/Source/KernelArgument/ArgumentMemoryType.h @@ -26,7 +26,12 @@ enum class ArgumentMemoryType * enough local memory to hold number of elements specified by the argument. The memory then needs to be filled with * data on kernel side. */ - Local + Local, + + /** Argument corresponds to the CUDA symbol which resides in global or constant device memory and matches the argument's name. + * In Vulkan and OpenCL, symbol arguments are treated in the same way as scalars. + */ + Symbol }; } // namespace ktt diff --git a/Source/KernelArgument/KernelArgument.cpp b/Source/KernelArgument/KernelArgument.cpp index 056075da..e750ad42 100644 --- a/Source/KernelArgument/KernelArgument.cpp +++ b/Source/KernelArgument/KernelArgument.cpp @@ -9,7 +9,7 @@ namespace ktt KernelArgument::KernelArgument(const ArgumentId id, const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType) : + const ArgumentManagementType managementType, const std::string& symbolName) : m_Id(id), m_ElementSize(elementSize), m_DataSize(0), @@ -19,12 +19,18 @@ KernelArgument::KernelArgument(const ArgumentId id, const size_t elementSize, co m_MemoryType(memoryType), m_ManagementType(managementType), m_Ownership(ArgumentOwnership::Copy), + m_SymbolName(symbolName), m_ReferencedData(nullptr) { KttAssert(m_MemoryType == ArgumentMemoryType::Vector || m_MemoryLocation == ArgumentMemoryLocation::Undefined, "Non-vector arguments must have undefined memory location"); KttAssert(m_MemoryType != ArgumentMemoryType::Vector || m_MemoryLocation != ArgumentMemoryLocation::Undefined, "Vector arguments must have defined memory location"); + + if (!m_SymbolName.empty()) + { + m_SymbolName = "&" + m_SymbolName; + } } void KernelArgument::SetReferencedData(void* data, const size_t dataSize) @@ -116,6 +122,11 @@ ArgumentManagementType KernelArgument::GetManagementType() const return m_ManagementType; } +const std::string& KernelArgument::GetSymbolName() const +{ + return m_SymbolName; +} + uint64_t KernelArgument::GetNumberOfElements() const { return static_cast(GetDataSize() / GetElementSize()); @@ -158,4 +169,20 @@ bool KernelArgument::HasUserBuffer() const return m_Ownership == ArgumentOwnership::User; } +std::vector KernelArgument::GetArgumentsWithMemoryType(const std::vector& arguments, + const ArgumentMemoryType type) +{ + std::vector result; + + for (auto* argument : arguments) + { + if (argument->GetMemoryType() == type) + { + result.push_back(argument); + } + } + + return result; +} + } // namespace ktt diff --git a/Source/KernelArgument/KernelArgument.h b/Source/KernelArgument/KernelArgument.h index a534eab9..24995745 100644 --- a/Source/KernelArgument/KernelArgument.h +++ b/Source/KernelArgument/KernelArgument.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -19,7 +20,7 @@ class KernelArgument public: explicit KernelArgument(const ArgumentId id, const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType); + const ArgumentManagementType managementType, const std::string& symbolName = ""); void SetReferencedData(void* data, const size_t dataSize); void SetOwnedData(const void* data, const size_t dataSize); @@ -32,6 +33,7 @@ class KernelArgument ArgumentAccessType GetAccessType() const; ArgumentMemoryType GetMemoryType() const; ArgumentManagementType GetManagementType() const; + const std::string& GetSymbolName() const; uint64_t GetNumberOfElements() const; size_t GetDataSize() const; @@ -46,6 +48,9 @@ class KernelArgument template uint64_t GetNumberOfElementsWithType() const; + static std::vector GetArgumentsWithMemoryType(const std::vector& arguments, + const ArgumentMemoryType type); + private: ArgumentId m_Id; size_t m_ElementSize; @@ -56,6 +61,7 @@ class KernelArgument ArgumentMemoryType m_MemoryType; ArgumentManagementType m_ManagementType; ArgumentOwnership m_Ownership; + std::string m_SymbolName; std::vector m_Data; void* m_ReferencedData; }; diff --git a/Source/KernelArgument/KernelArgumentManager.cpp b/Source/KernelArgument/KernelArgumentManager.cpp index 1012c582..78b09f88 100644 --- a/Source/KernelArgument/KernelArgumentManager.cpp +++ b/Source/KernelArgument/KernelArgumentManager.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -23,9 +24,9 @@ ArgumentId KernelArgumentManager::AddArgumentWithReferencedData(const size_t ele ArgumentId KernelArgumentManager::AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType, const void* data, const size_t dataSize) + const ArgumentManagementType managementType, const void* data, const size_t dataSize, const std::string& symbolName) { - const auto id = AddArgument(elementSize, dataType, memoryLocation, accessType, memoryType, managementType); + const auto id = AddArgument(elementSize, dataType, memoryLocation, accessType, memoryType, managementType, symbolName); auto& argument = GetArgument(id); argument.SetOwnedData(data, dataSize); return id; @@ -81,8 +82,21 @@ std::vector KernelArgumentManager::GetArguments(const std::vect ArgumentId KernelArgumentManager::AddArgument(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType) + const ArgumentManagementType managementType, const std::string& symbolName) { + if (memoryType == ArgumentMemoryType::Symbol && !symbolName.empty()) + { + const bool symbolNameExists = std::any_of(m_Arguments.cbegin(), m_Arguments.cend(), [&symbolName](const auto& pair) + { + return pair.second->GetSymbolName() == symbolName; + }); + + if (symbolNameExists) + { + throw KttException("Kernel argument with symbol name " + symbolName + " already exists"); + } + } + if (memoryType == ArgumentMemoryType::Vector && memoryLocation == ArgumentMemoryLocation::Undefined) { throw KttException("Vector kernel arguments must have properly defined memory location"); @@ -91,7 +105,7 @@ ArgumentId KernelArgumentManager::AddArgument(const size_t elementSize, const Ar const auto id = m_IdGenerator.GenerateId(); auto argument = std::make_unique(id, elementSize, dataType, memoryLocation, accessType, memoryType, - managementType); + managementType, symbolName); m_Arguments[id] = std::move(argument); return id; diff --git a/Source/KernelArgument/KernelArgumentManager.h b/Source/KernelArgument/KernelArgumentManager.h index 5eec790a..3bd2d546 100644 --- a/Source/KernelArgument/KernelArgumentManager.h +++ b/Source/KernelArgument/KernelArgumentManager.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -21,7 +22,7 @@ class KernelArgumentManager const ArgumentManagementType managementType, void* data, const size_t dataSize); ArgumentId AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType, const void* data, const size_t dataSize); + const ArgumentManagementType managementType, const void* data, const size_t dataSize, const std::string& symbolName = ""); ArgumentId AddUserArgument(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const size_t dataSize); void RemoveArgument(const ArgumentId id); @@ -35,7 +36,8 @@ class KernelArgumentManager std::map> m_Arguments; ArgumentId AddArgument(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, - const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, const ArgumentManagementType managementType); + const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, const ArgumentManagementType managementType, + const std::string& symbolName = ""); }; } // namespace ktt diff --git a/Source/Tuner.cpp b/Source/Tuner.cpp index 071713a0..95d9e9b7 100644 --- a/Source/Tuner.cpp +++ b/Source/Tuner.cpp @@ -655,12 +655,12 @@ ArgumentId Tuner::AddArgumentWithReferencedData(const size_t elementSize, const ArgumentId Tuner::AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType, const void* data, const size_t dataSize) + const ArgumentManagementType managementType, const void* data, const size_t dataSize, const std::string& symbolName) { try { return m_Tuner->AddArgumentWithOwnedData(elementSize, dataType, memoryLocation, accessType, memoryType, managementType, - data, dataSize); + data, dataSize, symbolName); } catch (const KttException& exception) { diff --git a/Source/Tuner.h b/Source/Tuner.h index dc3361b7..d38b36af 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -324,6 +324,16 @@ class KTT_API Tuner template ArgumentId AddArgumentLocal(const size_t localMemorySize); + /** @fn template ArgumentId AddArgumentSymbol(const T& data, const std::string& symbolName = "") + * Adds new symbol argument to the tuner. + * @param data Kernel argument data. The data type must be trivially copyable. Bool, reference or pointer types are not supported. + * @param symbolName Name of the corresponding symbol in kernel source code. Only utilized when tuner is using CUDA API. The symbol + * name must be unique. + * @return Id assigned to kernel argument by tuner. The id can be used in other API methods. + */ + template + ArgumentId AddArgumentSymbol(const T& data, const std::string& symbolName = ""); + /** @fn void RemoveArgument(const ArgumentId id) * Removes argument with the specified id from the tuner. Note that argument can only be removed if it is not associated with * any kernel definition. @@ -651,7 +661,7 @@ class KTT_API Tuner const ArgumentManagementType managementType, void* data, const size_t dataSize); ArgumentId AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType, const void* data, const size_t dataSize); + const ArgumentManagementType managementType, const void* data, const size_t dataSize, const std::string& symbolName = ""); ArgumentId AddUserArgument(ComputeBuffer buffer, const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const size_t dataSize); diff --git a/Source/Tuner.inl b/Source/Tuner.inl index 4e1ac143..41d4f47c 100644 --- a/Source/Tuner.inl +++ b/Source/Tuner.inl @@ -61,6 +61,14 @@ ArgumentId Tuner::AddArgumentLocal(const size_t localMemorySize) ArgumentMemoryType::Local, ArgumentManagementType::Framework, nullptr, localMemorySize); } +template +ArgumentId Tuner::AddArgumentSymbol(const T& data, const std::string& symbolName) +{ + const ArgumentDataType dataType = DeriveArgumentDataType(); + return AddArgumentWithOwnedData(sizeof(T), dataType, ArgumentMemoryLocation::Undefined, ArgumentAccessType::ReadOnly, + ArgumentMemoryType::Symbol, ArgumentManagementType::Framework, &data, sizeof(T), symbolName); +} + template ArgumentDataType Tuner::DeriveArgumentDataType() const { diff --git a/Source/TunerCore.cpp b/Source/TunerCore.cpp index bc181cd6..d137ffb2 100644 --- a/Source/TunerCore.cpp +++ b/Source/TunerCore.cpp @@ -123,10 +123,15 @@ ArgumentId TunerCore::AddArgumentWithReferencedData(const size_t elementSize, co ArgumentId TunerCore::AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType, const void* data, const size_t dataSize) + const ArgumentManagementType managementType, const void* data, const size_t dataSize, const std::string& symbolName) { + if (memoryType == ArgumentMemoryType::Symbol && symbolName.empty() && m_ComputeEngine->GetComputeApi() == ComputeApi::CUDA) + { + throw KttException("Symbol arguments in CUDA must have defined symbol name"); + } + return m_ArgumentManager->AddArgumentWithOwnedData(elementSize, dataType, memoryLocation, accessType, memoryType, - managementType, data, dataSize); + managementType, data, dataSize, symbolName); } ArgumentId TunerCore::AddUserArgument(ComputeBuffer buffer, const size_t elementSize, const ArgumentDataType dataType, diff --git a/Source/TunerCore.h b/Source/TunerCore.h index b40b13df..4caf1a83 100644 --- a/Source/TunerCore.h +++ b/Source/TunerCore.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -49,7 +50,7 @@ class TunerCore const ArgumentManagementType managementType, void* data, const size_t dataSize); ArgumentId AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, - const ArgumentManagementType managementType, const void* data, const size_t dataSize); + const ArgumentManagementType managementType, const void* data, const size_t dataSize, const std::string& symbolName = ""); ArgumentId AddUserArgument(ComputeBuffer buffer, const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const size_t dataSize); void RemoveArgument(const ArgumentId id); From d552d6c7d476b79bc598a5d9ac28631c969cb98b Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Mon, 6 Sep 2021 14:57:23 +0200 Subject: [PATCH 03/63] * Fixed compiler warning when OpenCL backend is disabled --- Source/TunerCore.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/TunerCore.cpp b/Source/TunerCore.cpp index d137ffb2..6bc13065 100644 --- a/Source/TunerCore.cpp +++ b/Source/TunerCore.cpp @@ -377,7 +377,8 @@ void TunerCore::Log(const LoggingLevel level, const std::string& message) Logger::GetLogger().Log(level, message); } -void TunerCore::InitializeComputeEngine(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api, const uint32_t queueCount) +void TunerCore::InitializeComputeEngine([[maybe_unused]] const PlatformIndex platform, const DeviceIndex device, const ComputeApi api, + const uint32_t queueCount) { if (queueCount == 0) { From d34350098c3083856b7b551f8754c57689dbb809 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 21 Sep 2021 14:18:49 +0200 Subject: [PATCH 04/63] * Corrected documentation for KTT exception --- Source/Api/KttException.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Api/KttException.h b/Source/Api/KttException.h index 9423408d..97396724 100644 --- a/Source/Api/KttException.h +++ b/Source/Api/KttException.h @@ -18,7 +18,7 @@ namespace ktt class KttException : public std::exception { public: - /** @fn KttException(const std::string& message) + /** @fn KttException(const std::string& message, const ExceptionReason reason = ExceptionReason::General) * Creates new exception with the specified error message. * @param message Holds message describing why the exception was thrown. * @param reason Reason why the exception was thrown. From 9dbbc722c4ba3e8f3f5c3d4a1eb315cb0d9ae88c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 21 Sep 2021 15:34:58 +0200 Subject: [PATCH 05/63] * Added support for adding and removing compute queues inside tuner after it was created * It is now possible to retrieve ids assigned to individual user-provided queues --- Source/ComputeEngine/ComputeEngine.h | 2 + Source/ComputeEngine/Cuda/CudaContext.cpp | 5 ++ Source/ComputeEngine/Cuda/CudaContext.h | 1 + Source/ComputeEngine/Cuda/CudaEngine.cpp | 80 +++++++++++++----- Source/ComputeEngine/Cuda/CudaEngine.h | 7 +- Source/ComputeEngine/OpenCl/OpenClContext.cpp | 5 ++ Source/ComputeEngine/OpenCl/OpenClContext.h | 1 + Source/ComputeEngine/OpenCl/OpenClEngine.cpp | 82 ++++++++++++++----- Source/ComputeEngine/OpenCl/OpenClEngine.h | 7 +- Source/ComputeEngine/Vulkan/VulkanEngine.cpp | 10 +++ Source/ComputeEngine/Vulkan/VulkanEngine.h | 2 + Source/KttTypes.h | 4 + Source/Tuner.cpp | 35 +++++++- Source/Tuner.h | 24 ++++++ Source/TunerCore.cpp | 21 +++-- Source/TunerCore.h | 6 +- 16 files changed, 237 insertions(+), 55 deletions(-) diff --git a/Source/ComputeEngine/ComputeEngine.h b/Source/ComputeEngine/ComputeEngine.h index 8d39c8ac..9bb8957a 100644 --- a/Source/ComputeEngine/ComputeEngine.h +++ b/Source/ComputeEngine/ComputeEngine.h @@ -52,6 +52,8 @@ class ComputeEngine virtual bool HasBuffer(const ArgumentId id) = 0; // Queue methods + virtual QueueId AddComputeQueue(ComputeQueue queue) = 0; + virtual void RemoveComputeQueue(const QueueId id) = 0; virtual QueueId GetDefaultQueue() const = 0; virtual std::vector GetAllQueues() const = 0; virtual void SynchronizeQueue(const QueueId queueId) = 0; diff --git a/Source/ComputeEngine/Cuda/CudaContext.cpp b/Source/ComputeEngine/Cuda/CudaContext.cpp index 6753dd65..c723a96b 100644 --- a/Source/ComputeEngine/Cuda/CudaContext.cpp +++ b/Source/ComputeEngine/Cuda/CudaContext.cpp @@ -64,6 +64,11 @@ CUdevice CudaContext::GetDevice() const return m_Device; } +bool CudaContext::IsUserOwned() const +{ + return !m_OwningContext; +} + } // namespace ktt #endif // KTT_API_CUDA diff --git a/Source/ComputeEngine/Cuda/CudaContext.h b/Source/ComputeEngine/Cuda/CudaContext.h index 86fa21c1..861612aa 100644 --- a/Source/ComputeEngine/Cuda/CudaContext.h +++ b/Source/ComputeEngine/Cuda/CudaContext.h @@ -22,6 +22,7 @@ class CudaContext CUcontext GetContext() const; CUdevice GetDevice() const; + bool IsUserOwned() const; private: CUcontext m_Context; diff --git a/Source/ComputeEngine/Cuda/CudaEngine.cpp b/Source/ComputeEngine/Cuda/CudaEngine.cpp index 7542011b..4f7b9aa0 100644 --- a/Source/ComputeEngine/Cuda/CudaEngine.cpp +++ b/Source/ComputeEngine/Cuda/CudaEngine.cpp @@ -42,8 +42,9 @@ CudaEngine::CudaEngine(const DeviceIndex deviceIndex, const uint32_t queueCount) for (uint32_t i = 0; i < queueCount; ++i) { - auto stream = std::make_unique(i); - m_Streams.push_back(std::move(stream)); + const QueueId id = m_QueueIdGenerator.GenerateId(); + auto stream = std::make_unique(id); + m_Streams[id] = std::move(stream); } InitializeCompilerOptions(); @@ -54,7 +55,7 @@ CudaEngine::CudaEngine(const DeviceIndex deviceIndex, const uint32_t queueCount) #endif // KTT_PROFILING_CUPTI } -CudaEngine::CudaEngine(const ComputeApiInitializer& initializer) : +CudaEngine::CudaEngine(const ComputeApiInitializer& initializer, std::vector& assignedQueueIds) : m_Configuration(GlobalSizeType::CUDA), m_DeviceInfo(0, ""), m_KernelCache(10) @@ -74,10 +75,12 @@ CudaEngine::CudaEngine(const ComputeApiInitializer& initializer) : const auto& streams = initializer.GetQueues(); - for (size_t i = 0; i < streams.size(); ++i) + for (auto& stream : streams) { - auto stream = std::make_unique(static_cast(i), streams[i]); - m_Streams.push_back(std::move(stream)); + const QueueId id = m_QueueIdGenerator.GenerateId(); + auto cudaStream = std::make_unique(id, stream); + m_Streams[id] = std::move(cudaStream); + assignedQueueIds.push_back(id); } InitializeCompilerOptions(); @@ -90,7 +93,7 @@ CudaEngine::CudaEngine(const ComputeApiInitializer& initializer) : ComputeActionId CudaEngine::RunKernelAsync(const KernelComputeData& data, const QueueId queueId) { - if (queueId >= static_cast(m_Streams.size())) + if (!ContainsKey(m_Streams, queueId)) { throw KttException("Invalid stream index: " + std::to_string(queueId)); } @@ -110,7 +113,7 @@ ComputeActionId CudaEngine::RunKernelAsync(const KernelComputeData& data, const std::vector arguments = GetKernelArguments(data.GetArguments()); const size_t sharedMemorySize = GetSharedMemorySize(data.GetArguments()); - const auto& stream = *m_Streams[static_cast(queueId)]; + const auto& stream = *m_Streams[queueId]; timer.Stop(); auto action = kernel->Launch(stream, data.GetGlobalSize(), data.GetLocalSize(), arguments, sharedMemorySize); @@ -334,7 +337,7 @@ TransferActionId CudaEngine::UploadArgument(KernelArgument& kernelArgument, cons const auto id = kernelArgument.GetId(); Logger::LogDebug("Uploading buffer for argument with id " + std::to_string(id)); - if (queueId >= static_cast(m_Streams.size())) + if (!ContainsKey(m_Streams, queueId)) { throw KttException("Invalid stream index: " + std::to_string(queueId)); } @@ -352,8 +355,7 @@ TransferActionId CudaEngine::UploadArgument(KernelArgument& kernelArgument, cons auto buffer = CreateBuffer(kernelArgument); timer.Stop(); - auto action = buffer->UploadData(*m_Streams[static_cast(queueId)], kernelArgument.GetData(), - kernelArgument.GetDataSize()); + auto action = buffer->UploadData(*m_Streams[queueId], kernelArgument.GetData(), kernelArgument.GetDataSize()); action->IncreaseOverhead(timer.GetElapsedTime()); const auto actionId = action->GetId(); @@ -371,7 +373,7 @@ TransferActionId CudaEngine::UpdateArgument(const ArgumentId id, const QueueId q Logger::LogDebug("Updating buffer for argument with id " + std::to_string(id)); - if (queueId >= static_cast(m_Streams.size())) + if (!ContainsKey(m_Streams, queueId)) { throw KttException("Invalid stream index: " + std::to_string(queueId)); } @@ -391,7 +393,7 @@ TransferActionId CudaEngine::UpdateArgument(const ArgumentId id, const QueueId q timer.Stop(); - auto action = buffer.UploadData(*m_Streams[static_cast(queueId)], data, actualDataSize); + auto action = buffer.UploadData(*m_Streams[queueId], data, actualDataSize); action->IncreaseOverhead(timer.GetElapsedTime()); const auto actionId = action->GetId(); m_TransferActions[actionId] = std::move(action); @@ -406,7 +408,7 @@ TransferActionId CudaEngine::DownloadArgument(const ArgumentId id, const QueueId Logger::LogDebug("Downloading buffer for argument with id " + std::to_string(id)); - if (queueId >= static_cast(m_Streams.size())) + if (!ContainsKey(m_Streams, queueId)) { throw KttException("Invalid stream index: " + std::to_string(queueId)); } @@ -426,7 +428,7 @@ TransferActionId CudaEngine::DownloadArgument(const ArgumentId id, const QueueId timer.Stop(); - auto action = buffer.DownloadData(*m_Streams[static_cast(queueId)], destination, actualDataSize); + auto action = buffer.DownloadData(*m_Streams[queueId], destination, actualDataSize); action->IncreaseOverhead(timer.GetElapsedTime()); const auto actionId = action->GetId(); m_TransferActions[actionId] = std::move(action); @@ -442,7 +444,7 @@ TransferActionId CudaEngine::CopyArgument(const ArgumentId destination, const Qu Logger::LogDebug("Copying buffer for argument with id " + std::to_string(source) + " into buffer for argument with id " + std::to_string(destination)); - if (queueId >= static_cast(m_Streams.size())) + if (!ContainsKey(m_Streams, queueId)) { throw KttException("Invalid stream index: " + std::to_string(queueId)); } @@ -469,7 +471,7 @@ TransferActionId CudaEngine::CopyArgument(const ArgumentId destination, const Qu timer.Stop(); - auto action = destinationBuffer.CopyData(*m_Streams[static_cast(queueId)], sourceBuffer, actualDataSize); + auto action = destinationBuffer.CopyData(*m_Streams[queueId], sourceBuffer, actualDataSize); action->IncreaseOverhead(timer.GetElapsedTime()); const auto actionId = action->GetId(); m_TransferActions[actionId] = std::move(action); @@ -549,6 +551,42 @@ bool CudaEngine::HasBuffer(const ArgumentId id) return ContainsKey(m_Buffers, id); } +QueueId CudaEngine::AddComputeQueue(ComputeQueue queue) +{ + if (!m_Context->IsUserOwned()) + { + throw KttException("New CUDA streams cannot be added to tuner which was not created with compute API initializer"); + } + + for (const auto& stream : m_Streams) + { + if (stream.second->GetStream() == static_cast(queue)) + { + throw KttException("The provided CUDA stream already exists inside the tuner under id: " + std::to_string(stream.first)); + } + } + + const QueueId id = m_QueueIdGenerator.GenerateId(); + auto stream = std::make_unique(id, queue); + m_Streams[id] = std::move(stream); + return id; +} + +void CudaEngine::RemoveComputeQueue(const QueueId id) +{ + if (!m_Context->IsUserOwned()) + { + throw KttException("CUDA streams cannot be removed from tuner which was not created with compute API initializer"); + } + + if (!ContainsKey(m_Streams, id)) + { + throw KttException("Invalid CUDA stream index: " + std::to_string(id)); + } + + m_Streams.erase(id); +} + QueueId CudaEngine::GetDefaultQueue() const { return static_cast(0); @@ -560,7 +598,7 @@ std::vector CudaEngine::GetAllQueues() const for (const auto& stream : m_Streams) { - result.push_back(stream->GetId()); + result.push_back(stream.first); } return result; @@ -568,19 +606,19 @@ std::vector CudaEngine::GetAllQueues() const void CudaEngine::SynchronizeQueue(const QueueId queueId) { - if (static_cast(queueId) >= m_Streams.size()) + if (!ContainsKey(m_Streams, queueId)) { throw KttException("Invalid CUDA stream index: " + std::to_string(queueId)); } - m_Streams[static_cast(queueId)]->Synchronize(); + m_Streams[queueId]->Synchronize(); } void CudaEngine::SynchronizeDevice() { for (auto& stream : m_Streams) { - stream->Synchronize(); + stream.second->Synchronize(); } } diff --git a/Source/ComputeEngine/Cuda/CudaEngine.h b/Source/ComputeEngine/Cuda/CudaEngine.h index c50cb8b1..08583299 100644 --- a/Source/ComputeEngine/Cuda/CudaEngine.h +++ b/Source/ComputeEngine/Cuda/CudaEngine.h @@ -33,7 +33,7 @@ class CudaEngine : public ComputeEngine { public: explicit CudaEngine(const DeviceIndex deviceIndex, const uint32_t queueCount); - explicit CudaEngine(const ComputeApiInitializer& initializer); + explicit CudaEngine(const ComputeApiInitializer& initializer, std::vector& assignedQueueIds); // Kernel methods ComputeActionId RunKernelAsync(const KernelComputeData& data, const QueueId queueId) override; @@ -66,6 +66,8 @@ class CudaEngine : public ComputeEngine bool HasBuffer(const ArgumentId id) override; // Queue methods + QueueId AddComputeQueue(ComputeQueue queue) override; + void RemoveComputeQueue(const QueueId id) override; QueueId GetDefaultQueue() const override; std::vector GetAllQueues() const override; void SynchronizeQueue(const QueueId queueId) override; @@ -91,10 +93,11 @@ class CudaEngine : public ComputeEngine EngineConfiguration m_Configuration; DeviceIndex m_DeviceIndex; DeviceInfo m_DeviceInfo; + IdGenerator m_QueueIdGenerator; IdGenerator m_ComputeIdGenerator; IdGenerator m_TransferIdGenerator; std::unique_ptr m_Context; - std::vector> m_Streams; + std::map> m_Streams; std::map> m_Buffers; LruCache> m_KernelCache; std::map> m_ComputeActions; diff --git a/Source/ComputeEngine/OpenCl/OpenClContext.cpp b/Source/ComputeEngine/OpenCl/OpenClContext.cpp index b3bd1dae..aa7d5e40 100644 --- a/Source/ComputeEngine/OpenCl/OpenClContext.cpp +++ b/Source/ComputeEngine/OpenCl/OpenClContext.cpp @@ -76,6 +76,11 @@ cl_device_id OpenClContext::GetDevice() const return m_Device; } +bool OpenClContext::IsUserOwned() const +{ + return !m_OwningContext; +} + } // namespace ktt #endif // KTT_PLATFORM_OPENCL diff --git a/Source/ComputeEngine/OpenCl/OpenClContext.h b/Source/ComputeEngine/OpenCl/OpenClContext.h index b4e5622b..443413a7 100644 --- a/Source/ComputeEngine/OpenCl/OpenClContext.h +++ b/Source/ComputeEngine/OpenCl/OpenClContext.h @@ -22,6 +22,7 @@ class OpenClContext cl_context GetContext() const; cl_platform_id GetPlatform() const; cl_device_id GetDevice() const; + bool IsUserOwned() const; private: cl_context m_Context; diff --git a/Source/ComputeEngine/OpenCl/OpenClEngine.cpp b/Source/ComputeEngine/OpenCl/OpenClEngine.cpp index 8e24a306..b625056a 100644 --- a/Source/ComputeEngine/OpenCl/OpenClEngine.cpp +++ b/Source/ComputeEngine/OpenCl/OpenClEngine.cpp @@ -46,8 +46,9 @@ OpenClEngine::OpenClEngine(const PlatformIndex platformIndex, const DeviceIndex for (uint32_t i = 0; i < queueCount; ++i) { - auto commandQueue = std::make_unique(i, *m_Context); - m_Queues.push_back(std::move(commandQueue)); + const QueueId id = m_QueueIdGenerator.GenerateId(); + auto commandQueue = std::make_unique(id, *m_Context); + m_Queues[id] = std::move(commandQueue); } m_DeviceInfo = GetDeviceInfo(m_PlatformIndex)[m_DeviceIndex]; @@ -57,7 +58,7 @@ OpenClEngine::OpenClEngine(const PlatformIndex platformIndex, const DeviceIndex #endif // KTT_PROFILING_GPA || KTT_PROFILING_GPA_LEGACY } -OpenClEngine::OpenClEngine(const ComputeApiInitializer& initializer) : +OpenClEngine::OpenClEngine(const ComputeApiInitializer& initializer, std::vector& assignedQueueIds) : m_Configuration(GlobalSizeType::OpenCL), m_DeviceInfo(0, ""), m_KernelCache(10) @@ -89,10 +90,12 @@ OpenClEngine::OpenClEngine(const ComputeApiInitializer& initializer) : const auto& queues = initializer.GetQueues(); - for (size_t i = 0; i < queues.size(); ++i) + for (auto& queue : queues) { - auto commandQueue = std::make_unique(static_cast(i), *m_Context, queues[i]); - m_Queues.push_back(std::move(commandQueue)); + const QueueId id = m_QueueIdGenerator.GenerateId(); + auto commandQueue = std::make_unique(id, *m_Context, queue); + m_Queues[id] = std::move(commandQueue); + assignedQueueIds.push_back(id); } m_DeviceInfo = GetDeviceInfo(m_PlatformIndex)[m_DeviceIndex]; @@ -104,7 +107,7 @@ OpenClEngine::OpenClEngine(const ComputeApiInitializer& initializer) : ComputeActionId OpenClEngine::RunKernelAsync(const KernelComputeData& data, const QueueId queueId) { - if (queueId >= static_cast(m_Queues.size())) + if (!ContainsKey(m_Queues, queueId)) { throw KttException("Invalid queue index: " + std::to_string(queueId)); } @@ -123,7 +126,7 @@ ComputeActionId OpenClEngine::RunKernelAsync(const KernelComputeData& data, cons auto kernel = LoadKernel(data); SetKernelArguments(*kernel, data.GetArguments()); - const auto& queue = *m_Queues[static_cast(queueId)]; + const auto& queue = *m_Queues[queueId]; timer.Stop(); auto action = kernel->Launch(queue, data.GetGlobalSize(), data.GetLocalSize()); @@ -272,7 +275,7 @@ TransferActionId OpenClEngine::UploadArgument(KernelArgument& kernelArgument, co const auto id = kernelArgument.GetId(); Logger::LogDebug("Uploading buffer for argument with id " + std::to_string(id)); - if (queueId >= static_cast(m_Queues.size())) + if (!ContainsKey(m_Queues, queueId)) { throw KttException("Invalid queue index: " + std::to_string(queueId)); } @@ -290,7 +293,7 @@ TransferActionId OpenClEngine::UploadArgument(KernelArgument& kernelArgument, co auto buffer = CreateBuffer(kernelArgument); timer.Stop(); - auto action = buffer->UploadData(*m_Queues[static_cast(queueId)], kernelArgument.GetData(), + auto action = buffer->UploadData(*m_Queues[queueId], kernelArgument.GetData(), kernelArgument.GetDataSize()); action->IncreaseOverhead(timer.GetElapsedTime()); const auto actionId = action->GetId(); @@ -309,7 +312,7 @@ TransferActionId OpenClEngine::UpdateArgument(const ArgumentId id, const QueueId Logger::LogDebug("Updating buffer for argument with id " + std::to_string(id)); - if (queueId >= static_cast(m_Queues.size())) + if (!ContainsKey(m_Queues, queueId)) { throw KttException("Invalid queue index: " + std::to_string(queueId)); } @@ -329,7 +332,7 @@ TransferActionId OpenClEngine::UpdateArgument(const ArgumentId id, const QueueId timer.Stop(); - auto action = buffer.UploadData(*m_Queues[static_cast(queueId)], data, actualDataSize); + auto action = buffer.UploadData(*m_Queues[queueId], data, actualDataSize); action->IncreaseOverhead(timer.GetElapsedTime()); const auto actionId = action->GetId(); m_TransferActions[actionId] = std::move(action); @@ -344,7 +347,7 @@ TransferActionId OpenClEngine::DownloadArgument(const ArgumentId id, const Queue Logger::LogDebug("Downloading buffer for argument with id " + std::to_string(id)); - if (queueId >= static_cast(m_Queues.size())) + if (!ContainsKey(m_Queues, queueId)) { throw KttException("Invalid queue index: " + std::to_string(queueId)); } @@ -364,7 +367,7 @@ TransferActionId OpenClEngine::DownloadArgument(const ArgumentId id, const Queue timer.Stop(); - auto action = buffer.DownloadData(*m_Queues[static_cast(queueId)], destination, actualDataSize); + auto action = buffer.DownloadData(*m_Queues[queueId], destination, actualDataSize); action->IncreaseOverhead(timer.GetElapsedTime()); const auto actionId = action->GetId(); m_TransferActions[actionId] = std::move(action); @@ -380,7 +383,7 @@ TransferActionId OpenClEngine::CopyArgument(const ArgumentId destination, const Logger::LogDebug("Copying buffer for argument with id " + std::to_string(source) + " into buffer for argument with id " + std::to_string(destination)); - if (queueId >= static_cast(m_Queues.size())) + if (!ContainsKey(m_Queues, queueId)) { throw KttException("Invalid queue index: " + std::to_string(queueId)); } @@ -407,7 +410,7 @@ TransferActionId OpenClEngine::CopyArgument(const ArgumentId destination, const timer.Stop(); - auto action = destinationBuffer.CopyData(*m_Queues[static_cast(queueId)], sourceBuffer, actualDataSize); + auto action = destinationBuffer.CopyData(*m_Queues[queueId], sourceBuffer, actualDataSize); action->IncreaseOverhead(timer.GetElapsedTime()); const auto actionId = action->GetId(); m_TransferActions[actionId] = std::move(action); @@ -439,7 +442,7 @@ void OpenClEngine::ResizeArgument(const ArgumentId id, const size_t newSize, con } auto& buffer = *m_Buffers[id]; - buffer.Resize(*m_Queues[static_cast(GetDefaultQueue())], newSize, preserveData); + buffer.Resize(*m_Queues[GetDefaultQueue()], newSize, preserveData); } void OpenClEngine::GetUnifiedMemoryBufferHandle(const ArgumentId id, UnifiedBufferMemory& handle) @@ -487,6 +490,43 @@ bool OpenClEngine::HasBuffer(const ArgumentId id) return ContainsKey(m_Buffers, id); } +QueueId OpenClEngine::AddComputeQueue(ComputeQueue queue) +{ + if (!m_Context->IsUserOwned()) + { + throw KttException("New OpenCL queues cannot be added to tuner which was not created with compute API initializer"); + } + + for (const auto& commandQueue : m_Queues) + { + if (commandQueue.second->GetQueue() == static_cast(queue)) + { + throw KttException("The provided OpenCL queue already exists inside the tuner under id: " + + std::to_string(commandQueue.first)); + } + } + + const QueueId id = m_QueueIdGenerator.GenerateId(); + auto commandQueue = std::make_unique(id, *m_Context, queue); + m_Queues[id] = std::move(commandQueue); + return id; +} + +void OpenClEngine::RemoveComputeQueue(const QueueId id) +{ + if (!m_Context->IsUserOwned()) + { + throw KttException("OpenCL command queues cannot be removed from tuner which was not created with compute API initializer"); + } + + if (!ContainsKey(m_Queues, id)) + { + throw KttException("Invalid queue index: " + std::to_string(id)); + } + + m_Queues.erase(id); +} + QueueId OpenClEngine::GetDefaultQueue() const { return static_cast(0); @@ -498,7 +538,7 @@ std::vector OpenClEngine::GetAllQueues() const for (const auto& queue : m_Queues) { - result.push_back(queue->GetId()); + result.push_back(queue.first); } return result; @@ -506,19 +546,19 @@ std::vector OpenClEngine::GetAllQueues() const void OpenClEngine::SynchronizeQueue(const QueueId queueId) { - if (static_cast(queueId) >= m_Queues.size()) + if (!ContainsKey(m_Queues, queueId)) { throw KttException("Invalid OpenCL command queue index: " + std::to_string(queueId)); } - m_Queues[static_cast(queueId)]->Synchronize(); + m_Queues[queueId]->Synchronize(); } void OpenClEngine::SynchronizeDevice() { for (auto& queue : m_Queues) { - queue->Synchronize(); + queue.second->Synchronize(); } } diff --git a/Source/ComputeEngine/OpenCl/OpenClEngine.h b/Source/ComputeEngine/OpenCl/OpenClEngine.h index be98ab95..33ae00ef 100644 --- a/Source/ComputeEngine/OpenCl/OpenClEngine.h +++ b/Source/ComputeEngine/OpenCl/OpenClEngine.h @@ -31,7 +31,7 @@ class OpenClEngine : public ComputeEngine { public: explicit OpenClEngine(const PlatformIndex platformIndex, const DeviceIndex deviceIndex, const uint32_t queueCount); - explicit OpenClEngine(const ComputeApiInitializer& initializer); + explicit OpenClEngine(const ComputeApiInitializer& initializer, std::vector& assignedQueueIds); // Kernel methods ComputeActionId RunKernelAsync(const KernelComputeData& data, const QueueId queueId) override; @@ -64,6 +64,8 @@ class OpenClEngine : public ComputeEngine bool HasBuffer(const ArgumentId id) override; // Queue methods + QueueId AddComputeQueue(ComputeQueue queue) override; + void RemoveComputeQueue(const QueueId id) override; QueueId GetDefaultQueue() const override; std::vector GetAllQueues() const override; void SynchronizeQueue(const QueueId queueId) override; @@ -90,10 +92,11 @@ class OpenClEngine : public ComputeEngine PlatformIndex m_PlatformIndex; DeviceIndex m_DeviceIndex; DeviceInfo m_DeviceInfo; + IdGenerator m_QueueIdGenerator; IdGenerator m_ComputeIdGenerator; IdGenerator m_TransferIdGenerator; std::unique_ptr m_Context; - std::vector> m_Queues; + std::map> m_Queues; std::map> m_Buffers; LruCache> m_KernelCache; std::map> m_ComputeActions; diff --git a/Source/ComputeEngine/Vulkan/VulkanEngine.cpp b/Source/ComputeEngine/Vulkan/VulkanEngine.cpp index add431e2..a61651ee 100644 --- a/Source/ComputeEngine/Vulkan/VulkanEngine.cpp +++ b/Source/ComputeEngine/Vulkan/VulkanEngine.cpp @@ -294,6 +294,16 @@ bool VulkanEngine::HasBuffer(const ArgumentId id) return ContainsKey(m_Buffers, id); } +QueueId VulkanEngine::AddComputeQueue([[maybe_unused]] ComputeQueue queue) +{ + throw KttException("Support for compute queue addition is not yet available for Vulkan backend"); +} + +void VulkanEngine::RemoveComputeQueue([[maybe_unused]] const QueueId id) +{ + throw KttException("Support for compute queue removal is not yet available for Vulkan backend"); +} + QueueId VulkanEngine::GetDefaultQueue() const { return static_cast(0); diff --git a/Source/ComputeEngine/Vulkan/VulkanEngine.h b/Source/ComputeEngine/Vulkan/VulkanEngine.h index 2f8e4fc6..ae5282cb 100644 --- a/Source/ComputeEngine/Vulkan/VulkanEngine.h +++ b/Source/ComputeEngine/Vulkan/VulkanEngine.h @@ -65,6 +65,8 @@ class VulkanEngine : public ComputeEngine bool HasBuffer(const ArgumentId id) override; // Queue methods + QueueId AddComputeQueue(ComputeQueue queue) override; + void RemoveComputeQueue(const QueueId id) override; QueueId GetDefaultQueue() const override; std::vector GetAllQueues() const override; void SynchronizeQueue(const QueueId queueId) override; diff --git a/Source/KttTypes.h b/Source/KttTypes.h index 1b801320..3ffe83a2 100644 --- a/Source/KttTypes.h +++ b/Source/KttTypes.h @@ -122,6 +122,10 @@ using ComputeQueue = void*; */ using ComputeBuffer = void*; +/** Queue id returned by compute queue addition methods in case of an error. + */ +inline const QueueId InvalidQueueId = std::numeric_limits::max(); + /** Kernel definition id returned by kernel definition addition methods in case of an error. */ inline const KernelDefinitionId InvalidKernelDefinitionId = std::numeric_limits::max(); diff --git a/Source/Tuner.cpp b/Source/Tuner.cpp index 95d9e9b7..0622a0d5 100644 --- a/Source/Tuner.cpp +++ b/Source/Tuner.cpp @@ -14,8 +14,14 @@ Tuner::Tuner(const PlatformIndex platform, const DeviceIndex device, const Compu m_Tuner(std::make_unique(platform, device, api, computeQueueCount)) {} -Tuner::Tuner(const ComputeApi api, const ComputeApiInitializer& initializer) : - m_Tuner(std::make_unique(api, initializer)) +Tuner::Tuner(const ComputeApi api, const ComputeApiInitializer& initializer) +{ + std::vector ids; + m_Tuner = std::make_unique(api, initializer, ids); +} + +Tuner::Tuner(const ComputeApi api, const ComputeApiInitializer& initializer, std::vector& assignedQueueIds) : + m_Tuner(std::make_unique(api, initializer, assignedQueueIds)) {} Tuner::~Tuner() = default; @@ -511,6 +517,31 @@ std::vector Tuner::LoadResults(const std::string& filePath, const } } +QueueId Tuner::AddComputeQueue(ComputeQueue queue) +{ + try + { + return m_Tuner->AddComputeQueue(queue); + } + catch (const KttException& exception) + { + TunerCore::Log(LoggingLevel::Error, exception.what()); + return InvalidQueueId; + } +} + +void Tuner::RemoveComputeQueue(const QueueId id) +{ + try + { + m_Tuner->RemoveComputeQueue(id); + } + catch (const KttException& exception) + { + TunerCore::Log(LoggingLevel::Error, exception.what()); + } +} + void Tuner::Synchronize() { try diff --git a/Source/Tuner.h b/Source/Tuner.h index d38b36af..4789a3d3 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -90,6 +90,16 @@ class KTT_API Tuner */ explicit Tuner(const ComputeApi api, const ComputeApiInitializer& initializer); + /** @fn explicit Tuner(const ComputeApi api, const ComputeApiInitializer& initializer, std::vector& assignedQueueIds) + * Creates tuner for the specified compute API using custom initializer. The initializer contains user-provided compute device + * context and queues. The ids assigned to queues will be added to the provided vector. + * @param api Compute API used by the tuner. + * @param initializer Custom compute API initializer. See ComputeApiInitializer for more information. + * @param assignedQueueIds Ids assigned to compute queues inside initializer by the tuner. The order of assigned ids matches + * the order of queues inside initializer. + */ + explicit Tuner(const ComputeApi api, const ComputeApiInitializer& initializer, std::vector& assignedQueueIds); + /** @fn ~Tuner() * Tuner destructor. */ @@ -566,6 +576,20 @@ class KTT_API Tuner */ std::vector LoadResults(const std::string& filePath, const OutputFormat format, UserData& data) const; + /** @fn QueueId AddComputeQueue(ComputeQueue queue) + * Adds the specified compute queue to the tuner. New queues can only be added if tuner was initialized with compute API + * initializer. + * @param queue Queue which will be added. The queue should be tied to the context specified inside compute API initializer. + * @return Id assigned to queue by the tuner. + */ + QueueId AddComputeQueue(ComputeQueue queue); + + /** @fn void RemoveComputeQueue(const QueueId id) + * Removes the specified compute queue from the tuner. Only queues added by user can be removed. + * @param id Id of compute queue which will be removed. + */ + void RemoveComputeQueue(const QueueId id); + /** @fn void Synchronize() * Blocks until all commands submitted to all KTT device queues are completed. */ diff --git a/Source/TunerCore.cpp b/Source/TunerCore.cpp index 6bc13065..001398c8 100644 --- a/Source/TunerCore.cpp +++ b/Source/TunerCore.cpp @@ -26,11 +26,11 @@ TunerCore::TunerCore(const PlatformIndex platform, const DeviceIndex device, con InitializeRunners(); } -TunerCore::TunerCore(const ComputeApi api, const ComputeApiInitializer& initializer) : +TunerCore::TunerCore(const ComputeApi api, const ComputeApiInitializer& initializer, std::vector& assignedQueueIds) : m_ArgumentManager(std::make_unique()), m_KernelManager(std::make_unique(*m_ArgumentManager)) { - InitializeComputeEngine(api, initializer); + InitializeComputeEngine(api, initializer, assignedQueueIds); InitializeRunners(); } @@ -312,6 +312,16 @@ std::vector TunerCore::LoadResults(const std::string& filePath, co return pair.second; } +QueueId TunerCore::AddComputeQueue(ComputeQueue queue) +{ + return m_ComputeEngine->AddComputeQueue(queue); +} + +void TunerCore::RemoveComputeQueue(const QueueId id) +{ + m_ComputeEngine->RemoveComputeQueue(id); +} + void TunerCore::SynchronizeDevice() { m_ComputeEngine->SynchronizeDevice(); @@ -413,20 +423,21 @@ void TunerCore::InitializeComputeEngine([[maybe_unused]] const PlatformIndex pla } } -void TunerCore::InitializeComputeEngine(const ComputeApi api, const ComputeApiInitializer& initializer) +void TunerCore::InitializeComputeEngine(const ComputeApi api, [[maybe_unused]] const ComputeApiInitializer& initializer, + [[maybe_unused]] std::vector& assignedQueueIds) { switch (api) { case ComputeApi::OpenCL: #ifdef KTT_API_OPENCL - m_ComputeEngine = std::make_unique(initializer); + m_ComputeEngine = std::make_unique(initializer, assignedQueueIds); #else throw KttException("Support for OpenCL API is not included in this version of KTT framework"); #endif // KTT_API_OPENCL break; case ComputeApi::CUDA: #ifdef KTT_API_CUDA - m_ComputeEngine = std::make_unique(initializer); + m_ComputeEngine = std::make_unique(initializer, assignedQueueIds); #else throw KttException("Support for CUDA API is not included in this version of KTT framework"); #endif // KTT_API_CUDA diff --git a/Source/TunerCore.h b/Source/TunerCore.h index 4caf1a83..4ed284a4 100644 --- a/Source/TunerCore.h +++ b/Source/TunerCore.h @@ -24,7 +24,7 @@ class TunerCore { public: explicit TunerCore(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api, const uint32_t queueCount); - explicit TunerCore(const ComputeApi api, const ComputeApiInitializer& initializer); + explicit TunerCore(const ComputeApi api, const ComputeApiInitializer& initializer, std::vector& assignedQueueIds); // Kernel management KernelDefinitionId AddKernelDefinition(const std::string& name, const std::string& source, const DimensionVector& globalSize, @@ -84,6 +84,8 @@ class TunerCore std::vector LoadResults(const std::string& filePath, const OutputFormat format, UserData& data) const; // Compute engine + QueueId AddComputeQueue(ComputeQueue queue); + void RemoveComputeQueue(const QueueId id); void SynchronizeDevice(); void SetProfilingCounters(const std::vector& counters); void SetCompilerOptions(const std::string& options); @@ -108,7 +110,7 @@ class TunerCore std::unique_ptr m_TuningRunner; void InitializeComputeEngine(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api, const uint32_t queueCount); - void InitializeComputeEngine(const ComputeApi api, const ComputeApiInitializer& initializer); + void InitializeComputeEngine(const ComputeApi api, const ComputeApiInitializer& initializer, std::vector& assignedQueueIds); void InitializeRunners(); static std::unique_ptr CreateSerializer(const OutputFormat format); From 78d899ff6563062d67621a055c34ea6a84965a31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 22 Sep 2021 10:58:21 +0200 Subject: [PATCH 06/63] * Fixed linking issue on Windows caused by unexported methods --- Source/KttPlatform.h | 3 +++ Source/Tuner.h | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Source/KttPlatform.h b/Source/KttPlatform.h index b263d653..3b3fd0df 100644 --- a/Source/KttPlatform.h +++ b/Source/KttPlatform.h @@ -15,8 +15,11 @@ #else #define KTT_API __declspec(dllimport) #endif // KTT_LIBRARY + + #define KTT_VIRTUAL_API virtual #else #define KTT_API + #define KTT_VIRTUAL_API #endif // _MSC_VER #endif // KTT_API diff --git a/Source/Tuner.h b/Source/Tuner.h index 4789a3d3..e2f9213e 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -680,13 +680,13 @@ class KTT_API Tuner private: std::unique_ptr m_Tuner; - ArgumentId AddArgumentWithReferencedData(const size_t elementSize, const ArgumentDataType dataType, + KTT_VIRTUAL_API ArgumentId AddArgumentWithReferencedData(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, const ArgumentManagementType managementType, void* data, const size_t dataSize); - ArgumentId AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType, + KTT_VIRTUAL_API ArgumentId AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType, const ArgumentManagementType managementType, const void* data, const size_t dataSize, const std::string& symbolName = ""); - ArgumentId AddUserArgument(ComputeBuffer buffer, const size_t elementSize, const ArgumentDataType dataType, + KTT_VIRTUAL_API ArgumentId AddUserArgument(ComputeBuffer buffer, const size_t elementSize, const ArgumentDataType dataType, const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const size_t dataSize); template From d0d308cddccd7b21a5f6259e3103762bec4b0095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Thu, 30 Sep 2021 13:50:27 +0200 Subject: [PATCH 07/63] * Added non-templated version of user buffer addition method to API --- Source/Tuner.cpp | 14 ++++++++++++++ Source/Tuner.h | 18 ++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/Source/Tuner.cpp b/Source/Tuner.cpp index 0622a0d5..8a38cdb2 100644 --- a/Source/Tuner.cpp +++ b/Source/Tuner.cpp @@ -234,6 +234,20 @@ void Tuner::SetProfiledDefinitions(const KernelId id, const std::vector ArgumentId AddArgumentScalar(const T& data); * Adds new scalar argument to the tuner. All scalar arguments are read-only. * @param data Kernel argument data. The data type must be trivially copyable. Bool, reference or pointer types are not supported. From a14a533401cbe2f4fccf271b509b847546030c58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 4 Oct 2021 14:45:19 +0200 Subject: [PATCH 08/63] * Added non-templated version of scalar argument addition method to API --- Source/Tuner.cpp | 14 ++++++++++++++ Source/Tuner.h | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/Source/Tuner.cpp b/Source/Tuner.cpp index 8a38cdb2..3ca4a473 100644 --- a/Source/Tuner.cpp +++ b/Source/Tuner.cpp @@ -248,6 +248,20 @@ ArgumentId Tuner::AddArgumentVector(ComputeBuffer buffer, const size_t bufferSiz } } +ArgumentId Tuner::AddArgumentScalar(const void* data, const size_t dataSize) +{ + try + { + return AddArgumentWithOwnedData(dataSize, ArgumentDataType::Custom, ArgumentMemoryLocation::Undefined, + ArgumentAccessType::ReadOnly, ArgumentMemoryType::Scalar, ArgumentManagementType::Framework, data, dataSize); + } + catch (const KttException& exception) + { + TunerCore::Log(LoggingLevel::Error, exception.what()); + return InvalidArgumentId; + } +} + void Tuner::RemoveArgument(const ArgumentId id) { try diff --git a/Source/Tuner.h b/Source/Tuner.h index 800b5ca4..5c22e376 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -340,6 +340,15 @@ class KTT_API Tuner template ArgumentId AddArgumentScalar(const T& data); + /** @fn ArgumentId AddArgumentScalar(const void* data, const size_t elementSize) + * Adds new scalar argument to the tuner. All scalar arguments are read-only. This method can be utilized when templated version + * of scalar argument addition cannot be used. + * @param data Pointer to memory with kernel argument data. + * @param dataSize Size of data in bytes (e.g., 4 for 32-bit float). + * @return Id assigned to kernel argument by tuner. The id can be used in other API methods. + */ + ArgumentId AddArgumentScalar(const void* data, const size_t dataSize); + /** @fn template ArgumentId AddArgumentLocal(const size_t localMemorySize) * Adds new local memory (shared memory in CUDA) argument to the tuner. All local memory arguments are read-only and cannot be * initialized from host memory. In case of CUDA API usage, local memory arguments cannot be directly set as kernel function From c71b5e3f3de6ade64ca0c04d2cc09508f7eca214 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 6 Oct 2021 09:58:42 +0200 Subject: [PATCH 09/63] * Compute API initializer examples no longer incorrectly link Vulkan libraries --- premake5.lua | 57 ++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/premake5.lua b/premake5.lua index 767f9edf..43bd5796 100644 --- a/premake5.lua +++ b/premake5.lua @@ -3,8 +3,8 @@ cudaProjects = false openClProjects = false vulkanProjects = false --- Helper functions to find compute API headers and libraries -function findLibrariesAmd() +-- Helper functions to find and link compute API headers and libraries +function linkLibrariesAmd() local path = os.getenv("OCL_ROOT") if not path then @@ -54,7 +54,7 @@ function findLibrariesAmd() return true end -function findLibrariesIntel() +function linkLibrariesIntel() local path = os.getenv("INTELOCLSDKROOT") if not path then @@ -81,7 +81,7 @@ function findLibrariesIntel() return true end -function findLibrariesNvidia() +function linkLibrariesNvidia() local path = os.getenv("CUDA_PATH") if not path then @@ -129,23 +129,35 @@ function findLibrariesNvidia() return true end -function findLibraries() - if findLibrariesAmd() then +function linkComputeLibraries() + if _OPTIONS["platform"] then + if _OPTIONS["platform"] == "amd" then + return linkLibrariesAmd() + elseif _OPTIONS["platform"] == "intel" then + return linkLibrariesIntel() + elseif _OPTIONS["platform"] == "nvidia" then + return linkLibrariesNvidia() + else + error("The specified platform is unknown.") + end + end + + if linkLibrariesAmd() then return true end - if findLibrariesIntel() then + if linkLibrariesIntel() then return true end - if findLibrariesNvidia() then + if linkLibrariesNvidia() then return true end return false end -function findVulkan() +function linkVulkan() local path = os.getenv("VULKAN_SDK") if not path then @@ -176,29 +188,16 @@ function findVulkan() return true end -function linkLibraries() - local librariesFound = false - - if _OPTIONS["platform"] then - if _OPTIONS["platform"] == "amd" then - librariesFound = findLibrariesAmd() - elseif _OPTIONS["platform"] == "intel" then - librariesFound = findLibrariesIntel() - elseif _OPTIONS["platform"] == "nvidia" then - librariesFound = findLibrariesNvidia() - else - error("The specified platform is unknown.") - end - else - librariesFound = findLibraries() - end +function linkAllLibraries() + local librariesFound = linkComputeLibraries() + -- Allow usage of KTT with only Vulkan if no other compute API was explicitly specified by user if not librariesFound and (not _OPTIONS["vulkan"] or _OPTIONS["platform"]) then error("Compute API libraries were not found. Please ensure that path to the SDK is correctly set in the environment variables:\nOCL_ROOT for AMD\nINTELOCLSDKROOT for Intel\nCUDA_PATH for Nvidia") end if _OPTIONS["vulkan"] then - vulkanFound = findVulkan() + local vulkanFound = linkVulkan() if not vulkanFound then error("Vulkan SDK was not found. Please ensure that path to the SDK is correctly set in the environment variables under VULKAN_SDK.") @@ -313,7 +312,7 @@ project "Ktt" includedirs {"Source", "Libraries/CTPL-Ahajha", "Libraries/date-3", "Libraries/Json-3.9.1", "Libraries/pugixml-1.11.4"} defines {"KTT_LIBRARY"} targetname("ktt") - linkLibraries() + linkAllLibraries() -- Tutorials configuration if not _OPTIONS["no-tutorials"] then @@ -349,7 +348,7 @@ project "05ComputeApiInitializerOpenCl" files {"Tutorials/05ComputeApiInitializer/ComputeApiInitializerOpenCl.cpp", "Tutorials/05ComputeApiInitializer/OpenClKernel.cl"} includedirs {"Source"} links {"ktt"} - linkLibraries() + linkComputeLibraries() project "06VectorArgumentCustomizationOpenCl" kind "ConsoleApp" @@ -390,7 +389,7 @@ project "05ComputeApiInitializerCuda" files {"Tutorials/05ComputeApiInitializer/ComputeApiInitializerCuda.cpp", "Tutorials/05ComputeApiInitializer/CudaKernel.cu"} includedirs {"Source"} links {"ktt"} - linkLibraries() + linkComputeLibraries() project "06VectorArgumentCustomizationCuda" kind "ConsoleApp" From 4fb66861b26e217153e28ef8017adf308f95db89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 6 Oct 2021 11:35:41 +0200 Subject: [PATCH 10/63] * CUDA compiler options now preserve target device architecture option after the options are changed by user --- Source/ComputeEngine/Cuda/CudaEngine.cpp | 26 ++++++++++++++++-------- Source/ComputeEngine/Cuda/CudaEngine.h | 2 +- Source/Tuner.h | 6 +++--- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Source/ComputeEngine/Cuda/CudaEngine.cpp b/Source/ComputeEngine/Cuda/CudaEngine.cpp index 4f7b9aa0..d2f38c82 100644 --- a/Source/ComputeEngine/Cuda/CudaEngine.cpp +++ b/Source/ComputeEngine/Cuda/CudaEngine.cpp @@ -47,7 +47,8 @@ CudaEngine::CudaEngine(const DeviceIndex deviceIndex, const uint32_t queueCount) m_Streams[id] = std::move(stream); } - InitializeCompilerOptions(); + Logger::LogDebug("Initializing default compiler options"); + SetCompilerOptions(""); m_DeviceInfo = GetDeviceInfo(0)[m_DeviceIndex]; #if defined(KTT_PROFILING_CUPTI) @@ -83,7 +84,8 @@ CudaEngine::CudaEngine(const ComputeApiInitializer& initializer, std::vector CudaEngine::CreateUserBuffer(KernelArgument& argumen return userBuffer; } -void CudaEngine::InitializeCompilerOptions() +std::string CudaEngine::GetDefaultCompilerOptions() const { - Logger::LogDebug("Initializing default compiler options"); - int computeCapabilityMajor = 0; int computeCapabilityMinor = 0; CheckError(cuDeviceGetAttribute(&computeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, m_Context->GetDevice()), "cuDeviceGetAttribute"); CheckError(cuDeviceGetAttribute(&computeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, m_Context->GetDevice()), "cuDeviceGetAttribute"); - - m_Configuration.SetCompilerOptions("--gpu-architecture=compute_" + std::to_string(computeCapabilityMajor) - + std::to_string(computeCapabilityMinor)); + + std::string result = "--gpu-architecture=compute_" + std::to_string(computeCapabilityMajor) + + std::to_string(computeCapabilityMinor); + return result; } #if defined(KTT_PROFILING_CUPTI) diff --git a/Source/ComputeEngine/Cuda/CudaEngine.h b/Source/ComputeEngine/Cuda/CudaEngine.h index 08583299..6c845de4 100644 --- a/Source/ComputeEngine/Cuda/CudaEngine.h +++ b/Source/ComputeEngine/Cuda/CudaEngine.h @@ -117,7 +117,7 @@ class CudaEngine : public ComputeEngine size_t GetSharedMemorySize(const std::vector& arguments) const; std::unique_ptr CreateBuffer(KernelArgument& argument); std::unique_ptr CreateUserBuffer(KernelArgument& argument, ComputeBuffer buffer); - void InitializeCompilerOptions(); + std::string GetDefaultCompilerOptions() const; #if defined(KTT_PROFILING_CUPTI) void InitializeCupti(); diff --git a/Source/Tuner.h b/Source/Tuner.h index 5c22e376..b19e4c60 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -687,9 +687,9 @@ class KTT_API Tuner DeviceInfo GetCurrentDeviceInfo() const; /** @fn static void SetLoggingLevel(const LoggingLevel level) - * Sets logging level for tuner. Default logging level is info. - * @param level Logging level which will be used by tuner. See ::LoggingLevel for more information. - */ + * Sets logging level for tuner. Default logging level is info. + * @param level Logging level which will be used by tuner. See ::LoggingLevel for more information. + */ static void SetLoggingLevel(const LoggingLevel level); /** @fn static void SetLoggingTarget(std::ostream& outputTarget) From 91d737aaf83212e67588d524054d09ded1e557f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 13 Oct 2021 14:39:10 +0200 Subject: [PATCH 11/63] * Added method for retrieving kernel definition id from name to tuner API --- Source/Kernel/KernelManager.cpp | 17 +++++++++++++++++ Source/Kernel/KernelManager.h | 1 + Source/Tuner.cpp | 13 +++++++++++++ Source/Tuner.h | 13 +++++++++++-- Source/TunerCore.cpp | 5 +++++ Source/TunerCore.h | 1 + 6 files changed, 48 insertions(+), 2 deletions(-) diff --git a/Source/Kernel/KernelManager.cpp b/Source/Kernel/KernelManager.cpp index 625fbb5b..dc150cad 100644 --- a/Source/Kernel/KernelManager.cpp +++ b/Source/Kernel/KernelManager.cpp @@ -162,6 +162,23 @@ KernelDefinition& KernelManager::GetDefinition(const KernelDefinitionId id) return const_cast(static_cast(this)->GetDefinition(id)); } +KernelDefinitionId KernelManager::GetDefinitionId(const std::string& name, const std::vector& typeNames) const +{ + const auto templatedName = KernelDefinition::CreateTemplatedName(name, typeNames); + + const auto iterator = std::find_if(m_Definitions.cbegin(), m_Definitions.cend(), [&name, &templatedName](const auto& pair) + { + return pair.second->GetName() == name && pair.second->GetTemplatedName() == templatedName; + }); + + if (iterator == m_Definitions.cend()) + { + return InvalidKernelDefinitionId; + } + + return iterator->first; +} + bool KernelManager::IsArgumentUsed(const ArgumentId id) const { for (const auto& definition : m_Definitions) diff --git a/Source/Kernel/KernelManager.h b/Source/Kernel/KernelManager.h index f59b9c8d..9854c047 100644 --- a/Source/Kernel/KernelManager.h +++ b/Source/Kernel/KernelManager.h @@ -40,6 +40,7 @@ class KernelManager Kernel& GetKernel(const KernelId id); const KernelDefinition& GetDefinition(const KernelDefinitionId id) const; KernelDefinition& GetDefinition(const KernelDefinitionId id); + KernelDefinitionId GetDefinitionId(const std::string& name, const std::vector& typeNames = {}) const; bool IsArgumentUsed(const ArgumentId id) const; private: diff --git a/Source/Tuner.cpp b/Source/Tuner.cpp index 3ca4a473..58640353 100644 --- a/Source/Tuner.cpp +++ b/Source/Tuner.cpp @@ -54,6 +54,19 @@ KernelDefinitionId Tuner::AddKernelDefinitionFromFile(const std::string& name, c } } +KernelDefinitionId Tuner::GetKernelDefinitionId(const std::string& name, const std::vector& typeNames) const +{ + try + { + return m_Tuner->GetKernelDefinitionId(name, typeNames); + } + catch (const KttException& exception) + { + TunerCore::Log(LoggingLevel::Error, exception.what()); + return InvalidKernelDefinitionId; + } +} + void Tuner::RemoveKernelDefinition(const KernelDefinitionId id) { try diff --git a/Source/Tuner.h b/Source/Tuner.h index b19e4c60..d9bec045 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -133,6 +133,14 @@ class KTT_API Tuner KernelDefinitionId AddKernelDefinitionFromFile(const std::string& name, const std::string& filePath, const DimensionVector& globalSize, const DimensionVector& localSize, const std::vector& typeNames = {}); + /** @fn KernelDefinitionId GetKernelDefinitionId(const std::string& name, const std::vector& typeNames = {}) const + * Retrieves kernel definition id from the tuner based on provided name and template arguments. + * @param name Name of a kernel definition. + * @param typeNames Names of types which were used to instantiate kernel template. Only supported in CUDA kernels. + * @return Id of the corresponding kernel definition. If no such definition exists, InvalidKernelDefinitionId will be returned. + */ + KernelDefinitionId GetKernelDefinitionId(const std::string& name, const std::vector& typeNames = {}) const; + /** @fn void RemoveKernelDefinition(const KernelDefinitionId id) * Removes kernel definition with the specified id from the tuner. Note that definition can only be removed if it is not * associated with any kernel. @@ -148,7 +156,7 @@ class KTT_API Tuner */ void SetArguments(const KernelDefinitionId id, const std::vector& argumentIds); - /** @fn KernelId CreateSimpleKernel(const KernelDefinitionId definitionId) + /** @fn KernelId CreateSimpleKernel(const std::string& name, const KernelDefinitionId definitionId) * Creates simple kernel from the specified definition. * @param name Kernel name used during logging and output operations. The name must be unique. * @param definitionId Id of kernel definition which will be utilized by the kernel. @@ -156,7 +164,8 @@ class KTT_API Tuner */ KernelId CreateSimpleKernel(const std::string& name, const KernelDefinitionId definitionId); - /** @fn KernelId CreateCompositeKernel(const std::vector& definitionIds, KernelLauncher launcher = nullptr) + /** @fn KernelId CreateCompositeKernel(const std::string& name, const std::vector& definitionIds, + * KernelLauncher launcher = nullptr) * Creates composite kernel from the specified definitions. Note that kernel launcher is required in order to launch kernels * with multiple definitions. * @param name Kernel name used during logging and output operations. The name must be unique. diff --git a/Source/TunerCore.cpp b/Source/TunerCore.cpp index 001398c8..9766a9bd 100644 --- a/Source/TunerCore.cpp +++ b/Source/TunerCore.cpp @@ -46,6 +46,11 @@ KernelDefinitionId TunerCore::AddKernelDefinitionFromFile(const std::string& nam return m_KernelManager->AddKernelDefinitionFromFile(name, filePath, globalSize, localSize, typeNames); } +KernelDefinitionId TunerCore::GetKernelDefinitionId(const std::string& name, const std::vector& typeNames) const +{ + return m_KernelManager->GetDefinitionId(name, typeNames); +} + void TunerCore::RemoveKernelDefinition(const KernelDefinitionId id) { const auto& definition = m_KernelManager->GetDefinition(id); diff --git a/Source/TunerCore.h b/Source/TunerCore.h index 4ed284a4..91616077 100644 --- a/Source/TunerCore.h +++ b/Source/TunerCore.h @@ -31,6 +31,7 @@ class TunerCore const DimensionVector& localSize, const std::vector& typeNames = {}); KernelDefinitionId AddKernelDefinitionFromFile(const std::string& name, const std::string& filePath, const DimensionVector& globalSize, const DimensionVector& localSize, const std::vector& typeNames = {}); + KernelDefinitionId GetKernelDefinitionId(const std::string& name, const std::vector& typeNames = {}) const; void RemoveKernelDefinition(const KernelDefinitionId id); void SetArguments(const KernelDefinitionId id, const std::vector& argumentIds); KernelId CreateKernel(const std::string& name, const KernelDefinitionId definitionId); From 3bf51014803b1bfb2643e96db642409a6ce4af03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 18 Oct 2021 16:10:49 +0200 Subject: [PATCH 12/63] * Added Python bindings library * Initial work on KTT Python bindings --- Libraries/pybind11-2.8.0/pybind11/attr.h | 605 ++++ .../pybind11-2.8.0/pybind11/buffer_info.h | 144 + Libraries/pybind11-2.8.0/pybind11/cast.h | 1442 ++++++++++ Libraries/pybind11-2.8.0/pybind11/chrono.h | 213 ++ Libraries/pybind11-2.8.0/pybind11/common.h | 2 + Libraries/pybind11-2.8.0/pybind11/complex.h | 65 + .../pybind11-2.8.0/pybind11/detail/class.h | 709 +++++ .../pybind11-2.8.0/pybind11/detail/common.h | 1021 +++++++ .../pybind11-2.8.0/pybind11/detail/descr.h | 104 + .../pybind11-2.8.0/pybind11/detail/init.h | 346 +++ .../pybind11/detail/internals.h | 467 ++++ .../pybind11/detail/type_caster_base.h | 978 +++++++ .../pybind11-2.8.0/pybind11/detail/typeid.h | 55 + Libraries/pybind11-2.8.0/pybind11/eigen.h | 590 ++++ Libraries/pybind11-2.8.0/pybind11/embed.h | 284 ++ Libraries/pybind11-2.8.0/pybind11/eval.h | 163 ++ .../pybind11-2.8.0/pybind11/functional.h | 121 + Libraries/pybind11-2.8.0/pybind11/gil.h | 193 ++ Libraries/pybind11-2.8.0/pybind11/iostream.h | 275 ++ Libraries/pybind11-2.8.0/pybind11/numpy.h | 1741 ++++++++++++ Libraries/pybind11-2.8.0/pybind11/operators.h | 163 ++ Libraries/pybind11-2.8.0/pybind11/options.h | 65 + Libraries/pybind11-2.8.0/pybind11/pybind11.h | 2463 +++++++++++++++++ Libraries/pybind11-2.8.0/pybind11/pytypes.h | 1879 +++++++++++++ Libraries/pybind11-2.8.0/pybind11/stl.h | 375 +++ .../pybind11-2.8.0/pybind11/stl/filesystem.h | 103 + Libraries/pybind11-2.8.0/pybind11/stl_bind.h | 747 +++++ Source/Python/PythonModule.cpp | 71 + premake5.lua | 61 +- 29 files changed, 15442 insertions(+), 3 deletions(-) create mode 100644 Libraries/pybind11-2.8.0/pybind11/attr.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/buffer_info.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/cast.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/chrono.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/common.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/complex.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/detail/class.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/detail/common.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/detail/descr.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/detail/init.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/detail/internals.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/detail/type_caster_base.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/detail/typeid.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/eigen.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/embed.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/eval.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/functional.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/gil.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/iostream.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/numpy.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/operators.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/options.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/pybind11.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/pytypes.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/stl.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/stl/filesystem.h create mode 100644 Libraries/pybind11-2.8.0/pybind11/stl_bind.h create mode 100644 Source/Python/PythonModule.cpp diff --git a/Libraries/pybind11-2.8.0/pybind11/attr.h b/Libraries/pybind11-2.8.0/pybind11/attr.h new file mode 100644 index 00000000..0dedbc08 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/attr.h @@ -0,0 +1,605 @@ +/* + pybind11/attr.h: Infrastructure for processing custom + type and function attributes + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "cast.h" + +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +/// \addtogroup annotations +/// @{ + +/// Annotation for methods +struct is_method { handle class_; + explicit is_method(const handle &c) : class_(c) {} +}; + +/// Annotation for operators +struct is_operator { }; + +/// Annotation for classes that cannot be subclassed +struct is_final { }; + +/// Annotation for parent scope +struct scope { handle value; + explicit scope(const handle &s) : value(s) {} +}; + +/// Annotation for documentation +struct doc { const char *value; + explicit doc(const char *value) : value(value) {} +}; + +/// Annotation for function names +struct name { const char *value; + explicit name(const char *value) : value(value) {} +}; + +/// Annotation indicating that a function is an overload associated with a given "sibling" +struct sibling { handle value; + explicit sibling(const handle &value) : value(value.ptr()) {} +}; + +/// Annotation indicating that a class derives from another given type +template struct base { + + PYBIND11_DEPRECATED("base() was deprecated in favor of specifying 'T' as a template argument to class_") + base() { } // NOLINT(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute +}; + +/// Keep patient alive while nurse lives +template struct keep_alive { }; + +/// Annotation indicating that a class is involved in a multiple inheritance relationship +struct multiple_inheritance { }; + +/// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class +struct dynamic_attr { }; + +/// Annotation which enables the buffer protocol for a type +struct buffer_protocol { }; + +/// Annotation which requests that a special metaclass is created for a type +struct metaclass { + handle value; + + PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.") + // NOLINTNEXTLINE(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute + metaclass() {} + + /// Override pybind11's default metaclass + explicit metaclass(handle value) : value(value) { } +}; + +/// Specifies a custom callback with signature `void (PyHeapTypeObject*)` that +/// may be used to customize the Python type. +/// +/// The callback is invoked immediately before `PyType_Ready`. +/// +/// Note: This is an advanced interface, and uses of it may require changes to +/// work with later versions of pybind11. You may wish to consult the +/// implementation of `make_new_python_type` in `detail/classes.h` to understand +/// the context in which the callback will be run. +struct custom_type_setup { + using callback = std::function; + + explicit custom_type_setup(callback value) : value(std::move(value)) {} + + callback value; +}; + +/// Annotation that marks a class as local to the module: +struct module_local { const bool value; + constexpr explicit module_local(bool v = true) : value(v) {} +}; + +/// Annotation to mark enums as an arithmetic type +struct arithmetic { }; + +/// Mark a function for addition at the beginning of the existing overload chain instead of the end +struct prepend { }; + +/** \rst + A call policy which places one or more guard variables (``Ts...``) around the function call. + + For example, this definition: + + .. code-block:: cpp + + m.def("foo", foo, py::call_guard()); + + is equivalent to the following pseudocode: + + .. code-block:: cpp + + m.def("foo", [](args...) { + T scope_guard; + return foo(args...); // forwarded arguments + }); + \endrst */ +template struct call_guard; + +template <> struct call_guard<> { using type = detail::void_type; }; + +template +struct call_guard { + static_assert(std::is_default_constructible::value, + "The guard type must be default constructible"); + + using type = T; +}; + +template +struct call_guard { + struct type { + T guard{}; // Compose multiple guard types with left-to-right default-constructor order + typename call_guard::type next{}; + }; +}; + +/// @} annotations + +PYBIND11_NAMESPACE_BEGIN(detail) +/* Forward declarations */ +enum op_id : int; +enum op_type : int; +struct undefined_t; +template struct op_; +void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret); + +/// Internal data structure which holds metadata about a keyword argument +struct argument_record { + const char *name; ///< Argument name + const char *descr; ///< Human-readable version of the argument value + handle value; ///< Associated Python object + bool convert : 1; ///< True if the argument is allowed to convert when loading + bool none : 1; ///< True if None is allowed when loading + + argument_record(const char *name, const char *descr, handle value, bool convert, bool none) + : name(name), descr(descr), value(value), convert(convert), none(none) { } +}; + +/// Internal data structure which holds metadata about a bound function (signature, overloads, etc.) +struct function_record { + function_record() + : is_constructor(false), is_new_style_constructor(false), is_stateless(false), + is_operator(false), is_method(false), has_args(false), + has_kwargs(false), has_kw_only_args(false), prepend(false) { } + + /// Function name + char *name = nullptr; /* why no C++ strings? They generate heavier code.. */ + + // User-specified documentation string + char *doc = nullptr; + + /// Human-readable version of the function signature + char *signature = nullptr; + + /// List of registered keyword arguments + std::vector args; + + /// Pointer to lambda function which converts arguments and performs the actual call + handle (*impl) (function_call &) = nullptr; + + /// Storage for the wrapped function pointer and captured data, if any + void *data[3] = { }; + + /// Pointer to custom destructor for 'data' (if needed) + void (*free_data) (function_record *ptr) = nullptr; + + /// Return value policy associated with this function + return_value_policy policy = return_value_policy::automatic; + + /// True if name == '__init__' + bool is_constructor : 1; + + /// True if this is a new-style `__init__` defined in `detail/init.h` + bool is_new_style_constructor : 1; + + /// True if this is a stateless function pointer + bool is_stateless : 1; + + /// True if this is an operator (__add__), etc. + bool is_operator : 1; + + /// True if this is a method + bool is_method : 1; + + /// True if the function has a '*args' argument + bool has_args : 1; + + /// True if the function has a '**kwargs' argument + bool has_kwargs : 1; + + /// True once a 'py::kw_only' is encountered (any following args are keyword-only) + bool has_kw_only_args : 1; + + /// True if this function is to be inserted at the beginning of the overload resolution chain + bool prepend : 1; + + /// Number of arguments (including py::args and/or py::kwargs, if present) + std::uint16_t nargs; + + /// Number of trailing arguments (counted in `nargs`) that are keyword-only + std::uint16_t nargs_kw_only = 0; + + /// Number of leading arguments (counted in `nargs`) that are positional-only + std::uint16_t nargs_pos_only = 0; + + /// Python method object + PyMethodDef *def = nullptr; + + /// Python handle to the parent scope (a class or a module) + handle scope; + + /// Python handle to the sibling function representing an overload chain + handle sibling; + + /// Pointer to next overload + function_record *next = nullptr; +}; + +/// Special data structure which (temporarily) holds metadata about a bound class +struct type_record { + PYBIND11_NOINLINE type_record() + : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false), + default_holder(true), module_local(false), is_final(false) { } + + /// Handle to the parent scope + handle scope; + + /// Name of the class + const char *name = nullptr; + + // Pointer to RTTI type_info data structure + const std::type_info *type = nullptr; + + /// How large is the underlying C++ type? + size_t type_size = 0; + + /// What is the alignment of the underlying C++ type? + size_t type_align = 0; + + /// How large is the type's holder? + size_t holder_size = 0; + + /// The global operator new can be overridden with a class-specific variant + void *(*operator_new)(size_t) = nullptr; + + /// Function pointer to class_<..>::init_instance + void (*init_instance)(instance *, const void *) = nullptr; + + /// Function pointer to class_<..>::dealloc + void (*dealloc)(detail::value_and_holder &) = nullptr; + + /// List of base classes of the newly created type + list bases; + + /// Optional docstring + const char *doc = nullptr; + + /// Custom metaclass (optional) + handle metaclass; + + /// Custom type setup. + custom_type_setup::callback custom_type_setup_callback; + + /// Multiple inheritance marker + bool multiple_inheritance : 1; + + /// Does the class manage a __dict__? + bool dynamic_attr : 1; + + /// Does the class implement the buffer protocol? + bool buffer_protocol : 1; + + /// Is the default (unique_ptr) holder type used? + bool default_holder : 1; + + /// Is the class definition local to the module shared object? + bool module_local : 1; + + /// Is the class inheritable from python classes? + bool is_final : 1; + + PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *)) { + auto base_info = detail::get_type_info(base, false); + if (!base_info) { + std::string tname(base.name()); + detail::clean_type_id(tname); + pybind11_fail("generic_type: type \"" + std::string(name) + + "\" referenced unknown base type \"" + tname + "\""); + } + + if (default_holder != base_info->default_holder) { + std::string tname(base.name()); + detail::clean_type_id(tname); + pybind11_fail("generic_type: type \"" + std::string(name) + "\" " + + (default_holder ? "does not have" : "has") + + " a non-default holder type while its base \"" + tname + "\" " + + (base_info->default_holder ? "does not" : "does")); + } + + bases.append((PyObject *) base_info->type); + + if (base_info->type->tp_dictoffset != 0) + dynamic_attr = true; + + if (caster) + base_info->implicit_casts.emplace_back(type, caster); + } +}; + +inline function_call::function_call(const function_record &f, handle p) : + func(f), parent(p) { + args.reserve(f.nargs); + args_convert.reserve(f.nargs); +} + +/// Tag for a new-style `__init__` defined in `detail/init.h` +struct is_new_style_constructor { }; + +/** + * Partial template specializations to process custom attributes provided to + * cpp_function_ and class_. These are either used to initialize the respective + * fields in the type_record and function_record data structures or executed at + * runtime to deal with custom call policies (e.g. keep_alive). + */ +template struct process_attribute; + +template struct process_attribute_default { + /// Default implementation: do nothing + static void init(const T &, function_record *) { } + static void init(const T &, type_record *) { } + static void precall(function_call &) { } + static void postcall(function_call &, handle) { } +}; + +/// Process an attribute specifying the function's name +template <> struct process_attribute : process_attribute_default { + static void init(const name &n, function_record *r) { r->name = const_cast(n.value); } +}; + +/// Process an attribute specifying the function's docstring +template <> struct process_attribute : process_attribute_default { + static void init(const doc &n, function_record *r) { r->doc = const_cast(n.value); } +}; + +/// Process an attribute specifying the function's docstring (provided as a C-style string) +template <> struct process_attribute : process_attribute_default { + static void init(const char *d, function_record *r) { r->doc = const_cast(d); } + static void init(const char *d, type_record *r) { r->doc = const_cast(d); } +}; +template <> struct process_attribute : process_attribute { }; + +/// Process an attribute indicating the function's return value policy +template <> struct process_attribute : process_attribute_default { + static void init(const return_value_policy &p, function_record *r) { r->policy = p; } +}; + +/// Process an attribute which indicates that this is an overloaded function associated with a given sibling +template <> struct process_attribute : process_attribute_default { + static void init(const sibling &s, function_record *r) { r->sibling = s.value; } +}; + +/// Process an attribute which indicates that this function is a method +template <> struct process_attribute : process_attribute_default { + static void init(const is_method &s, function_record *r) { r->is_method = true; r->scope = s.class_; } +}; + +/// Process an attribute which indicates the parent scope of a method +template <> struct process_attribute : process_attribute_default { + static void init(const scope &s, function_record *r) { r->scope = s.value; } +}; + +/// Process an attribute which indicates that this function is an operator +template <> struct process_attribute : process_attribute_default { + static void init(const is_operator &, function_record *r) { r->is_operator = true; } +}; + +template <> struct process_attribute : process_attribute_default { + static void init(const is_new_style_constructor &, function_record *r) { r->is_new_style_constructor = true; } +}; + +inline void process_kw_only_arg(const arg &a, function_record *r) { + if (!a.name || a.name[0] == '\0') + pybind11_fail("arg(): cannot specify an unnamed argument after an kw_only() annotation"); + ++r->nargs_kw_only; +} + +/// Process a keyword argument attribute (*without* a default value) +template <> struct process_attribute : process_attribute_default { + static void init(const arg &a, function_record *r) { + if (r->is_method && r->args.empty()) + r->args.emplace_back("self", nullptr, handle(), true /*convert*/, false /*none not allowed*/); + r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none); + + if (r->has_kw_only_args) process_kw_only_arg(a, r); + } +}; + +/// Process a keyword argument attribute (*with* a default value) +template <> struct process_attribute : process_attribute_default { + static void init(const arg_v &a, function_record *r) { + if (r->is_method && r->args.empty()) + r->args.emplace_back("self", nullptr /*descr*/, handle() /*parent*/, true /*convert*/, false /*none not allowed*/); + + if (!a.value) { +#if !defined(NDEBUG) + std::string descr("'"); + if (a.name) descr += std::string(a.name) + ": "; + descr += a.type + "'"; + if (r->is_method) { + if (r->name) + descr += " in method '" + (std::string) str(r->scope) + "." + (std::string) r->name + "'"; + else + descr += " in method of '" + (std::string) str(r->scope) + "'"; + } else if (r->name) { + descr += " in function '" + (std::string) r->name + "'"; + } + pybind11_fail("arg(): could not convert default argument " + + descr + " into a Python object (type not registered yet?)"); +#else + pybind11_fail("arg(): could not convert default argument " + "into a Python object (type not registered yet?). " + "Compile in debug mode for more information."); +#endif + } + r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none); + + if (r->has_kw_only_args) process_kw_only_arg(a, r); + } +}; + +/// Process a keyword-only-arguments-follow pseudo argument +template <> struct process_attribute : process_attribute_default { + static void init(const kw_only &, function_record *r) { + r->has_kw_only_args = true; + } +}; + +/// Process a positional-only-argument maker +template <> struct process_attribute : process_attribute_default { + static void init(const pos_only &, function_record *r) { + r->nargs_pos_only = static_cast(r->args.size()); + } +}; + +/// Process a parent class attribute. Single inheritance only (class_ itself already guarantees that) +template +struct process_attribute::value>> : process_attribute_default { + static void init(const handle &h, type_record *r) { r->bases.append(h); } +}; + +/// Process a parent class attribute (deprecated, does not support multiple inheritance) +template +struct process_attribute> : process_attribute_default> { + static void init(const base &, type_record *r) { r->add_base(typeid(T), nullptr); } +}; + +/// Process a multiple inheritance attribute +template <> +struct process_attribute : process_attribute_default { + static void init(const multiple_inheritance &, type_record *r) { r->multiple_inheritance = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const dynamic_attr &, type_record *r) { r->dynamic_attr = true; } +}; + +template <> +struct process_attribute { + static void init(const custom_type_setup &value, type_record *r) { + r->custom_type_setup_callback = value.value; + } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const is_final &, type_record *r) { r->is_final = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const buffer_protocol &, type_record *r) { r->buffer_protocol = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const metaclass &m, type_record *r) { r->metaclass = m.value; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const module_local &l, type_record *r) { r->module_local = l.value; } +}; + +/// Process a 'prepend' attribute, putting this at the beginning of the overload chain +template <> +struct process_attribute : process_attribute_default { + static void init(const prepend &, function_record *r) { r->prepend = true; } +}; + +/// Process an 'arithmetic' attribute for enums (does nothing here) +template <> +struct process_attribute : process_attribute_default {}; + +template +struct process_attribute> : process_attribute_default> { }; + +/** + * Process a keep_alive call policy -- invokes keep_alive_impl during the + * pre-call handler if both Nurse, Patient != 0 and use the post-call handler + * otherwise + */ +template struct process_attribute> : public process_attribute_default> { + template = 0> + static void precall(function_call &call) { keep_alive_impl(Nurse, Patient, call, handle()); } + template = 0> + static void postcall(function_call &, handle) { } + template = 0> + static void precall(function_call &) { } + template = 0> + static void postcall(function_call &call, handle ret) { keep_alive_impl(Nurse, Patient, call, ret); } +}; + +/// Recursively iterate over variadic template arguments +template struct process_attributes { + static void init(const Args&... args, function_record *r) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r); + using expander = int[]; + (void) expander{ + 0, ((void) process_attribute::type>::init(args, r), 0)...}; + } + static void init(const Args&... args, type_record *r) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r); + using expander = int[]; + (void) expander{0, + (process_attribute::type>::init(args, r), 0)...}; + } + static void precall(function_call &call) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(call); + using expander = int[]; + (void) expander{0, + (process_attribute::type>::precall(call), 0)...}; + } + static void postcall(function_call &call, handle fn_ret) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(call, fn_ret); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(fn_ret); + using expander = int[]; + (void) expander{ + 0, (process_attribute::type>::postcall(call, fn_ret), 0)...}; + } +}; + +template +using is_call_guard = is_instantiation; + +/// Extract the ``type`` from the first `call_guard` in `Extras...` (or `void_type` if none found) +template +using extract_guard_t = typename exactly_one_t, Extra...>::type; + +/// Check the number of named arguments at compile time +template ::value...), + size_t self = constexpr_sum(std::is_same::value...)> +constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(nargs, has_args, has_kwargs); + return named == 0 || (self + named + size_t(has_args) + size_t(has_kwargs)) == nargs; +} + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/buffer_info.h b/Libraries/pybind11-2.8.0/pybind11/buffer_info.h new file mode 100644 index 00000000..eba68d1a --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/buffer_info.h @@ -0,0 +1,144 @@ +/* + pybind11/buffer_info.h: Python buffer object interface + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +PYBIND11_NAMESPACE_BEGIN(detail) + +// Default, C-style strides +inline std::vector c_strides(const std::vector &shape, ssize_t itemsize) { + auto ndim = shape.size(); + std::vector strides(ndim, itemsize); + if (ndim > 0) + for (size_t i = ndim - 1; i > 0; --i) + strides[i - 1] = strides[i] * shape[i]; + return strides; +} + +// F-style strides; default when constructing an array_t with `ExtraFlags & f_style` +inline std::vector f_strides(const std::vector &shape, ssize_t itemsize) { + auto ndim = shape.size(); + std::vector strides(ndim, itemsize); + for (size_t i = 1; i < ndim; ++i) + strides[i] = strides[i - 1] * shape[i - 1]; + return strides; +} + +PYBIND11_NAMESPACE_END(detail) + +/// Information record describing a Python buffer object +struct buffer_info { + void *ptr = nullptr; // Pointer to the underlying storage + ssize_t itemsize = 0; // Size of individual items in bytes + ssize_t size = 0; // Total number of entries + std::string format; // For homogeneous buffers, this should be set to format_descriptor::format() + ssize_t ndim = 0; // Number of dimensions + std::vector shape; // Shape of the tensor (1 entry per dimension) + std::vector strides; // Number of bytes between adjacent entries (for each per dimension) + bool readonly = false; // flag to indicate if the underlying storage may be written to + + buffer_info() = default; + + buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, + detail::any_container shape_in, detail::any_container strides_in, bool readonly=false) + : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim), + shape(std::move(shape_in)), strides(std::move(strides_in)), readonly(readonly) { + if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) + pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length"); + for (size_t i = 0; i < (size_t) ndim; ++i) + size *= shape[i]; + } + + template + buffer_info(T *ptr, detail::any_container shape_in, detail::any_container strides_in, bool readonly=false) + : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor::format(), static_cast(shape_in->size()), std::move(shape_in), std::move(strides_in), readonly) { } + + buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size, bool readonly=false) + : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly) { } + + template + buffer_info(T *ptr, ssize_t size, bool readonly=false) + : buffer_info(ptr, sizeof(T), format_descriptor::format(), size, readonly) { } + + template + buffer_info(const T *ptr, ssize_t size, bool readonly=true) + : buffer_info(const_cast(ptr), sizeof(T), format_descriptor::format(), size, readonly) { } + + explicit buffer_info(Py_buffer *view, bool ownview = true) + : buffer_info(view->buf, view->itemsize, view->format, view->ndim, + {view->shape, view->shape + view->ndim}, + /* Though buffer::request() requests PyBUF_STRIDES, ctypes objects + * ignore this flag and return a view with NULL strides. + * When strides are NULL, build them manually. */ + view->strides + ? std::vector(view->strides, view->strides + view->ndim) + : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize), + (view->readonly != 0)) { + this->m_view = view; + this->ownview = ownview; + } + + buffer_info(const buffer_info &) = delete; + buffer_info& operator=(const buffer_info &) = delete; + + buffer_info(buffer_info &&other) noexcept { (*this) = std::move(other); } + + buffer_info &operator=(buffer_info &&rhs) noexcept { + ptr = rhs.ptr; + itemsize = rhs.itemsize; + size = rhs.size; + format = std::move(rhs.format); + ndim = rhs.ndim; + shape = std::move(rhs.shape); + strides = std::move(rhs.strides); + std::swap(m_view, rhs.m_view); + std::swap(ownview, rhs.ownview); + readonly = rhs.readonly; + return *this; + } + + ~buffer_info() { + if (m_view && ownview) { PyBuffer_Release(m_view); delete m_view; } + } + + Py_buffer *view() const { return m_view; } + Py_buffer *&view() { return m_view; } +private: + struct private_ctr_tag { }; + + buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, + detail::any_container &&shape_in, detail::any_container &&strides_in, bool readonly) + : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) { } + + Py_buffer *m_view = nullptr; + bool ownview = false; +}; + +PYBIND11_NAMESPACE_BEGIN(detail) + +template struct compare_buffer_info { + static bool compare(const buffer_info& b) { + return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T); + } +}; + +template struct compare_buffer_info::value>> { + static bool compare(const buffer_info& b) { + return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor::value || + ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned::value ? "L" : "l")) || + ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned::value ? "N" : "n"))); + } +}; + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/cast.h b/Libraries/pybind11-2.8.0/pybind11/cast.h new file mode 100644 index 00000000..1ec2080f --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/cast.h @@ -0,0 +1,1442 @@ +/* + pybind11/cast.h: Partial template specializations to cast between + C++ and Python types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pytypes.h" +#include "detail/common.h" +#include "detail/descr.h" +#include "detail/type_caster_base.h" +#include "detail/typeid.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(PYBIND11_CPP17) +# if defined(__has_include) +# if __has_include() +# define PYBIND11_HAS_STRING_VIEW +# endif +# elif defined(_MSC_VER) +# define PYBIND11_HAS_STRING_VIEW +# endif +#endif +#ifdef PYBIND11_HAS_STRING_VIEW +#include +#endif + +#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L +# define PYBIND11_HAS_U8STRING +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +template class type_caster : public type_caster_base { }; +template using make_caster = type_caster>; + +// Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T +template typename make_caster::template cast_op_type cast_op(make_caster &caster) { + return caster.operator typename make_caster::template cast_op_type(); +} +template typename make_caster::template cast_op_type::type> +cast_op(make_caster &&caster) { + return std::move(caster).operator + typename make_caster::template cast_op_type::type>(); +} + +template class type_caster> { +private: + using caster_t = make_caster; + caster_t subcaster; + using reference_t = type&; + using subcaster_cast_op_type = + typename caster_t::template cast_op_type; + + static_assert(std::is_same::type &, subcaster_cast_op_type>::value || + std::is_same::value, + "std::reference_wrapper caster requires T to have a caster with an " + "`operator T &()` or `operator const T &()`"); +public: + bool load(handle src, bool convert) { return subcaster.load(src, convert); } + static constexpr auto name = caster_t::name; + static handle cast(const std::reference_wrapper &src, return_value_policy policy, handle parent) { + // It is definitely wrong to take ownership of this pointer, so mask that rvp + if (policy == return_value_policy::take_ownership || policy == return_value_policy::automatic) + policy = return_value_policy::automatic_reference; + return caster_t::cast(&src.get(), policy, parent); + } + template using cast_op_type = std::reference_wrapper; + explicit operator std::reference_wrapper() { return cast_op(subcaster); } +}; + +#define PYBIND11_TYPE_CASTER(type, py_name) \ +protected: \ + type value; \ + \ +public: \ + static constexpr auto name = py_name; \ + template >::value, int> = 0> \ + static handle cast(T_ *src, return_value_policy policy, handle parent) { \ + if (!src) \ + return none().release(); \ + if (policy == return_value_policy::take_ownership) { \ + auto h = cast(std::move(*src), policy, parent); \ + delete src; \ + return h; \ + } \ + return cast(*src, policy, parent); \ + } \ + operator type *() { return &value; } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &() { return value; } /* NOLINT(bugprone-macro-parentheses) */ \ + operator type &&() && { return std::move(value); } /* NOLINT(bugprone-macro-parentheses) */ \ + template \ + using cast_op_type = pybind11::detail::movable_cast_op_type + +template using is_std_char_type = any_of< + std::is_same, /* std::string */ +#if defined(PYBIND11_HAS_U8STRING) + std::is_same, /* std::u8string */ +#endif + std::is_same, /* std::u16string */ + std::is_same, /* std::u32string */ + std::is_same /* std::wstring */ +>; + + +template +struct type_caster::value && !is_std_char_type::value>> { + using _py_type_0 = conditional_t; + using _py_type_1 = conditional_t::value, _py_type_0, typename std::make_unsigned<_py_type_0>::type>; + using py_type = conditional_t::value, double, _py_type_1>; +public: + + bool load(handle src, bool convert) { + py_type py_value; + + if (!src) + return false; + +#if !defined(PYPY_VERSION) + auto index_check = [](PyObject *o) { return PyIndex_Check(o); }; +#else + // In PyPy 7.3.3, `PyIndex_Check` is implemented by calling `__index__`, + // while CPython only considers the existence of `nb_index`/`__index__`. + auto index_check = [](PyObject *o) { return hasattr(o, "__index__"); }; +#endif + + if (std::is_floating_point::value) { + if (convert || PyFloat_Check(src.ptr())) + py_value = (py_type) PyFloat_AsDouble(src.ptr()); + else + return false; + } else if (PyFloat_Check(src.ptr()) + || (!convert && !PYBIND11_LONG_CHECK(src.ptr()) && !index_check(src.ptr()))) { + return false; + } else { + handle src_or_index = src; +#if PY_VERSION_HEX < 0x03080000 + object index; + if (!PYBIND11_LONG_CHECK(src.ptr())) { // So: index_check(src.ptr()) + index = reinterpret_steal(PyNumber_Index(src.ptr())); + if (!index) { + PyErr_Clear(); + if (!convert) + return false; + } + else { + src_or_index = index; + } + } +#endif + if (std::is_unsigned::value) { + py_value = as_unsigned(src_or_index.ptr()); + } else { // signed integer: + py_value = sizeof(T) <= sizeof(long) + ? (py_type) PyLong_AsLong(src_or_index.ptr()) + : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr()); + } + } + + // Python API reported an error + bool py_err = py_value == (py_type) -1 && PyErr_Occurred(); + + // Check to see if the conversion is valid (integers should match exactly) + // Signed/unsigned checks happen elsewhere + if (py_err || (std::is_integral::value && sizeof(py_type) != sizeof(T) && py_value != (py_type) (T) py_value)) { + PyErr_Clear(); + if (py_err && convert && (PyNumber_Check(src.ptr()) != 0)) { + auto tmp = reinterpret_steal(std::is_floating_point::value + ? PyNumber_Float(src.ptr()) + : PyNumber_Long(src.ptr())); + PyErr_Clear(); + return load(tmp, false); + } + return false; + } + + value = (T) py_value; + return true; + } + + template + static typename std::enable_if::value, handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyFloat_FromDouble((double) src); + } + + template + static typename std::enable_if::value && std::is_signed::value && (sizeof(U) <= sizeof(long)), handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_SIGNED((long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value && (sizeof(U) <= sizeof(unsigned long)), handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_UNSIGNED((unsigned long) src); + } + + template + static typename std::enable_if::value && std::is_signed::value && (sizeof(U) > sizeof(long)), handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromLongLong((long long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value && (sizeof(U) > sizeof(unsigned long)), handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromUnsignedLongLong((unsigned long long) src); + } + + PYBIND11_TYPE_CASTER(T, _::value>("int", "float")); +}; + +template struct void_caster { +public: + bool load(handle src, bool) { + if (src && src.is_none()) + return true; + return false; + } + static handle cast(T, return_value_policy /* policy */, handle /* parent */) { + return none().inc_ref(); + } + PYBIND11_TYPE_CASTER(T, _("None")); +}; + +template <> class type_caster : public void_caster {}; + +template <> class type_caster : public type_caster { +public: + using type_caster::cast; + + bool load(handle h, bool) { + if (!h) { + return false; + } + if (h.is_none()) { + value = nullptr; + return true; + } + + /* Check if this is a capsule */ + if (isinstance(h)) { + value = reinterpret_borrow(h); + return true; + } + + /* Check if this is a C++ type */ + auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr()); + if (bases.size() == 1) { // Only allowing loading from a single-value type + value = values_and_holders(reinterpret_cast(h.ptr())).begin()->value_ptr(); + return true; + } + + /* Fail */ + return false; + } + + static handle cast(const void *ptr, return_value_policy /* policy */, handle /* parent */) { + if (ptr) + return capsule(ptr).release(); + return none().inc_ref(); + } + + template using cast_op_type = void*&; + explicit operator void *&() { return value; } + static constexpr auto name = _("capsule"); +private: + void *value = nullptr; +}; + +template <> class type_caster : public void_caster { }; + +template <> class type_caster { +public: + bool load(handle src, bool convert) { + if (!src) return false; + if (src.ptr() == Py_True) { + value = true; + return true; + } + if (src.ptr() == Py_False) { + value = false; + return true; + } + if (convert || (std::strcmp("numpy.bool_", Py_TYPE(src.ptr())->tp_name) == 0)) { + // (allow non-implicit conversion for numpy booleans) + + Py_ssize_t res = -1; + if (src.is_none()) { + res = 0; // None is implicitly converted to False + } + #if defined(PYPY_VERSION) + // On PyPy, check that "__bool__" (or "__nonzero__" on Python 2.7) attr exists + else if (hasattr(src, PYBIND11_BOOL_ATTR)) { + res = PyObject_IsTrue(src.ptr()); + } + #else + // Alternate approach for CPython: this does the same as the above, but optimized + // using the CPython API so as to avoid an unneeded attribute lookup. + else if (auto tp_as_number = src.ptr()->ob_type->tp_as_number) { + if (PYBIND11_NB_BOOL(tp_as_number)) { + res = (*PYBIND11_NB_BOOL(tp_as_number))(src.ptr()); + } + } + #endif + if (res == 0 || res == 1) { + value = (res != 0); + return true; + } + PyErr_Clear(); + } + return false; + } + static handle cast(bool src, return_value_policy /* policy */, handle /* parent */) { + return handle(src ? Py_True : Py_False).inc_ref(); + } + PYBIND11_TYPE_CASTER(bool, _("bool")); +}; + +// Helper class for UTF-{8,16,32} C++ stl strings: +template struct string_caster { + using CharT = typename StringType::value_type; + + // Simplify life by being able to assume standard char sizes (the standard only guarantees + // minimums, but Python requires exact sizes) + static_assert(!std::is_same::value || sizeof(CharT) == 1, "Unsupported char size != 1"); +#if defined(PYBIND11_HAS_U8STRING) + static_assert(!std::is_same::value || sizeof(CharT) == 1, "Unsupported char8_t size != 1"); +#endif + static_assert(!std::is_same::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2"); + static_assert(!std::is_same::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4"); + // wchar_t can be either 16 bits (Windows) or 32 (everywhere else) + static_assert(!std::is_same::value || sizeof(CharT) == 2 || sizeof(CharT) == 4, + "Unsupported wchar_t size != 2/4"); + static constexpr size_t UTF_N = 8 * sizeof(CharT); + + bool load(handle src, bool) { +#if PY_MAJOR_VERSION < 3 + object temp; +#endif + handle load_src = src; + if (!src) { + return false; + } + if (!PyUnicode_Check(load_src.ptr())) { +#if PY_MAJOR_VERSION >= 3 + return load_bytes(load_src); +#else + if (std::is_same::value) { + return load_bytes(load_src); + } + + // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false + if (!PYBIND11_BYTES_CHECK(load_src.ptr())) + return false; + + temp = reinterpret_steal(PyUnicode_FromObject(load_src.ptr())); + if (!temp) { PyErr_Clear(); return false; } + load_src = temp; +#endif + } + +#if PY_VERSION_HEX >= 0x03030000 + // On Python >= 3.3, for UTF-8 we avoid the need for a temporary `bytes` + // object by using `PyUnicode_AsUTF8AndSize`. + if (PYBIND11_SILENCE_MSVC_C4127(UTF_N == 8)) { + Py_ssize_t size = -1; + const auto *buffer + = reinterpret_cast(PyUnicode_AsUTF8AndSize(load_src.ptr(), &size)); + if (!buffer) { + PyErr_Clear(); + return false; + } + value = StringType(buffer, static_cast(size)); + return true; + } +#endif + + auto utfNbytes = reinterpret_steal(PyUnicode_AsEncodedString( + load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr)); + if (!utfNbytes) { PyErr_Clear(); return false; } + + const auto *buffer = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr())); + size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT); + // Skip BOM for UTF-16/32 + if (PYBIND11_SILENCE_MSVC_C4127(UTF_N > 8)) { + buffer++; + length--; + } + value = StringType(buffer, length); + + // If we're loading a string_view we need to keep the encoded Python object alive: + if (IsView) + loader_life_support::add_patient(utfNbytes); + + return true; + } + + static handle cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) { + const char *buffer = reinterpret_cast(src.data()); + auto nbytes = ssize_t(src.size() * sizeof(CharT)); + handle s = decode_utfN(buffer, nbytes); + if (!s) throw error_already_set(); + return s; + } + + PYBIND11_TYPE_CASTER(StringType, _(PYBIND11_STRING_NAME)); + +private: + static handle decode_utfN(const char *buffer, ssize_t nbytes) { +#if !defined(PYPY_VERSION) + return + UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) : + UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) : + PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr); +#else + // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as well), + // so bypass the whole thing by just passing the encoding as a string value, which works properly: + return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr); +#endif + } + + // When loading into a std::string or char*, accept a bytes object as-is (i.e. + // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. + // which supports loading a unicode from a str, doesn't take this path. + template + bool load_bytes(enable_if_t::value, handle> src) { + if (PYBIND11_BYTES_CHECK(src.ptr())) { + // We were passed a Python 3 raw bytes; accept it into a std::string or char* + // without any encoding attempt. + const char *bytes = PYBIND11_BYTES_AS_STRING(src.ptr()); + if (bytes) { + value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr())); + return true; + } + } + + return false; + } + + template + bool load_bytes(enable_if_t::value, handle>) { return false; } +}; + +template +struct type_caster, enable_if_t::value>> + : string_caster> {}; + +#ifdef PYBIND11_HAS_STRING_VIEW +template +struct type_caster, enable_if_t::value>> + : string_caster, true> {}; +#endif + +// Type caster for C-style strings. We basically use a std::string type caster, but also add the +// ability to use None as a nullptr char* (which the string caster doesn't allow). +template struct type_caster::value>> { + using StringType = std::basic_string; + using StringCaster = type_caster; + StringCaster str_caster; + bool none = false; + CharT one_char = 0; +public: + bool load(handle src, bool convert) { + if (!src) return false; + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) return false; + none = true; + return true; + } + return str_caster.load(src, convert); + } + + static handle cast(const CharT *src, return_value_policy policy, handle parent) { + if (src == nullptr) return pybind11::none().inc_ref(); + return StringCaster::cast(StringType(src), policy, parent); + } + + static handle cast(CharT src, return_value_policy policy, handle parent) { + if (std::is_same::value) { + handle s = PyUnicode_DecodeLatin1((const char *) &src, 1, nullptr); + if (!s) throw error_already_set(); + return s; + } + return StringCaster::cast(StringType(1, src), policy, parent); + } + + explicit operator CharT *() { + return none ? nullptr : const_cast(static_cast(str_caster).c_str()); + } + explicit operator CharT &() { + if (none) + throw value_error("Cannot convert None to a character"); + + auto &value = static_cast(str_caster); + size_t str_len = value.size(); + if (str_len == 0) + throw value_error("Cannot convert empty string to a character"); + + // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that + // is too high, and one for multiple unicode characters (caught later), so we need to figure + // out how long the first encoded character is in bytes to distinguish between these two + // errors. We also allow want to allow unicode characters U+0080 through U+00FF, as those + // can fit into a single char value. + if (PYBIND11_SILENCE_MSVC_C4127(StringCaster::UTF_N == 8) && str_len > 1 && str_len <= 4) { + auto v0 = static_cast(value[0]); + // low bits only: 0-127 + // 0b110xxxxx - start of 2-byte sequence + // 0b1110xxxx - start of 3-byte sequence + // 0b11110xxx - start of 4-byte sequence + size_t char0_bytes = (v0 & 0x80) == 0 ? 1 + : (v0 & 0xE0) == 0xC0 ? 2 + : (v0 & 0xF0) == 0xE0 ? 3 + : 4; + + if (char0_bytes == str_len) { + // If we have a 128-255 value, we can decode it into a single char: + if (char0_bytes == 2 && (v0 & 0xFC) == 0xC0) { // 0x110000xx 0x10xxxxxx + one_char = static_cast(((v0 & 3) << 6) + (static_cast(value[1]) & 0x3F)); + return one_char; + } + // Otherwise we have a single character, but it's > U+00FF + throw value_error("Character code point not in range(0x100)"); + } + } + + // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a + // surrogate pair with total length 2 instantly indicates a range error (but not a "your + // string was too long" error). + else if (PYBIND11_SILENCE_MSVC_C4127(StringCaster::UTF_N == 16) && str_len == 2) { + one_char = static_cast(value[0]); + if (one_char >= 0xD800 && one_char < 0xE000) + throw value_error("Character code point not in range(0x10000)"); + } + + if (str_len != 1) + throw value_error("Expected a character, but multi-character string found"); + + one_char = value[0]; + return one_char; + } + + static constexpr auto name = _(PYBIND11_STRING_NAME); + template using cast_op_type = pybind11::detail::cast_op_type<_T>; +}; + +// Base implementation for std::tuple and std::pair +template class Tuple, typename... Ts> class tuple_caster { + using type = Tuple; + static constexpr auto size = sizeof...(Ts); + using indices = make_index_sequence; +public: + + bool load(handle src, bool convert) { + if (!isinstance(src)) + return false; + const auto seq = reinterpret_borrow(src); + if (seq.size() != size) + return false; + return load_impl(seq, convert, indices{}); + } + + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + return cast_impl(std::forward(src), policy, parent, indices{}); + } + + // copied from the PYBIND11_TYPE_CASTER macro + template + static handle cast(T *src, return_value_policy policy, handle parent) { + if (!src) return none().release(); + if (policy == return_value_policy::take_ownership) { + auto h = cast(std::move(*src), policy, parent); + delete src; + return h; + } + return cast(*src, policy, parent); + } + + static constexpr auto name = _("Tuple[") + concat(make_caster::name...) + _("]"); + + template using cast_op_type = type; + + explicit operator type() & { return implicit_cast(indices{}); } + explicit operator type() && { return std::move(*this).implicit_cast(indices{}); } + +protected: + template + type implicit_cast(index_sequence) & { return type(cast_op(std::get(subcasters))...); } + template + type implicit_cast(index_sequence) && { return type(cast_op(std::move(std::get(subcasters)))...); } + + static constexpr bool load_impl(const sequence &, bool, index_sequence<>) { return true; } + + template + bool load_impl(const sequence &seq, bool convert, index_sequence) { +#ifdef __cpp_fold_expressions + if ((... || !std::get(subcasters).load(seq[Is], convert))) + return false; +#else + for (bool r : {std::get(subcasters).load(seq[Is], convert)...}) + if (!r) + return false; +#endif + return true; + } + + /* Implementation: Convert a C++ tuple into a Python tuple */ + template + static handle cast_impl(T &&src, return_value_policy policy, handle parent, index_sequence) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(src, policy, parent); + PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(policy, parent); + std::array entries{{ + reinterpret_steal(make_caster::cast(std::get(std::forward(src)), policy, parent))... + }}; + for (const auto &entry: entries) + if (!entry) + return handle(); + tuple result(size); + int counter = 0; + for (auto & entry: entries) + PyTuple_SET_ITEM(result.ptr(), counter++, entry.release().ptr()); + return result.release(); + } + + Tuple...> subcasters; +}; + +template class type_caster> + : public tuple_caster {}; + +template class type_caster> + : public tuple_caster {}; + +/// Helper class which abstracts away certain actions. Users can provide specializations for +/// custom holders, but it's only necessary if the type has a non-standard interface. +template +struct holder_helper { + static auto get(const T &p) -> decltype(p.get()) { return p.get(); } +}; + +/// Type caster for holder types like std::shared_ptr, etc. +/// The SFINAE hook is provided to help work around the current lack of support +/// for smart-pointer interoperability. Please consider it an implementation +/// detail that may change in the future, as formal support for smart-pointer +/// interoperability is added into pybind11. +template +struct copyable_holder_caster : public type_caster_base { +public: + using base = type_caster_base; + static_assert(std::is_base_of>::value, + "Holder classes are only supported for custom types"); + using base::base; + using base::cast; + using base::typeinfo; + using base::value; + + bool load(handle src, bool convert) { + return base::template load_impl>(src, convert); + } + + explicit operator type*() { return this->value; } + // static_cast works around compiler error with MSVC 17 and CUDA 10.2 + // see issue #2180 + explicit operator type&() { return *(static_cast(this->value)); } + explicit operator holder_type*() { return std::addressof(holder); } + explicit operator holder_type&() { return holder; } + + static handle cast(const holder_type &src, return_value_policy, handle) { + const auto *ptr = holder_helper::get(src); + return type_caster_base::cast_holder(ptr, &src); + } + +protected: + friend class type_caster_generic; + void check_holder_compat() { + if (typeinfo->default_holder) + throw cast_error("Unable to load a custom holder type from a default-holder instance"); + } + + bool load_value(value_and_holder &&v_h) { + if (v_h.holder_constructed()) { + value = v_h.value_ptr(); + holder = v_h.template holder(); + return true; + } + throw cast_error("Unable to cast from non-held to held instance (T& to Holder) " +#if defined(NDEBUG) + "(compile in debug mode for type information)"); +#else + "of type '" + + type_id() + "''"); +#endif + } + + template ::value, int> = 0> + bool try_implicit_casts(handle, bool) { return false; } + + template ::value, int> = 0> + bool try_implicit_casts(handle src, bool convert) { + for (auto &cast : typeinfo->implicit_casts) { + copyable_holder_caster sub_caster(*cast.first); + if (sub_caster.load(src, convert)) { + value = cast.second(sub_caster.value); + holder = holder_type(sub_caster.holder, (type *) value); + return true; + } + } + return false; + } + + static bool try_direct_conversions(handle) { return false; } + + + holder_type holder; +}; + +/// Specialize for the common std::shared_ptr, so users don't need to +template +class type_caster> : public copyable_holder_caster> { }; + +/// Type caster for holder types like std::unique_ptr. +/// Please consider the SFINAE hook an implementation detail, as explained +/// in the comment for the copyable_holder_caster. +template +struct move_only_holder_caster { + static_assert(std::is_base_of, type_caster>::value, + "Holder classes are only supported for custom types"); + + static handle cast(holder_type &&src, return_value_policy, handle) { + auto *ptr = holder_helper::get(src); + return type_caster_base::cast_holder(ptr, std::addressof(src)); + } + static constexpr auto name = type_caster_base::name; +}; + +template +class type_caster> + : public move_only_holder_caster> { }; + +template +using type_caster_holder = conditional_t::value, + copyable_holder_caster, + move_only_holder_caster>; + +template struct always_construct_holder { static constexpr bool value = Value; }; + +/// Create a specialization for custom holder types (silently ignores std::shared_ptr) +#define PYBIND11_DECLARE_HOLDER_TYPE(type, holder_type, ...) \ + namespace pybind11 { namespace detail { \ + template \ + struct always_construct_holder : always_construct_holder { }; \ + template \ + class type_caster::value>> \ + : public type_caster_holder { }; \ + }} + +// PYBIND11_DECLARE_HOLDER_TYPE holder types: +template struct is_holder_type : + std::is_base_of, detail::type_caster> {}; +// Specialization for always-supported unique_ptr holders: +template struct is_holder_type> : + std::true_type {}; + +template struct handle_type_name { static constexpr auto name = _(); }; +template <> struct handle_type_name { static constexpr auto name = _(PYBIND11_BYTES_NAME); }; +template <> struct handle_type_name { static constexpr auto name = _("int"); }; +template <> struct handle_type_name { static constexpr auto name = _("Iterable"); }; +template <> struct handle_type_name { static constexpr auto name = _("Iterator"); }; +template <> struct handle_type_name { static constexpr auto name = _("None"); }; +template <> struct handle_type_name { static constexpr auto name = _("*args"); }; +template <> struct handle_type_name { static constexpr auto name = _("**kwargs"); }; + +template +struct pyobject_caster { + template ::value, int> = 0> + bool load(handle src, bool /* convert */) { value = src; return static_cast(value); } + + template ::value, int> = 0> + bool load(handle src, bool /* convert */) { +#if PY_MAJOR_VERSION < 3 && !defined(PYBIND11_STR_LEGACY_PERMISSIVE) + // For Python 2, without this implicit conversion, Python code would + // need to be cluttered with six.ensure_text() or similar, only to be + // un-cluttered later after Python 2 support is dropped. + if (PYBIND11_SILENCE_MSVC_C4127(std::is_same::value) && isinstance(src)) { + PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr); + if (!str_from_bytes) throw error_already_set(); + value = reinterpret_steal(str_from_bytes); + return true; + } +#endif + if (!isinstance(src)) + return false; + value = reinterpret_borrow(src); + return true; + } + + static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) { + return src.inc_ref(); + } + PYBIND11_TYPE_CASTER(type, handle_type_name::name); +}; + +template +class type_caster::value>> : public pyobject_caster { }; + +// Our conditions for enabling moving are quite restrictive: +// At compile time: +// - T needs to be a non-const, non-pointer, non-reference type +// - type_caster::operator T&() must exist +// - the type must be move constructible (obviously) +// At run-time: +// - if the type is non-copy-constructible, the object must be the sole owner of the type (i.e. it +// must have ref_count() == 1)h +// If any of the above are not satisfied, we fall back to copying. +template using move_is_plain_type = satisfies_none_of; +template struct move_always : std::false_type {}; +template struct move_always, + negation>, + std::is_move_constructible, + std::is_same>().operator T&()), T&> +>::value>> : std::true_type {}; +template struct move_if_unreferenced : std::false_type {}; +template struct move_if_unreferenced, + negation>, + std::is_move_constructible, + std::is_same>().operator T&()), T&> +>::value>> : std::true_type {}; +template using move_never = none_of, move_if_unreferenced>; + +// Detect whether returning a `type` from a cast on type's type_caster is going to result in a +// reference or pointer to a local variable of the type_caster. Basically, only +// non-reference/pointer `type`s and reference/pointers from a type_caster_generic are safe; +// everything else returns a reference/pointer to a local variable. +template using cast_is_temporary_value_reference = bool_constant< + (std::is_reference::value || std::is_pointer::value) && + !std::is_base_of>::value && + !std::is_same, void>::value +>; + +// When a value returned from a C++ function is being cast back to Python, we almost always want to +// force `policy = move`, regardless of the return value policy the function/method was declared +// with. +template struct return_value_policy_override { + static return_value_policy policy(return_value_policy p) { return p; } +}; + +template struct return_value_policy_override>::value, void>> { + static return_value_policy policy(return_value_policy p) { + return !std::is_lvalue_reference::value && + !std::is_pointer::value + ? return_value_policy::move : p; + } +}; + +// Basic python -> C++ casting; throws if casting fails +template type_caster &load_type(type_caster &conv, const handle &handle) { + if (!conv.load(handle, true)) { +#if defined(NDEBUG) + throw cast_error("Unable to cast Python instance to C++ type (compile in debug mode for details)"); +#else + throw cast_error("Unable to cast Python instance of type " + + (std::string) str(type::handle_of(handle)) + " to C++ type '" + type_id() + "'"); +#endif + } + return conv; +} +// Wrapper around the above that also constructs and returns a type_caster +template make_caster load_type(const handle &handle) { + make_caster conv; + load_type(conv, handle); + return conv; +} + +PYBIND11_NAMESPACE_END(detail) + +// pytype -> C++ type +template ::value, int> = 0> +T cast(const handle &handle) { + using namespace detail; + static_assert(!cast_is_temporary_value_reference::value, + "Unable to cast type to reference: value is local to type caster"); + return cast_op(load_type(handle)); +} + +// pytype -> pytype (calls converting constructor) +template ::value, int> = 0> +T cast(const handle &handle) { return T(reinterpret_borrow(handle)); } + +// C++ type -> py::object +template ::value, int> = 0> +object cast(T &&value, return_value_policy policy = return_value_policy::automatic_reference, + handle parent = handle()) { + using no_ref_T = typename std::remove_reference::type; + if (policy == return_value_policy::automatic) + policy = std::is_pointer::value ? return_value_policy::take_ownership : + std::is_lvalue_reference::value ? return_value_policy::copy : return_value_policy::move; + else if (policy == return_value_policy::automatic_reference) + policy = std::is_pointer::value ? return_value_policy::reference : + std::is_lvalue_reference::value ? return_value_policy::copy : return_value_policy::move; + return reinterpret_steal(detail::make_caster::cast(std::forward(value), policy, parent)); +} + +template T handle::cast() const { return pybind11::cast(*this); } +template <> inline void handle::cast() const { return; } + +template +detail::enable_if_t::value, T> move(object &&obj) { + if (obj.ref_count() > 1) +#if defined(NDEBUG) + throw cast_error("Unable to cast Python instance to C++ rvalue: instance has multiple references" + " (compile in debug mode for details)"); +#else + throw cast_error("Unable to move from Python " + (std::string) str(type::handle_of(obj)) + + " instance to C++ " + type_id() + " instance: instance has multiple references"); +#endif + + // Move into a temporary and return that, because the reference may be a local value of `conv` + T ret = std::move(detail::load_type(obj).operator T&()); + return ret; +} + +// Calling cast() on an rvalue calls pybind11::cast with the object rvalue, which does: +// - If we have to move (because T has no copy constructor), do it. This will fail if the moved +// object has multiple references, but trying to copy will fail to compile. +// - If both movable and copyable, check ref count: if 1, move; otherwise copy +// - Otherwise (not movable), copy. +template detail::enable_if_t::value, T> cast(object &&object) { + return move(std::move(object)); +} +template detail::enable_if_t::value, T> cast(object &&object) { + if (object.ref_count() > 1) + return cast(object); + return move(std::move(object)); +} +template detail::enable_if_t::value, T> cast(object &&object) { + return cast(object); +} + +template T object::cast() const & { return pybind11::cast(*this); } +template T object::cast() && { return pybind11::cast(std::move(*this)); } +template <> inline void object::cast() const & { return; } +template <> inline void object::cast() && { return; } + +PYBIND11_NAMESPACE_BEGIN(detail) + +// Declared in pytypes.h: +template ::value, int>> +object object_or_cast(T &&o) { return pybind11::cast(std::forward(o)); } + +struct override_unused {}; // Placeholder type for the unneeded (and dead code) static variable in the PYBIND11_OVERRIDE_OVERRIDE macro +template using override_caster_t = conditional_t< + cast_is_temporary_value_reference::value, make_caster, override_unused>; + +// Trampoline use: for reference/pointer types to value-converted values, we do a value cast, then +// store the result in the given variable. For other types, this is a no-op. +template enable_if_t::value, T> cast_ref(object &&o, make_caster &caster) { + return cast_op(load_type(caster, o)); +} +template enable_if_t::value, T> cast_ref(object &&, override_unused &) { + pybind11_fail("Internal error: cast_ref fallback invoked"); } + +// Trampoline use: Having a pybind11::cast with an invalid reference type is going to static_assert, even +// though if it's in dead code, so we provide a "trampoline" to pybind11::cast that only does anything in +// cases where pybind11::cast is valid. +template enable_if_t::value, T> cast_safe(object &&o) { + return pybind11::cast(std::move(o)); } +template enable_if_t::value, T> cast_safe(object &&) { + pybind11_fail("Internal error: cast_safe fallback invoked"); } +template <> inline void cast_safe(object &&) {} + +PYBIND11_NAMESPACE_END(detail) + +// The overloads could coexist, i.e. the #if is not strictly speaking needed, +// but it is an easy minor optimization. +#if defined(NDEBUG) +inline cast_error cast_error_unable_to_convert_call_arg() { + return cast_error( + "Unable to convert call argument to Python object (compile in debug mode for details)"); +} +#else +inline cast_error cast_error_unable_to_convert_call_arg(const std::string &name, + const std::string &type) { + return cast_error("Unable to convert call argument '" + name + "' of type '" + type + + "' to Python object"); +} +#endif + +template +tuple make_tuple() { return tuple(0); } + +template tuple make_tuple(Args&&... args_) { + constexpr size_t size = sizeof...(Args); + std::array args { + { reinterpret_steal(detail::make_caster::cast( + std::forward(args_), policy, nullptr))... } + }; + for (size_t i = 0; i < args.size(); i++) { + if (!args[i]) { +#if defined(NDEBUG) + throw cast_error_unable_to_convert_call_arg(); +#else + std::array argtypes { {type_id()...} }; + throw cast_error_unable_to_convert_call_arg(std::to_string(i), argtypes[i]); +#endif + } + } + tuple result(size); + int counter = 0; + for (auto &arg_value : args) + PyTuple_SET_ITEM(result.ptr(), counter++, arg_value.release().ptr()); + return result; +} + +#if PY_VERSION_HEX >= 0x03030000 +template ()>> +object make_simple_namespace(Args&&... args_) { + PyObject *ns = _PyNamespace_New(dict(std::forward(args_)...).ptr()); + if (!ns) throw error_already_set(); + return reinterpret_steal(ns); +} +#endif + +/// \ingroup annotations +/// Annotation for arguments +struct arg { + /// Constructs an argument with the name of the argument; if null or omitted, this is a positional argument. + constexpr explicit arg(const char *name = nullptr) : name(name), flag_noconvert(false), flag_none(true) { } + /// Assign a value to this argument + template arg_v operator=(T &&value) const; + /// Indicate that the type should not be converted in the type caster + arg &noconvert(bool flag = true) { flag_noconvert = flag; return *this; } + /// Indicates that the argument should/shouldn't allow None (e.g. for nullable pointer args) + arg &none(bool flag = true) { flag_none = flag; return *this; } + + const char *name; ///< If non-null, this is a named kwargs argument + bool flag_noconvert : 1; ///< If set, do not allow conversion (requires a supporting type caster!) + bool flag_none : 1; ///< If set (the default), allow None to be passed to this argument +}; + +/// \ingroup annotations +/// Annotation for arguments with values +struct arg_v : arg { +private: + template + arg_v(arg &&base, T &&x, const char *descr = nullptr) + : arg(base), + value(reinterpret_steal( + detail::make_caster::cast(x, return_value_policy::automatic, {}) + )), + descr(descr) +#if !defined(NDEBUG) + , type(type_id()) +#endif + { + // Workaround! See: + // https://github.com/pybind/pybind11/issues/2336 + // https://github.com/pybind/pybind11/pull/2685#issuecomment-731286700 + if (PyErr_Occurred()) { + PyErr_Clear(); + } + } + +public: + /// Direct construction with name, default, and description + template + arg_v(const char *name, T &&x, const char *descr = nullptr) + : arg_v(arg(name), std::forward(x), descr) { } + + /// Called internally when invoking `py::arg("a") = value` + template + arg_v(const arg &base, T &&x, const char *descr = nullptr) + : arg_v(arg(base), std::forward(x), descr) { } + + /// Same as `arg::noconvert()`, but returns *this as arg_v&, not arg& + arg_v &noconvert(bool flag = true) { arg::noconvert(flag); return *this; } + + /// Same as `arg::nonone()`, but returns *this as arg_v&, not arg& + arg_v &none(bool flag = true) { arg::none(flag); return *this; } + + /// The default value + object value; + /// The (optional) description of the default value + const char *descr; +#if !defined(NDEBUG) + /// The C++ type name of the default value (only available when compiled in debug mode) + std::string type; +#endif +}; + +/// \ingroup annotations +/// Annotation indicating that all following arguments are keyword-only; the is the equivalent of an +/// unnamed '*' argument (in Python 3) +struct kw_only {}; + +/// \ingroup annotations +/// Annotation indicating that all previous arguments are positional-only; the is the equivalent of an +/// unnamed '/' argument (in Python 3.8) +struct pos_only {}; + +template +arg_v arg::operator=(T &&value) const { + return {*this, std::forward(value)}; +} + +/// Alias for backward compatibility -- to be removed in version 2.0 +template using arg_t = arg_v; + +inline namespace literals { +/** \rst + String literal version of `arg` + \endrst */ +constexpr arg operator"" _a(const char *name, size_t) { return arg(name); } +} // namespace literals + +PYBIND11_NAMESPACE_BEGIN(detail) + +// forward declaration (definition in attr.h) +struct function_record; + +/// Internal data associated with a single function call +struct function_call { + function_call(const function_record &f, handle p); // Implementation in attr.h + + /// The function data: + const function_record &func; + + /// Arguments passed to the function: + std::vector args; + + /// The `convert` value the arguments should be loaded with + std::vector args_convert; + + /// Extra references for the optional `py::args` and/or `py::kwargs` arguments (which, if + /// present, are also in `args` but without a reference). + object args_ref, kwargs_ref; + + /// The parent, if any + handle parent; + + /// If this is a call to an initializer, this argument contains `self` + handle init_self; +}; + + +/// Helper class which loads arguments for C++ functions called from Python +template +class argument_loader { + using indices = make_index_sequence; + + template using argument_is_args = std::is_same, args>; + template using argument_is_kwargs = std::is_same, kwargs>; + // Get args/kwargs argument positions relative to the end of the argument list: + static constexpr auto args_pos = constexpr_first() - (int) sizeof...(Args), + kwargs_pos = constexpr_first() - (int) sizeof...(Args); + + static constexpr bool args_kwargs_are_last = kwargs_pos >= - 1 && args_pos >= kwargs_pos - 1; + + static_assert(args_kwargs_are_last, "py::args/py::kwargs are only permitted as the last argument(s) of a function"); + +public: + static constexpr bool has_kwargs = kwargs_pos < 0; + static constexpr bool has_args = args_pos < 0; + + static constexpr auto arg_names = concat(type_descr(make_caster::name)...); + + bool load_args(function_call &call) { + return load_impl_sequence(call, indices{}); + } + + template + // NOLINTNEXTLINE(readability-const-return-type) + enable_if_t::value, Return> call(Func &&f) && { + return std::move(*this).template call_impl>(std::forward(f), indices{}, Guard{}); + } + + template + enable_if_t::value, void_type> call(Func &&f) && { + std::move(*this).template call_impl>(std::forward(f), indices{}, Guard{}); + return void_type(); + } + +private: + + static bool load_impl_sequence(function_call &, index_sequence<>) { return true; } + + template + bool load_impl_sequence(function_call &call, index_sequence) { +#ifdef __cpp_fold_expressions + if ((... || !std::get(argcasters).load(call.args[Is], call.args_convert[Is]))) + return false; +#else + for (bool r : {std::get(argcasters).load(call.args[Is], call.args_convert[Is])...}) + if (!r) + return false; +#endif + return true; + } + + template + Return call_impl(Func &&f, index_sequence, Guard &&) && { + return std::forward(f)(cast_op(std::move(std::get(argcasters)))...); + } + + std::tuple...> argcasters; +}; + +/// Helper class which collects only positional arguments for a Python function call. +/// A fancier version below can collect any argument, but this one is optimal for simple calls. +template +class simple_collector { +public: + template + explicit simple_collector(Ts &&...values) + : m_args(pybind11::make_tuple(std::forward(values)...)) { } + + const tuple &args() const & { return m_args; } + dict kwargs() const { return {}; } + + tuple args() && { return std::move(m_args); } + + /// Call a Python function and pass the collected arguments + object call(PyObject *ptr) const { + PyObject *result = PyObject_CallObject(ptr, m_args.ptr()); + if (!result) + throw error_already_set(); + return reinterpret_steal(result); + } + +private: + tuple m_args; +}; + +/// Helper class which collects positional, keyword, * and ** arguments for a Python function call +template +class unpacking_collector { +public: + template + explicit unpacking_collector(Ts &&...values) { + // Tuples aren't (easily) resizable so a list is needed for collection, + // but the actual function call strictly requires a tuple. + auto args_list = list(); + using expander = int[]; + (void) expander{0, (process(args_list, std::forward(values)), 0)...}; + + m_args = std::move(args_list); + } + + const tuple &args() const & { return m_args; } + const dict &kwargs() const & { return m_kwargs; } + + tuple args() && { return std::move(m_args); } + dict kwargs() && { return std::move(m_kwargs); } + + /// Call a Python function and pass the collected arguments + object call(PyObject *ptr) const { + PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr()); + if (!result) + throw error_already_set(); + return reinterpret_steal(result); + } + +private: + template + void process(list &args_list, T &&x) { + auto o = reinterpret_steal(detail::make_caster::cast(std::forward(x), policy, {})); + if (!o) { +#if defined(NDEBUG) + throw cast_error_unable_to_convert_call_arg(); +#else + throw cast_error_unable_to_convert_call_arg( + std::to_string(args_list.size()), type_id()); +#endif + } + args_list.append(o); + } + + void process(list &args_list, detail::args_proxy ap) { + for (auto a : ap) + args_list.append(a); + } + + void process(list &/*args_list*/, arg_v a) { + if (!a.name) +#if defined(NDEBUG) + nameless_argument_error(); +#else + nameless_argument_error(a.type); +#endif + + if (m_kwargs.contains(a.name)) { +#if defined(NDEBUG) + multiple_values_error(); +#else + multiple_values_error(a.name); +#endif + } + if (!a.value) { +#if defined(NDEBUG) + throw cast_error_unable_to_convert_call_arg(); +#else + throw cast_error_unable_to_convert_call_arg(a.name, a.type); +#endif + } + m_kwargs[a.name] = a.value; + } + + void process(list &/*args_list*/, detail::kwargs_proxy kp) { + if (!kp) + return; + for (auto k : reinterpret_borrow(kp)) { + if (m_kwargs.contains(k.first)) { +#if defined(NDEBUG) + multiple_values_error(); +#else + multiple_values_error(str(k.first)); +#endif + } + m_kwargs[k.first] = k.second; + } + } + + [[noreturn]] static void nameless_argument_error() { + throw type_error("Got kwargs without a name; only named arguments " + "may be passed via py::arg() to a python function call. " + "(compile in debug mode for details)"); + } + [[noreturn]] static void nameless_argument_error(const std::string &type) { + throw type_error("Got kwargs without a name of type '" + type + "'; only named " + "arguments may be passed via py::arg() to a python function call. "); + } + [[noreturn]] static void multiple_values_error() { + throw type_error("Got multiple values for keyword argument " + "(compile in debug mode for details)"); + } + + [[noreturn]] static void multiple_values_error(const std::string &name) { + throw type_error("Got multiple values for keyword argument '" + name + "'"); + } + +private: + tuple m_args; + dict m_kwargs; +}; + +// [workaround(intel)] Separate function required here +// We need to put this into a separate function because the Intel compiler +// fails to compile enable_if_t...>::value> +// (tested with ICC 2021.1 Beta 20200827). +template +constexpr bool args_are_all_positional() +{ + return all_of...>::value; +} + +/// Collect only positional arguments for a Python function call +template ()>> +simple_collector collect_arguments(Args &&...args) { + return simple_collector(std::forward(args)...); +} + +/// Collect all arguments, including keywords and unpacking (only instantiated when needed) +template ()>> +unpacking_collector collect_arguments(Args &&...args) { + // Following argument order rules for generalized unpacking according to PEP 448 + static_assert( + constexpr_last() < constexpr_first() + && constexpr_last() < constexpr_first(), + "Invalid function call: positional args must precede keywords and ** unpacking; " + "* unpacking must precede ** unpacking" + ); + return unpacking_collector(std::forward(args)...); +} + +template +template +object object_api::operator()(Args &&...args) const { +#if !defined(NDEBUG) && PY_VERSION_HEX >= 0x03060000 + if (!PyGILState_Check()) { + pybind11_fail("pybind11::object_api<>::operator() PyGILState_Check() failure."); + } +#endif + return detail::collect_arguments(std::forward(args)...).call(derived().ptr()); +} + +template +template +object object_api::call(Args &&...args) const { + return operator()(std::forward(args)...); +} + +PYBIND11_NAMESPACE_END(detail) + + +template +handle type::handle_of() { + static_assert( + std::is_base_of>::value, + "py::type::of only supports the case where T is a registered C++ types." + ); + + return detail::get_type_handle(typeid(T), true); +} + + +#define PYBIND11_MAKE_OPAQUE(...) \ + namespace pybind11 { namespace detail { \ + template<> class type_caster<__VA_ARGS__> : public type_caster_base<__VA_ARGS__> { }; \ + }} + +/// Lets you pass a type containing a `,` through a macro parameter without needing a separate +/// typedef, e.g.: `PYBIND11_OVERRIDE(PYBIND11_TYPE(ReturnType), PYBIND11_TYPE(Parent), f, arg)` +#define PYBIND11_TYPE(...) __VA_ARGS__ + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/chrono.h b/Libraries/pybind11-2.8.0/pybind11/chrono.h new file mode 100644 index 00000000..61bbcbc5 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/chrono.h @@ -0,0 +1,213 @@ +/* + pybind11/chrono.h: Transparent conversion between std::chrono and python's datetime + + Copyright (c) 2016 Trent Houliston and + Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" + +#include +#include +#include +#include + +#include + +#include + +// Backport the PyDateTime_DELTA functions from Python3.3 if required +#ifndef PyDateTime_DELTA_GET_DAYS +#define PyDateTime_DELTA_GET_DAYS(o) (((PyDateTime_Delta*)o)->days) +#endif +#ifndef PyDateTime_DELTA_GET_SECONDS +#define PyDateTime_DELTA_GET_SECONDS(o) (((PyDateTime_Delta*)o)->seconds) +#endif +#ifndef PyDateTime_DELTA_GET_MICROSECONDS +#define PyDateTime_DELTA_GET_MICROSECONDS(o) (((PyDateTime_Delta*)o)->microseconds) +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +template class duration_caster { +public: + using rep = typename type::rep; + using period = typename type::period; + + using days = std::chrono::duration>; // signed 25 bits required by the standard. + + bool load(handle src, bool) { + using namespace std::chrono; + + // Lazy initialise the PyDateTime import + if (!PyDateTimeAPI) { PyDateTime_IMPORT; } + + if (!src) return false; + // If invoked with datetime.delta object + if (PyDelta_Check(src.ptr())) { + value = type(duration_cast>( + days(PyDateTime_DELTA_GET_DAYS(src.ptr())) + + seconds(PyDateTime_DELTA_GET_SECONDS(src.ptr())) + + microseconds(PyDateTime_DELTA_GET_MICROSECONDS(src.ptr())))); + return true; + } + // If invoked with a float we assume it is seconds and convert + if (PyFloat_Check(src.ptr())) { + value = type(duration_cast>(duration(PyFloat_AsDouble(src.ptr())))); + return true; + } + return false; + } + + // If this is a duration just return it back + static const std::chrono::duration& get_duration(const std::chrono::duration &src) { + return src; + } + + // If this is a time_point get the time_since_epoch + template static std::chrono::duration get_duration(const std::chrono::time_point> &src) { + return src.time_since_epoch(); + } + + static handle cast(const type &src, return_value_policy /* policy */, handle /* parent */) { + using namespace std::chrono; + + // Use overloaded function to get our duration from our source + // Works out if it is a duration or time_point and get the duration + auto d = get_duration(src); + + // Lazy initialise the PyDateTime import + if (!PyDateTimeAPI) { PyDateTime_IMPORT; } + + // Declare these special duration types so the conversions happen with the correct primitive types (int) + using dd_t = duration>; + using ss_t = duration>; + using us_t = duration; + + auto dd = duration_cast(d); + auto subd = d - dd; + auto ss = duration_cast(subd); + auto us = duration_cast(subd - ss); + return PyDelta_FromDSU(dd.count(), ss.count(), us.count()); + } + + PYBIND11_TYPE_CASTER(type, _("datetime.timedelta")); +}; + +inline std::tm *localtime_thread_safe(const std::time_t *time, std::tm *buf) { +#if (defined(__STDC_LIB_EXT1__) && defined(__STDC_WANT_LIB_EXT1__)) || defined(_MSC_VER) + if (localtime_s(buf, time)) + return nullptr; + return buf; +#else + static std::mutex mtx; + std::lock_guard lock(mtx); + std::tm *tm_ptr = localtime(time); + if (tm_ptr != nullptr) { + *buf = *tm_ptr; + } + return tm_ptr; +#endif +} + +// This is for casting times on the system clock into datetime.datetime instances +template class type_caster> { +public: + using type = std::chrono::time_point; + bool load(handle src, bool) { + using namespace std::chrono; + + // Lazy initialise the PyDateTime import + if (!PyDateTimeAPI) { PyDateTime_IMPORT; } + + if (!src) return false; + + std::tm cal; + microseconds msecs; + + if (PyDateTime_Check(src.ptr())) { + cal.tm_sec = PyDateTime_DATE_GET_SECOND(src.ptr()); + cal.tm_min = PyDateTime_DATE_GET_MINUTE(src.ptr()); + cal.tm_hour = PyDateTime_DATE_GET_HOUR(src.ptr()); + cal.tm_mday = PyDateTime_GET_DAY(src.ptr()); + cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1; + cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900; + cal.tm_isdst = -1; + msecs = microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr())); + } else if (PyDate_Check(src.ptr())) { + cal.tm_sec = 0; + cal.tm_min = 0; + cal.tm_hour = 0; + cal.tm_mday = PyDateTime_GET_DAY(src.ptr()); + cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1; + cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900; + cal.tm_isdst = -1; + msecs = microseconds(0); + } else if (PyTime_Check(src.ptr())) { + cal.tm_sec = PyDateTime_TIME_GET_SECOND(src.ptr()); + cal.tm_min = PyDateTime_TIME_GET_MINUTE(src.ptr()); + cal.tm_hour = PyDateTime_TIME_GET_HOUR(src.ptr()); + cal.tm_mday = 1; // This date (day, month, year) = (1, 0, 70) + cal.tm_mon = 0; // represents 1-Jan-1970, which is the first + cal.tm_year = 70; // earliest available date for Python's datetime + cal.tm_isdst = -1; + msecs = microseconds(PyDateTime_TIME_GET_MICROSECOND(src.ptr())); + } + else return false; + + value = time_point_cast(system_clock::from_time_t(std::mktime(&cal)) + msecs); + return true; + } + + static handle cast(const std::chrono::time_point &src, return_value_policy /* policy */, handle /* parent */) { + using namespace std::chrono; + + // Lazy initialise the PyDateTime import + if (!PyDateTimeAPI) { PyDateTime_IMPORT; } + + // Get out microseconds, and make sure they are positive, to avoid bug in eastern hemisphere time zones + // (cfr. https://github.com/pybind/pybind11/issues/2417) + using us_t = duration; + auto us = duration_cast(src.time_since_epoch() % seconds(1)); + if (us.count() < 0) + us += seconds(1); + + // Subtract microseconds BEFORE `system_clock::to_time_t`, because: + // > If std::time_t has lower precision, it is implementation-defined whether the value is rounded or truncated. + // (https://en.cppreference.com/w/cpp/chrono/system_clock/to_time_t) + std::time_t tt = system_clock::to_time_t(time_point_cast(src - us)); + + std::tm localtime; + std::tm *localtime_ptr = localtime_thread_safe(&tt, &localtime); + if (!localtime_ptr) + throw cast_error("Unable to represent system_clock in local time"); + return PyDateTime_FromDateAndTime(localtime.tm_year + 1900, + localtime.tm_mon + 1, + localtime.tm_mday, + localtime.tm_hour, + localtime.tm_min, + localtime.tm_sec, + us.count()); + } + PYBIND11_TYPE_CASTER(type, _("datetime.datetime")); +}; + +// Other clocks that are not the system clock are not measured as datetime.datetime objects +// since they are not measured on calendar time. So instead we just make them timedeltas +// Or if they have passed us a time as a float we convert that +template class type_caster> +: public duration_caster> { +}; + +template class type_caster> +: public duration_caster> { +}; + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/common.h b/Libraries/pybind11-2.8.0/pybind11/common.h new file mode 100644 index 00000000..6c8a4f1e --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/common.h @@ -0,0 +1,2 @@ +#include "detail/common.h" +#warning "Including 'common.h' is deprecated. It will be removed in v3.0. Use 'pybind11.h'." diff --git a/Libraries/pybind11-2.8.0/pybind11/complex.h b/Libraries/pybind11-2.8.0/pybind11/complex.h new file mode 100644 index 00000000..f8327eb3 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/complex.h @@ -0,0 +1,65 @@ +/* + pybind11/complex.h: Complex number support + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include + +/// glibc defines I as a macro which breaks things, e.g., boost template names +#ifdef I +# undef I +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +template struct format_descriptor, detail::enable_if_t::value>> { + static constexpr const char c = format_descriptor::c; + static constexpr const char value[3] = { 'Z', c, '\0' }; + static std::string format() { return std::string(value); } +}; + +#ifndef PYBIND11_CPP17 + +template constexpr const char format_descriptor< + std::complex, detail::enable_if_t::value>>::value[3]; + +#endif + +PYBIND11_NAMESPACE_BEGIN(detail) + +template struct is_fmt_numeric, detail::enable_if_t::value>> { + static constexpr bool value = true; + static constexpr int index = is_fmt_numeric::index + 3; +}; + +template class type_caster> { +public: + bool load(handle src, bool convert) { + if (!src) + return false; + if (!convert && !PyComplex_Check(src.ptr())) + return false; + Py_complex result = PyComplex_AsCComplex(src.ptr()); + if (result.real == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + return false; + } + value = std::complex((T) result.real, (T) result.imag); + return true; + } + + static handle cast(const std::complex &src, return_value_policy /* policy */, handle /* parent */) { + return PyComplex_FromDoubles((double) src.real(), (double) src.imag()); + } + + PYBIND11_TYPE_CASTER(std::complex, _("complex")); +}; +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/class.h b/Libraries/pybind11-2.8.0/pybind11/detail/class.h new file mode 100644 index 00000000..b9376b4c --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/detail/class.h @@ -0,0 +1,709 @@ +/* + pybind11/detail/class.h: Python C API implementation details for py::class_ + + Copyright (c) 2017 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "../attr.h" +#include "../options.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +#if PY_VERSION_HEX >= 0x03030000 && !defined(PYPY_VERSION) +# define PYBIND11_BUILTIN_QUALNAME +# define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj) +#else +// In pre-3.3 Python, we still set __qualname__ so that we can produce reliable function type +// signatures; in 3.3+ this macro expands to nothing: +# define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj) setattr((PyObject *) obj, "__qualname__", nameobj) +#endif + +inline std::string get_fully_qualified_tp_name(PyTypeObject *type) { +#if !defined(PYPY_VERSION) + return type->tp_name; +#else + auto module_name = handle((PyObject *) type).attr("__module__").cast(); + if (module_name == PYBIND11_BUILTINS_MODULE) + return type->tp_name; + else + return std::move(module_name) + "." + type->tp_name; +#endif +} + +inline PyTypeObject *type_incref(PyTypeObject *type) { + Py_INCREF(type); + return type; +} + +#if !defined(PYPY_VERSION) + +/// `pybind11_static_property.__get__()`: Always pass the class instead of the instance. +extern "C" inline PyObject *pybind11_static_get(PyObject *self, PyObject * /*ob*/, PyObject *cls) { + return PyProperty_Type.tp_descr_get(self, cls, cls); +} + +/// `pybind11_static_property.__set__()`: Just like the above `__get__()`. +extern "C" inline int pybind11_static_set(PyObject *self, PyObject *obj, PyObject *value) { + PyObject *cls = PyType_Check(obj) ? obj : (PyObject *) Py_TYPE(obj); + return PyProperty_Type.tp_descr_set(self, cls, value); +} + +/** A `static_property` is the same as a `property` but the `__get__()` and `__set__()` + methods are modified to always use the object type instead of a concrete instance. + Return value: New reference. */ +inline PyTypeObject *make_static_property_type() { + constexpr auto *name = "pybind11_static_property"; + auto name_obj = reinterpret_steal(PYBIND11_FROM_STRING(name)); + + /* Danger zone: from now (and until PyType_Ready), make sure to + issue no Python C API calls which could potentially invoke the + garbage collector (the GC will call type_traverse(), which will in + turn find the newly constructed type in an invalid state) */ + auto heap_type = (PyHeapTypeObject *) PyType_Type.tp_alloc(&PyType_Type, 0); + if (!heap_type) + pybind11_fail("make_static_property_type(): error allocating type!"); + + heap_type->ht_name = name_obj.inc_ref().ptr(); +#ifdef PYBIND11_BUILTIN_QUALNAME + heap_type->ht_qualname = name_obj.inc_ref().ptr(); +#endif + + auto type = &heap_type->ht_type; + type->tp_name = name; + type->tp_base = type_incref(&PyProperty_Type); + type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + type->tp_descr_get = pybind11_static_get; + type->tp_descr_set = pybind11_static_set; + + if (PyType_Ready(type) < 0) + pybind11_fail("make_static_property_type(): failure in PyType_Ready()!"); + + setattr((PyObject *) type, "__module__", str("pybind11_builtins")); + PYBIND11_SET_OLDPY_QUALNAME(type, name_obj); + + return type; +} + +#else // PYPY + +/** PyPy has some issues with the above C API, so we evaluate Python code instead. + This function will only be called once so performance isn't really a concern. + Return value: New reference. */ +inline PyTypeObject *make_static_property_type() { + auto d = dict(); + PyObject *result = PyRun_String(R"(\ + class pybind11_static_property(property): + def __get__(self, obj, cls): + return property.__get__(self, cls, cls) + + def __set__(self, obj, value): + cls = obj if isinstance(obj, type) else type(obj) + property.__set__(self, cls, value) + )", Py_file_input, d.ptr(), d.ptr() + ); + if (result == nullptr) + throw error_already_set(); + Py_DECREF(result); + return (PyTypeObject *) d["pybind11_static_property"].cast().release().ptr(); +} + +#endif // PYPY + +/** Types with static properties need to handle `Type.static_prop = x` in a specific way. + By default, Python replaces the `static_property` itself, but for wrapped C++ types + we need to call `static_property.__set__()` in order to propagate the new value to + the underlying C++ data structure. */ +extern "C" inline int pybind11_meta_setattro(PyObject* obj, PyObject* name, PyObject* value) { + // Use `_PyType_Lookup()` instead of `PyObject_GetAttr()` in order to get the raw + // descriptor (`property`) instead of calling `tp_descr_get` (`property.__get__()`). + PyObject *descr = _PyType_Lookup((PyTypeObject *) obj, name); + + // The following assignment combinations are possible: + // 1. `Type.static_prop = value` --> descr_set: `Type.static_prop.__set__(value)` + // 2. `Type.static_prop = other_static_prop` --> setattro: replace existing `static_prop` + // 3. `Type.regular_attribute = value` --> setattro: regular attribute assignment + const auto static_prop = (PyObject *) get_internals().static_property_type; + const auto call_descr_set = (descr != nullptr) && (value != nullptr) + && (PyObject_IsInstance(descr, static_prop) != 0) + && (PyObject_IsInstance(value, static_prop) == 0); + if (call_descr_set) { + // Call `static_property.__set__()` instead of replacing the `static_property`. +#if !defined(PYPY_VERSION) + return Py_TYPE(descr)->tp_descr_set(descr, obj, value); +#else + if (PyObject *result = PyObject_CallMethod(descr, "__set__", "OO", obj, value)) { + Py_DECREF(result); + return 0; + } else { + return -1; + } +#endif + } else { + // Replace existing attribute. + return PyType_Type.tp_setattro(obj, name, value); + } +} + +#if PY_MAJOR_VERSION >= 3 +/** + * Python 3's PyInstanceMethod_Type hides itself via its tp_descr_get, which prevents aliasing + * methods via cls.attr("m2") = cls.attr("m1"): instead the tp_descr_get returns a plain function, + * when called on a class, or a PyMethod, when called on an instance. Override that behaviour here + * to do a special case bypass for PyInstanceMethod_Types. + */ +extern "C" inline PyObject *pybind11_meta_getattro(PyObject *obj, PyObject *name) { + PyObject *descr = _PyType_Lookup((PyTypeObject *) obj, name); + if (descr && PyInstanceMethod_Check(descr)) { + Py_INCREF(descr); + return descr; + } + return PyType_Type.tp_getattro(obj, name); +} +#endif + +/// metaclass `__call__` function that is used to create all pybind11 objects. +extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, PyObject *kwargs) { + + // use the default metaclass call to create/initialize the object + PyObject *self = PyType_Type.tp_call(type, args, kwargs); + if (self == nullptr) { + return nullptr; + } + + // This must be a pybind11 instance + auto instance = reinterpret_cast(self); + + // Ensure that the base __init__ function(s) were called + for (const auto &vh : values_and_holders(instance)) { + if (!vh.holder_constructed()) { + PyErr_Format(PyExc_TypeError, "%.200s.__init__() must be called when overriding __init__", + get_fully_qualified_tp_name(vh.type->type).c_str()); + Py_DECREF(self); + return nullptr; + } + } + + return self; +} + +/// Cleanup the type-info for a pybind11-registered type. +extern "C" inline void pybind11_meta_dealloc(PyObject *obj) { + auto *type = (PyTypeObject *) obj; + auto &internals = get_internals(); + + // A pybind11-registered type will: + // 1) be found in internals.registered_types_py + // 2) have exactly one associated `detail::type_info` + auto found_type = internals.registered_types_py.find(type); + if (found_type != internals.registered_types_py.end() && + found_type->second.size() == 1 && + found_type->second[0]->type == type) { + + auto *tinfo = found_type->second[0]; + auto tindex = std::type_index(*tinfo->cpptype); + internals.direct_conversions.erase(tindex); + + if (tinfo->module_local) + get_local_internals().registered_types_cpp.erase(tindex); + else + internals.registered_types_cpp.erase(tindex); + internals.registered_types_py.erase(tinfo->type); + + // Actually just `std::erase_if`, but that's only available in C++20 + auto &cache = internals.inactive_override_cache; + for (auto it = cache.begin(), last = cache.end(); it != last; ) { + if (it->first == (PyObject *) tinfo->type) + it = cache.erase(it); + else + ++it; + } + + delete tinfo; + } + + PyType_Type.tp_dealloc(obj); +} + +/** This metaclass is assigned by default to all pybind11 types and is required in order + for static properties to function correctly. Users may override this using `py::metaclass`. + Return value: New reference. */ +inline PyTypeObject* make_default_metaclass() { + constexpr auto *name = "pybind11_type"; + auto name_obj = reinterpret_steal(PYBIND11_FROM_STRING(name)); + + /* Danger zone: from now (and until PyType_Ready), make sure to + issue no Python C API calls which could potentially invoke the + garbage collector (the GC will call type_traverse(), which will in + turn find the newly constructed type in an invalid state) */ + auto heap_type = (PyHeapTypeObject *) PyType_Type.tp_alloc(&PyType_Type, 0); + if (!heap_type) + pybind11_fail("make_default_metaclass(): error allocating metaclass!"); + + heap_type->ht_name = name_obj.inc_ref().ptr(); +#ifdef PYBIND11_BUILTIN_QUALNAME + heap_type->ht_qualname = name_obj.inc_ref().ptr(); +#endif + + auto type = &heap_type->ht_type; + type->tp_name = name; + type->tp_base = type_incref(&PyType_Type); + type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + + type->tp_call = pybind11_meta_call; + + type->tp_setattro = pybind11_meta_setattro; +#if PY_MAJOR_VERSION >= 3 + type->tp_getattro = pybind11_meta_getattro; +#endif + + type->tp_dealloc = pybind11_meta_dealloc; + + if (PyType_Ready(type) < 0) + pybind11_fail("make_default_metaclass(): failure in PyType_Ready()!"); + + setattr((PyObject *) type, "__module__", str("pybind11_builtins")); + PYBIND11_SET_OLDPY_QUALNAME(type, name_obj); + + return type; +} + +/// For multiple inheritance types we need to recursively register/deregister base pointers for any +/// base classes with pointers that are difference from the instance value pointer so that we can +/// correctly recognize an offset base class pointer. This calls a function with any offset base ptrs. +inline void traverse_offset_bases(void *valueptr, const detail::type_info *tinfo, instance *self, + bool (*f)(void * /*parentptr*/, instance * /*self*/)) { + for (handle h : reinterpret_borrow(tinfo->type->tp_bases)) { + if (auto parent_tinfo = get_type_info((PyTypeObject *) h.ptr())) { + for (auto &c : parent_tinfo->implicit_casts) { + if (c.first == tinfo->cpptype) { + auto *parentptr = c.second(valueptr); + if (parentptr != valueptr) + f(parentptr, self); + traverse_offset_bases(parentptr, parent_tinfo, self, f); + break; + } + } + } + } +} + +inline bool register_instance_impl(void *ptr, instance *self) { + get_internals().registered_instances.emplace(ptr, self); + return true; // unused, but gives the same signature as the deregister func +} +inline bool deregister_instance_impl(void *ptr, instance *self) { + auto ®istered_instances = get_internals().registered_instances; + auto range = registered_instances.equal_range(ptr); + for (auto it = range.first; it != range.second; ++it) { + if (self == it->second) { + registered_instances.erase(it); + return true; + } + } + return false; +} + +inline void register_instance(instance *self, void *valptr, const type_info *tinfo) { + register_instance_impl(valptr, self); + if (!tinfo->simple_ancestors) + traverse_offset_bases(valptr, tinfo, self, register_instance_impl); +} + +inline bool deregister_instance(instance *self, void *valptr, const type_info *tinfo) { + bool ret = deregister_instance_impl(valptr, self); + if (!tinfo->simple_ancestors) + traverse_offset_bases(valptr, tinfo, self, deregister_instance_impl); + return ret; +} + +/// Instance creation function for all pybind11 types. It allocates the internal instance layout for +/// holding C++ objects and holders. Allocation is done lazily (the first time the instance is cast +/// to a reference or pointer), and initialization is done by an `__init__` function. +inline PyObject *make_new_instance(PyTypeObject *type) { +#if defined(PYPY_VERSION) + // PyPy gets tp_basicsize wrong (issue 2482) under multiple inheritance when the first inherited + // object is a plain Python type (i.e. not derived from an extension type). Fix it. + ssize_t instance_size = static_cast(sizeof(instance)); + if (type->tp_basicsize < instance_size) { + type->tp_basicsize = instance_size; + } +#endif + PyObject *self = type->tp_alloc(type, 0); + auto inst = reinterpret_cast(self); + // Allocate the value/holder internals: + inst->allocate_layout(); + + return self; +} + +/// Instance creation function for all pybind11 types. It only allocates space for the +/// C++ object, but doesn't call the constructor -- an `__init__` function must do that. +extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *) { + return make_new_instance(type); +} + +/// An `__init__` function constructs the C++ object. Users should provide at least one +/// of these using `py::init` or directly with `.def(__init__, ...)`. Otherwise, the +/// following default function will be used which simply throws an exception. +extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject *) { + PyTypeObject *type = Py_TYPE(self); + std::string msg = get_fully_qualified_tp_name(type) + ": No constructor defined!"; + PyErr_SetString(PyExc_TypeError, msg.c_str()); + return -1; +} + +inline void add_patient(PyObject *nurse, PyObject *patient) { + auto &internals = get_internals(); + auto instance = reinterpret_cast(nurse); + instance->has_patients = true; + Py_INCREF(patient); + internals.patients[nurse].push_back(patient); +} + +inline void clear_patients(PyObject *self) { + auto instance = reinterpret_cast(self); + auto &internals = get_internals(); + auto pos = internals.patients.find(self); + assert(pos != internals.patients.end()); + // Clearing the patients can cause more Python code to run, which + // can invalidate the iterator. Extract the vector of patients + // from the unordered_map first. + auto patients = std::move(pos->second); + internals.patients.erase(pos); + instance->has_patients = false; + for (PyObject *&patient : patients) + Py_CLEAR(patient); +} + +/// Clears all internal data from the instance and removes it from registered instances in +/// preparation for deallocation. +inline void clear_instance(PyObject *self) { + auto instance = reinterpret_cast(self); + + // Deallocate any values/holders, if present: + for (auto &v_h : values_and_holders(instance)) { + if (v_h) { + + // We have to deregister before we call dealloc because, for virtual MI types, we still + // need to be able to get the parent pointers. + if (v_h.instance_registered() && !deregister_instance(instance, v_h.value_ptr(), v_h.type)) + pybind11_fail("pybind11_object_dealloc(): Tried to deallocate unregistered instance!"); + + if (instance->owned || v_h.holder_constructed()) + v_h.type->dealloc(v_h); + } + } + // Deallocate the value/holder layout internals: + instance->deallocate_layout(); + + if (instance->weakrefs) + PyObject_ClearWeakRefs(self); + + PyObject **dict_ptr = _PyObject_GetDictPtr(self); + if (dict_ptr) + Py_CLEAR(*dict_ptr); + + if (instance->has_patients) + clear_patients(self); +} + +/// Instance destructor function for all pybind11 types. It calls `type_info.dealloc` +/// to destroy the C++ object itself, while the rest is Python bookkeeping. +extern "C" inline void pybind11_object_dealloc(PyObject *self) { + clear_instance(self); + + auto type = Py_TYPE(self); + type->tp_free(self); + +#if PY_VERSION_HEX < 0x03080000 + // `type->tp_dealloc != pybind11_object_dealloc` means that we're being called + // as part of a derived type's dealloc, in which case we're not allowed to decref + // the type here. For cross-module compatibility, we shouldn't compare directly + // with `pybind11_object_dealloc`, but with the common one stashed in internals. + auto pybind11_object_type = (PyTypeObject *) get_internals().instance_base; + if (type->tp_dealloc == pybind11_object_type->tp_dealloc) + Py_DECREF(type); +#else + // This was not needed before Python 3.8 (Python issue 35810) + // https://github.com/pybind/pybind11/issues/1946 + Py_DECREF(type); +#endif +} + +/** Create the type which can be used as a common base for all classes. This is + needed in order to satisfy Python's requirements for multiple inheritance. + Return value: New reference. */ +inline PyObject *make_object_base_type(PyTypeObject *metaclass) { + constexpr auto *name = "pybind11_object"; + auto name_obj = reinterpret_steal(PYBIND11_FROM_STRING(name)); + + /* Danger zone: from now (and until PyType_Ready), make sure to + issue no Python C API calls which could potentially invoke the + garbage collector (the GC will call type_traverse(), which will in + turn find the newly constructed type in an invalid state) */ + auto heap_type = (PyHeapTypeObject *) metaclass->tp_alloc(metaclass, 0); + if (!heap_type) + pybind11_fail("make_object_base_type(): error allocating type!"); + + heap_type->ht_name = name_obj.inc_ref().ptr(); +#ifdef PYBIND11_BUILTIN_QUALNAME + heap_type->ht_qualname = name_obj.inc_ref().ptr(); +#endif + + auto type = &heap_type->ht_type; + type->tp_name = name; + type->tp_base = type_incref(&PyBaseObject_Type); + type->tp_basicsize = static_cast(sizeof(instance)); + type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + + type->tp_new = pybind11_object_new; + type->tp_init = pybind11_object_init; + type->tp_dealloc = pybind11_object_dealloc; + + /* Support weak references (needed for the keep_alive feature) */ + type->tp_weaklistoffset = offsetof(instance, weakrefs); + + if (PyType_Ready(type) < 0) + pybind11_fail("PyType_Ready failed in make_object_base_type():" + error_string()); + + setattr((PyObject *) type, "__module__", str("pybind11_builtins")); + PYBIND11_SET_OLDPY_QUALNAME(type, name_obj); + + assert(!PyType_HasFeature(type, Py_TPFLAGS_HAVE_GC)); + return (PyObject *) heap_type; +} + +/// dynamic_attr: Support for `d = instance.__dict__`. +extern "C" inline PyObject *pybind11_get_dict(PyObject *self, void *) { + PyObject *&dict = *_PyObject_GetDictPtr(self); + if (!dict) + dict = PyDict_New(); + Py_XINCREF(dict); + return dict; +} + +/// dynamic_attr: Support for `instance.__dict__ = dict()`. +extern "C" inline int pybind11_set_dict(PyObject *self, PyObject *new_dict, void *) { + if (!PyDict_Check(new_dict)) { + PyErr_Format(PyExc_TypeError, "__dict__ must be set to a dictionary, not a '%.200s'", + get_fully_qualified_tp_name(Py_TYPE(new_dict)).c_str()); + return -1; + } + PyObject *&dict = *_PyObject_GetDictPtr(self); + Py_INCREF(new_dict); + Py_CLEAR(dict); + dict = new_dict; + return 0; +} + +/// dynamic_attr: Allow the garbage collector to traverse the internal instance `__dict__`. +extern "C" inline int pybind11_traverse(PyObject *self, visitproc visit, void *arg) { + PyObject *&dict = *_PyObject_GetDictPtr(self); + Py_VISIT(dict); + return 0; +} + +/// dynamic_attr: Allow the GC to clear the dictionary. +extern "C" inline int pybind11_clear(PyObject *self) { + PyObject *&dict = *_PyObject_GetDictPtr(self); + Py_CLEAR(dict); + return 0; +} + +/// Give instances of this type a `__dict__` and opt into garbage collection. +inline void enable_dynamic_attributes(PyHeapTypeObject *heap_type) { + auto type = &heap_type->ht_type; + type->tp_flags |= Py_TPFLAGS_HAVE_GC; + type->tp_dictoffset = type->tp_basicsize; // place dict at the end + type->tp_basicsize += (ssize_t)sizeof(PyObject *); // and allocate enough space for it + type->tp_traverse = pybind11_traverse; + type->tp_clear = pybind11_clear; + + static PyGetSetDef getset[] = { + {const_cast("__dict__"), pybind11_get_dict, pybind11_set_dict, nullptr, nullptr}, + {nullptr, nullptr, nullptr, nullptr, nullptr} + }; + type->tp_getset = getset; +} + +/// buffer_protocol: Fill in the view as specified by flags. +extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int flags) { + // Look for a `get_buffer` implementation in this type's info or any bases (following MRO). + type_info *tinfo = nullptr; + for (auto type : reinterpret_borrow(Py_TYPE(obj)->tp_mro)) { + tinfo = get_type_info((PyTypeObject *) type.ptr()); + if (tinfo && tinfo->get_buffer) + break; + } + if (view == nullptr || !tinfo || !tinfo->get_buffer) { + if (view) + view->obj = nullptr; + PyErr_SetString(PyExc_BufferError, "pybind11_getbuffer(): Internal error"); + return -1; + } + std::memset(view, 0, sizeof(Py_buffer)); + buffer_info *info = tinfo->get_buffer(obj, tinfo->get_buffer_data); + if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) { + delete info; + // view->obj = nullptr; // Was just memset to 0, so not necessary + PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage"); + return -1; + } + view->obj = obj; + view->ndim = 1; + view->internal = info; + view->buf = info->ptr; + view->itemsize = info->itemsize; + view->len = view->itemsize; + for (auto s : info->shape) + view->len *= s; + view->readonly = static_cast(info->readonly); + if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) + view->format = const_cast(info->format.c_str()); + if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) { + view->ndim = (int) info->ndim; + view->strides = &info->strides[0]; + view->shape = &info->shape[0]; + } + Py_INCREF(view->obj); + return 0; +} + +/// buffer_protocol: Release the resources of the buffer. +extern "C" inline void pybind11_releasebuffer(PyObject *, Py_buffer *view) { + delete (buffer_info *) view->internal; +} + +/// Give this type a buffer interface. +inline void enable_buffer_protocol(PyHeapTypeObject *heap_type) { + heap_type->ht_type.tp_as_buffer = &heap_type->as_buffer; +#if PY_MAJOR_VERSION < 3 + heap_type->ht_type.tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER; +#endif + + heap_type->as_buffer.bf_getbuffer = pybind11_getbuffer; + heap_type->as_buffer.bf_releasebuffer = pybind11_releasebuffer; +} + +/** Create a brand new Python type according to the `type_record` specification. + Return value: New reference. */ +inline PyObject* make_new_python_type(const type_record &rec) { + auto name = reinterpret_steal(PYBIND11_FROM_STRING(rec.name)); + + auto qualname = name; + if (rec.scope && !PyModule_Check(rec.scope.ptr()) && hasattr(rec.scope, "__qualname__")) { +#if PY_MAJOR_VERSION >= 3 + qualname = reinterpret_steal( + PyUnicode_FromFormat("%U.%U", rec.scope.attr("__qualname__").ptr(), name.ptr())); +#else + qualname = str(rec.scope.attr("__qualname__").cast() + "." + rec.name); +#endif + } + + object module_; + if (rec.scope) { + if (hasattr(rec.scope, "__module__")) + module_ = rec.scope.attr("__module__"); + else if (hasattr(rec.scope, "__name__")) + module_ = rec.scope.attr("__name__"); + } + + auto full_name = c_str( +#if !defined(PYPY_VERSION) + module_ ? str(module_).cast() + "." + rec.name : +#endif + rec.name); + + char *tp_doc = nullptr; + if (rec.doc && options::show_user_defined_docstrings()) { + /* Allocate memory for docstring (using PyObject_MALLOC, since + Python will free this later on) */ + size_t size = strlen(rec.doc) + 1; + tp_doc = (char *) PyObject_MALLOC(size); + memcpy((void *) tp_doc, rec.doc, size); + } + + auto &internals = get_internals(); + auto bases = tuple(rec.bases); + auto base = (bases.empty()) ? internals.instance_base + : bases[0].ptr(); + + /* Danger zone: from now (and until PyType_Ready), make sure to + issue no Python C API calls which could potentially invoke the + garbage collector (the GC will call type_traverse(), which will in + turn find the newly constructed type in an invalid state) */ + auto metaclass = rec.metaclass.ptr() ? (PyTypeObject *) rec.metaclass.ptr() + : internals.default_metaclass; + + auto heap_type = (PyHeapTypeObject *) metaclass->tp_alloc(metaclass, 0); + if (!heap_type) + pybind11_fail(std::string(rec.name) + ": Unable to create type object!"); + + heap_type->ht_name = name.release().ptr(); +#ifdef PYBIND11_BUILTIN_QUALNAME + heap_type->ht_qualname = qualname.inc_ref().ptr(); +#endif + + auto type = &heap_type->ht_type; + type->tp_name = full_name; + type->tp_doc = tp_doc; + type->tp_base = type_incref((PyTypeObject *)base); + type->tp_basicsize = static_cast(sizeof(instance)); + if (!bases.empty()) + type->tp_bases = bases.release().ptr(); + + /* Don't inherit base __init__ */ + type->tp_init = pybind11_object_init; + + /* Supported protocols */ + type->tp_as_number = &heap_type->as_number; + type->tp_as_sequence = &heap_type->as_sequence; + type->tp_as_mapping = &heap_type->as_mapping; +#if PY_VERSION_HEX >= 0x03050000 + type->tp_as_async = &heap_type->as_async; +#endif + + /* Flags */ + type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE; +#if PY_MAJOR_VERSION < 3 + type->tp_flags |= Py_TPFLAGS_CHECKTYPES; +#endif + if (!rec.is_final) + type->tp_flags |= Py_TPFLAGS_BASETYPE; + + if (rec.dynamic_attr) + enable_dynamic_attributes(heap_type); + + if (rec.buffer_protocol) + enable_buffer_protocol(heap_type); + + if (rec.custom_type_setup_callback) + rec.custom_type_setup_callback(heap_type); + + if (PyType_Ready(type) < 0) + pybind11_fail(std::string(rec.name) + ": PyType_Ready failed (" + error_string() + ")!"); + + assert(!rec.dynamic_attr || PyType_HasFeature(type, Py_TPFLAGS_HAVE_GC)); + + /* Register type with the parent scope */ + if (rec.scope) + setattr(rec.scope, rec.name, (PyObject *) type); + else + Py_INCREF(type); // Keep it alive forever (reference leak) + + if (module_) // Needed by pydoc + setattr((PyObject *) type, "__module__", module_); + + PYBIND11_SET_OLDPY_QUALNAME(type, qualname); + + return (PyObject *) type; +} + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/common.h b/Libraries/pybind11-2.8.0/pybind11/detail/common.h new file mode 100644 index 00000000..31716e5b --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/detail/common.h @@ -0,0 +1,1021 @@ +/* + pybind11/detail/common.h -- Basic macros + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#define PYBIND11_VERSION_MAJOR 2 +#define PYBIND11_VERSION_MINOR 8 +#define PYBIND11_VERSION_PATCH 0 + +// Similar to Python's convention: https://docs.python.org/3/c-api/apiabiversion.html +// Additional convention: 0xD = dev +#define PYBIND11_VERSION_HEX 0x02080000 + +#define PYBIND11_NAMESPACE_BEGIN(name) namespace name { +#define PYBIND11_NAMESPACE_END(name) } + +// Robust support for some features and loading modules compiled against different pybind versions +// requires forcing hidden visibility on pybind code, so we enforce this by setting the attribute on +// the main `pybind11` namespace. +#if !defined(PYBIND11_NAMESPACE) +# ifdef __GNUG__ +# define PYBIND11_NAMESPACE pybind11 __attribute__((visibility("hidden"))) +# else +# define PYBIND11_NAMESPACE pybind11 +# endif +#endif + +#if !(defined(_MSC_VER) && __cplusplus == 199711L) +# if __cplusplus >= 201402L +# define PYBIND11_CPP14 +# if __cplusplus >= 201703L +# define PYBIND11_CPP17 +# endif +# endif +#elif defined(_MSC_VER) && __cplusplus == 199711L +// MSVC sets _MSVC_LANG rather than __cplusplus (supposedly until the standard is fully implemented) +// Unless you use the /Zc:__cplusplus flag on Visual Studio 2017 15.7 Preview 3 or newer +# if _MSVC_LANG >= 201402L +# define PYBIND11_CPP14 +# if _MSVC_LANG > 201402L && _MSC_VER >= 1910 +# define PYBIND11_CPP17 +# endif +# endif +#endif + +// Compiler version assertions +#if defined(__INTEL_COMPILER) +# if __INTEL_COMPILER < 1800 +# error pybind11 requires Intel C++ compiler v18 or newer +# elif __INTEL_COMPILER < 1900 && defined(PYBIND11_CPP14) +# error pybind11 supports only C++11 with Intel C++ compiler v18. Use v19 or newer for C++14. +# endif +/* The following pragma cannot be pop'ed: + https://community.intel.com/t5/Intel-C-Compiler/Inline-and-no-inline-warning/td-p/1216764 */ +# pragma warning disable 2196 // warning #2196: routine is both "inline" and "noinline" +#elif defined(__clang__) && !defined(__apple_build_version__) +# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 3) +# error pybind11 requires clang 3.3 or newer +# endif +#elif defined(__clang__) +// Apple changes clang version macros to its Xcode version; the first Xcode release based on +// (upstream) clang 3.3 was Xcode 5: +# if __clang_major__ < 5 +# error pybind11 requires Xcode/clang 5.0 or newer +# endif +#elif defined(__GNUG__) +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +# error pybind11 requires gcc 4.8 or newer +# endif +#elif defined(_MSC_VER) +// Pybind hits various compiler bugs in 2015u2 and earlier, and also makes use of some stl features +// (e.g. std::negation) added in 2015u3: +# if _MSC_FULL_VER < 190024210 +# error pybind11 requires MSVC 2015 update 3 or newer +# endif +#endif + +#if !defined(PYBIND11_EXPORT) +# if defined(WIN32) || defined(_WIN32) +# define PYBIND11_EXPORT __declspec(dllexport) +# else +# define PYBIND11_EXPORT __attribute__ ((visibility("default"))) +# endif +#endif + +#if !defined(PYBIND11_EXPORT_EXCEPTION) +# ifdef __MINGW32__ +// workaround for: +// error: 'dllexport' implies default visibility, but xxx has already been declared with a different visibility +# define PYBIND11_EXPORT_EXCEPTION +# else +# define PYBIND11_EXPORT_EXCEPTION PYBIND11_EXPORT +# endif +#endif + +// For CUDA, GCC7, GCC8: +// PYBIND11_NOINLINE_FORCED is incompatible with `-Wattributes -Werror`. +// When defining PYBIND11_NOINLINE_FORCED, it is best to also use `-Wno-attributes`. +// However, the measured shared-library size saving when using noinline are only +// 1.7% for CUDA, -0.2% for GCC7, and 0.0% for GCC8 (using -DCMAKE_BUILD_TYPE=MinSizeRel, +// the default under pybind11/tests). +#if !defined(PYBIND11_NOINLINE_FORCED) && \ + (defined(__CUDACC__) || (defined(__GNUC__) && (__GNUC__ == 7 || __GNUC__ == 8))) +# define PYBIND11_NOINLINE_DISABLED +#endif + +// The PYBIND11_NOINLINE macro is for function DEFINITIONS. +// In contrast, FORWARD DECLARATIONS should never use this macro: +// https://stackoverflow.com/questions/9317473/forward-declaration-of-inline-functions +#if defined(PYBIND11_NOINLINE_DISABLED) // Option for maximum portability and experimentation. +# define PYBIND11_NOINLINE inline +#elif defined(_MSC_VER) +# define PYBIND11_NOINLINE __declspec(noinline) inline +#else +# define PYBIND11_NOINLINE __attribute__ ((noinline)) inline +#endif + +#if defined(__MINGW32__) +// For unknown reasons all PYBIND11_DEPRECATED member trigger a warning when declared +// whether it is used or not +# define PYBIND11_DEPRECATED(reason) +#elif defined(PYBIND11_CPP14) +# define PYBIND11_DEPRECATED(reason) [[deprecated(reason)]] +#else +# define PYBIND11_DEPRECATED(reason) __attribute__((deprecated(reason))) +#endif + +#if defined(PYBIND11_CPP17) +# define PYBIND11_MAYBE_UNUSED [[maybe_unused]] +#elif defined(_MSC_VER) && !defined(__clang__) +# define PYBIND11_MAYBE_UNUSED +#else +# define PYBIND11_MAYBE_UNUSED __attribute__ ((__unused__)) +#endif + +/* Don't let Python.h #define (v)snprintf as macro because they are implemented + properly in Visual Studio since 2015. */ +#if defined(_MSC_VER) && _MSC_VER >= 1900 +# define HAVE_SNPRINTF 1 +#endif + +/// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode +#if defined(_MSC_VER) +# if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 4) +# define HAVE_ROUND 1 +# endif +# pragma warning(push) +// C4505: 'PySlice_GetIndicesEx': unreferenced local function has been removed (PyPy only) +# pragma warning(disable: 4505) +# if defined(_DEBUG) && !defined(Py_DEBUG) +# define PYBIND11_DEBUG_MARKER +# undef _DEBUG +# endif +#endif + +// https://en.cppreference.com/w/c/chrono/localtime +#if defined(__STDC_LIB_EXT1__) && !defined(__STDC_WANT_LIB_EXT1__) +# define __STDC_WANT_LIB_EXT1__ +#endif + +#ifdef __has_include +// std::optional (but including it in c++14 mode isn't allowed) +# if defined(PYBIND11_CPP17) && __has_include() +# define PYBIND11_HAS_OPTIONAL 1 +# endif +// std::experimental::optional (but not allowed in c++11 mode) +# if defined(PYBIND11_CPP14) && (__has_include() && \ + !__has_include()) +# define PYBIND11_HAS_EXP_OPTIONAL 1 +# endif +// std::variant +# if defined(PYBIND11_CPP17) && __has_include() +# define PYBIND11_HAS_VARIANT 1 +# endif +#elif defined(_MSC_VER) && defined(PYBIND11_CPP17) +# define PYBIND11_HAS_OPTIONAL 1 +# define PYBIND11_HAS_VARIANT 1 +#endif + +#include +#include +#include + +/* Python #defines overrides on all sorts of core functions, which + tends to weak havok in C++ codebases that expect these to work + like regular functions (potentially with several overloads) */ +#if defined(isalnum) +# undef isalnum +# undef isalpha +# undef islower +# undef isspace +# undef isupper +# undef tolower +# undef toupper +#endif + +#if defined(copysign) +# undef copysign +#endif + +#if defined(_MSC_VER) +# if defined(PYBIND11_DEBUG_MARKER) +# define _DEBUG +# undef PYBIND11_DEBUG_MARKER +# endif +# pragma warning(pop) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__has_include) +# if __has_include() +# include +# endif +#endif + +// #define PYBIND11_STR_LEGACY_PERMISSIVE +// If DEFINED, pybind11::str can hold PyUnicodeObject or PyBytesObject +// (probably surprising and never documented, but this was the +// legacy behavior until and including v2.6.x). As a side-effect, +// pybind11::isinstance() is true for both pybind11::str and +// pybind11::bytes. +// If UNDEFINED, pybind11::str can only hold PyUnicodeObject, and +// pybind11::isinstance() is true only for pybind11::str. +// However, for Python 2 only (!), the pybind11::str caster +// implicitly decodes bytes to PyUnicodeObject. This is to ease +// the transition from the legacy behavior to the non-permissive +// behavior. + +#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions +#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr) +#define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check +#define PYBIND11_INSTANCE_METHOD_GET_FUNCTION PyInstanceMethod_GET_FUNCTION +#define PYBIND11_BYTES_CHECK PyBytes_Check +#define PYBIND11_BYTES_FROM_STRING PyBytes_FromString +#define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyBytes_FromStringAndSize +#define PYBIND11_BYTES_AS_STRING_AND_SIZE PyBytes_AsStringAndSize +#define PYBIND11_BYTES_AS_STRING PyBytes_AsString +#define PYBIND11_BYTES_SIZE PyBytes_Size +#define PYBIND11_LONG_CHECK(o) PyLong_Check(o) +#define PYBIND11_LONG_AS_LONGLONG(o) PyLong_AsLongLong(o) +#define PYBIND11_LONG_FROM_SIGNED(o) PyLong_FromSsize_t((ssize_t) (o)) +#define PYBIND11_LONG_FROM_UNSIGNED(o) PyLong_FromSize_t((size_t) (o)) +#define PYBIND11_BYTES_NAME "bytes" +#define PYBIND11_STRING_NAME "str" +#define PYBIND11_SLICE_OBJECT PyObject +#define PYBIND11_FROM_STRING PyUnicode_FromString +#define PYBIND11_STR_TYPE ::pybind11::str +#define PYBIND11_BOOL_ATTR "__bool__" +#define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_bool) +#define PYBIND11_BUILTINS_MODULE "builtins" +// Providing a separate declaration to make Clang's -Wmissing-prototypes happy. +// See comment for PYBIND11_MODULE below for why this is marked "maybe unused". +#define PYBIND11_PLUGIN_IMPL(name) \ + extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT PyObject *PyInit_##name(); \ + extern "C" PYBIND11_EXPORT PyObject *PyInit_##name() + +#else +#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyMethod_New(ptr, nullptr, class_) +#define PYBIND11_INSTANCE_METHOD_CHECK PyMethod_Check +#define PYBIND11_INSTANCE_METHOD_GET_FUNCTION PyMethod_GET_FUNCTION +#define PYBIND11_BYTES_CHECK PyString_Check +#define PYBIND11_BYTES_FROM_STRING PyString_FromString +#define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyString_FromStringAndSize +#define PYBIND11_BYTES_AS_STRING_AND_SIZE PyString_AsStringAndSize +#define PYBIND11_BYTES_AS_STRING PyString_AsString +#define PYBIND11_BYTES_SIZE PyString_Size +#define PYBIND11_LONG_CHECK(o) (PyInt_Check(o) || PyLong_Check(o)) +#define PYBIND11_LONG_AS_LONGLONG(o) (PyInt_Check(o) ? (long long) PyLong_AsLong(o) : PyLong_AsLongLong(o)) +#define PYBIND11_LONG_FROM_SIGNED(o) PyInt_FromSsize_t((ssize_t) o) // Returns long if needed. +#define PYBIND11_LONG_FROM_UNSIGNED(o) PyInt_FromSize_t((size_t) o) // Returns long if needed. +#define PYBIND11_BYTES_NAME "str" +#define PYBIND11_STRING_NAME "unicode" +#define PYBIND11_SLICE_OBJECT PySliceObject +#define PYBIND11_FROM_STRING PyString_FromString +#define PYBIND11_STR_TYPE ::pybind11::bytes +#define PYBIND11_BOOL_ATTR "__nonzero__" +#define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_nonzero) +#define PYBIND11_BUILTINS_MODULE "__builtin__" +// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy. +// See comment for PYBIND11_MODULE below for why this is marked "maybe unused". +#define PYBIND11_PLUGIN_IMPL(name) \ + static PyObject *pybind11_init_wrapper(); \ + extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT void init##name(); \ + extern "C" PYBIND11_EXPORT void init##name() { \ + (void)pybind11_init_wrapper(); \ + } \ + PyObject *pybind11_init_wrapper() +#endif + +#if PY_VERSION_HEX >= 0x03050000 && PY_VERSION_HEX < 0x03050200 +extern "C" { + struct _Py_atomic_address { void *value; }; + PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current; +} +#endif + +#define PYBIND11_TRY_NEXT_OVERLOAD ((PyObject *) 1) // special failure return code +#define PYBIND11_STRINGIFY(x) #x +#define PYBIND11_TOSTRING(x) PYBIND11_STRINGIFY(x) +#define PYBIND11_CONCAT(first, second) first##second +#define PYBIND11_ENSURE_INTERNALS_READY \ + pybind11::detail::get_internals(); + +#define PYBIND11_CHECK_PYTHON_VERSION \ + { \ + const char *compiled_ver = PYBIND11_TOSTRING(PY_MAJOR_VERSION) \ + "." PYBIND11_TOSTRING(PY_MINOR_VERSION); \ + const char *runtime_ver = Py_GetVersion(); \ + size_t len = std::strlen(compiled_ver); \ + if (std::strncmp(runtime_ver, compiled_ver, len) != 0 \ + || (runtime_ver[len] >= '0' && runtime_ver[len] <= '9')) { \ + PyErr_Format(PyExc_ImportError, \ + "Python version mismatch: module was compiled for Python %s, " \ + "but the interpreter version is incompatible: %s.", \ + compiled_ver, runtime_ver); \ + return nullptr; \ + } \ + } + +#if PY_VERSION_HEX >= 0x03030000 + +#define PYBIND11_CATCH_INIT_EXCEPTIONS \ + catch (pybind11::error_already_set &e) { \ + pybind11::raise_from(e, PyExc_ImportError, "initialization failed"); \ + return nullptr; \ + } catch (const std::exception &e) { \ + PyErr_SetString(PyExc_ImportError, e.what()); \ + return nullptr; \ + } \ + +#else + +#define PYBIND11_CATCH_INIT_EXCEPTIONS \ + catch (pybind11::error_already_set &e) { \ + PyErr_SetString(PyExc_ImportError, e.what()); \ + return nullptr; \ + } catch (const std::exception &e) { \ + PyErr_SetString(PyExc_ImportError, e.what()); \ + return nullptr; \ + } \ + +#endif + +/** \rst + ***Deprecated in favor of PYBIND11_MODULE*** + + This macro creates the entry point that will be invoked when the Python interpreter + imports a plugin library. Please create a `module_` in the function body and return + the pointer to its underlying Python object at the end. + + .. code-block:: cpp + + PYBIND11_PLUGIN(example) { + pybind11::module_ m("example", "pybind11 example plugin"); + /// Set up bindings here + return m.ptr(); + } +\endrst */ +#define PYBIND11_PLUGIN(name) \ + PYBIND11_DEPRECATED("PYBIND11_PLUGIN is deprecated, use PYBIND11_MODULE") \ + static PyObject *pybind11_init(); \ + PYBIND11_PLUGIN_IMPL(name) { \ + PYBIND11_CHECK_PYTHON_VERSION \ + PYBIND11_ENSURE_INTERNALS_READY \ + try { \ + return pybind11_init(); \ + } PYBIND11_CATCH_INIT_EXCEPTIONS \ + } \ + PyObject *pybind11_init() + +/** \rst + This macro creates the entry point that will be invoked when the Python interpreter + imports an extension module. The module name is given as the fist argument and it + should not be in quotes. The second macro argument defines a variable of type + `py::module_` which can be used to initialize the module. + + The entry point is marked as "maybe unused" to aid dead-code detection analysis: + since the entry point is typically only looked up at runtime and not referenced + during translation, it would otherwise appear as unused ("dead") code. + + .. code-block:: cpp + + PYBIND11_MODULE(example, m) { + m.doc() = "pybind11 example module"; + + // Add bindings here + m.def("foo", []() { + return "Hello, World!"; + }); + } +\endrst */ +#define PYBIND11_MODULE(name, variable) \ + static ::pybind11::module_::module_def PYBIND11_CONCAT(pybind11_module_def_, name) \ + PYBIND11_MAYBE_UNUSED; \ + PYBIND11_MAYBE_UNUSED \ + static void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &); \ + PYBIND11_PLUGIN_IMPL(name) { \ + PYBIND11_CHECK_PYTHON_VERSION \ + PYBIND11_ENSURE_INTERNALS_READY \ + auto m = ::pybind11::module_::create_extension_module( \ + PYBIND11_TOSTRING(name), nullptr, &PYBIND11_CONCAT(pybind11_module_def_, name)); \ + try { \ + PYBIND11_CONCAT(pybind11_init_, name)(m); \ + return m.ptr(); \ + } \ + PYBIND11_CATCH_INIT_EXCEPTIONS \ + } \ + void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ & (variable)) + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +using ssize_t = Py_ssize_t; +using size_t = std::size_t; + +template +inline ssize_t ssize_t_cast(const IntType &val) { + static_assert(sizeof(IntType) <= sizeof(ssize_t), "Implicit narrowing is not permitted."); + return static_cast(val); +} + +/// Approach used to cast a previously unknown C++ instance into a Python object +enum class return_value_policy : uint8_t { + /** This is the default return value policy, which falls back to the policy + return_value_policy::take_ownership when the return value is a pointer. + Otherwise, it uses return_value::move or return_value::copy for rvalue + and lvalue references, respectively. See below for a description of what + all of these different policies do. */ + automatic = 0, + + /** As above, but use policy return_value_policy::reference when the return + value is a pointer. This is the default conversion policy for function + arguments when calling Python functions manually from C++ code (i.e. via + handle::operator()). You probably won't need to use this. */ + automatic_reference, + + /** Reference an existing object (i.e. do not create a new copy) and take + ownership. Python will call the destructor and delete operator when the + object’s reference count reaches zero. Undefined behavior ensues when + the C++ side does the same.. */ + take_ownership, + + /** Create a new copy of the returned object, which will be owned by + Python. This policy is comparably safe because the lifetimes of the two + instances are decoupled. */ + copy, + + /** Use std::move to move the return value contents into a new instance + that will be owned by Python. This policy is comparably safe because the + lifetimes of the two instances (move source and destination) are + decoupled. */ + move, + + /** Reference an existing object, but do not take ownership. The C++ side + is responsible for managing the object’s lifetime and deallocating it + when it is no longer used. Warning: undefined behavior will ensue when + the C++ side deletes an object that is still referenced and used by + Python. */ + reference, + + /** This policy only applies to methods and properties. It references the + object without taking ownership similar to the above + return_value_policy::reference policy. In contrast to that policy, the + function or property’s implicit this argument (called the parent) is + considered to be the the owner of the return value (the child). + pybind11 then couples the lifetime of the parent to the child via a + reference relationship that ensures that the parent cannot be garbage + collected while Python is still using the child. More advanced + variations of this scheme are also possible using combinations of + return_value_policy::reference and the keep_alive call policy */ + reference_internal +}; + +PYBIND11_NAMESPACE_BEGIN(detail) + +inline static constexpr int log2(size_t n, int k = 0) { return (n <= 1) ? k : log2(n >> 1, k + 1); } + +// Returns the size as a multiple of sizeof(void *), rounded up. +inline static constexpr size_t size_in_ptrs(size_t s) { return 1 + ((s - 1) >> log2(sizeof(void *))); } + +/** + * The space to allocate for simple layout instance holders (see below) in multiple of the size of + * a pointer (e.g. 2 means 16 bytes on 64-bit architectures). The default is the minimum required + * to holder either a std::unique_ptr or std::shared_ptr (which is almost always + * sizeof(std::shared_ptr)). + */ +constexpr size_t instance_simple_holder_in_ptrs() { + static_assert(sizeof(std::shared_ptr) >= sizeof(std::unique_ptr), + "pybind assumes std::shared_ptrs are at least as big as std::unique_ptrs"); + return size_in_ptrs(sizeof(std::shared_ptr)); +} + +// Forward declarations +struct type_info; +struct value_and_holder; + +struct nonsimple_values_and_holders { + void **values_and_holders; + uint8_t *status; +}; + +/// The 'instance' type which needs to be standard layout (need to be able to use 'offsetof') +struct instance { + PyObject_HEAD + /// Storage for pointers and holder; see simple_layout, below, for a description + union { + void *simple_value_holder[1 + instance_simple_holder_in_ptrs()]; + nonsimple_values_and_holders nonsimple; + }; + /// Weak references + PyObject *weakrefs; + /// If true, the pointer is owned which means we're free to manage it with a holder. + bool owned : 1; + /** + * An instance has two possible value/holder layouts. + * + * Simple layout (when this flag is true), means the `simple_value_holder` is set with a pointer + * and the holder object governing that pointer, i.e. [val1*][holder]. This layout is applied + * whenever there is no python-side multiple inheritance of bound C++ types *and* the type's + * holder will fit in the default space (which is large enough to hold either a std::unique_ptr + * or std::shared_ptr). + * + * Non-simple layout applies when using custom holders that require more space than `shared_ptr` + * (which is typically the size of two pointers), or when multiple inheritance is used on the + * python side. Non-simple layout allocates the required amount of memory to have multiple + * bound C++ classes as parents. Under this layout, `nonsimple.values_and_holders` is set to a + * pointer to allocated space of the required space to hold a sequence of value pointers and + * holders followed `status`, a set of bit flags (1 byte each), i.e. + * [val1*][holder1][val2*][holder2]...[bb...] where each [block] is rounded up to a multiple of + * `sizeof(void *)`. `nonsimple.status` is, for convenience, a pointer to the + * beginning of the [bb...] block (but not independently allocated). + * + * Status bits indicate whether the associated holder is constructed (& + * status_holder_constructed) and whether the value pointer is registered (& + * status_instance_registered) in `registered_instances`. + */ + bool simple_layout : 1; + /// For simple layout, tracks whether the holder has been constructed + bool simple_holder_constructed : 1; + /// For simple layout, tracks whether the instance is registered in `registered_instances` + bool simple_instance_registered : 1; + /// If true, get_internals().patients has an entry for this object + bool has_patients : 1; + + /// Initializes all of the above type/values/holders data (but not the instance values themselves) + void allocate_layout(); + + /// Destroys/deallocates all of the above + void deallocate_layout(); + + /// Returns the value_and_holder wrapper for the given type (or the first, if `find_type` + /// omitted). Returns a default-constructed (with `.inst = nullptr`) object on failure if + /// `throw_if_missing` is false. + value_and_holder get_value_and_holder(const type_info *find_type = nullptr, bool throw_if_missing = true); + + /// Bit values for the non-simple status flags + static constexpr uint8_t status_holder_constructed = 1; + static constexpr uint8_t status_instance_registered = 2; +}; + +static_assert(std::is_standard_layout::value, "Internal error: `pybind11::detail::instance` is not standard layout!"); + +/// from __cpp_future__ import (convenient aliases from C++14/17) +#if defined(PYBIND11_CPP14) && (!defined(_MSC_VER) || _MSC_VER >= 1910) +using std::enable_if_t; +using std::conditional_t; +using std::remove_cv_t; +using std::remove_reference_t; +#else +template using enable_if_t = typename std::enable_if::type; +template using conditional_t = typename std::conditional::type; +template using remove_cv_t = typename std::remove_cv::type; +template using remove_reference_t = typename std::remove_reference::type; +#endif + +/// Index sequences +#if defined(PYBIND11_CPP14) +using std::index_sequence; +using std::make_index_sequence; +#else +template struct index_sequence { }; +template struct make_index_sequence_impl : make_index_sequence_impl { }; +template struct make_index_sequence_impl <0, S...> { using type = index_sequence; }; +template using make_index_sequence = typename make_index_sequence_impl::type; +#endif + +/// Make an index sequence of the indices of true arguments +template struct select_indices_impl { using type = ISeq; }; +template struct select_indices_impl, I, B, Bs...> + : select_indices_impl, index_sequence>, I + 1, Bs...> {}; +template using select_indices = typename select_indices_impl, 0, Bs...>::type; + +/// Backports of std::bool_constant and std::negation to accommodate older compilers +template using bool_constant = std::integral_constant; +template struct negation : bool_constant { }; + +// PGI/Intel cannot detect operator delete with the "compatible" void_t impl, so +// using the new one (C++14 defect, so generally works on newer compilers, even +// if not in C++17 mode) +#if defined(__PGIC__) || defined(__INTEL_COMPILER) +template using void_t = void; +#else +template struct void_t_impl { using type = void; }; +template using void_t = typename void_t_impl::type; +#endif + + +/// Compile-time all/any/none of that check the boolean value of all template types +#if defined(__cpp_fold_expressions) && !(defined(_MSC_VER) && (_MSC_VER < 1916)) +template using all_of = bool_constant<(Ts::value && ...)>; +template using any_of = bool_constant<(Ts::value || ...)>; +#elif !defined(_MSC_VER) +template struct bools {}; +template using all_of = std::is_same< + bools, + bools>; +template using any_of = negation...>>; +#else +// MSVC has trouble with the above, but supports std::conjunction, which we can use instead (albeit +// at a slight loss of compilation efficiency). +template using all_of = std::conjunction; +template using any_of = std::disjunction; +#endif +template using none_of = negation>; + +template class... Predicates> using satisfies_all_of = all_of...>; +template class... Predicates> using satisfies_any_of = any_of...>; +template class... Predicates> using satisfies_none_of = none_of...>; + +/// Strip the class from a method type +template struct remove_class { }; +template struct remove_class { using type = R (A...); }; +template struct remove_class { using type = R (A...); }; + +/// Helper template to strip away type modifiers +template struct intrinsic_type { using type = T; }; +template struct intrinsic_type { using type = typename intrinsic_type::type; }; +template struct intrinsic_type { using type = typename intrinsic_type::type; }; +template struct intrinsic_type { using type = typename intrinsic_type::type; }; +template struct intrinsic_type { using type = typename intrinsic_type::type; }; +template struct intrinsic_type { using type = typename intrinsic_type::type; }; +template struct intrinsic_type { using type = typename intrinsic_type::type; }; +template using intrinsic_t = typename intrinsic_type::type; + +/// Helper type to replace 'void' in some expressions +struct void_type { }; + +/// Helper template which holds a list of types +template struct type_list { }; + +/// Compile-time integer sum +#ifdef __cpp_fold_expressions +template constexpr size_t constexpr_sum(Ts... ns) { return (0 + ... + size_t{ns}); } +#else +constexpr size_t constexpr_sum() { return 0; } +template +constexpr size_t constexpr_sum(T n, Ts... ns) { return size_t{n} + constexpr_sum(ns...); } +#endif + +PYBIND11_NAMESPACE_BEGIN(constexpr_impl) +/// Implementation details for constexpr functions +constexpr int first(int i) { return i; } +template +constexpr int first(int i, T v, Ts... vs) { return v ? i : first(i + 1, vs...); } + +constexpr int last(int /*i*/, int result) { return result; } +template +constexpr int last(int i, int result, T v, Ts... vs) { return last(i + 1, v ? i : result, vs...); } +PYBIND11_NAMESPACE_END(constexpr_impl) + +/// Return the index of the first type in Ts which satisfies Predicate. Returns sizeof...(Ts) if +/// none match. +template class Predicate, typename... Ts> +constexpr int constexpr_first() { return constexpr_impl::first(0, Predicate::value...); } + +/// Return the index of the last type in Ts which satisfies Predicate, or -1 if none match. +template class Predicate, typename... Ts> +constexpr int constexpr_last() { return constexpr_impl::last(0, -1, Predicate::value...); } + +/// Return the Nth element from the parameter pack +template +struct pack_element { using type = typename pack_element::type; }; +template +struct pack_element<0, T, Ts...> { using type = T; }; + +/// Return the one and only type which matches the predicate, or Default if none match. +/// If more than one type matches the predicate, fail at compile-time. +template class Predicate, typename Default, typename... Ts> +struct exactly_one { + static constexpr auto found = constexpr_sum(Predicate::value...); + static_assert(found <= 1, "Found more than one type matching the predicate"); + + static constexpr auto index = found ? constexpr_first() : 0; + using type = conditional_t::type, Default>; +}; +template class P, typename Default> +struct exactly_one { using type = Default; }; + +template class Predicate, typename Default, typename... Ts> +using exactly_one_t = typename exactly_one::type; + +/// Defer the evaluation of type T until types Us are instantiated +template struct deferred_type { using type = T; }; +template using deferred_t = typename deferred_type::type; + +/// Like is_base_of, but requires a strict base (i.e. `is_strict_base_of::value == false`, +/// unlike `std::is_base_of`) +template using is_strict_base_of = bool_constant< + std::is_base_of::value && !std::is_same::value>; + +/// Like is_base_of, but also requires that the base type is accessible (i.e. that a Derived pointer +/// can be converted to a Base pointer) +/// For unions, `is_base_of::value` is False, so we need to check `is_same` as well. +template using is_accessible_base_of = bool_constant< + (std::is_same::value || std::is_base_of::value) && std::is_convertible::value>; + +template class Base> +struct is_template_base_of_impl { + template static std::true_type check(Base *); + static std::false_type check(...); +}; + +/// Check if a template is the base of a type. For example: +/// `is_template_base_of` is true if `struct T : Base {}` where U can be anything +template class Base, typename T> +#if !defined(_MSC_VER) +using is_template_base_of = decltype(is_template_base_of_impl::check((intrinsic_t*)nullptr)); +#else // MSVC2015 has trouble with decltype in template aliases +struct is_template_base_of : decltype(is_template_base_of_impl::check((intrinsic_t*)nullptr)) { }; +#endif + +/// Check if T is an instantiation of the template `Class`. For example: +/// `is_instantiation` is true if `T == shared_ptr` where U can be anything. +template class Class, typename T> +struct is_instantiation : std::false_type { }; +template class Class, typename... Us> +struct is_instantiation> : std::true_type { }; + +/// Check if T is std::shared_ptr where U can be anything +template using is_shared_ptr = is_instantiation; + +/// Check if T looks like an input iterator +template struct is_input_iterator : std::false_type {}; +template +struct is_input_iterator()), decltype(++std::declval())>> + : std::true_type {}; + +template using is_function_pointer = bool_constant< + std::is_pointer::value && std::is_function::type>::value>; + +template struct strip_function_object { + // If you are encountering an + // 'error: name followed by "::" must be a class or namespace name' + // with the Intel compiler and a noexcept function here, + // try to use noexcept(true) instead of plain noexcept. + using type = typename remove_class::type; +}; + +// Extracts the function signature from a function, function pointer or lambda. +template > +using function_signature_t = conditional_t< + std::is_function::value, + F, + typename conditional_t< + std::is_pointer::value || std::is_member_pointer::value, + std::remove_pointer, + strip_function_object + >::type +>; + +/// Returns true if the type looks like a lambda: that is, isn't a function, pointer or member +/// pointer. Note that this can catch all sorts of other things, too; this is intended to be used +/// in a place where passing a lambda makes sense. +template using is_lambda = satisfies_none_of, + std::is_function, std::is_pointer, std::is_member_pointer>; + +// [workaround(intel)] Internal error on fold expression +/// Apply a function over each element of a parameter pack +#if defined(__cpp_fold_expressions) && !defined(__INTEL_COMPILER) +// Intel compiler produces an internal error on this fold expression (tested with ICC 19.0.2) +#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (((PATTERN), void()), ...) +#else +using expand_side_effects = bool[]; +#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (void)pybind11::detail::expand_side_effects{ ((PATTERN), void(), false)..., false } +#endif + +PYBIND11_NAMESPACE_END(detail) + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4275) // warning C4275: An exported class was derived from a class that wasn't exported. Can be ignored when derived from a STL class. +#endif +/// C++ bindings of builtin Python exceptions +class PYBIND11_EXPORT_EXCEPTION builtin_exception : public std::runtime_error { +public: + using std::runtime_error::runtime_error; + /// Set the error using the Python C API + virtual void set_error() const = 0; +}; +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#define PYBIND11_RUNTIME_EXCEPTION(name, type) \ + class PYBIND11_EXPORT_EXCEPTION name : public builtin_exception { public: \ + using builtin_exception::builtin_exception; \ + name() : name("") { } \ + void set_error() const override { PyErr_SetString(type, what()); } \ + }; + +PYBIND11_RUNTIME_EXCEPTION(stop_iteration, PyExc_StopIteration) +PYBIND11_RUNTIME_EXCEPTION(index_error, PyExc_IndexError) +PYBIND11_RUNTIME_EXCEPTION(key_error, PyExc_KeyError) +PYBIND11_RUNTIME_EXCEPTION(value_error, PyExc_ValueError) +PYBIND11_RUNTIME_EXCEPTION(type_error, PyExc_TypeError) +PYBIND11_RUNTIME_EXCEPTION(buffer_error, PyExc_BufferError) +PYBIND11_RUNTIME_EXCEPTION(import_error, PyExc_ImportError) +PYBIND11_RUNTIME_EXCEPTION(cast_error, PyExc_RuntimeError) /// Thrown when pybind11::cast or handle::call fail due to a type casting error +PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used internally + +[[noreturn]] PYBIND11_NOINLINE void pybind11_fail(const char *reason) { throw std::runtime_error(reason); } +[[noreturn]] PYBIND11_NOINLINE void pybind11_fail(const std::string &reason) { throw std::runtime_error(reason); } + +template struct format_descriptor { }; + +PYBIND11_NAMESPACE_BEGIN(detail) +// Returns the index of the given type in the type char array below, and in the list in numpy.h +// The order here is: bool; 8 ints ((signed,unsigned)x(8,16,32,64)bits); float,double,long double; +// complex float,double,long double. Note that the long double types only participate when long +// double is actually longer than double (it isn't under MSVC). +// NB: not only the string below but also complex.h and numpy.h rely on this order. +template struct is_fmt_numeric { static constexpr bool value = false; }; +template struct is_fmt_numeric::value>> { + static constexpr bool value = true; + static constexpr int index = std::is_same::value ? 0 : 1 + ( + std::is_integral::value ? detail::log2(sizeof(T))*2 + std::is_unsigned::value : 8 + ( + std::is_same::value ? 1 : std::is_same::value ? 2 : 0)); +}; +PYBIND11_NAMESPACE_END(detail) + +template struct format_descriptor::value>> { + static constexpr const char c = "?bBhHiIqQfdg"[detail::is_fmt_numeric::index]; + static constexpr const char value[2] = { c, '\0' }; + static std::string format() { return std::string(1, c); } +}; + +#if !defined(PYBIND11_CPP17) + +template constexpr const char format_descriptor< + T, detail::enable_if_t::value>>::value[2]; + +#endif + +/// RAII wrapper that temporarily clears any Python error state +struct error_scope { + PyObject *type, *value, *trace; + error_scope() { PyErr_Fetch(&type, &value, &trace); } + ~error_scope() { PyErr_Restore(type, value, trace); } +}; + +/// Dummy destructor wrapper that can be used to expose classes with a private destructor +struct nodelete { template void operator()(T*) { } }; + +PYBIND11_NAMESPACE_BEGIN(detail) +template +struct overload_cast_impl { + // NOLINTNEXTLINE(modernize-use-equals-default): MSVC 2015 needs this + constexpr overload_cast_impl() {} + + template + constexpr auto operator()(Return (*pf)(Args...)) const noexcept + -> decltype(pf) { return pf; } + + template + constexpr auto operator()(Return (Class::*pmf)(Args...), std::false_type = {}) const noexcept + -> decltype(pmf) { return pmf; } + + template + constexpr auto operator()(Return (Class::*pmf)(Args...) const, std::true_type) const noexcept + -> decltype(pmf) { return pmf; } +}; +PYBIND11_NAMESPACE_END(detail) + +// overload_cast requires variable templates: C++14 +#if defined(PYBIND11_CPP14) +#define PYBIND11_OVERLOAD_CAST 1 +/// Syntax sugar for resolving overloaded function pointers: +/// - regular: static_cast(&Class::func) +/// - sweet: overload_cast(&Class::func) +template +static constexpr detail::overload_cast_impl overload_cast = {}; +// MSVC 2015 only accepts this particular initialization syntax for this variable template. +#endif + +/// Const member function selector for overload_cast +/// - regular: static_cast(&Class::func) +/// - sweet: overload_cast(&Class::func, const_) +static constexpr auto const_ = std::true_type{}; + +#if !defined(PYBIND11_CPP14) // no overload_cast: providing something that static_assert-fails: +template struct overload_cast { + static_assert(detail::deferred_t::value, + "pybind11::overload_cast<...> requires compiling in C++14 mode"); +}; +#endif // overload_cast + +PYBIND11_NAMESPACE_BEGIN(detail) + +// Adaptor for converting arbitrary container arguments into a vector; implicitly convertible from +// any standard container (or C-style array) supporting std::begin/std::end, any singleton +// arithmetic type (if T is arithmetic), or explicitly constructible from an iterator pair. +template +class any_container { + std::vector v; +public: + any_container() = default; + + // Can construct from a pair of iterators + template ::value>> + any_container(It first, It last) : v(first, last) { } + + // Implicit conversion constructor from any arbitrary container type with values convertible to T + template ())), T>::value>> + // NOLINTNEXTLINE(google-explicit-constructor) + any_container(const Container &c) : any_container(std::begin(c), std::end(c)) { } + + // initializer_list's aren't deducible, so don't get matched by the above template; we need this + // to explicitly allow implicit conversion from one: + template ::value>> + any_container(const std::initializer_list &c) : any_container(c.begin(), c.end()) { } + + // Avoid copying if given an rvalue vector of the correct type. + // NOLINTNEXTLINE(google-explicit-constructor) + any_container(std::vector &&v) : v(std::move(v)) { } + + // Moves the vector out of an rvalue any_container + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::vector &&() && { return std::move(v); } + + // Dereferencing obtains a reference to the underlying vector + std::vector &operator*() { return v; } + const std::vector &operator*() const { return v; } + + // -> lets you call methods on the underlying vector + std::vector *operator->() { return &v; } + const std::vector *operator->() const { return &v; } +}; + +// Forward-declaration; see detail/class.h +std::string get_fully_qualified_tp_name(PyTypeObject*); + +template +inline static std::shared_ptr try_get_shared_from_this(std::enable_shared_from_this *holder_value_ptr) { +// Pre C++17, this code path exploits undefined behavior, but is known to work on many platforms. +// Use at your own risk! +// See also https://en.cppreference.com/w/cpp/memory/enable_shared_from_this, and in particular +// the `std::shared_ptr gp1 = not_so_good.getptr();` and `try`-`catch` parts of the example. +#if defined(__cpp_lib_enable_shared_from_this) && (!defined(_MSC_VER) || _MSC_VER >= 1912) + return holder_value_ptr->weak_from_this().lock(); +#else + try { + return holder_value_ptr->shared_from_this(); + } + catch (const std::bad_weak_ptr &) { + return nullptr; + } +#endif +} + +// For silencing "unused" compiler warnings in special situations. +template +#if defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER < 1920 // MSVC 2017 +constexpr +#endif +inline void silence_unused_warnings(Args &&...) {} + +// MSVC warning C4100: Unreferenced formal parameter +#if defined(_MSC_VER) && _MSC_VER <= 1916 +# define PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(...) \ + detail::silence_unused_warnings(__VA_ARGS__) +#else +# define PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(...) +#endif + +// GCC -Wunused-but-set-parameter All GCC versions (as of July 2021). +#if defined(__GNUG__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +# define PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(...) \ + detail::silence_unused_warnings(__VA_ARGS__) +#else +# define PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(...) +#endif + +#if defined(_MSC_VER) // All versions (as of July 2021). + +// warning C4127: Conditional expression is constant +constexpr inline bool silence_msvc_c4127(bool cond) { return cond; } + +# define PYBIND11_SILENCE_MSVC_C4127(...) ::pybind11::detail::silence_msvc_c4127(__VA_ARGS__) + +#else +# define PYBIND11_SILENCE_MSVC_C4127(...) __VA_ARGS__ +#endif + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/descr.h b/Libraries/pybind11-2.8.0/pybind11/detail/descr.h new file mode 100644 index 00000000..c62e541b --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/detail/descr.h @@ -0,0 +1,104 @@ +/* + pybind11/detail/descr.h: Helper type for concatenating type signatures at compile time + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "common.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +#if !defined(_MSC_VER) +# define PYBIND11_DESCR_CONSTEXPR static constexpr +#else +# define PYBIND11_DESCR_CONSTEXPR const +#endif + +/* Concatenate type signatures at compile time */ +template +struct descr { + char text[N + 1]{'\0'}; + + constexpr descr() = default; + // NOLINTNEXTLINE(google-explicit-constructor) + constexpr descr(char const (&s)[N+1]) : descr(s, make_index_sequence()) { } + + template + constexpr descr(char const (&s)[N+1], index_sequence) : text{s[Is]..., '\0'} { } + + template + // NOLINTNEXTLINE(google-explicit-constructor) + constexpr descr(char c, Chars... cs) : text{c, static_cast(cs)..., '\0'} { } + + static constexpr std::array types() { + return {{&typeid(Ts)..., nullptr}}; + } +}; + +template +constexpr descr plus_impl(const descr &a, const descr &b, + index_sequence, index_sequence) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(b); + return {a.text[Is1]..., b.text[Is2]...}; +} + +template +constexpr descr operator+(const descr &a, const descr &b) { + return plus_impl(a, b, make_index_sequence(), make_index_sequence()); +} + +template +constexpr descr _(char const(&text)[N]) { return descr(text); } +constexpr descr<0> _(char const(&)[1]) { return {}; } + +template struct int_to_str : int_to_str { }; +template struct int_to_str<0, Digits...> { + static constexpr auto digits = descr(('0' + Digits)...); +}; + +// Ternary description (like std::conditional) +template +constexpr enable_if_t> _(char const(&text1)[N1], char const(&)[N2]) { + return _(text1); +} +template +constexpr enable_if_t> _(char const(&)[N1], char const(&text2)[N2]) { + return _(text2); +} + +template +constexpr enable_if_t _(const T1 &d, const T2 &) { return d; } +template +constexpr enable_if_t _(const T1 &, const T2 &d) { return d; } + +template +auto constexpr _() -> remove_cv_t::digits)> { + return int_to_str::digits; +} + +template constexpr descr<1, Type> _() { return {'%'}; } + +constexpr descr<0> concat() { return {}; } + +template +constexpr descr concat(const descr &descr) { return descr; } + +template +constexpr auto concat(const descr &d, const Args &...args) + -> decltype(std::declval>() + concat(args...)) { + return d + _(", ") + concat(args...); +} + +template +constexpr descr type_descr(const descr &descr) { + return _("{") + descr + _("}"); +} + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/init.h b/Libraries/pybind11-2.8.0/pybind11/detail/init.h new file mode 100644 index 00000000..cace3529 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/detail/init.h @@ -0,0 +1,346 @@ +/* + pybind11/detail/init.h: init factory function implementation and support code. + + Copyright (c) 2017 Jason Rhinelander + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "class.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +template <> +class type_caster { +public: + bool load(handle h, bool) { + value = reinterpret_cast(h.ptr()); + return true; + } + + template using cast_op_type = value_and_holder &; + explicit operator value_and_holder &() { return *value; } + static constexpr auto name = _(); + +private: + value_and_holder *value = nullptr; +}; + +PYBIND11_NAMESPACE_BEGIN(initimpl) + +inline void no_nullptr(void *ptr) { + if (!ptr) throw type_error("pybind11::init(): factory function returned nullptr"); +} + +// Implementing functions for all forms of py::init<...> and py::init(...) +template using Cpp = typename Class::type; +template using Alias = typename Class::type_alias; +template using Holder = typename Class::holder_type; + +template using is_alias_constructible = std::is_constructible, Cpp &&>; + +// Takes a Cpp pointer and returns true if it actually is a polymorphic Alias instance. +template = 0> +bool is_alias(Cpp *ptr) { + return dynamic_cast *>(ptr) != nullptr; +} +// Failing fallback version of the above for a no-alias class (always returns false) +template +constexpr bool is_alias(void *) { return false; } + +// Constructs and returns a new object; if the given arguments don't map to a constructor, we fall +// back to brace aggregate initiailization so that for aggregate initialization can be used with +// py::init, e.g. `py::init` to initialize a `struct T { int a; int b; }`. For +// non-aggregate types, we need to use an ordinary T(...) constructor (invoking as `T{...}` usually +// works, but will not do the expected thing when `T` has an `initializer_list` constructor). +template ::value, int> = 0> +inline Class *construct_or_initialize(Args &&...args) { return new Class(std::forward(args)...); } +template ::value, int> = 0> +inline Class *construct_or_initialize(Args &&...args) { return new Class{std::forward(args)...}; } + +// Attempts to constructs an alias using a `Alias(Cpp &&)` constructor. This allows types with +// an alias to provide only a single Cpp factory function as long as the Alias can be +// constructed from an rvalue reference of the base Cpp type. This means that Alias classes +// can, when appropriate, simply define a `Alias(Cpp &&)` constructor rather than needing to +// inherit all the base class constructors. +template +void construct_alias_from_cpp(std::true_type /*is_alias_constructible*/, + value_and_holder &v_h, Cpp &&base) { + v_h.value_ptr() = new Alias(std::move(base)); +} +template +[[noreturn]] void construct_alias_from_cpp(std::false_type /*!is_alias_constructible*/, + value_and_holder &, Cpp &&) { + throw type_error("pybind11::init(): unable to convert returned instance to required " + "alias class: no `Alias(Class &&)` constructor available"); +} + +// Error-generating fallback for factories that don't match one of the below construction +// mechanisms. +template +void construct(...) { + static_assert(!std::is_same::value /* always false */, + "pybind11::init(): init function must return a compatible pointer, " + "holder, or value"); +} + +// Pointer return v1: the factory function returns a class pointer for a registered class. +// If we don't need an alias (because this class doesn't have one, or because the final type is +// inherited on the Python side) we can simply take over ownership. Otherwise we need to try to +// construct an Alias from the returned base instance. +template +void construct(value_and_holder &v_h, Cpp *ptr, bool need_alias) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias); + no_nullptr(ptr); + if (PYBIND11_SILENCE_MSVC_C4127(Class::has_alias) && need_alias && !is_alias(ptr)) { + // We're going to try to construct an alias by moving the cpp type. Whether or not + // that succeeds, we still need to destroy the original cpp pointer (either the + // moved away leftover, if the alias construction works, or the value itself if we + // throw an error), but we can't just call `delete ptr`: it might have a special + // deleter, or might be shared_from_this. So we construct a holder around it as if + // it was a normal instance, then steal the holder away into a local variable; thus + // the holder and destruction happens when we leave the C++ scope, and the holder + // class gets to handle the destruction however it likes. + v_h.value_ptr() = ptr; + v_h.set_instance_registered(true); // To prevent init_instance from registering it + v_h.type->init_instance(v_h.inst, nullptr); // Set up the holder + Holder temp_holder(std::move(v_h.holder>())); // Steal the holder + v_h.type->dealloc(v_h); // Destroys the moved-out holder remains, resets value ptr to null + v_h.set_instance_registered(false); + + construct_alias_from_cpp(is_alias_constructible{}, v_h, std::move(*ptr)); + } else { + // Otherwise the type isn't inherited, so we don't need an Alias + v_h.value_ptr() = ptr; + } +} + +// Pointer return v2: a factory that always returns an alias instance ptr. We simply take over +// ownership of the pointer. +template = 0> +void construct(value_and_holder &v_h, Alias *alias_ptr, bool) { + no_nullptr(alias_ptr); + v_h.value_ptr() = static_cast *>(alias_ptr); +} + +// Holder return: copy its pointer, and move or copy the returned holder into the new instance's +// holder. This also handles types like std::shared_ptr and std::unique_ptr where T is a +// derived type (through those holder's implicit conversion from derived class holder constructors). +template +void construct(value_and_holder &v_h, Holder holder, bool need_alias) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias); + auto *ptr = holder_helper>::get(holder); + no_nullptr(ptr); + // If we need an alias, check that the held pointer is actually an alias instance + if (PYBIND11_SILENCE_MSVC_C4127(Class::has_alias) && need_alias && !is_alias(ptr)) + throw type_error("pybind11::init(): construction failed: returned holder-wrapped instance " + "is not an alias instance"); + + v_h.value_ptr() = ptr; + v_h.type->init_instance(v_h.inst, &holder); +} + +// return-by-value version 1: returning a cpp class by value. If the class has an alias and an +// alias is required the alias must have an `Alias(Cpp &&)` constructor so that we can construct +// the alias from the base when needed (i.e. because of Python-side inheritance). When we don't +// need it, we simply move-construct the cpp value into a new instance. +template +void construct(value_and_holder &v_h, Cpp &&result, bool need_alias) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias); + static_assert(std::is_move_constructible>::value, + "pybind11::init() return-by-value factory function requires a movable class"); + if (PYBIND11_SILENCE_MSVC_C4127(Class::has_alias) && need_alias) + construct_alias_from_cpp(is_alias_constructible{}, v_h, std::move(result)); + else + v_h.value_ptr() = new Cpp(std::move(result)); +} + +// return-by-value version 2: returning a value of the alias type itself. We move-construct an +// Alias instance (even if no the python-side inheritance is involved). The is intended for +// cases where Alias initialization is always desired. +template +void construct(value_and_holder &v_h, Alias &&result, bool) { + static_assert(std::is_move_constructible>::value, + "pybind11::init() return-by-alias-value factory function requires a movable alias class"); + v_h.value_ptr() = new Alias(std::move(result)); +} + +// Implementing class for py::init<...>() +template +struct constructor { + template = 0> + static void execute(Class &cl, const Extra&... extra) { + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + }, is_new_style_constructor(), extra...); + } + + template , Args...>::value, int> = 0> + static void execute(Class &cl, const Extra&... extra) { + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + if (Py_TYPE(v_h.inst) == v_h.type->type) + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + else + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + }, is_new_style_constructor(), extra...); + } + + template , Args...>::value, int> = 0> + static void execute(Class &cl, const Extra&... extra) { + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + }, is_new_style_constructor(), extra...); + } +}; + +// Implementing class for py::init_alias<...>() +template struct alias_constructor { + template , Args...>::value, int> = 0> + static void execute(Class &cl, const Extra&... extra) { + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + }, is_new_style_constructor(), extra...); + } +}; + +// Implementation class for py::init(Func) and py::init(Func, AliasFunc) +template , typename = function_signature_t> +struct factory; + +// Specialization for py::init(Func) +template +struct factory { + remove_reference_t class_factory; + + // NOLINTNEXTLINE(google-explicit-constructor) + factory(Func &&f) : class_factory(std::forward(f)) {} + + // The given class either has no alias or has no separate alias factory; + // this always constructs the class itself. If the class is registered with an alias + // type and an alias instance is needed (i.e. because the final type is a Python class + // inheriting from the C++ type) the returned value needs to either already be an alias + // instance, or the alias needs to be constructible from a `Class &&` argument. + template + void execute(Class &cl, const Extra &...extra) && { + #if defined(PYBIND11_CPP14) + cl.def("__init__", [func = std::move(class_factory)] + #else + auto &func = class_factory; + cl.def("__init__", [func] + #endif + (value_and_holder &v_h, Args... args) { + construct(v_h, func(std::forward(args)...), + Py_TYPE(v_h.inst) != v_h.type->type); + }, is_new_style_constructor(), extra...); + } +}; + +// Specialization for py::init(Func, AliasFunc) +template +struct factory { + static_assert(sizeof...(CArgs) == sizeof...(AArgs), + "pybind11::init(class_factory, alias_factory): class and alias factories " + "must have identical argument signatures"); + static_assert(all_of...>::value, + "pybind11::init(class_factory, alias_factory): class and alias factories " + "must have identical argument signatures"); + + remove_reference_t class_factory; + remove_reference_t alias_factory; + + factory(CFunc &&c, AFunc &&a) + : class_factory(std::forward(c)), alias_factory(std::forward(a)) { } + + // The class factory is called when the `self` type passed to `__init__` is the direct + // class (i.e. not inherited), the alias factory when `self` is a Python-side subtype. + template + void execute(Class &cl, const Extra&... extra) && { + static_assert(Class::has_alias, "The two-argument version of `py::init()` can " + "only be used if the class has an alias"); + #if defined(PYBIND11_CPP14) + cl.def("__init__", [class_func = std::move(class_factory), alias_func = std::move(alias_factory)] + #else + auto &class_func = class_factory; + auto &alias_func = alias_factory; + cl.def("__init__", [class_func, alias_func] + #endif + (value_and_holder &v_h, CArgs... args) { + if (Py_TYPE(v_h.inst) == v_h.type->type) + // If the instance type equals the registered type we don't have inheritance, so + // don't need the alias and can construct using the class function: + construct(v_h, class_func(std::forward(args)...), false); + else + construct(v_h, alias_func(std::forward(args)...), true); + }, is_new_style_constructor(), extra...); + } +}; + +/// Set just the C++ state. Same as `__init__`. +template +void setstate(value_and_holder &v_h, T &&result, bool need_alias) { + construct(v_h, std::forward(result), need_alias); +} + +/// Set both the C++ and Python states +template ::value, int> = 0> +void setstate(value_and_holder &v_h, std::pair &&result, bool need_alias) { + construct(v_h, std::move(result.first), need_alias); + auto d = handle(result.second); + if (PyDict_Check(d.ptr()) && PyDict_Size(d.ptr()) == 0) { + // Skipping setattr below, to not force use of py::dynamic_attr() for Class unnecessarily. + // See PR #2972 for details. + return; + } + setattr((PyObject *) v_h.inst, "__dict__", d); +} + +/// Implementation for py::pickle(GetState, SetState) +template , typename = function_signature_t> +struct pickle_factory; + +template +struct pickle_factory { + static_assert(std::is_same, intrinsic_t>::value, + "The type returned by `__getstate__` must be the same " + "as the argument accepted by `__setstate__`"); + + remove_reference_t get; + remove_reference_t set; + + pickle_factory(Get get, Set set) + : get(std::forward(get)), set(std::forward(set)) { } + + template + void execute(Class &cl, const Extra &...extra) && { + cl.def("__getstate__", std::move(get)); + +#if defined(PYBIND11_CPP14) + cl.def("__setstate__", [func = std::move(set)] +#else + auto &func = set; + cl.def("__setstate__", [func] +#endif + (value_and_holder &v_h, ArgState state) { + setstate(v_h, func(std::forward(state)), + Py_TYPE(v_h.inst) != v_h.type->type); + }, is_new_style_constructor(), extra...); + } +}; + +PYBIND11_NAMESPACE_END(initimpl) +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(pybind11) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/internals.h b/Libraries/pybind11-2.8.0/pybind11/detail/internals.h new file mode 100644 index 00000000..98d21eb9 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/detail/internals.h @@ -0,0 +1,467 @@ +/* + pybind11/detail/internals.h: Internal data structure and related functions + + Copyright (c) 2017 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "../pytypes.h" + +/// Tracks the `internals` and `type_info` ABI version independent of the main library version. +/// +/// Some portions of the code use an ABI that is conditional depending on this +/// version number. That allows ABI-breaking changes to be "pre-implemented". +/// Once the default version number is incremented, the conditional logic that +/// no longer applies can be removed. Additionally, users that need not +/// maintain ABI compatibility can increase the version number in order to take +/// advantage of any functionality/efficiency improvements that depend on the +/// newer ABI. +/// +/// WARNING: If you choose to manually increase the ABI version, note that +/// pybind11 may not be tested as thoroughly with a non-default ABI version, and +/// further ABI-incompatible changes may be made before the ABI is officially +/// changed to the new version. +#ifndef PYBIND11_INTERNALS_VERSION +# define PYBIND11_INTERNALS_VERSION 4 +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +using ExceptionTranslator = void (*)(std::exception_ptr); + +PYBIND11_NAMESPACE_BEGIN(detail) + +// Forward declarations +inline PyTypeObject *make_static_property_type(); +inline PyTypeObject *make_default_metaclass(); +inline PyObject *make_object_base_type(PyTypeObject *metaclass); + +// The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new +// Thread Specific Storage (TSS) API. +#if PY_VERSION_HEX >= 0x03070000 +// Avoid unnecessary allocation of `Py_tss_t`, since we cannot use +// `Py_LIMITED_API` anyway. +# if PYBIND11_INTERNALS_VERSION > 4 +# define PYBIND11_TLS_KEY_REF Py_tss_t & +# ifdef __GNUC__ +// Clang on macOS warns due to `Py_tss_NEEDS_INIT` not specifying an initializer +// for every field. +# define PYBIND11_TLS_KEY_INIT(var) \ + _Pragma("GCC diagnostic push") /**/ \ + _Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") /**/ \ + Py_tss_t var \ + = Py_tss_NEEDS_INIT; \ + _Pragma("GCC diagnostic pop") +# else +# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t var = Py_tss_NEEDS_INIT; +# endif +# define PYBIND11_TLS_KEY_CREATE(var) (PyThread_tss_create(&(var)) == 0) +# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get(&(key)) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set(&(key), (value)) +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set(&(key), nullptr) +# define PYBIND11_TLS_FREE(key) PyThread_tss_delete(&(key)) +# else +# define PYBIND11_TLS_KEY_REF Py_tss_t * +# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr; +# define PYBIND11_TLS_KEY_CREATE(var) \ + (((var) = PyThread_tss_alloc()) != nullptr && (PyThread_tss_create((var)) == 0)) +# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key)) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value)) +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr) +# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key) +# endif +#else +// Usually an int but a long on Cygwin64 with Python 3.x +# define PYBIND11_TLS_KEY_REF decltype(PyThread_create_key()) +# define PYBIND11_TLS_KEY_INIT(var) PYBIND11_TLS_KEY_REF var = 0; +# define PYBIND11_TLS_KEY_CREATE(var) (((var) = PyThread_create_key()) != -1) +# define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key)) +# if PY_MAJOR_VERSION < 3 || defined(PYPY_VERSION) +// On CPython < 3.4 and on PyPy, `PyThread_set_key_value` strangely does not set +// the value if it has already been set. Instead, it must first be deleted and +// then set again. +inline void tls_replace_value(PYBIND11_TLS_KEY_REF key, void *value) { + PyThread_delete_key_value(key); + PyThread_set_key_value(key, value); +} +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_delete_key_value(key) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) \ + ::pybind11::detail::tls_replace_value((key), (value)) +# else +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_set_key_value((key), nullptr) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_set_key_value((key), (value)) +# endif +# define PYBIND11_TLS_FREE(key) (void) key +#endif + +// Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly +// other STLs, this means `typeid(A)` from one module won't equal `typeid(A)` from another module +// even when `A` is the same, non-hidden-visibility type (e.g. from a common include). Under +// libstdc++, this doesn't happen: equality and the type_index hash are based on the type name, +// which works. If not under a known-good stl, provide our own name-based hash and equality +// functions that use the type name. +#if defined(__GLIBCXX__) +inline bool same_type(const std::type_info &lhs, const std::type_info &rhs) { return lhs == rhs; } +using type_hash = std::hash; +using type_equal_to = std::equal_to; +#else +inline bool same_type(const std::type_info &lhs, const std::type_info &rhs) { + return lhs.name() == rhs.name() || std::strcmp(lhs.name(), rhs.name()) == 0; +} + +struct type_hash { + size_t operator()(const std::type_index &t) const { + size_t hash = 5381; + const char *ptr = t.name(); + while (auto c = static_cast(*ptr++)) + hash = (hash * 33) ^ c; + return hash; + } +}; + +struct type_equal_to { + bool operator()(const std::type_index &lhs, const std::type_index &rhs) const { + return lhs.name() == rhs.name() || std::strcmp(lhs.name(), rhs.name()) == 0; + } +}; +#endif + +template +using type_map = std::unordered_map; + +struct override_hash { + inline size_t operator()(const std::pair& v) const { + size_t value = std::hash()(v.first); + value ^= std::hash()(v.second) + 0x9e3779b9 + (value<<6) + (value>>2); + return value; + } +}; + +/// Internal data structure used to track registered instances and types. +/// Whenever binary incompatible changes are made to this structure, +/// `PYBIND11_INTERNALS_VERSION` must be incremented. +struct internals { + type_map registered_types_cpp; // std::type_index -> pybind11's type information + std::unordered_map> registered_types_py; // PyTypeObject* -> base type_info(s) + std::unordered_multimap registered_instances; // void * -> instance* + std::unordered_set, override_hash> inactive_override_cache; + type_map> direct_conversions; + std::unordered_map> patients; + std::forward_list registered_exception_translators; + std::unordered_map shared_data; // Custom data to be shared across extensions +#if PYBIND11_INTERNALS_VERSION == 4 + std::vector unused_loader_patient_stack_remove_at_v5; +#endif + std::forward_list static_strings; // Stores the std::strings backing detail::c_str() + PyTypeObject *static_property_type; + PyTypeObject *default_metaclass; + PyObject *instance_base; +#if defined(WITH_THREAD) + PYBIND11_TLS_KEY_INIT(tstate) +# if PYBIND11_INTERNALS_VERSION > 4 + PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) +# endif // PYBIND11_INTERNALS_VERSION > 4 + PyInterpreterState *istate = nullptr; + ~internals() { +# if PYBIND11_INTERNALS_VERSION > 4 + PYBIND11_TLS_FREE(loader_life_support_tls_key); +# endif // PYBIND11_INTERNALS_VERSION > 4 + + // This destructor is called *after* Py_Finalize() in finalize_interpreter(). + // That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is + // called. PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does + // nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree. + // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). + // Neither of those have anything to do with CPython internals. PyMem_RawFree *requires* + // that the `tstate` be allocated with the CPython allocator. + PYBIND11_TLS_FREE(tstate); + } +#endif +}; + +/// Additional type information which does not fit into the PyTypeObject. +/// Changes to this struct also require bumping `PYBIND11_INTERNALS_VERSION`. +struct type_info { + PyTypeObject *type; + const std::type_info *cpptype; + size_t type_size, type_align, holder_size_in_ptrs; + void *(*operator_new)(size_t); + void (*init_instance)(instance *, const void *); + void (*dealloc)(value_and_holder &v_h); + std::vector implicit_conversions; + std::vector> implicit_casts; + std::vector *direct_conversions; + buffer_info *(*get_buffer)(PyObject *, void *) = nullptr; + void *get_buffer_data = nullptr; + void *(*module_local_load)(PyObject *, const type_info *) = nullptr; + /* A simple type never occurs as a (direct or indirect) parent + * of a class that makes use of multiple inheritance */ + bool simple_type : 1; + /* True if there is no multiple inheritance in this type's inheritance tree */ + bool simple_ancestors : 1; + /* for base vs derived holder_type checks */ + bool default_holder : 1; + /* true if this is a type registered with py::module_local */ + bool module_local : 1; +}; + +/// On MSVC, debug and release builds are not ABI-compatible! +#if defined(_MSC_VER) && defined(_DEBUG) +# define PYBIND11_BUILD_TYPE "_debug" +#else +# define PYBIND11_BUILD_TYPE "" +#endif + +/// Let's assume that different compilers are ABI-incompatible. +/// A user can manually set this string if they know their +/// compiler is compatible. +#ifndef PYBIND11_COMPILER_TYPE +# if defined(_MSC_VER) +# define PYBIND11_COMPILER_TYPE "_msvc" +# elif defined(__INTEL_COMPILER) +# define PYBIND11_COMPILER_TYPE "_icc" +# elif defined(__clang__) +# define PYBIND11_COMPILER_TYPE "_clang" +# elif defined(__PGI) +# define PYBIND11_COMPILER_TYPE "_pgi" +# elif defined(__MINGW32__) +# define PYBIND11_COMPILER_TYPE "_mingw" +# elif defined(__CYGWIN__) +# define PYBIND11_COMPILER_TYPE "_gcc_cygwin" +# elif defined(__GNUC__) +# define PYBIND11_COMPILER_TYPE "_gcc" +# else +# define PYBIND11_COMPILER_TYPE "_unknown" +# endif +#endif + +/// Also standard libs +#ifndef PYBIND11_STDLIB +# if defined(_LIBCPP_VERSION) +# define PYBIND11_STDLIB "_libcpp" +# elif defined(__GLIBCXX__) || defined(__GLIBCPP__) +# define PYBIND11_STDLIB "_libstdcpp" +# else +# define PYBIND11_STDLIB "" +# endif +#endif + +/// On Linux/OSX, changes in __GXX_ABI_VERSION__ indicate ABI incompatibility. +#ifndef PYBIND11_BUILD_ABI +# if defined(__GXX_ABI_VERSION) +# define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION) +# else +# define PYBIND11_BUILD_ABI "" +# endif +#endif + +#ifndef PYBIND11_INTERNALS_KIND +# if defined(WITH_THREAD) +# define PYBIND11_INTERNALS_KIND "" +# else +# define PYBIND11_INTERNALS_KIND "_without_thread" +# endif +#endif + +#define PYBIND11_INTERNALS_ID "__pybind11_internals_v" \ + PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__" + +#define PYBIND11_MODULE_LOCAL_ID "__pybind11_module_local_v" \ + PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__" + +/// Each module locally stores a pointer to the `internals` data. The data +/// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. +inline internals **&get_internals_pp() { + static internals **internals_pp = nullptr; + return internals_pp; +} + +inline void translate_exception(std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (error_already_set &e) { e.restore(); return; + } catch (const builtin_exception &e) { e.set_error(); return; + } catch (const std::bad_alloc &e) { PyErr_SetString(PyExc_MemoryError, e.what()); return; + } catch (const std::domain_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return; + } catch (const std::invalid_argument &e) { PyErr_SetString(PyExc_ValueError, e.what()); return; + } catch (const std::length_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return; + } catch (const std::out_of_range &e) { PyErr_SetString(PyExc_IndexError, e.what()); return; + } catch (const std::range_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return; + } catch (const std::overflow_error &e) { PyErr_SetString(PyExc_OverflowError, e.what()); return; + } catch (const std::exception &e) { PyErr_SetString(PyExc_RuntimeError, e.what()); return; + } catch (...) { + PyErr_SetString(PyExc_RuntimeError, "Caught an unknown exception!"); + return; + } +} + +#if !defined(__GLIBCXX__) +inline void translate_local_exception(std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (error_already_set &e) { e.restore(); return; + } catch (const builtin_exception &e) { e.set_error(); return; + } +} +#endif + +/// Return a reference to the current `internals` data +PYBIND11_NOINLINE internals &get_internals() { + auto **&internals_pp = get_internals_pp(); + if (internals_pp && *internals_pp) + return **internals_pp; + + // Ensure that the GIL is held since we will need to make Python calls. + // Cannot use py::gil_scoped_acquire here since that constructor calls get_internals. + struct gil_scoped_acquire_local { + gil_scoped_acquire_local() : state (PyGILState_Ensure()) {} + ~gil_scoped_acquire_local() { PyGILState_Release(state); } + const PyGILState_STATE state; + } gil; + + PYBIND11_STR_TYPE id(PYBIND11_INTERNALS_ID); + auto builtins = handle(PyEval_GetBuiltins()); + if (builtins.contains(id) && isinstance(builtins[id])) { + internals_pp = static_cast(capsule(builtins[id])); + + // We loaded builtins through python's builtins, which means that our `error_already_set` + // and `builtin_exception` may be different local classes than the ones set up in the + // initial exception translator, below, so add another for our local exception classes. + // + // libstdc++ doesn't require this (types there are identified only by name) + // libc++ with CPython doesn't require this (types are explicitly exported) + // libc++ with PyPy still need it, awaiting further investigation +#if !defined(__GLIBCXX__) + (*internals_pp)->registered_exception_translators.push_front(&translate_local_exception); +#endif + } else { + if (!internals_pp) internals_pp = new internals*(); + auto *&internals_ptr = *internals_pp; + internals_ptr = new internals(); +#if defined(WITH_THREAD) + +# if PY_VERSION_HEX < 0x03090000 + PyEval_InitThreads(); +# endif + PyThreadState *tstate = PyThreadState_Get(); + if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->tstate)) { + pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!"); + } + PYBIND11_TLS_REPLACE_VALUE(internals_ptr->tstate, tstate); + +# if PYBIND11_INTERNALS_VERSION > 4 + if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->loader_life_support_tls_key)) { + pybind11_fail("get_internals: could not successfully initialize the " + "loader_life_support TSS key!"); + } +# endif + internals_ptr->istate = tstate->interp; +#endif + builtins[id] = capsule(internals_pp); + internals_ptr->registered_exception_translators.push_front(&translate_exception); + internals_ptr->static_property_type = make_static_property_type(); + internals_ptr->default_metaclass = make_default_metaclass(); + internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); + } + return **internals_pp; +} + +// the internals struct (above) is shared between all the modules. local_internals are only +// for a single module. Any changes made to internals may require an update to +// PYBIND11_INTERNALS_VERSION, breaking backwards compatibility. local_internals is, by design, +// restricted to a single module. Whether a module has local internals or not should not +// impact any other modules, because the only things accessing the local internals is the +// module that contains them. +struct local_internals { + type_map registered_types_cpp; + std::forward_list registered_exception_translators; +#if defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4 + + // For ABI compatibility, we can't store the loader_life_support TLS key in + // the `internals` struct directly. Instead, we store it in `shared_data` and + // cache a copy in `local_internals`. If we allocated a separate TLS key for + // each instance of `local_internals`, we could end up allocating hundreds of + // TLS keys if hundreds of different pybind11 modules are loaded (which is a + // plausible number). + PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) + + // Holds the shared TLS key for the loader_life_support stack. + struct shared_loader_life_support_data { + PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) + shared_loader_life_support_data() { + if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) { + pybind11_fail("local_internals: could not successfully initialize the " + "loader_life_support TLS key!"); + } + } + // We can't help but leak the TLS key, because Python never unloads extension modules. + }; + + local_internals() { + auto &internals = get_internals(); + // Get or create the `loader_life_support_stack_key`. + auto &ptr = internals.shared_data["_life_support"]; + if (!ptr) { + ptr = new shared_loader_life_support_data; + } + loader_life_support_tls_key + = static_cast(ptr)->loader_life_support_tls_key; + } +#endif // defined(WITH_THREAD) && PYBIND11_INTERNALS_VERSION == 4 +}; + +/// Works like `get_internals`, but for things which are locally registered. +inline local_internals &get_local_internals() { + static local_internals locals; + return locals; +} + + +/// Constructs a std::string with the given arguments, stores it in `internals`, and returns its +/// `c_str()`. Such strings objects have a long storage duration -- the internal strings are only +/// cleared when the program exits or after interpreter shutdown (when embedding), and so are +/// suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name). +template +const char *c_str(Args &&...args) { + auto &strings = get_internals().static_strings; + strings.emplace_front(std::forward(args)...); + return strings.front().c_str(); +} + +PYBIND11_NAMESPACE_END(detail) + +/// Returns a named pointer that is shared among all extension modules (using the same +/// pybind11 version) running in the current interpreter. Names starting with underscores +/// are reserved for internal usage. Returns `nullptr` if no matching entry was found. +PYBIND11_NOINLINE void *get_shared_data(const std::string &name) { + auto &internals = detail::get_internals(); + auto it = internals.shared_data.find(name); + return it != internals.shared_data.end() ? it->second : nullptr; +} + +/// Set the shared data that can be later recovered by `get_shared_data()`. +PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) { + detail::get_internals().shared_data[name] = data; + return data; +} + +/// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if +/// such entry exists. Otherwise, a new object of default-constructible type `T` is +/// added to the shared data under the given name and a reference to it is returned. +template +T &get_or_create_shared_data(const std::string &name) { + auto &internals = detail::get_internals(); + auto it = internals.shared_data.find(name); + T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr); + if (!ptr) { + ptr = new T(); + internals.shared_data[name] = ptr; + } + return *ptr; +} + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/type_caster_base.h b/Libraries/pybind11-2.8.0/pybind11/detail/type_caster_base.h new file mode 100644 index 00000000..f804d9d1 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/detail/type_caster_base.h @@ -0,0 +1,978 @@ +/* + pybind11/detail/type_caster_base.h (originally first part of pybind11/cast.h) + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "../pytypes.h" +#include "common.h" +#include "descr.h" +#include "internals.h" +#include "typeid.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +/// A life support system for temporary objects created by `type_caster::load()`. +/// Adding a patient will keep it alive up until the enclosing function returns. +class loader_life_support { +private: + loader_life_support* parent = nullptr; + std::unordered_set keep_alive; + +#if defined(WITH_THREAD) + // Store stack pointer in thread-local storage. + static PYBIND11_TLS_KEY_REF get_stack_tls_key() { +# if PYBIND11_INTERNALS_VERSION == 4 + return get_local_internals().loader_life_support_tls_key; +# else + return get_internals().loader_life_support_tls_key; +# endif + } + static loader_life_support *get_stack_top() { + return static_cast(PYBIND11_TLS_GET_VALUE(get_stack_tls_key())); + } + static void set_stack_top(loader_life_support *value) { + PYBIND11_TLS_REPLACE_VALUE(get_stack_tls_key(), value); + } +#else + // Use single global variable for stack. + static loader_life_support **get_stack_pp() { + static loader_life_support *global_stack = nullptr; + return global_stack; + } + static loader_life_support *get_stack_top() { return *get_stack_pp(); } + static void set_stack_top(loader_life_support *value) { *get_stack_pp() = value; } +#endif + +public: + /// A new patient frame is created when a function is entered + loader_life_support() { + parent = get_stack_top(); + set_stack_top(this); + } + + /// ... and destroyed after it returns + ~loader_life_support() { + if (get_stack_top() != this) + pybind11_fail("loader_life_support: internal error"); + set_stack_top(parent); + for (auto* item : keep_alive) + Py_DECREF(item); + } + + /// This can only be used inside a pybind11-bound function, either by `argument_loader` + /// at argument preparation time or by `py::cast()` at execution time. + PYBIND11_NOINLINE static void add_patient(handle h) { + loader_life_support *frame = get_stack_top(); + if (!frame) { + // NOTE: It would be nice to include the stack frames here, as this indicates + // use of pybind11::cast<> outside the normal call framework, finding such + // a location is challenging. Developers could consider printing out + // stack frame addresses here using something like __builtin_frame_address(0) + throw cast_error("When called outside a bound function, py::cast() cannot " + "do Python -> C++ conversions which require the creation " + "of temporary values"); + } + + if (frame->keep_alive.insert(h.ptr()).second) + Py_INCREF(h.ptr()); + } +}; + +// Gets the cache entry for the given type, creating it if necessary. The return value is the pair +// returned by emplace, i.e. an iterator for the entry and a bool set to `true` if the entry was +// just created. +inline std::pair all_type_info_get_cache(PyTypeObject *type); + +// Populates a just-created cache entry. +PYBIND11_NOINLINE void all_type_info_populate(PyTypeObject *t, std::vector &bases) { + std::vector check; + for (handle parent : reinterpret_borrow(t->tp_bases)) + check.push_back((PyTypeObject *) parent.ptr()); + + auto const &type_dict = get_internals().registered_types_py; + for (size_t i = 0; i < check.size(); i++) { + auto type = check[i]; + // Ignore Python2 old-style class super type: + if (!PyType_Check((PyObject *) type)) continue; + + // Check `type` in the current set of registered python types: + auto it = type_dict.find(type); + if (it != type_dict.end()) { + // We found a cache entry for it, so it's either pybind-registered or has pre-computed + // pybind bases, but we have to make sure we haven't already seen the type(s) before: we + // want to follow Python/virtual C++ rules that there should only be one instance of a + // common base. + for (auto *tinfo : it->second) { + // NB: Could use a second set here, rather than doing a linear search, but since + // having a large number of immediate pybind11-registered types seems fairly + // unlikely, that probably isn't worthwhile. + bool found = false; + for (auto *known : bases) { + if (known == tinfo) { found = true; break; } + } + if (!found) bases.push_back(tinfo); + } + } + else if (type->tp_bases) { + // It's some python type, so keep follow its bases classes to look for one or more + // registered types + if (i + 1 == check.size()) { + // When we're at the end, we can pop off the current element to avoid growing + // `check` when adding just one base (which is typical--i.e. when there is no + // multiple inheritance) + check.pop_back(); + i--; + } + for (handle parent : reinterpret_borrow(type->tp_bases)) + check.push_back((PyTypeObject *) parent.ptr()); + } + } +} + +/** + * Extracts vector of type_info pointers of pybind-registered roots of the given Python type. Will + * be just 1 pybind type for the Python type of a pybind-registered class, or for any Python-side + * derived class that uses single inheritance. Will contain as many types as required for a Python + * class that uses multiple inheritance to inherit (directly or indirectly) from multiple + * pybind-registered classes. Will be empty if neither the type nor any base classes are + * pybind-registered. + * + * The value is cached for the lifetime of the Python type. + */ +inline const std::vector &all_type_info(PyTypeObject *type) { + auto ins = all_type_info_get_cache(type); + if (ins.second) + // New cache entry: populate it + all_type_info_populate(type, ins.first->second); + + return ins.first->second; +} + +/** + * Gets a single pybind11 type info for a python type. Returns nullptr if neither the type nor any + * ancestors are pybind11-registered. Throws an exception if there are multiple bases--use + * `all_type_info` instead if you want to support multiple bases. + */ +PYBIND11_NOINLINE detail::type_info* get_type_info(PyTypeObject *type) { + auto &bases = all_type_info(type); + if (bases.empty()) + return nullptr; + if (bases.size() > 1) + pybind11_fail("pybind11::detail::get_type_info: type has multiple pybind11-registered bases"); + return bases.front(); +} + +inline detail::type_info *get_local_type_info(const std::type_index &tp) { + auto &locals = get_local_internals().registered_types_cpp; + auto it = locals.find(tp); + if (it != locals.end()) + return it->second; + return nullptr; +} + +inline detail::type_info *get_global_type_info(const std::type_index &tp) { + auto &types = get_internals().registered_types_cpp; + auto it = types.find(tp); + if (it != types.end()) + return it->second; + return nullptr; +} + +/// Return the type info for a given C++ type; on lookup failure can either throw or return nullptr. +PYBIND11_NOINLINE detail::type_info *get_type_info(const std::type_index &tp, + bool throw_if_missing = false) { + if (auto ltype = get_local_type_info(tp)) + return ltype; + if (auto gtype = get_global_type_info(tp)) + return gtype; + + if (throw_if_missing) { + std::string tname = tp.name(); + detail::clean_type_id(tname); + pybind11_fail("pybind11::detail::get_type_info: unable to find type info for \"" + tname + "\""); + } + return nullptr; +} + +PYBIND11_NOINLINE handle get_type_handle(const std::type_info &tp, bool throw_if_missing) { + detail::type_info *type_info = get_type_info(tp, throw_if_missing); + return handle(type_info ? ((PyObject *) type_info->type) : nullptr); +} + +// Searches the inheritance graph for a registered Python instance, using all_type_info(). +PYBIND11_NOINLINE handle find_registered_python_instance(void *src, + const detail::type_info *tinfo) { + auto it_instances = get_internals().registered_instances.equal_range(src); + for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) { + for (auto instance_type : detail::all_type_info(Py_TYPE(it_i->second))) { + if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) + return handle((PyObject *) it_i->second).inc_ref(); + } + } + return handle(); +} + +struct value_and_holder { + instance *inst = nullptr; + size_t index = 0u; + const detail::type_info *type = nullptr; + void **vh = nullptr; + + // Main constructor for a found value/holder: + value_and_holder(instance *i, const detail::type_info *type, size_t vpos, size_t index) : + inst{i}, index{index}, type{type}, + vh{inst->simple_layout ? inst->simple_value_holder : &inst->nonsimple.values_and_holders[vpos]} + {} + + // Default constructor (used to signal a value-and-holder not found by get_value_and_holder()) + value_and_holder() = default; + + // Used for past-the-end iterator + explicit value_and_holder(size_t index) : index{index} {} + + template V *&value_ptr() const { + return reinterpret_cast(vh[0]); + } + // True if this `value_and_holder` has a non-null value pointer + explicit operator bool() const { return value_ptr() != nullptr; } + + template H &holder() const { + return reinterpret_cast(vh[1]); + } + bool holder_constructed() const { + return inst->simple_layout + ? inst->simple_holder_constructed + : (inst->nonsimple.status[index] & instance::status_holder_constructed) != 0u; + } + // NOLINTNEXTLINE(readability-make-member-function-const) + void set_holder_constructed(bool v = true) { + if (inst->simple_layout) + inst->simple_holder_constructed = v; + else if (v) + inst->nonsimple.status[index] |= instance::status_holder_constructed; + else + inst->nonsimple.status[index] &= (std::uint8_t) ~instance::status_holder_constructed; + } + bool instance_registered() const { + return inst->simple_layout + ? inst->simple_instance_registered + : ((inst->nonsimple.status[index] & instance::status_instance_registered) != 0); + } + // NOLINTNEXTLINE(readability-make-member-function-const) + void set_instance_registered(bool v = true) { + if (inst->simple_layout) + inst->simple_instance_registered = v; + else if (v) + inst->nonsimple.status[index] |= instance::status_instance_registered; + else + inst->nonsimple.status[index] &= (std::uint8_t) ~instance::status_instance_registered; + } +}; + +// Container for accessing and iterating over an instance's values/holders +struct values_and_holders { +private: + instance *inst; + using type_vec = std::vector; + const type_vec &tinfo; + +public: + explicit values_and_holders(instance *inst) + : inst{inst}, tinfo(all_type_info(Py_TYPE(inst))) {} + + struct iterator { + private: + instance *inst = nullptr; + const type_vec *types = nullptr; + value_and_holder curr; + friend struct values_and_holders; + iterator(instance *inst, const type_vec *tinfo) + : inst{inst}, types{tinfo}, + curr(inst /* instance */, + types->empty() ? nullptr : (*types)[0] /* type info */, + 0, /* vpos: (non-simple types only): the first vptr comes first */ + 0 /* index */) + {} + // Past-the-end iterator: + explicit iterator(size_t end) : curr(end) {} + + public: + bool operator==(const iterator &other) const { return curr.index == other.curr.index; } + bool operator!=(const iterator &other) const { return curr.index != other.curr.index; } + iterator &operator++() { + if (!inst->simple_layout) + curr.vh += 1 + (*types)[curr.index]->holder_size_in_ptrs; + ++curr.index; + curr.type = curr.index < types->size() ? (*types)[curr.index] : nullptr; + return *this; + } + value_and_holder &operator*() { return curr; } + value_and_holder *operator->() { return &curr; } + }; + + iterator begin() { return iterator(inst, &tinfo); } + iterator end() { return iterator(tinfo.size()); } + + iterator find(const type_info *find_type) { + auto it = begin(), endit = end(); + while (it != endit && it->type != find_type) ++it; + return it; + } + + size_t size() { return tinfo.size(); } +}; + +/** + * Extracts C++ value and holder pointer references from an instance (which may contain multiple + * values/holders for python-side multiple inheritance) that match the given type. Throws an error + * if the given type (or ValueType, if omitted) is not a pybind11 base of the given instance. If + * `find_type` is omitted (or explicitly specified as nullptr) the first value/holder are returned, + * regardless of type (and the resulting .type will be nullptr). + * + * The returned object should be short-lived: in particular, it must not outlive the called-upon + * instance. + */ +PYBIND11_NOINLINE value_and_holder instance::get_value_and_holder(const type_info *find_type /*= nullptr default in common.h*/, bool throw_if_missing /*= true in common.h*/) { + // Optimize common case: + if (!find_type || Py_TYPE(this) == find_type->type) + return value_and_holder(this, find_type, 0, 0); + + detail::values_and_holders vhs(this); + auto it = vhs.find(find_type); + if (it != vhs.end()) + return *it; + + if (!throw_if_missing) + return value_and_holder(); + +#if defined(NDEBUG) + pybind11_fail("pybind11::detail::instance::get_value_and_holder: " + "type is not a pybind11 base of the given instance " + "(compile in debug mode for type details)"); +#else + pybind11_fail("pybind11::detail::instance::get_value_and_holder: `" + + get_fully_qualified_tp_name(find_type->type) + "' is not a pybind11 base of the given `" + + get_fully_qualified_tp_name(Py_TYPE(this)) + "' instance"); +#endif +} + +PYBIND11_NOINLINE void instance::allocate_layout() { + auto &tinfo = all_type_info(Py_TYPE(this)); + + const size_t n_types = tinfo.size(); + + if (n_types == 0) + pybind11_fail("instance allocation failed: new instance has no pybind11-registered base types"); + + simple_layout = + n_types == 1 && tinfo.front()->holder_size_in_ptrs <= instance_simple_holder_in_ptrs(); + + // Simple path: no python-side multiple inheritance, and a small-enough holder + if (simple_layout) { + simple_value_holder[0] = nullptr; + simple_holder_constructed = false; + simple_instance_registered = false; + } + else { // multiple base types or a too-large holder + // Allocate space to hold: [v1*][h1][v2*][h2]...[bb...] where [vN*] is a value pointer, + // [hN] is the (uninitialized) holder instance for value N, and [bb...] is a set of bool + // values that tracks whether each associated holder has been initialized. Each [block] is + // padded, if necessary, to an integer multiple of sizeof(void *). + size_t space = 0; + for (auto t : tinfo) { + space += 1; // value pointer + space += t->holder_size_in_ptrs; // holder instance + } + size_t flags_at = space; + space += size_in_ptrs(n_types); // status bytes (holder_constructed and instance_registered) + + // Allocate space for flags, values, and holders, and initialize it to 0 (flags and values, + // in particular, need to be 0). Use Python's memory allocation functions: in Python 3.6 + // they default to using pymalloc, which is designed to be efficient for small allocations + // like the one we're doing here; in earlier versions (and for larger allocations) they are + // just wrappers around malloc. +#if PY_VERSION_HEX >= 0x03050000 + nonsimple.values_and_holders = (void **) PyMem_Calloc(space, sizeof(void *)); + if (!nonsimple.values_and_holders) throw std::bad_alloc(); +#else + nonsimple.values_and_holders = (void **) PyMem_New(void *, space); + if (!nonsimple.values_and_holders) throw std::bad_alloc(); + std::memset(nonsimple.values_and_holders, 0, space * sizeof(void *)); +#endif + nonsimple.status = reinterpret_cast(&nonsimple.values_and_holders[flags_at]); + } + owned = true; +} + +// NOLINTNEXTLINE(readability-make-member-function-const) +PYBIND11_NOINLINE void instance::deallocate_layout() { + if (!simple_layout) + PyMem_Free(nonsimple.values_and_holders); +} + +PYBIND11_NOINLINE bool isinstance_generic(handle obj, const std::type_info &tp) { + handle type = detail::get_type_handle(tp, false); + if (!type) + return false; + return isinstance(obj, type); +} + +PYBIND11_NOINLINE std::string error_string() { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, "Unknown internal error occurred"); + return "Unknown internal error occurred"; + } + + error_scope scope; // Preserve error state + + std::string errorString; + if (scope.type) { + errorString += handle(scope.type).attr("__name__").cast(); + errorString += ": "; + } + if (scope.value) + errorString += (std::string) str(scope.value); + + PyErr_NormalizeException(&scope.type, &scope.value, &scope.trace); + +#if PY_MAJOR_VERSION >= 3 + if (scope.trace != nullptr) + PyException_SetTraceback(scope.value, scope.trace); +#endif + +#if !defined(PYPY_VERSION) + if (scope.trace) { + auto *trace = (PyTracebackObject *) scope.trace; + + /* Get the deepest trace possible */ + while (trace->tb_next) + trace = trace->tb_next; + + PyFrameObject *frame = trace->tb_frame; + errorString += "\n\nAt:\n"; + while (frame) { + int lineno = PyFrame_GetLineNumber(frame); + errorString += + " " + handle(frame->f_code->co_filename).cast() + + "(" + std::to_string(lineno) + "): " + + handle(frame->f_code->co_name).cast() + "\n"; + frame = frame->f_back; + } + } +#endif + + return errorString; +} + +PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_info *type ) { + auto &instances = get_internals().registered_instances; + auto range = instances.equal_range(ptr); + for (auto it = range.first; it != range.second; ++it) { + for (const auto &vh : values_and_holders(it->second)) { + if (vh.type == type) + return handle((PyObject *) it->second); + } + } + return handle(); +} + +inline PyThreadState *get_thread_state_unchecked() { +#if defined(PYPY_VERSION) + return PyThreadState_GET(); +#elif PY_VERSION_HEX < 0x03000000 + return _PyThreadState_Current; +#elif PY_VERSION_HEX < 0x03050000 + return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); +#elif PY_VERSION_HEX < 0x03050200 + return (PyThreadState*) _PyThreadState_Current.value; +#else + return _PyThreadState_UncheckedGet(); +#endif +} + +// Forward declarations +void keep_alive_impl(handle nurse, handle patient); +inline PyObject *make_new_instance(PyTypeObject *type); + +class type_caster_generic { +public: + PYBIND11_NOINLINE explicit type_caster_generic(const std::type_info &type_info) + : typeinfo(get_type_info(type_info)), cpptype(&type_info) {} + + explicit type_caster_generic(const type_info *typeinfo) + : typeinfo(typeinfo), cpptype(typeinfo ? typeinfo->cpptype : nullptr) {} + + bool load(handle src, bool convert) { + return load_impl(src, convert); + } + + PYBIND11_NOINLINE static handle cast(const void *_src, return_value_policy policy, handle parent, + const detail::type_info *tinfo, + void *(*copy_constructor)(const void *), + void *(*move_constructor)(const void *), + const void *existing_holder = nullptr) { + if (!tinfo) // no type info: error will be set already + return handle(); + + void *src = const_cast(_src); + if (src == nullptr) + return none().release(); + + if (handle registered_inst = find_registered_python_instance(src, tinfo)) + return registered_inst; + + auto inst = reinterpret_steal(make_new_instance(tinfo->type)); + auto wrapper = reinterpret_cast(inst.ptr()); + wrapper->owned = false; + void *&valueptr = values_and_holders(wrapper).begin()->value_ptr(); + + switch (policy) { + case return_value_policy::automatic: + case return_value_policy::take_ownership: + valueptr = src; + wrapper->owned = true; + break; + + case return_value_policy::automatic_reference: + case return_value_policy::reference: + valueptr = src; + wrapper->owned = false; + break; + + case return_value_policy::copy: + if (copy_constructor) + valueptr = copy_constructor(src); + else { +#if defined(NDEBUG) + throw cast_error("return_value_policy = copy, but type is " + "non-copyable! (compile in debug mode for details)"); +#else + std::string type_name(tinfo->cpptype->name()); + detail::clean_type_id(type_name); + throw cast_error("return_value_policy = copy, but type " + + type_name + " is non-copyable!"); +#endif + } + wrapper->owned = true; + break; + + case return_value_policy::move: + if (move_constructor) + valueptr = move_constructor(src); + else if (copy_constructor) + valueptr = copy_constructor(src); + else { +#if defined(NDEBUG) + throw cast_error("return_value_policy = move, but type is neither " + "movable nor copyable! " + "(compile in debug mode for details)"); +#else + std::string type_name(tinfo->cpptype->name()); + detail::clean_type_id(type_name); + throw cast_error("return_value_policy = move, but type " + + type_name + " is neither movable nor copyable!"); +#endif + } + wrapper->owned = true; + break; + + case return_value_policy::reference_internal: + valueptr = src; + wrapper->owned = false; + keep_alive_impl(inst, parent); + break; + + default: + throw cast_error("unhandled return_value_policy: should not happen!"); + } + + tinfo->init_instance(wrapper, existing_holder); + + return inst.release(); + } + + // Base methods for generic caster; there are overridden in copyable_holder_caster + void load_value(value_and_holder &&v_h) { + auto *&vptr = v_h.value_ptr(); + // Lazy allocation for unallocated values: + if (vptr == nullptr) { + auto *type = v_h.type ? v_h.type : typeinfo; + if (type->operator_new) { + vptr = type->operator_new(type->type_size); + } else { + #if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912) + if (type->type_align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) + vptr = ::operator new(type->type_size, + std::align_val_t(type->type_align)); + else + #endif + vptr = ::operator new(type->type_size); + } + } + value = vptr; + } + bool try_implicit_casts(handle src, bool convert) { + for (auto &cast : typeinfo->implicit_casts) { + type_caster_generic sub_caster(*cast.first); + if (sub_caster.load(src, convert)) { + value = cast.second(sub_caster.value); + return true; + } + } + return false; + } + bool try_direct_conversions(handle src) { + for (auto &converter : *typeinfo->direct_conversions) { + if (converter(src.ptr(), value)) + return true; + } + return false; + } + void check_holder_compat() {} + + PYBIND11_NOINLINE static void *local_load(PyObject *src, const type_info *ti) { + auto caster = type_caster_generic(ti); + if (caster.load(src, false)) + return caster.value; + return nullptr; + } + + /// Try to load with foreign typeinfo, if available. Used when there is no + /// native typeinfo, or when the native one wasn't able to produce a value. + PYBIND11_NOINLINE bool try_load_foreign_module_local(handle src) { + constexpr auto *local_key = PYBIND11_MODULE_LOCAL_ID; + const auto pytype = type::handle_of(src); + if (!hasattr(pytype, local_key)) + return false; + + type_info *foreign_typeinfo = reinterpret_borrow(getattr(pytype, local_key)); + // Only consider this foreign loader if actually foreign and is a loader of the correct cpp type + if (foreign_typeinfo->module_local_load == &local_load + || (cpptype && !same_type(*cpptype, *foreign_typeinfo->cpptype))) + return false; + + if (auto result = foreign_typeinfo->module_local_load(src.ptr(), foreign_typeinfo)) { + value = result; + return true; + } + return false; + } + + // Implementation of `load`; this takes the type of `this` so that it can dispatch the relevant + // bits of code between here and copyable_holder_caster where the two classes need different + // logic (without having to resort to virtual inheritance). + template + PYBIND11_NOINLINE bool load_impl(handle src, bool convert) { + if (!src) return false; + if (!typeinfo) return try_load_foreign_module_local(src); + + auto &this_ = static_cast(*this); + this_.check_holder_compat(); + + PyTypeObject *srctype = Py_TYPE(src.ptr()); + + // Case 1: If src is an exact type match for the target type then we can reinterpret_cast + // the instance's value pointer to the target type: + if (srctype == typeinfo->type) { + this_.load_value(reinterpret_cast(src.ptr())->get_value_and_holder()); + return true; + } + // Case 2: We have a derived class + if (PyType_IsSubtype(srctype, typeinfo->type)) { + auto &bases = all_type_info(srctype); + bool no_cpp_mi = typeinfo->simple_type; + + // Case 2a: the python type is a Python-inherited derived class that inherits from just + // one simple (no MI) pybind11 class, or is an exact match, so the C++ instance is of + // the right type and we can use reinterpret_cast. + // (This is essentially the same as case 2b, but because not using multiple inheritance + // is extremely common, we handle it specially to avoid the loop iterator and type + // pointer lookup overhead) + if (bases.size() == 1 && (no_cpp_mi || bases.front()->type == typeinfo->type)) { + this_.load_value(reinterpret_cast(src.ptr())->get_value_and_holder()); + return true; + } + // Case 2b: the python type inherits from multiple C++ bases. Check the bases to see if + // we can find an exact match (or, for a simple C++ type, an inherited match); if so, we + // can safely reinterpret_cast to the relevant pointer. + if (bases.size() > 1) { + for (auto base : bases) { + if (no_cpp_mi ? PyType_IsSubtype(base->type, typeinfo->type) : base->type == typeinfo->type) { + this_.load_value(reinterpret_cast(src.ptr())->get_value_and_holder(base)); + return true; + } + } + } + + // Case 2c: C++ multiple inheritance is involved and we couldn't find an exact type match + // in the registered bases, above, so try implicit casting (needed for proper C++ casting + // when MI is involved). + if (this_.try_implicit_casts(src, convert)) + return true; + } + + // Perform an implicit conversion + if (convert) { + for (auto &converter : typeinfo->implicit_conversions) { + auto temp = reinterpret_steal(converter(src.ptr(), typeinfo->type)); + if (load_impl(temp, false)) { + loader_life_support::add_patient(temp); + return true; + } + } + if (this_.try_direct_conversions(src)) + return true; + } + + // Failed to match local typeinfo. Try again with global. + if (typeinfo->module_local) { + if (auto gtype = get_global_type_info(*typeinfo->cpptype)) { + typeinfo = gtype; + return load(src, false); + } + } + + // Global typeinfo has precedence over foreign module_local + if (try_load_foreign_module_local(src)) { + return true; + } + + // Custom converters didn't take None, now we convert None to nullptr. + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) return false; + value = nullptr; + return true; + } + + return false; + } + + + // Called to do type lookup and wrap the pointer and type in a pair when a dynamic_cast + // isn't needed or can't be used. If the type is unknown, sets the error and returns a pair + // with .second = nullptr. (p.first = nullptr is not an error: it becomes None). + PYBIND11_NOINLINE static std::pair src_and_type( + const void *src, const std::type_info &cast_type, const std::type_info *rtti_type = nullptr) { + if (auto *tpi = get_type_info(cast_type)) + return {src, const_cast(tpi)}; + + // Not found, set error: + std::string tname = rtti_type ? rtti_type->name() : cast_type.name(); + detail::clean_type_id(tname); + std::string msg = "Unregistered type : " + tname; + PyErr_SetString(PyExc_TypeError, msg.c_str()); + return {nullptr, nullptr}; + } + + const type_info *typeinfo = nullptr; + const std::type_info *cpptype = nullptr; + void *value = nullptr; +}; + +/** + * Determine suitable casting operator for pointer-or-lvalue-casting type casters. The type caster + * needs to provide `operator T*()` and `operator T&()` operators. + * + * If the type supports moving the value away via an `operator T&&() &&` method, it should use + * `movable_cast_op_type` instead. + */ +template +using cast_op_type = + conditional_t>::value, + typename std::add_pointer>::type, + typename std::add_lvalue_reference>::type>; + +/** + * Determine suitable casting operator for a type caster with a movable value. Such a type caster + * needs to provide `operator T*()`, `operator T&()`, and `operator T&&() &&`. The latter will be + * called in appropriate contexts where the value can be moved rather than copied. + * + * These operator are automatically provided when using the PYBIND11_TYPE_CASTER macro. + */ +template +using movable_cast_op_type = + conditional_t::type>::value, + typename std::add_pointer>::type, + conditional_t::value, + typename std::add_rvalue_reference>::type, + typename std::add_lvalue_reference>::type>>; + +// std::is_copy_constructible isn't quite enough: it lets std::vector (and similar) through when +// T is non-copyable, but code containing such a copy constructor fails to actually compile. +template struct is_copy_constructible : std::is_copy_constructible {}; + +// Specialization for types that appear to be copy constructible but also look like stl containers +// (we specifically check for: has `value_type` and `reference` with `reference = value_type&`): if +// so, copy constructability depends on whether the value_type is copy constructible. +template struct is_copy_constructible, + std::is_same, + // Avoid infinite recursion + negation> + >::value>> : is_copy_constructible {}; + +// Likewise for std::pair +// (after C++17 it is mandatory that the copy constructor not exist when the two types aren't themselves +// copy constructible, but this can not be relied upon when T1 or T2 are themselves containers). +template struct is_copy_constructible> + : all_of, is_copy_constructible> {}; + +// The same problems arise with std::is_copy_assignable, so we use the same workaround. +template struct is_copy_assignable : std::is_copy_assignable {}; +template struct is_copy_assignable, + std::is_same + >::value>> : is_copy_assignable {}; +template struct is_copy_assignable> + : all_of, is_copy_assignable> {}; + +PYBIND11_NAMESPACE_END(detail) + +// polymorphic_type_hook::get(src, tinfo) determines whether the object pointed +// to by `src` actually is an instance of some class derived from `itype`. +// If so, it sets `tinfo` to point to the std::type_info representing that derived +// type, and returns a pointer to the start of the most-derived object of that type +// (in which `src` is a subobject; this will be the same address as `src` in most +// single inheritance cases). If not, or if `src` is nullptr, it simply returns `src` +// and leaves `tinfo` at its default value of nullptr. +// +// The default polymorphic_type_hook just returns src. A specialization for polymorphic +// types determines the runtime type of the passed object and adjusts the this-pointer +// appropriately via dynamic_cast. This is what enables a C++ Animal* to appear +// to Python as a Dog (if Dog inherits from Animal, Animal is polymorphic, Dog is +// registered with pybind11, and this Animal is in fact a Dog). +// +// You may specialize polymorphic_type_hook yourself for types that want to appear +// polymorphic to Python but do not use C++ RTTI. (This is a not uncommon pattern +// in performance-sensitive applications, used most notably in LLVM.) +// +// polymorphic_type_hook_base allows users to specialize polymorphic_type_hook with +// std::enable_if. User provided specializations will always have higher priority than +// the default implementation and specialization provided in polymorphic_type_hook_base. +template +struct polymorphic_type_hook_base +{ + static const void *get(const itype *src, const std::type_info*&) { return src; } +}; +template +struct polymorphic_type_hook_base::value>> +{ + static const void *get(const itype *src, const std::type_info*& type) { + type = src ? &typeid(*src) : nullptr; + return dynamic_cast(src); + } +}; +template +struct polymorphic_type_hook : public polymorphic_type_hook_base {}; + +PYBIND11_NAMESPACE_BEGIN(detail) + +/// Generic type caster for objects stored on the heap +template class type_caster_base : public type_caster_generic { + using itype = intrinsic_t; + +public: + static constexpr auto name = _(); + + type_caster_base() : type_caster_base(typeid(type)) { } + explicit type_caster_base(const std::type_info &info) : type_caster_generic(info) { } + + static handle cast(const itype &src, return_value_policy policy, handle parent) { + if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference) + policy = return_value_policy::copy; + return cast(&src, policy, parent); + } + + static handle cast(itype &&src, return_value_policy, handle parent) { + return cast(&src, return_value_policy::move, parent); + } + + // Returns a (pointer, type_info) pair taking care of necessary type lookup for a + // polymorphic type (using RTTI by default, but can be overridden by specializing + // polymorphic_type_hook). If the instance isn't derived, returns the base version. + static std::pair src_and_type(const itype *src) { + auto &cast_type = typeid(itype); + const std::type_info *instance_type = nullptr; + const void *vsrc = polymorphic_type_hook::get(src, instance_type); + if (instance_type && !same_type(cast_type, *instance_type)) { + // This is a base pointer to a derived type. If the derived type is registered + // with pybind11, we want to make the full derived object available. + // In the typical case where itype is polymorphic, we get the correct + // derived pointer (which may be != base pointer) by a dynamic_cast to + // most derived type. If itype is not polymorphic, we won't get here + // except via a user-provided specialization of polymorphic_type_hook, + // and the user has promised that no this-pointer adjustment is + // required in that case, so it's OK to use static_cast. + if (const auto *tpi = get_type_info(*instance_type)) + return {vsrc, tpi}; + } + // Otherwise we have either a nullptr, an `itype` pointer, or an unknown derived pointer, so + // don't do a cast + return type_caster_generic::src_and_type(src, cast_type, instance_type); + } + + static handle cast(const itype *src, return_value_policy policy, handle parent) { + auto st = src_and_type(src); + return type_caster_generic::cast( + st.first, policy, parent, st.second, + make_copy_constructor(src), make_move_constructor(src)); + } + + static handle cast_holder(const itype *src, const void *holder) { + auto st = src_and_type(src); + return type_caster_generic::cast( + st.first, return_value_policy::take_ownership, {}, st.second, + nullptr, nullptr, holder); + } + + template using cast_op_type = detail::cast_op_type; + + // NOLINTNEXTLINE(google-explicit-constructor) + operator itype*() { return (type *) value; } + // NOLINTNEXTLINE(google-explicit-constructor) + operator itype&() { if (!value) throw reference_cast_error(); return *((itype *) value); } + +protected: + using Constructor = void *(*)(const void *); + + /* Only enabled when the types are {copy,move}-constructible *and* when the type + does not have a private operator new implementation. A comma operator is used in the decltype + argument to apply SFINAE to the public copy/move constructors.*/ + template ::value>> + static auto make_copy_constructor(const T *) -> decltype(new T(std::declval()), Constructor{}) { + return [](const void *arg) -> void * { + return new T(*reinterpret_cast(arg)); + }; + } + + template ::value>> + static auto make_move_constructor(const T *) -> decltype(new T(std::declval()), Constructor{}) { + return [](const void *arg) -> void * { + return new T(std::move(*const_cast(reinterpret_cast(arg)))); + }; + } + + static Constructor make_copy_constructor(...) { return nullptr; } + static Constructor make_move_constructor(...) { return nullptr; } +}; + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/typeid.h b/Libraries/pybind11-2.8.0/pybind11/detail/typeid.h new file mode 100644 index 00000000..39ba8ce0 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/detail/typeid.h @@ -0,0 +1,55 @@ +/* + pybind11/detail/typeid.h: Compiler-independent access to type identifiers + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include +#include + +#if defined(__GNUG__) +#include +#endif + +#include "common.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) +/// Erase all occurrences of a substring +inline void erase_all(std::string &string, const std::string &search) { + for (size_t pos = 0;;) { + pos = string.find(search, pos); + if (pos == std::string::npos) break; + string.erase(pos, search.length()); + } +} + +PYBIND11_NOINLINE void clean_type_id(std::string &name) { +#if defined(__GNUG__) + int status = 0; + std::unique_ptr res { + abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), std::free }; + if (status == 0) + name = res.get(); +#else + detail::erase_all(name, "class "); + detail::erase_all(name, "struct "); + detail::erase_all(name, "enum "); +#endif + detail::erase_all(name, "pybind11::"); +} +PYBIND11_NAMESPACE_END(detail) + +/// Return a string representation of a C++ type +template static std::string type_id() { + std::string name(typeid(T).name()); + detail::clean_type_id(name); + return name; +} + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/eigen.h b/Libraries/pybind11-2.8.0/pybind11/eigen.h new file mode 100644 index 00000000..c0363827 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/eigen.h @@ -0,0 +1,590 @@ +/* + pybind11/eigen.h: Transparent conversion for dense and sparse Eigen matrices + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +/* HINT: To suppress warnings originating from the Eigen headers, use -isystem. + See also: + https://stackoverflow.com/questions/2579576/i-dir-vs-isystem-dir + https://stackoverflow.com/questions/1741816/isystem-for-ms-visual-studio-c-compiler +*/ + +#include "numpy.h" + +#include +#include + +// Eigen prior to 3.2.7 doesn't have proper move constructors--but worse, some classes get implicit +// move constructors that break things. We could detect this an explicitly copy, but an extra copy +// of matrices seems highly undesirable. +static_assert(EIGEN_VERSION_AT_LEAST(3,2,7), "Eigen support in pybind11 requires Eigen >= 3.2.7"); + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +// Provide a convenience alias for easier pass-by-ref usage with fully dynamic strides: +using EigenDStride = Eigen::Stride; +template using EigenDRef = Eigen::Ref; +template using EigenDMap = Eigen::Map; + +PYBIND11_NAMESPACE_BEGIN(detail) + +#if EIGEN_VERSION_AT_LEAST(3,3,0) +using EigenIndex = Eigen::Index; +#else +using EigenIndex = EIGEN_DEFAULT_DENSE_INDEX_TYPE; +#endif + +// Matches Eigen::Map, Eigen::Ref, blocks, etc: +template using is_eigen_dense_map = all_of, std::is_base_of, T>>; +template using is_eigen_mutable_map = std::is_base_of, T>; +template using is_eigen_dense_plain = all_of>, is_template_base_of>; +template using is_eigen_sparse = is_template_base_of; +// Test for objects inheriting from EigenBase that aren't captured by the above. This +// basically covers anything that can be assigned to a dense matrix but that don't have a typical +// matrix data layout that can be copied from their .data(). For example, DiagonalMatrix and +// SelfAdjointView fall into this category. +template using is_eigen_other = all_of< + is_template_base_of, + negation, is_eigen_dense_plain, is_eigen_sparse>> +>; + +// Captures numpy/eigen conformability status (returned by EigenProps::conformable()): +template struct EigenConformable { + bool conformable = false; + EigenIndex rows = 0, cols = 0; + EigenDStride stride{0, 0}; // Only valid if negativestrides is false! + bool negativestrides = false; // If true, do not use stride! + + // NOLINTNEXTLINE(google-explicit-constructor) + EigenConformable(bool fits = false) : conformable{fits} {} + // Matrix type: + EigenConformable(EigenIndex r, EigenIndex c, + EigenIndex rstride, EigenIndex cstride) : + conformable{true}, rows{r}, cols{c} { + // TODO: when Eigen bug #747 is fixed, remove the tests for non-negativity. http://eigen.tuxfamily.org/bz/show_bug.cgi?id=747 + if (rstride < 0 || cstride < 0) { + negativestrides = true; + } else { + stride = {EigenRowMajor ? rstride : cstride /* outer stride */, + EigenRowMajor ? cstride : rstride /* inner stride */ }; + } + } + // Vector type: + EigenConformable(EigenIndex r, EigenIndex c, EigenIndex stride) + : EigenConformable(r, c, r == 1 ? c*stride : stride, c == 1 ? r : r*stride) {} + + template bool stride_compatible() const { + // To have compatible strides, we need (on both dimensions) one of fully dynamic strides, + // matching strides, or a dimension size of 1 (in which case the stride value is irrelevant) + return + !negativestrides && + (props::inner_stride == Eigen::Dynamic || props::inner_stride == stride.inner() || + (EigenRowMajor ? cols : rows) == 1) && + (props::outer_stride == Eigen::Dynamic || props::outer_stride == stride.outer() || + (EigenRowMajor ? rows : cols) == 1); + } + // NOLINTNEXTLINE(google-explicit-constructor) + operator bool() const { return conformable; } +}; + +template struct eigen_extract_stride { using type = Type; }; +template +struct eigen_extract_stride> { using type = StrideType; }; +template +struct eigen_extract_stride> { using type = StrideType; }; + +// Helper struct for extracting information from an Eigen type +template struct EigenProps { + using Type = Type_; + using Scalar = typename Type::Scalar; + using StrideType = typename eigen_extract_stride::type; + static constexpr EigenIndex + rows = Type::RowsAtCompileTime, + cols = Type::ColsAtCompileTime, + size = Type::SizeAtCompileTime; + static constexpr bool + row_major = Type::IsRowMajor, + vector = Type::IsVectorAtCompileTime, // At least one dimension has fixed size 1 + fixed_rows = rows != Eigen::Dynamic, + fixed_cols = cols != Eigen::Dynamic, + fixed = size != Eigen::Dynamic, // Fully-fixed size + dynamic = !fixed_rows && !fixed_cols; // Fully-dynamic size + + template using if_zero = std::integral_constant; + static constexpr EigenIndex inner_stride = if_zero::value, + outer_stride = if_zero::value; + static constexpr bool dynamic_stride = inner_stride == Eigen::Dynamic && outer_stride == Eigen::Dynamic; + static constexpr bool requires_row_major = !dynamic_stride && !vector && (row_major ? inner_stride : outer_stride) == 1; + static constexpr bool requires_col_major = !dynamic_stride && !vector && (row_major ? outer_stride : inner_stride) == 1; + + // Takes an input array and determines whether we can make it fit into the Eigen type. If + // the array is a vector, we attempt to fit it into either an Eigen 1xN or Nx1 vector + // (preferring the latter if it will fit in either, i.e. for a fully dynamic matrix type). + static EigenConformable conformable(const array &a) { + const auto dims = a.ndim(); + if (dims < 1 || dims > 2) + return false; + + if (dims == 2) { // Matrix type: require exact match (or dynamic) + + EigenIndex + np_rows = a.shape(0), + np_cols = a.shape(1), + np_rstride = a.strides(0) / static_cast(sizeof(Scalar)), + np_cstride = a.strides(1) / static_cast(sizeof(Scalar)); + if ((PYBIND11_SILENCE_MSVC_C4127(fixed_rows) && np_rows != rows) || + (PYBIND11_SILENCE_MSVC_C4127(fixed_cols) && np_cols != cols)) + return false; + + return {np_rows, np_cols, np_rstride, np_cstride}; + } + + // Otherwise we're storing an n-vector. Only one of the strides will be used, but whichever + // is used, we want the (single) numpy stride value. + const EigenIndex n = a.shape(0), + stride = a.strides(0) / static_cast(sizeof(Scalar)); + + if (vector) { // Eigen type is a compile-time vector + if (PYBIND11_SILENCE_MSVC_C4127(fixed) && size != n) + return false; // Vector size mismatch + return {rows == 1 ? 1 : n, cols == 1 ? 1 : n, stride}; + } + if (fixed) { + // The type has a fixed size, but is not a vector: abort + return false; + } + if (fixed_cols) { + // Since this isn't a vector, cols must be != 1. We allow this only if it exactly + // equals the number of elements (rows is Dynamic, and so 1 row is allowed). + if (cols != n) return false; + return {1, n, stride}; + } // Otherwise it's either fully dynamic, or column dynamic; both become a column vector + if (PYBIND11_SILENCE_MSVC_C4127(fixed_rows) && rows != n) return false; + return {n, 1, stride}; + } + + static constexpr bool show_writeable = is_eigen_dense_map::value && is_eigen_mutable_map::value; + static constexpr bool show_order = is_eigen_dense_map::value; + static constexpr bool show_c_contiguous = show_order && requires_row_major; + static constexpr bool show_f_contiguous = !show_c_contiguous && show_order && requires_col_major; + + static constexpr auto descriptor = + _("numpy.ndarray[") + npy_format_descriptor::name + + _("[") + _(_<(size_t) rows>(), _("m")) + + _(", ") + _(_<(size_t) cols>(), _("n")) + + _("]") + + // For a reference type (e.g. Ref) we have other constraints that might need to be + // satisfied: writeable=True (for a mutable reference), and, depending on the map's stride + // options, possibly f_contiguous or c_contiguous. We include them in the descriptor output + // to provide some hint as to why a TypeError is occurring (otherwise it can be confusing to + // see that a function accepts a 'numpy.ndarray[float64[3,2]]' and an error message that you + // *gave* a numpy.ndarray of the right type and dimensions. + _(", flags.writeable", "") + + _(", flags.c_contiguous", "") + + _(", flags.f_contiguous", "") + + _("]"); +}; + +// Casts an Eigen type to numpy array. If given a base, the numpy array references the src data, +// otherwise it'll make a copy. writeable lets you turn off the writeable flag for the array. +template handle eigen_array_cast(typename props::Type const &src, handle base = handle(), bool writeable = true) { + constexpr ssize_t elem_size = sizeof(typename props::Scalar); + array a; + if (props::vector) + a = array({ src.size() }, { elem_size * src.innerStride() }, src.data(), base); + else + a = array({ src.rows(), src.cols() }, { elem_size * src.rowStride(), elem_size * src.colStride() }, + src.data(), base); + + if (!writeable) + array_proxy(a.ptr())->flags &= ~detail::npy_api::NPY_ARRAY_WRITEABLE_; + + return a.release(); +} + +// Takes an lvalue ref to some Eigen type and a (python) base object, creating a numpy array that +// reference the Eigen object's data with `base` as the python-registered base class (if omitted, +// the base will be set to None, and lifetime management is up to the caller). The numpy array is +// non-writeable if the given type is const. +template +handle eigen_ref_array(Type &src, handle parent = none()) { + // none here is to get past array's should-we-copy detection, which currently always + // copies when there is no base. Setting the base to None should be harmless. + return eigen_array_cast(src, parent, !std::is_const::value); +} + +// Takes a pointer to some dense, plain Eigen type, builds a capsule around it, then returns a numpy +// array that references the encapsulated data with a python-side reference to the capsule to tie +// its destruction to that of any dependent python objects. Const-ness is determined by whether or +// not the Type of the pointer given is const. +template ::value>> +handle eigen_encapsulate(Type *src) { + capsule base(src, [](void *o) { delete static_cast(o); }); + return eigen_ref_array(*src, base); +} + +// Type caster for regular, dense matrix types (e.g. MatrixXd), but not maps/refs/etc. of dense +// types. +template +struct type_caster::value>> { + using Scalar = typename Type::Scalar; + using props = EigenProps; + + bool load(handle src, bool convert) { + // If we're in no-convert mode, only load if given an array of the correct type + if (!convert && !isinstance>(src)) + return false; + + // Coerce into an array, but don't do type conversion yet; the copy below handles it. + auto buf = array::ensure(src); + + if (!buf) + return false; + + auto dims = buf.ndim(); + if (dims < 1 || dims > 2) + return false; + + auto fits = props::conformable(buf); + if (!fits) + return false; + + // Allocate the new type, then build a numpy reference into it + value = Type(fits.rows, fits.cols); + auto ref = reinterpret_steal(eigen_ref_array(value)); + if (dims == 1) ref = ref.squeeze(); + else if (ref.ndim() == 1) buf = buf.squeeze(); + + int result = detail::npy_api::get().PyArray_CopyInto_(ref.ptr(), buf.ptr()); + + if (result < 0) { // Copy failed! + PyErr_Clear(); + return false; + } + + return true; + } + +private: + + // Cast implementation + template + static handle cast_impl(CType *src, return_value_policy policy, handle parent) { + switch (policy) { + case return_value_policy::take_ownership: + case return_value_policy::automatic: + return eigen_encapsulate(src); + case return_value_policy::move: + return eigen_encapsulate(new CType(std::move(*src))); + case return_value_policy::copy: + return eigen_array_cast(*src); + case return_value_policy::reference: + case return_value_policy::automatic_reference: + return eigen_ref_array(*src); + case return_value_policy::reference_internal: + return eigen_ref_array(*src, parent); + default: + throw cast_error("unhandled return_value_policy: should not happen!"); + }; + } + +public: + + // Normal returned non-reference, non-const value: + static handle cast(Type &&src, return_value_policy /* policy */, handle parent) { + return cast_impl(&src, return_value_policy::move, parent); + } + // If you return a non-reference const, we mark the numpy array readonly: + static handle cast(const Type &&src, return_value_policy /* policy */, handle parent) { + return cast_impl(&src, return_value_policy::move, parent); + } + // lvalue reference return; default (automatic) becomes copy + static handle cast(Type &src, return_value_policy policy, handle parent) { + if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference) + policy = return_value_policy::copy; + return cast_impl(&src, policy, parent); + } + // const lvalue reference return; default (automatic) becomes copy + static handle cast(const Type &src, return_value_policy policy, handle parent) { + if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference) + policy = return_value_policy::copy; + return cast(&src, policy, parent); + } + // non-const pointer return + static handle cast(Type *src, return_value_policy policy, handle parent) { + return cast_impl(src, policy, parent); + } + // const pointer return + static handle cast(const Type *src, return_value_policy policy, handle parent) { + return cast_impl(src, policy, parent); + } + + static constexpr auto name = props::descriptor; + + // NOLINTNEXTLINE(google-explicit-constructor) + operator Type*() { return &value; } + // NOLINTNEXTLINE(google-explicit-constructor) + operator Type&() { return value; } + // NOLINTNEXTLINE(google-explicit-constructor) + operator Type&&() && { return std::move(value); } + template using cast_op_type = movable_cast_op_type; + +private: + Type value; +}; + +// Base class for casting reference/map/block/etc. objects back to python. +template struct eigen_map_caster { +private: + using props = EigenProps; + +public: + + // Directly referencing a ref/map's data is a bit dangerous (whatever the map/ref points to has + // to stay around), but we'll allow it under the assumption that you know what you're doing (and + // have an appropriate keep_alive in place). We return a numpy array pointing directly at the + // ref's data (The numpy array ends up read-only if the ref was to a const matrix type.) Note + // that this means you need to ensure you don't destroy the object in some other way (e.g. with + // an appropriate keep_alive, or with a reference to a statically allocated matrix). + static handle cast(const MapType &src, return_value_policy policy, handle parent) { + switch (policy) { + case return_value_policy::copy: + return eigen_array_cast(src); + case return_value_policy::reference_internal: + return eigen_array_cast(src, parent, is_eigen_mutable_map::value); + case return_value_policy::reference: + case return_value_policy::automatic: + case return_value_policy::automatic_reference: + return eigen_array_cast(src, none(), is_eigen_mutable_map::value); + default: + // move, take_ownership don't make any sense for a ref/map: + pybind11_fail("Invalid return_value_policy for Eigen Map/Ref/Block type"); + } + } + + static constexpr auto name = props::descriptor; + + // Explicitly delete these: support python -> C++ conversion on these (i.e. these can be return + // types but not bound arguments). We still provide them (with an explicitly delete) so that + // you end up here if you try anyway. + bool load(handle, bool) = delete; + operator MapType() = delete; + template using cast_op_type = MapType; +}; + +// We can return any map-like object (but can only load Refs, specialized next): +template struct type_caster::value>> + : eigen_map_caster {}; + +// Loader for Ref<...> arguments. See the documentation for info on how to make this work without +// copying (it requires some extra effort in many cases). +template +struct type_caster< + Eigen::Ref, + enable_if_t>::value> +> : public eigen_map_caster> { +private: + using Type = Eigen::Ref; + using props = EigenProps; + using Scalar = typename props::Scalar; + using MapType = Eigen::Map; + using Array = array_t; + static constexpr bool need_writeable = is_eigen_mutable_map::value; + // Delay construction (these have no default constructor) + std::unique_ptr map; + std::unique_ptr ref; + // Our array. When possible, this is just a numpy array pointing to the source data, but + // sometimes we can't avoid copying (e.g. input is not a numpy array at all, has an incompatible + // layout, or is an array of a type that needs to be converted). Using a numpy temporary + // (rather than an Eigen temporary) saves an extra copy when we need both type conversion and + // storage order conversion. (Note that we refuse to use this temporary copy when loading an + // argument for a Ref with M non-const, i.e. a read-write reference). + Array copy_or_ref; +public: + bool load(handle src, bool convert) { + // First check whether what we have is already an array of the right type. If not, we can't + // avoid a copy (because the copy is also going to do type conversion). + bool need_copy = !isinstance(src); + + EigenConformable fits; + if (!need_copy) { + // We don't need a converting copy, but we also need to check whether the strides are + // compatible with the Ref's stride requirements + auto aref = reinterpret_borrow(src); + + if (aref && (!need_writeable || aref.writeable())) { + fits = props::conformable(aref); + if (!fits) return false; // Incompatible dimensions + if (!fits.template stride_compatible()) + need_copy = true; + else + copy_or_ref = std::move(aref); + } + else { + need_copy = true; + } + } + + if (need_copy) { + // We need to copy: If we need a mutable reference, or we're not supposed to convert + // (either because we're in the no-convert overload pass, or because we're explicitly + // instructed not to copy (via `py::arg().noconvert()`) we have to fail loading. + if (!convert || need_writeable) return false; + + Array copy = Array::ensure(src); + if (!copy) return false; + fits = props::conformable(copy); + if (!fits || !fits.template stride_compatible()) + return false; + copy_or_ref = std::move(copy); + loader_life_support::add_patient(copy_or_ref); + } + + ref.reset(); + map.reset(new MapType(data(copy_or_ref), fits.rows, fits.cols, make_stride(fits.stride.outer(), fits.stride.inner()))); + ref.reset(new Type(*map)); + + return true; + } + + // NOLINTNEXTLINE(google-explicit-constructor) + operator Type*() { return ref.get(); } + // NOLINTNEXTLINE(google-explicit-constructor) + operator Type&() { return *ref; } + template using cast_op_type = pybind11::detail::cast_op_type<_T>; + +private: + template ::value, int> = 0> + Scalar *data(Array &a) { return a.mutable_data(); } + + template ::value, int> = 0> + const Scalar *data(Array &a) { return a.data(); } + + // Attempt to figure out a constructor of `Stride` that will work. + // If both strides are fixed, use a default constructor: + template using stride_ctor_default = bool_constant< + S::InnerStrideAtCompileTime != Eigen::Dynamic && S::OuterStrideAtCompileTime != Eigen::Dynamic && + std::is_default_constructible::value>; + // Otherwise, if there is a two-index constructor, assume it is (outer,inner) like + // Eigen::Stride, and use it: + template using stride_ctor_dual = bool_constant< + !stride_ctor_default::value && std::is_constructible::value>; + // Otherwise, if there is a one-index constructor, and just one of the strides is dynamic, use + // it (passing whichever stride is dynamic). + template using stride_ctor_outer = bool_constant< + !any_of, stride_ctor_dual>::value && + S::OuterStrideAtCompileTime == Eigen::Dynamic && S::InnerStrideAtCompileTime != Eigen::Dynamic && + std::is_constructible::value>; + template using stride_ctor_inner = bool_constant< + !any_of, stride_ctor_dual>::value && + S::InnerStrideAtCompileTime == Eigen::Dynamic && S::OuterStrideAtCompileTime != Eigen::Dynamic && + std::is_constructible::value>; + + template ::value, int> = 0> + static S make_stride(EigenIndex, EigenIndex) { return S(); } + template ::value, int> = 0> + static S make_stride(EigenIndex outer, EigenIndex inner) { return S(outer, inner); } + template ::value, int> = 0> + static S make_stride(EigenIndex outer, EigenIndex) { return S(outer); } + template ::value, int> = 0> + static S make_stride(EigenIndex, EigenIndex inner) { return S(inner); } + +}; + +// type_caster for special matrix types (e.g. DiagonalMatrix), which are EigenBase, but not +// EigenDense (i.e. they don't have a data(), at least not with the usual matrix layout). +// load() is not supported, but we can cast them into the python domain by first copying to a +// regular Eigen::Matrix, then casting that. +template +struct type_caster::value>> { +protected: + using Matrix = Eigen::Matrix; + using props = EigenProps; +public: + static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) { + handle h = eigen_encapsulate(new Matrix(src)); + return h; + } + static handle cast(const Type *src, return_value_policy policy, handle parent) { return cast(*src, policy, parent); } + + static constexpr auto name = props::descriptor; + + // Explicitly delete these: support python -> C++ conversion on these (i.e. these can be return + // types but not bound arguments). We still provide them (with an explicitly delete) so that + // you end up here if you try anyway. + bool load(handle, bool) = delete; + operator Type() = delete; + template using cast_op_type = Type; +}; + +template +struct type_caster::value>> { + using Scalar = typename Type::Scalar; + using StorageIndex = remove_reference_t().outerIndexPtr())>; + using Index = typename Type::Index; + static constexpr bool rowMajor = Type::IsRowMajor; + + bool load(handle src, bool) { + if (!src) + return false; + + auto obj = reinterpret_borrow(src); + object sparse_module = module_::import("scipy.sparse"); + object matrix_type = sparse_module.attr( + rowMajor ? "csr_matrix" : "csc_matrix"); + + if (!type::handle_of(obj).is(matrix_type)) { + try { + obj = matrix_type(obj); + } catch (const error_already_set &) { + return false; + } + } + + auto values = array_t((object) obj.attr("data")); + auto innerIndices = array_t((object) obj.attr("indices")); + auto outerIndices = array_t((object) obj.attr("indptr")); + auto shape = pybind11::tuple((pybind11::object) obj.attr("shape")); + auto nnz = obj.attr("nnz").cast(); + + if (!values || !innerIndices || !outerIndices) + return false; + + value = Eigen::MappedSparseMatrix( + shape[0].cast(), shape[1].cast(), nnz, + outerIndices.mutable_data(), innerIndices.mutable_data(), values.mutable_data()); + + return true; + } + + static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) { + const_cast(src).makeCompressed(); + + object matrix_type = module_::import("scipy.sparse").attr( + rowMajor ? "csr_matrix" : "csc_matrix"); + + array data(src.nonZeros(), src.valuePtr()); + array outerIndices((rowMajor ? src.rows() : src.cols()) + 1, src.outerIndexPtr()); + array innerIndices(src.nonZeros(), src.innerIndexPtr()); + + return matrix_type( + std::make_tuple(data, innerIndices, outerIndices), + std::make_pair(src.rows(), src.cols()) + ).release(); + } + + PYBIND11_TYPE_CASTER(Type, _<(Type::IsRowMajor) != 0>("scipy.sparse.csr_matrix[", "scipy.sparse.csc_matrix[") + + npy_format_descriptor::name + _("]")); +}; + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/embed.h b/Libraries/pybind11-2.8.0/pybind11/embed.h new file mode 100644 index 00000000..9843f0f9 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/embed.h @@ -0,0 +1,284 @@ +/* + pybind11/embed.h: Support for embedding the interpreter + + Copyright (c) 2017 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include "eval.h" + +#include +#include + +#if defined(PYPY_VERSION) +# error Embedding the interpreter is not supported with PyPy +#endif + +#if PY_MAJOR_VERSION >= 3 +# define PYBIND11_EMBEDDED_MODULE_IMPL(name) \ + extern "C" PyObject *pybind11_init_impl_##name(); \ + extern "C" PyObject *pybind11_init_impl_##name() { \ + return pybind11_init_wrapper_##name(); \ + } +#else +# define PYBIND11_EMBEDDED_MODULE_IMPL(name) \ + extern "C" void pybind11_init_impl_##name(); \ + extern "C" void pybind11_init_impl_##name() { \ + pybind11_init_wrapper_##name(); \ + } +#endif + +/** \rst + Add a new module to the table of builtins for the interpreter. Must be + defined in global scope. The first macro parameter is the name of the + module (without quotes). The second parameter is the variable which will + be used as the interface to add functions and classes to the module. + + .. code-block:: cpp + + PYBIND11_EMBEDDED_MODULE(example, m) { + // ... initialize functions and classes here + m.def("foo", []() { + return "Hello, World!"; + }); + } + \endrst */ +#define PYBIND11_EMBEDDED_MODULE(name, variable) \ + static ::pybind11::module_::module_def PYBIND11_CONCAT(pybind11_module_def_, name); \ + static void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ &); \ + static PyObject PYBIND11_CONCAT(*pybind11_init_wrapper_, name)() { \ + auto m = ::pybind11::module_::create_extension_module( \ + PYBIND11_TOSTRING(name), nullptr, &PYBIND11_CONCAT(pybind11_module_def_, name)); \ + try { \ + PYBIND11_CONCAT(pybind11_init_, name)(m); \ + return m.ptr(); \ + } \ + PYBIND11_CATCH_INIT_EXCEPTIONS \ + } \ + PYBIND11_EMBEDDED_MODULE_IMPL(name) \ + ::pybind11::detail::embedded_module PYBIND11_CONCAT(pybind11_module_, name)( \ + PYBIND11_TOSTRING(name), PYBIND11_CONCAT(pybind11_init_impl_, name)); \ + void PYBIND11_CONCAT(pybind11_init_, name)(::pybind11::module_ \ + & variable) // NOLINT(bugprone-macro-parentheses) + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +/// Python 2.7/3.x compatible version of `PyImport_AppendInittab` and error checks. +struct embedded_module { +#if PY_MAJOR_VERSION >= 3 + using init_t = PyObject *(*)(); +#else + using init_t = void (*)(); +#endif + embedded_module(const char *name, init_t init) { + if (Py_IsInitialized() != 0) + pybind11_fail("Can't add new modules after the interpreter has been initialized"); + + auto result = PyImport_AppendInittab(name, init); + if (result == -1) + pybind11_fail("Insufficient memory to add a new module"); + } +}; + +struct wide_char_arg_deleter { + void operator()(wchar_t *ptr) const { +#if PY_VERSION_HEX >= 0x030500f0 + // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale + PyMem_RawFree(ptr); +#else + delete[] ptr; +#endif + } +}; + +inline wchar_t *widen_chars(const char *safe_arg) { +#if PY_VERSION_HEX >= 0x030500f0 + wchar_t *widened_arg = Py_DecodeLocale(safe_arg, nullptr); +#else + wchar_t *widened_arg = nullptr; +# if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS + size_t count = strlen(safe_arg); +# else + size_t count = mbstowcs(nullptr, safe_arg, 0); +# endif + if (count != static_cast(-1)) { + widened_arg = new wchar_t[count + 1]; + mbstowcs(widened_arg, safe_arg, count + 1); + } +#endif + return widened_arg; +} + +/// Python 2.x/3.x-compatible version of `PySys_SetArgv` +inline void set_interpreter_argv(int argc, const char *const *argv, bool add_program_dir_to_path) { + // Before it was special-cased in python 3.8, passing an empty or null argv + // caused a segfault, so we have to reimplement the special case ourselves. + bool special_case = (argv == nullptr || argc <= 0); + + const char *const empty_argv[]{"\0"}; + const char *const *safe_argv = special_case ? empty_argv : argv; + if (special_case) + argc = 1; + + auto argv_size = static_cast(argc); +#if PY_MAJOR_VERSION >= 3 + // SetArgv* on python 3 takes wchar_t, so we have to convert. + std::unique_ptr widened_argv(new wchar_t *[argv_size]); + std::vector> widened_argv_entries; + widened_argv_entries.reserve(argv_size); + for (size_t ii = 0; ii < argv_size; ++ii) { + widened_argv_entries.emplace_back(widen_chars(safe_argv[ii])); + if (!widened_argv_entries.back()) { + // A null here indicates a character-encoding failure or the python + // interpreter out of memory. Give up. + return; + } + widened_argv[ii] = widened_argv_entries.back().get(); + } + + auto pysys_argv = widened_argv.get(); +#else + // python 2.x + std::vector strings{safe_argv, safe_argv + argv_size}; + std::vector char_strings{argv_size}; + for (std::size_t i = 0; i < argv_size; ++i) + char_strings[i] = &strings[i][0]; + char **pysys_argv = char_strings.data(); +#endif + + PySys_SetArgvEx(argc, pysys_argv, static_cast(add_program_dir_to_path)); +} + +PYBIND11_NAMESPACE_END(detail) + +/** \rst + Initialize the Python interpreter. No other pybind11 or CPython API functions can be + called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The + optional `init_signal_handlers` parameter can be used to skip the registration of + signal handlers (see the `Python documentation`_ for details). Calling this function + again after the interpreter has already been initialized is a fatal error. + + If initializing the Python interpreter fails, then the program is terminated. (This + is controlled by the CPython runtime and is an exception to pybind11's normal behavior + of throwing exceptions on errors.) + + The remaining optional parameters, `argc`, `argv`, and `add_program_dir_to_path` are + used to populate ``sys.argv`` and ``sys.path``. + See the |PySys_SetArgvEx documentation|_ for details. + + .. _Python documentation: https://docs.python.org/3/c-api/init.html#c.Py_InitializeEx + .. |PySys_SetArgvEx documentation| replace:: ``PySys_SetArgvEx`` documentation + .. _PySys_SetArgvEx documentation: https://docs.python.org/3/c-api/init.html#c.PySys_SetArgvEx + \endrst */ +inline void initialize_interpreter(bool init_signal_handlers = true, + int argc = 0, + const char *const *argv = nullptr, + bool add_program_dir_to_path = true) { + if (Py_IsInitialized() != 0) + pybind11_fail("The interpreter is already running"); + + Py_InitializeEx(init_signal_handlers ? 1 : 0); + + detail::set_interpreter_argv(argc, argv, add_program_dir_to_path); +} + +/** \rst + Shut down the Python interpreter. No pybind11 or CPython API functions can be called + after this. In addition, pybind11 objects must not outlive the interpreter: + + .. code-block:: cpp + + { // BAD + py::initialize_interpreter(); + auto hello = py::str("Hello, World!"); + py::finalize_interpreter(); + } // <-- BOOM, hello's destructor is called after interpreter shutdown + + { // GOOD + py::initialize_interpreter(); + { // scoped + auto hello = py::str("Hello, World!"); + } // <-- OK, hello is cleaned up properly + py::finalize_interpreter(); + } + + { // BETTER + py::scoped_interpreter guard{}; + auto hello = py::str("Hello, World!"); + } + + .. warning:: + + The interpreter can be restarted by calling `initialize_interpreter` again. + Modules created using pybind11 can be safely re-initialized. However, Python + itself cannot completely unload binary extension modules and there are several + caveats with regard to interpreter restarting. All the details can be found + in the CPython documentation. In short, not all interpreter memory may be + freed, either due to reference cycles or user-created global data. + + \endrst */ +inline void finalize_interpreter() { + handle builtins(PyEval_GetBuiltins()); + const char *id = PYBIND11_INTERNALS_ID; + + // Get the internals pointer (without creating it if it doesn't exist). It's possible for the + // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` + // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). + detail::internals **internals_ptr_ptr = detail::get_internals_pp(); + // It could also be stashed in builtins, so look there too: + if (builtins.contains(id) && isinstance(builtins[id])) + internals_ptr_ptr = capsule(builtins[id]); + + Py_Finalize(); + + if (internals_ptr_ptr) { + delete *internals_ptr_ptr; + *internals_ptr_ptr = nullptr; + } +} + +/** \rst + Scope guard version of `initialize_interpreter` and `finalize_interpreter`. + This a move-only guard and only a single instance can exist. + + See `initialize_interpreter` for a discussion of its constructor arguments. + + .. code-block:: cpp + + #include + + int main() { + py::scoped_interpreter guard{}; + py::print(Hello, World!); + } // <-- interpreter shutdown + \endrst */ +class scoped_interpreter { +public: + explicit scoped_interpreter(bool init_signal_handlers = true, + int argc = 0, + const char *const *argv = nullptr, + bool add_program_dir_to_path = true) { + initialize_interpreter(init_signal_handlers, argc, argv, add_program_dir_to_path); + } + + scoped_interpreter(const scoped_interpreter &) = delete; + scoped_interpreter(scoped_interpreter &&other) noexcept { other.is_valid = false; } + scoped_interpreter &operator=(const scoped_interpreter &) = delete; + scoped_interpreter &operator=(scoped_interpreter &&) = delete; + + ~scoped_interpreter() { + if (is_valid) + finalize_interpreter(); + } + +private: + bool is_valid = true; +}; + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/eval.h b/Libraries/pybind11-2.8.0/pybind11/eval.h new file mode 100644 index 00000000..e0f58bcf --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/eval.h @@ -0,0 +1,163 @@ +/* + pybind11/exec.h: Support for evaluating Python expressions and statements + from strings and files + + Copyright (c) 2016 Klemens Morgenstern and + Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include + +#include "pybind11.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +inline void ensure_builtins_in_globals(object &global) { + #if PY_VERSION_HEX < 0x03080000 + // Running exec and eval on Python 2 and 3 adds `builtins` module under + // `__builtins__` key to globals if not yet present. + // Python 3.8 made PyRun_String behave similarly. Let's also do that for + // older versions, for consistency. + if (!global.contains("__builtins__")) + global["__builtins__"] = module_::import(PYBIND11_BUILTINS_MODULE); + #else + (void) global; + #endif +} + +PYBIND11_NAMESPACE_END(detail) + +enum eval_mode { + /// Evaluate a string containing an isolated expression + eval_expr, + + /// Evaluate a string containing a single statement. Returns \c none + eval_single_statement, + + /// Evaluate a string containing a sequence of statement. Returns \c none + eval_statements +}; + +template +object eval(const str &expr, object global = globals(), object local = object()) { + if (!local) + local = global; + + detail::ensure_builtins_in_globals(global); + + /* PyRun_String does not accept a PyObject / encoding specifier, + this seems to be the only alternative */ + std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr; + + int start = 0; + switch (mode) { + case eval_expr: start = Py_eval_input; break; + case eval_single_statement: start = Py_single_input; break; + case eval_statements: start = Py_file_input; break; + default: pybind11_fail("invalid evaluation mode"); + } + + PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr()); + if (!result) + throw error_already_set(); + return reinterpret_steal(result); +} + +template +object eval(const char (&s)[N], object global = globals(), object local = object()) { + /* Support raw string literals by removing common leading whitespace */ + auto expr = (s[0] == '\n') ? str(module_::import("textwrap").attr("dedent")(s)) + : str(s); + return eval(expr, global, local); +} + +inline void exec(const str &expr, object global = globals(), object local = object()) { + eval(expr, std::move(global), std::move(local)); +} + +template +void exec(const char (&s)[N], object global = globals(), object local = object()) { + eval(s, global, local); +} + +#if defined(PYPY_VERSION) && PY_VERSION_HEX >= 0x03000000 +template +object eval_file(str, object, object) { + pybind11_fail("eval_file not supported in PyPy3. Use eval"); +} +template +object eval_file(str, object) { + pybind11_fail("eval_file not supported in PyPy3. Use eval"); +} +template +object eval_file(str) { + pybind11_fail("eval_file not supported in PyPy3. Use eval"); +} +#else +template +object eval_file(str fname, object global = globals(), object local = object()) { + if (!local) + local = global; + + detail::ensure_builtins_in_globals(global); + + int start = 0; + switch (mode) { + case eval_expr: start = Py_eval_input; break; + case eval_single_statement: start = Py_single_input; break; + case eval_statements: start = Py_file_input; break; + default: pybind11_fail("invalid evaluation mode"); + } + + int closeFile = 1; + std::string fname_str = (std::string) fname; +#if PY_VERSION_HEX >= 0x03040000 + FILE *f = _Py_fopen_obj(fname.ptr(), "r"); +#elif PY_VERSION_HEX >= 0x03000000 + FILE *f = _Py_fopen(fname.ptr(), "r"); +#else + /* No unicode support in open() :( */ + auto fobj = reinterpret_steal(PyFile_FromString( + const_cast(fname_str.c_str()), + const_cast("r"))); + FILE *f = nullptr; + if (fobj) + f = PyFile_AsFile(fobj.ptr()); + closeFile = 0; +#endif + if (!f) { + PyErr_Clear(); + pybind11_fail("File \"" + fname_str + "\" could not be opened!"); + } + + // In Python2, this should be encoded by getfilesystemencoding. + // We don't boher setting it since Python2 is past EOL anyway. + // See PR#3233 +#if PY_VERSION_HEX >= 0x03000000 + if (!global.contains("__file__")) { + global["__file__"] = std::move(fname); + } +#endif + +#if PY_VERSION_HEX < 0x03000000 && defined(PYPY_VERSION) + PyObject *result = PyRun_File(f, fname_str.c_str(), start, global.ptr(), + local.ptr()); + (void) closeFile; +#else + PyObject *result = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(), + local.ptr(), closeFile); +#endif + + if (!result) + throw error_already_set(); + return reinterpret_steal(result); +} +#endif + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/functional.h b/Libraries/pybind11-2.8.0/pybind11/functional.h new file mode 100644 index 00000000..24141ce3 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/functional.h @@ -0,0 +1,121 @@ +/* + pybind11/functional.h: std::function<> support + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +template +struct type_caster> { + using type = std::function; + using retval_type = conditional_t::value, void_type, Return>; + using function_type = Return (*) (Args...); + +public: + bool load(handle src, bool convert) { + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) return false; + return true; + } + + if (!isinstance(src)) + return false; + + auto func = reinterpret_borrow(src); + + /* + When passing a C++ function as an argument to another C++ + function via Python, every function call would normally involve + a full C++ -> Python -> C++ roundtrip, which can be prohibitive. + Here, we try to at least detect the case where the function is + stateless (i.e. function pointer or lambda function without + captured variables), in which case the roundtrip can be avoided. + */ + if (auto cfunc = func.cpp_function()) { + auto cfunc_self = PyCFunction_GET_SELF(cfunc.ptr()); + if (isinstance(cfunc_self)) { + auto c = reinterpret_borrow(cfunc_self); + auto rec = (function_record *) c; + + while (rec != nullptr) { + if (rec->is_stateless + && same_type(typeid(function_type), + *reinterpret_cast(rec->data[1]))) { + struct capture { + function_type f; + }; + value = ((capture *) &rec->data)->f; + return true; + } + rec = rec->next; + } + } + // PYPY segfaults here when passing builtin function like sum. + // Raising an fail exception here works to prevent the segfault, but only on gcc. + // See PR #1413 for full details + } + + // ensure GIL is held during functor destruction + struct func_handle { + function f; +#if !(defined(_MSC_VER) && _MSC_VER == 1916 && defined(PYBIND11_CPP17) && PY_MAJOR_VERSION < 3) + // This triggers a syntax error under very special conditions (very weird indeed). + explicit +#endif + func_handle(function &&f_) noexcept : f(std::move(f_)) {} + func_handle(const func_handle &f_) { operator=(f_); } + func_handle &operator=(const func_handle &f_) { + gil_scoped_acquire acq; + f = f_.f; + return *this; + } + ~func_handle() { + gil_scoped_acquire acq; + function kill_f(std::move(f)); + } + }; + + // to emulate 'move initialization capture' in C++11 + struct func_wrapper { + func_handle hfunc; + explicit func_wrapper(func_handle &&hf) noexcept : hfunc(std::move(hf)) {} + Return operator()(Args... args) const { + gil_scoped_acquire acq; + object retval(hfunc.f(std::forward(args)...)); + /* Visual studio 2015 parser issue: need parentheses around this expression */ + return (retval.template cast()); + } + }; + + value = func_wrapper(func_handle(std::move(func))); + return true; + } + + template + static handle cast(Func &&f_, return_value_policy policy, handle /* parent */) { + if (!f_) + return none().inc_ref(); + + auto result = f_.template target(); + if (result) + return cpp_function(*result, policy).release(); + return cpp_function(std::forward(f_), policy).release(); + } + + PYBIND11_TYPE_CASTER(type, _("Callable[[") + concat(make_caster::name...) + _("], ") + + make_caster::name + _("]")); +}; + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/gil.h b/Libraries/pybind11-2.8.0/pybind11/gil.h new file mode 100644 index 00000000..b73aaa3f --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/gil.h @@ -0,0 +1,193 @@ +/* + pybind11/gil.h: RAII helpers for managing the GIL + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "detail/internals.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + + +PYBIND11_NAMESPACE_BEGIN(detail) + +// forward declarations +PyThreadState *get_thread_state_unchecked(); + +PYBIND11_NAMESPACE_END(detail) + + +#if defined(WITH_THREAD) && !defined(PYPY_VERSION) + +/* The functions below essentially reproduce the PyGILState_* API using a RAII + * pattern, but there are a few important differences: + * + * 1. When acquiring the GIL from an non-main thread during the finalization + * phase, the GILState API blindly terminates the calling thread, which + * is often not what is wanted. This API does not do this. + * + * 2. The gil_scoped_release function can optionally cut the relationship + * of a PyThreadState and its associated thread, which allows moving it to + * another thread (this is a fairly rare/advanced use case). + * + * 3. The reference count of an acquired thread state can be controlled. This + * can be handy to prevent cases where callbacks issued from an external + * thread would otherwise constantly construct and destroy thread state data + * structures. + * + * See the Python bindings of NanoGUI (http://github.com/wjakob/nanogui) for an + * example which uses features 2 and 3 to migrate the Python thread of + * execution to another thread (to run the event loop on the original thread, + * in this case). + */ + +class gil_scoped_acquire { +public: + PYBIND11_NOINLINE gil_scoped_acquire() { + auto &internals = detail::get_internals(); + tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate); + + if (!tstate) { + /* Check if the GIL was acquired using the PyGILState_* API instead (e.g. if + calling from a Python thread). Since we use a different key, this ensures + we don't create a new thread state and deadlock in PyEval_AcquireThread + below. Note we don't save this state with internals.tstate, since we don't + create it we would fail to clear it (its reference count should be > 0). */ + tstate = PyGILState_GetThisThreadState(); + } + + if (!tstate) { + tstate = PyThreadState_New(internals.istate); + #if !defined(NDEBUG) + if (!tstate) + pybind11_fail("scoped_acquire: could not create thread state!"); + #endif + tstate->gilstate_counter = 0; + PYBIND11_TLS_REPLACE_VALUE(internals.tstate, tstate); + } else { + release = detail::get_thread_state_unchecked() != tstate; + } + + if (release) { + PyEval_AcquireThread(tstate); + } + + inc_ref(); + } + + void inc_ref() { + ++tstate->gilstate_counter; + } + + PYBIND11_NOINLINE void dec_ref() { + --tstate->gilstate_counter; + #if !defined(NDEBUG) + if (detail::get_thread_state_unchecked() != tstate) + pybind11_fail("scoped_acquire::dec_ref(): thread state must be current!"); + if (tstate->gilstate_counter < 0) + pybind11_fail("scoped_acquire::dec_ref(): reference count underflow!"); + #endif + if (tstate->gilstate_counter == 0) { + #if !defined(NDEBUG) + if (!release) + pybind11_fail("scoped_acquire::dec_ref(): internal error!"); + #endif + PyThreadState_Clear(tstate); + if (active) + PyThreadState_DeleteCurrent(); + PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate); + release = false; + } + } + + /// This method will disable the PyThreadState_DeleteCurrent call and the + /// GIL won't be acquired. This method should be used if the interpreter + /// could be shutting down when this is called, as thread deletion is not + /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and + /// protect subsequent code. + PYBIND11_NOINLINE void disarm() { + active = false; + } + + PYBIND11_NOINLINE ~gil_scoped_acquire() { + dec_ref(); + if (release) + PyEval_SaveThread(); + } +private: + PyThreadState *tstate = nullptr; + bool release = true; + bool active = true; +}; + +class gil_scoped_release { +public: + explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) { + // `get_internals()` must be called here unconditionally in order to initialize + // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an + // initialization race could occur as multiple threads try `gil_scoped_acquire`. + auto &internals = detail::get_internals(); + tstate = PyEval_SaveThread(); + if (disassoc) { + auto key = internals.tstate; + PYBIND11_TLS_DELETE_VALUE(key); + } + } + + /// This method will disable the PyThreadState_DeleteCurrent call and the + /// GIL won't be acquired. This method should be used if the interpreter + /// could be shutting down when this is called, as thread deletion is not + /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and + /// protect subsequent code. + PYBIND11_NOINLINE void disarm() { + active = false; + } + + ~gil_scoped_release() { + if (!tstate) + return; + // `PyEval_RestoreThread()` should not be called if runtime is finalizing + if (active) + PyEval_RestoreThread(tstate); + if (disassoc) { + auto key = detail::get_internals().tstate; + PYBIND11_TLS_REPLACE_VALUE(key, tstate); + } + } +private: + PyThreadState *tstate; + bool disassoc; + bool active = true; +}; +#elif defined(PYPY_VERSION) +class gil_scoped_acquire { + PyGILState_STATE state; +public: + gil_scoped_acquire() { state = PyGILState_Ensure(); } + ~gil_scoped_acquire() { PyGILState_Release(state); } + void disarm() {} +}; + +class gil_scoped_release { + PyThreadState *state; +public: + gil_scoped_release() { state = PyEval_SaveThread(); } + ~gil_scoped_release() { PyEval_RestoreThread(state); } + void disarm() {} +}; +#else +class gil_scoped_acquire { + void disarm() {} +}; +class gil_scoped_release { + void disarm() {} +}; +#endif + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/iostream.h b/Libraries/pybind11-2.8.0/pybind11/iostream.h new file mode 100644 index 00000000..95449a07 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/iostream.h @@ -0,0 +1,275 @@ +/* + pybind11/iostream.h -- Tools to assist with redirecting cout and cerr to Python + + Copyright (c) 2017 Henry F. Schreiner + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. + + WARNING: The implementation in this file is NOT thread safe. Multiple + threads writing to a redirected ostream concurrently cause data races + and potentially buffer overflows. Therefore it is currently a requirement + that all (possibly) concurrent redirected ostream writes are protected by + a mutex. + #HelpAppreciated: Work on iostream.h thread safety. + For more background see the discussions under + https://github.com/pybind/pybind11/pull/2982 and + https://github.com/pybind/pybind11/pull/2995. +*/ + +#pragma once + +#include "pybind11.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +// Buffer that writes to Python instead of C++ +class pythonbuf : public std::streambuf { +private: + using traits_type = std::streambuf::traits_type; + + const size_t buf_size; + std::unique_ptr d_buffer; + object pywrite; + object pyflush; + + int overflow(int c) override { + if (!traits_type::eq_int_type(c, traits_type::eof())) { + *pptr() = traits_type::to_char_type(c); + pbump(1); + } + return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof(); + } + + // Computes how many bytes at the end of the buffer are part of an + // incomplete sequence of UTF-8 bytes. + // Precondition: pbase() < pptr() + size_t utf8_remainder() const { + const auto rbase = std::reverse_iterator(pbase()); + const auto rpptr = std::reverse_iterator(pptr()); + auto is_ascii = [](char c) { + return (static_cast(c) & 0x80) == 0x00; + }; + auto is_leading = [](char c) { + return (static_cast(c) & 0xC0) == 0xC0; + }; + auto is_leading_2b = [](char c) { + return static_cast(c) <= 0xDF; + }; + auto is_leading_3b = [](char c) { + return static_cast(c) <= 0xEF; + }; + // If the last character is ASCII, there are no incomplete code points + if (is_ascii(*rpptr)) + return 0; + // Otherwise, work back from the end of the buffer and find the first + // UTF-8 leading byte + const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase; + const auto leading = std::find_if(rpptr, rpend, is_leading); + if (leading == rbase) + return 0; + const auto dist = static_cast(leading - rpptr); + size_t remainder = 0; + + if (dist == 0) + remainder = 1; // 1-byte code point is impossible + else if (dist == 1) + remainder = is_leading_2b(*leading) ? 0 : dist + 1; + else if (dist == 2) + remainder = is_leading_3b(*leading) ? 0 : dist + 1; + // else if (dist >= 3), at least 4 bytes before encountering an UTF-8 + // leading byte, either no remainder or invalid UTF-8. + // Invalid UTF-8 will cause an exception later when converting + // to a Python string, so that's not handled here. + return remainder; + } + + // This function must be non-virtual to be called in a destructor. + int _sync() { + if (pbase() != pptr()) { // If buffer is not empty + gil_scoped_acquire tmp; + // This subtraction cannot be negative, so dropping the sign. + auto size = static_cast(pptr() - pbase()); + size_t remainder = utf8_remainder(); + + if (size > remainder) { + str line(pbase(), size - remainder); + pywrite(line); + pyflush(); + } + + // Copy the remainder at the end of the buffer to the beginning: + if (remainder > 0) + std::memmove(pbase(), pptr() - remainder, remainder); + setp(pbase(), epptr()); + pbump(static_cast(remainder)); + } + return 0; + } + + int sync() override { + return _sync(); + } + +public: + explicit pythonbuf(const object &pyostream, size_t buffer_size = 1024) + : buf_size(buffer_size), d_buffer(new char[buf_size]), pywrite(pyostream.attr("write")), + pyflush(pyostream.attr("flush")) { + setp(d_buffer.get(), d_buffer.get() + buf_size - 1); + } + + pythonbuf(pythonbuf&&) = default; + + /// Sync before destroy + ~pythonbuf() override { + _sync(); + } +}; + +PYBIND11_NAMESPACE_END(detail) + + +/** \rst + This a move-only guard that redirects output. + + .. code-block:: cpp + + #include + + ... + + { + py::scoped_ostream_redirect output; + std::cout << "Hello, World!"; // Python stdout + } // <-- return std::cout to normal + + You can explicitly pass the c++ stream and the python object, + for example to guard stderr instead. + + .. code-block:: cpp + + { + py::scoped_ostream_redirect output{std::cerr, py::module::import("sys").attr("stderr")}; + std::cout << "Hello, World!"; + } + \endrst */ +class scoped_ostream_redirect { +protected: + std::streambuf *old; + std::ostream &costream; + detail::pythonbuf buffer; + +public: + explicit scoped_ostream_redirect(std::ostream &costream = std::cout, + const object &pyostream + = module_::import("sys").attr("stdout")) + : costream(costream), buffer(pyostream) { + old = costream.rdbuf(&buffer); + } + + ~scoped_ostream_redirect() { + costream.rdbuf(old); + } + + scoped_ostream_redirect(const scoped_ostream_redirect &) = delete; + scoped_ostream_redirect(scoped_ostream_redirect &&other) = default; + scoped_ostream_redirect &operator=(const scoped_ostream_redirect &) = delete; + scoped_ostream_redirect &operator=(scoped_ostream_redirect &&) = delete; +}; + + +/** \rst + Like `scoped_ostream_redirect`, but redirects cerr by default. This class + is provided primary to make ``py::call_guard`` easier to make. + + .. code-block:: cpp + + m.def("noisy_func", &noisy_func, + py::call_guard()); + +\endrst */ +class scoped_estream_redirect : public scoped_ostream_redirect { +public: + explicit scoped_estream_redirect(std::ostream &costream = std::cerr, + const object &pyostream + = module_::import("sys").attr("stderr")) + : scoped_ostream_redirect(costream, pyostream) {} +}; + + +PYBIND11_NAMESPACE_BEGIN(detail) + +// Class to redirect output as a context manager. C++ backend. +class OstreamRedirect { + bool do_stdout_; + bool do_stderr_; + std::unique_ptr redirect_stdout; + std::unique_ptr redirect_stderr; + +public: + explicit OstreamRedirect(bool do_stdout = true, bool do_stderr = true) + : do_stdout_(do_stdout), do_stderr_(do_stderr) {} + + void enter() { + if (do_stdout_) + redirect_stdout.reset(new scoped_ostream_redirect()); + if (do_stderr_) + redirect_stderr.reset(new scoped_estream_redirect()); + } + + void exit() { + redirect_stdout.reset(); + redirect_stderr.reset(); + } +}; + +PYBIND11_NAMESPACE_END(detail) + +/** \rst + This is a helper function to add a C++ redirect context manager to Python + instead of using a C++ guard. To use it, add the following to your binding code: + + .. code-block:: cpp + + #include + + ... + + py::add_ostream_redirect(m, "ostream_redirect"); + + You now have a Python context manager that redirects your output: + + .. code-block:: python + + with m.ostream_redirect(): + m.print_to_cout_function() + + This manager can optionally be told which streams to operate on: + + .. code-block:: python + + with m.ostream_redirect(stdout=true, stderr=true): + m.noisy_function_with_error_printing() + + \endrst */ +inline class_ +add_ostream_redirect(module_ m, const std::string &name = "ostream_redirect") { + return class_(std::move(m), name.c_str(), module_local()) + .def(init(), arg("stdout") = true, arg("stderr") = true) + .def("__enter__", &detail::OstreamRedirect::enter) + .def("__exit__", [](detail::OstreamRedirect &self_, const args &) { self_.exit(); }); +} + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/numpy.h b/Libraries/pybind11-2.8.0/pybind11/numpy.h new file mode 100644 index 00000000..b7747fae --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/numpy.h @@ -0,0 +1,1741 @@ +/* + pybind11/numpy.h: Basic NumPy support, vectorize() wrapper + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include "complex.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This will be true on all flat address space platforms and allows us to reduce the + whole npy_intp / ssize_t / Py_intptr_t business down to just ssize_t for all size + and dimension types (e.g. shape, strides, indexing), instead of inflicting this + upon the library user. */ +static_assert(sizeof(::pybind11::ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t"); +static_assert(std::is_signed::value, "Py_intptr_t must be signed"); +// We now can reinterpret_cast between py::ssize_t and Py_intptr_t (MSVC + PyPy cares) + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +class array; // Forward declaration + +PYBIND11_NAMESPACE_BEGIN(detail) + +template <> struct handle_type_name { static constexpr auto name = _("numpy.ndarray"); }; + +template struct npy_format_descriptor; + +struct PyArrayDescr_Proxy { + PyObject_HEAD + PyObject *typeobj; + char kind; + char type; + char byteorder; + char flags; + int type_num; + int elsize; + int alignment; + char *subarray; + PyObject *fields; + PyObject *names; +}; + +struct PyArray_Proxy { + PyObject_HEAD + char *data; + int nd; + ssize_t *dimensions; + ssize_t *strides; + PyObject *base; + PyObject *descr; + int flags; +}; + +struct PyVoidScalarObject_Proxy { + PyObject_VAR_HEAD + char *obval; + PyArrayDescr_Proxy *descr; + int flags; + PyObject *base; +}; + +struct numpy_type_info { + PyObject* dtype_ptr; + std::string format_str; +}; + +struct numpy_internals { + std::unordered_map registered_dtypes; + + numpy_type_info *get_type_info(const std::type_info& tinfo, bool throw_if_missing = true) { + auto it = registered_dtypes.find(std::type_index(tinfo)); + if (it != registered_dtypes.end()) + return &(it->second); + if (throw_if_missing) + pybind11_fail(std::string("NumPy type info missing for ") + tinfo.name()); + return nullptr; + } + + template numpy_type_info *get_type_info(bool throw_if_missing = true) { + return get_type_info(typeid(typename std::remove_cv::type), throw_if_missing); + } +}; + +PYBIND11_NOINLINE void load_numpy_internals(numpy_internals* &ptr) { + ptr = &get_or_create_shared_data("_numpy_internals"); +} + +inline numpy_internals& get_numpy_internals() { + static numpy_internals* ptr = nullptr; + if (!ptr) + load_numpy_internals(ptr); + return *ptr; +} + +template struct same_size { + template using as = bool_constant; +}; + +template constexpr int platform_lookup() { return -1; } + +// Lookup a type according to its size, and return a value corresponding to the NumPy typenum. +template +constexpr int platform_lookup(int I, Ints... Is) { + return sizeof(Concrete) == sizeof(T) ? I : platform_lookup(Is...); +} + +struct npy_api { + enum constants { + NPY_ARRAY_C_CONTIGUOUS_ = 0x0001, + NPY_ARRAY_F_CONTIGUOUS_ = 0x0002, + NPY_ARRAY_OWNDATA_ = 0x0004, + NPY_ARRAY_FORCECAST_ = 0x0010, + NPY_ARRAY_ENSUREARRAY_ = 0x0040, + NPY_ARRAY_ALIGNED_ = 0x0100, + NPY_ARRAY_WRITEABLE_ = 0x0400, + NPY_BOOL_ = 0, + NPY_BYTE_, NPY_UBYTE_, + NPY_SHORT_, NPY_USHORT_, + NPY_INT_, NPY_UINT_, + NPY_LONG_, NPY_ULONG_, + NPY_LONGLONG_, NPY_ULONGLONG_, + NPY_FLOAT_, NPY_DOUBLE_, NPY_LONGDOUBLE_, + NPY_CFLOAT_, NPY_CDOUBLE_, NPY_CLONGDOUBLE_, + NPY_OBJECT_ = 17, + NPY_STRING_, NPY_UNICODE_, NPY_VOID_, + // Platform-dependent normalization + NPY_INT8_ = NPY_BYTE_, + NPY_UINT8_ = NPY_UBYTE_, + NPY_INT16_ = NPY_SHORT_, + NPY_UINT16_ = NPY_USHORT_, + // `npy_common.h` defines the integer aliases. In order, it checks: + // NPY_BITSOF_LONG, NPY_BITSOF_LONGLONG, NPY_BITSOF_INT, NPY_BITSOF_SHORT, NPY_BITSOF_CHAR + // and assigns the alias to the first matching size, so we should check in this order. + NPY_INT32_ = platform_lookup( + NPY_LONG_, NPY_INT_, NPY_SHORT_), + NPY_UINT32_ = platform_lookup( + NPY_ULONG_, NPY_UINT_, NPY_USHORT_), + NPY_INT64_ = platform_lookup( + NPY_LONG_, NPY_LONGLONG_, NPY_INT_), + NPY_UINT64_ = platform_lookup( + NPY_ULONG_, NPY_ULONGLONG_, NPY_UINT_), + }; + + struct PyArray_Dims { + Py_intptr_t *ptr; + int len; + }; + + static npy_api& get() { + static npy_api api = lookup(); + return api; + } + + bool PyArray_Check_(PyObject *obj) const { + return (bool) PyObject_TypeCheck(obj, PyArray_Type_); + } + bool PyArrayDescr_Check_(PyObject *obj) const { + return (bool) PyObject_TypeCheck(obj, PyArrayDescr_Type_); + } + + unsigned int (*PyArray_GetNDArrayCFeatureVersion_)(); + PyObject *(*PyArray_DescrFromType_)(int); + PyObject *(*PyArray_NewFromDescr_) + (PyTypeObject *, PyObject *, int, Py_intptr_t const *, + Py_intptr_t const *, void *, int, PyObject *); + // Unused. Not removed because that affects ABI of the class. + PyObject *(*PyArray_DescrNewFromType_)(int); + int (*PyArray_CopyInto_)(PyObject *, PyObject *); + PyObject *(*PyArray_NewCopy_)(PyObject *, int); + PyTypeObject *PyArray_Type_; + PyTypeObject *PyVoidArrType_Type_; + PyTypeObject *PyArrayDescr_Type_; + PyObject *(*PyArray_DescrFromScalar_)(PyObject *); + PyObject *(*PyArray_FromAny_) (PyObject *, PyObject *, int, int, int, PyObject *); + int (*PyArray_DescrConverter_) (PyObject *, PyObject **); + bool (*PyArray_EquivTypes_) (PyObject *, PyObject *); + int (*PyArray_GetArrayParamsFromObject_)(PyObject *, PyObject *, unsigned char, PyObject **, int *, + Py_intptr_t *, PyObject **, PyObject *); + PyObject *(*PyArray_Squeeze_)(PyObject *); + // Unused. Not removed because that affects ABI of the class. + int (*PyArray_SetBaseObject_)(PyObject *, PyObject *); + PyObject* (*PyArray_Resize_)(PyObject*, PyArray_Dims*, int, int); + PyObject* (*PyArray_Newshape_)(PyObject*, PyArray_Dims*, int); + PyObject* (*PyArray_View_)(PyObject*, PyObject*, PyObject*); + +private: + enum functions { + API_PyArray_GetNDArrayCFeatureVersion = 211, + API_PyArray_Type = 2, + API_PyArrayDescr_Type = 3, + API_PyVoidArrType_Type = 39, + API_PyArray_DescrFromType = 45, + API_PyArray_DescrFromScalar = 57, + API_PyArray_FromAny = 69, + API_PyArray_Resize = 80, + API_PyArray_CopyInto = 82, + API_PyArray_NewCopy = 85, + API_PyArray_NewFromDescr = 94, + API_PyArray_DescrNewFromType = 96, + API_PyArray_Newshape = 135, + API_PyArray_Squeeze = 136, + API_PyArray_View = 137, + API_PyArray_DescrConverter = 174, + API_PyArray_EquivTypes = 182, + API_PyArray_GetArrayParamsFromObject = 278, + API_PyArray_SetBaseObject = 282 + }; + + static npy_api lookup() { + module_ m = module_::import("numpy.core.multiarray"); + auto c = m.attr("_ARRAY_API"); +#if PY_MAJOR_VERSION >= 3 + void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), NULL); +#else + void **api_ptr = (void **) PyCObject_AsVoidPtr(c.ptr()); +#endif + npy_api api; +#define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func]; + DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion); + if (api.PyArray_GetNDArrayCFeatureVersion_() < 0x7) + pybind11_fail("pybind11 numpy support requires numpy >= 1.7.0"); + DECL_NPY_API(PyArray_Type); + DECL_NPY_API(PyVoidArrType_Type); + DECL_NPY_API(PyArrayDescr_Type); + DECL_NPY_API(PyArray_DescrFromType); + DECL_NPY_API(PyArray_DescrFromScalar); + DECL_NPY_API(PyArray_FromAny); + DECL_NPY_API(PyArray_Resize); + DECL_NPY_API(PyArray_CopyInto); + DECL_NPY_API(PyArray_NewCopy); + DECL_NPY_API(PyArray_NewFromDescr); + DECL_NPY_API(PyArray_DescrNewFromType); + DECL_NPY_API(PyArray_Newshape); + DECL_NPY_API(PyArray_Squeeze); + DECL_NPY_API(PyArray_View); + DECL_NPY_API(PyArray_DescrConverter); + DECL_NPY_API(PyArray_EquivTypes); + DECL_NPY_API(PyArray_GetArrayParamsFromObject); + DECL_NPY_API(PyArray_SetBaseObject); + +#undef DECL_NPY_API + return api; + } +}; + +inline PyArray_Proxy* array_proxy(void* ptr) { + return reinterpret_cast(ptr); +} + +inline const PyArray_Proxy* array_proxy(const void* ptr) { + return reinterpret_cast(ptr); +} + +inline PyArrayDescr_Proxy* array_descriptor_proxy(PyObject* ptr) { + return reinterpret_cast(ptr); +} + +inline const PyArrayDescr_Proxy* array_descriptor_proxy(const PyObject* ptr) { + return reinterpret_cast(ptr); +} + +inline bool check_flags(const void* ptr, int flag) { + return (flag == (array_proxy(ptr)->flags & flag)); +} + +template struct is_std_array : std::false_type { }; +template struct is_std_array> : std::true_type { }; +template struct is_complex : std::false_type { }; +template struct is_complex> : std::true_type { }; + +template struct array_info_scalar { + using type = T; + static constexpr bool is_array = false; + static constexpr bool is_empty = false; + static constexpr auto extents = _(""); + static void append_extents(list& /* shape */) { } +}; +// Computes underlying type and a comma-separated list of extents for array +// types (any mix of std::array and built-in arrays). An array of char is +// treated as scalar because it gets special handling. +template struct array_info : array_info_scalar { }; +template struct array_info> { + using type = typename array_info::type; + static constexpr bool is_array = true; + static constexpr bool is_empty = (N == 0) || array_info::is_empty; + static constexpr size_t extent = N; + + // appends the extents to shape + static void append_extents(list& shape) { + shape.append(N); + array_info::append_extents(shape); + } + + static constexpr auto extents = _::is_array>( + concat(_(), array_info::extents), _() + ); +}; +// For numpy we have special handling for arrays of characters, so we don't include +// the size in the array extents. +template struct array_info : array_info_scalar { }; +template struct array_info> : array_info_scalar> { }; +template struct array_info : array_info> { }; +template using remove_all_extents_t = typename array_info::type; + +template using is_pod_struct = all_of< + std::is_standard_layout, // since we're accessing directly in memory we need a standard layout type +#if defined(__GLIBCXX__) && (__GLIBCXX__ < 20150422 || __GLIBCXX__ == 20150426 || __GLIBCXX__ == 20150623 || __GLIBCXX__ == 20150626 || __GLIBCXX__ == 20160803) + // libstdc++ < 5 (including versions 4.8.5, 4.9.3 and 4.9.4 which were released after 5) + // don't implement is_trivially_copyable, so approximate it + std::is_trivially_destructible, + satisfies_any_of, +#else + std::is_trivially_copyable, +#endif + satisfies_none_of +>; + +// Replacement for std::is_pod (deprecated in C++20) +template using is_pod = all_of< + std::is_standard_layout, + std::is_trivial +>; + +template ssize_t byte_offset_unsafe(const Strides &) { return 0; } +template +ssize_t byte_offset_unsafe(const Strides &strides, ssize_t i, Ix... index) { + return i * strides[Dim] + byte_offset_unsafe(strides, index...); +} + +/** + * Proxy class providing unsafe, unchecked const access to array data. This is constructed through + * the `unchecked()` method of `array` or the `unchecked()` method of `array_t`. `Dims` + * will be -1 for dimensions determined at runtime. + */ +template +class unchecked_reference { +protected: + static constexpr bool Dynamic = Dims < 0; + const unsigned char *data_; + // Storing the shape & strides in local variables (i.e. these arrays) allows the compiler to + // make large performance gains on big, nested loops, but requires compile-time dimensions + conditional_t> + shape_, strides_; + const ssize_t dims_; + + friend class pybind11::array; + // Constructor for compile-time dimensions: + template + unchecked_reference(const void *data, const ssize_t *shape, const ssize_t *strides, enable_if_t) + : data_{reinterpret_cast(data)}, dims_{Dims} { + for (size_t i = 0; i < (size_t) dims_; i++) { + shape_[i] = shape[i]; + strides_[i] = strides[i]; + } + } + // Constructor for runtime dimensions: + template + unchecked_reference(const void *data, const ssize_t *shape, const ssize_t *strides, enable_if_t dims) + : data_{reinterpret_cast(data)}, shape_{shape}, strides_{strides}, dims_{dims} {} + +public: + /** + * Unchecked const reference access to data at the given indices. For a compile-time known + * number of dimensions, this requires the correct number of arguments; for run-time + * dimensionality, this is not checked (and so is up to the caller to use safely). + */ + template const T &operator()(Ix... index) const { + static_assert(ssize_t{sizeof...(Ix)} == Dims || Dynamic, + "Invalid number of indices for unchecked array reference"); + return *reinterpret_cast(data_ + byte_offset_unsafe(strides_, ssize_t(index)...)); + } + /** + * Unchecked const reference access to data; this operator only participates if the reference + * is to a 1-dimensional array. When present, this is exactly equivalent to `obj(index)`. + */ + template > + const T &operator[](ssize_t index) const { return operator()(index); } + + /// Pointer access to the data at the given indices. + template const T *data(Ix... ix) const { return &operator()(ssize_t(ix)...); } + + /// Returns the item size, i.e. sizeof(T) + constexpr static ssize_t itemsize() { return sizeof(T); } + + /// Returns the shape (i.e. size) of dimension `dim` + ssize_t shape(ssize_t dim) const { return shape_[(size_t) dim]; } + + /// Returns the number of dimensions of the array + ssize_t ndim() const { return dims_; } + + /// Returns the total number of elements in the referenced array, i.e. the product of the shapes + template + enable_if_t size() const { + return std::accumulate(shape_.begin(), shape_.end(), (ssize_t) 1, std::multiplies()); + } + template + enable_if_t size() const { + return std::accumulate(shape_, shape_ + ndim(), (ssize_t) 1, std::multiplies()); + } + + /// Returns the total number of bytes used by the referenced data. Note that the actual span in + /// memory may be larger if the referenced array has non-contiguous strides (e.g. for a slice). + ssize_t nbytes() const { + return size() * itemsize(); + } +}; + +template +class unchecked_mutable_reference : public unchecked_reference { + friend class pybind11::array; + using ConstBase = unchecked_reference; + using ConstBase::ConstBase; + using ConstBase::Dynamic; +public: + // Bring in const-qualified versions from base class + using ConstBase::operator(); + using ConstBase::operator[]; + + /// Mutable, unchecked access to data at the given indices. + template T& operator()(Ix... index) { + static_assert(ssize_t{sizeof...(Ix)} == Dims || Dynamic, + "Invalid number of indices for unchecked array reference"); + return const_cast(ConstBase::operator()(index...)); + } + /** + * Mutable, unchecked access data at the given index; this operator only participates if the + * reference is to a 1-dimensional array (or has runtime dimensions). When present, this is + * exactly equivalent to `obj(index)`. + */ + template > + T &operator[](ssize_t index) { return operator()(index); } + + /// Mutable pointer access to the data at the given indices. + template T *mutable_data(Ix... ix) { return &operator()(ssize_t(ix)...); } +}; + +template +struct type_caster> { + static_assert(Dim == 0 && Dim > 0 /* always fail */, "unchecked array proxy object is not castable"); +}; +template +struct type_caster> : type_caster> {}; + +PYBIND11_NAMESPACE_END(detail) + +class dtype : public object { +public: + PYBIND11_OBJECT_DEFAULT(dtype, object, detail::npy_api::get().PyArrayDescr_Check_); + + explicit dtype(const buffer_info &info) { + dtype descr(_dtype_from_pep3118()(PYBIND11_STR_TYPE(info.format))); + // If info.itemsize == 0, use the value calculated from the format string + m_ptr = descr.strip_padding(info.itemsize != 0 ? info.itemsize : descr.itemsize()) + .release() + .ptr(); + } + + explicit dtype(const std::string &format) { + m_ptr = from_args(pybind11::str(format)).release().ptr(); + } + + explicit dtype(const char *format) : dtype(std::string(format)) {} + + dtype(list names, list formats, list offsets, ssize_t itemsize) { + dict args; + args["names"] = std::move(names); + args["formats"] = std::move(formats); + args["offsets"] = std::move(offsets); + args["itemsize"] = pybind11::int_(itemsize); + m_ptr = from_args(std::move(args)).release().ptr(); + } + + /// This is essentially the same as calling numpy.dtype(args) in Python. + static dtype from_args(object args) { + PyObject *ptr = nullptr; + if ((detail::npy_api::get().PyArray_DescrConverter_(args.ptr(), &ptr) == 0) || !ptr) + throw error_already_set(); + return reinterpret_steal(ptr); + } + + /// Return dtype associated with a C++ type. + template static dtype of() { + return detail::npy_format_descriptor::type>::dtype(); + } + + /// Size of the data type in bytes. + ssize_t itemsize() const { + return detail::array_descriptor_proxy(m_ptr)->elsize; + } + + /// Returns true for structured data types. + bool has_fields() const { + return detail::array_descriptor_proxy(m_ptr)->names != nullptr; + } + + /// Single-character code for dtype's kind. + /// For example, floating point types are 'f' and integral types are 'i'. + char kind() const { + return detail::array_descriptor_proxy(m_ptr)->kind; + } + + /// Single-character for dtype's type. + /// For example, ``float`` is 'f', ``double`` 'd', ``int`` 'i', and ``long`` 'd'. + char char_() const { + // Note: The signature, `dtype::char_` follows the naming of NumPy's + // public Python API (i.e., ``dtype.char``), rather than its internal + // C API (``PyArray_Descr::type``). + return detail::array_descriptor_proxy(m_ptr)->type; + } + +private: + static object _dtype_from_pep3118() { + static PyObject *obj = module_::import("numpy.core._internal") + .attr("_dtype_from_pep3118").cast().release().ptr(); + return reinterpret_borrow(obj); + } + + dtype strip_padding(ssize_t itemsize) { + // Recursively strip all void fields with empty names that are generated for + // padding fields (as of NumPy v1.11). + if (!has_fields()) + return *this; + + struct field_descr { PYBIND11_STR_TYPE name; object format; pybind11::int_ offset; }; + std::vector field_descriptors; + + for (auto field : attr("fields").attr("items")()) { + auto spec = field.cast(); + auto name = spec[0].cast(); + auto format = spec[1].cast()[0].cast(); + auto offset = spec[1].cast()[1].cast(); + if ((len(name) == 0u) && format.kind() == 'V') + continue; + field_descriptors.push_back({(PYBIND11_STR_TYPE) name, format.strip_padding(format.itemsize()), offset}); + } + + std::sort(field_descriptors.begin(), field_descriptors.end(), + [](const field_descr& a, const field_descr& b) { + return a.offset.cast() < b.offset.cast(); + }); + + list names, formats, offsets; + for (auto& descr : field_descriptors) { + names.append(descr.name); + formats.append(descr.format); + offsets.append(descr.offset); + } + return dtype(std::move(names), std::move(formats), std::move(offsets), itemsize); + } +}; + +class array : public buffer { +public: + PYBIND11_OBJECT_CVT(array, buffer, detail::npy_api::get().PyArray_Check_, raw_array) + + enum { + c_style = detail::npy_api::NPY_ARRAY_C_CONTIGUOUS_, + f_style = detail::npy_api::NPY_ARRAY_F_CONTIGUOUS_, + forcecast = detail::npy_api::NPY_ARRAY_FORCECAST_ + }; + + array() : array(0, static_cast(nullptr)) {} + + using ShapeContainer = detail::any_container; + using StridesContainer = detail::any_container; + + // Constructs an array taking shape/strides from arbitrary container types + array(const pybind11::dtype &dt, ShapeContainer shape, StridesContainer strides, + const void *ptr = nullptr, handle base = handle()) { + + if (strides->empty()) + *strides = detail::c_strides(*shape, dt.itemsize()); + + auto ndim = shape->size(); + if (ndim != strides->size()) + pybind11_fail("NumPy: shape ndim doesn't match strides ndim"); + auto descr = dt; + + int flags = 0; + if (base && ptr) { + if (isinstance(base)) + /* Copy flags from base (except ownership bit) */ + flags = reinterpret_borrow(base).flags() & ~detail::npy_api::NPY_ARRAY_OWNDATA_; + else + /* Writable by default, easy to downgrade later on if needed */ + flags = detail::npy_api::NPY_ARRAY_WRITEABLE_; + } + + auto &api = detail::npy_api::get(); + auto tmp = reinterpret_steal(api.PyArray_NewFromDescr_( + api.PyArray_Type_, descr.release().ptr(), (int) ndim, + // Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1) + reinterpret_cast(shape->data()), + reinterpret_cast(strides->data()), + const_cast(ptr), flags, nullptr)); + if (!tmp) + throw error_already_set(); + if (ptr) { + if (base) { + api.PyArray_SetBaseObject_(tmp.ptr(), base.inc_ref().ptr()); + } else { + tmp = reinterpret_steal(api.PyArray_NewCopy_(tmp.ptr(), -1 /* any order */)); + } + } + m_ptr = tmp.release().ptr(); + } + + array(const pybind11::dtype &dt, ShapeContainer shape, const void *ptr = nullptr, handle base = handle()) + : array(dt, std::move(shape), {}, ptr, base) { } + + template ::value && !std::is_same::value>> + array(const pybind11::dtype &dt, T count, const void *ptr = nullptr, handle base = handle()) + : array(dt, {{count}}, ptr, base) { } + + template + array(ShapeContainer shape, StridesContainer strides, const T *ptr, handle base = handle()) + : array(pybind11::dtype::of(), std::move(shape), std::move(strides), ptr, base) { } + + template + array(ShapeContainer shape, const T *ptr, handle base = handle()) + : array(std::move(shape), {}, ptr, base) { } + + template + explicit array(ssize_t count, const T *ptr, handle base = handle()) : array({count}, {}, ptr, base) { } + + explicit array(const buffer_info &info, handle base = handle()) + : array(pybind11::dtype(info), info.shape, info.strides, info.ptr, base) { } + + /// Array descriptor (dtype) + pybind11::dtype dtype() const { + return reinterpret_borrow(detail::array_proxy(m_ptr)->descr); + } + + /// Total number of elements + ssize_t size() const { + return std::accumulate(shape(), shape() + ndim(), (ssize_t) 1, std::multiplies()); + } + + /// Byte size of a single element + ssize_t itemsize() const { + return detail::array_descriptor_proxy(detail::array_proxy(m_ptr)->descr)->elsize; + } + + /// Total number of bytes + ssize_t nbytes() const { + return size() * itemsize(); + } + + /// Number of dimensions + ssize_t ndim() const { + return detail::array_proxy(m_ptr)->nd; + } + + /// Base object + object base() const { + return reinterpret_borrow(detail::array_proxy(m_ptr)->base); + } + + /// Dimensions of the array + const ssize_t* shape() const { + return detail::array_proxy(m_ptr)->dimensions; + } + + /// Dimension along a given axis + ssize_t shape(ssize_t dim) const { + if (dim >= ndim()) + fail_dim_check(dim, "invalid axis"); + return shape()[dim]; + } + + /// Strides of the array + const ssize_t* strides() const { + return detail::array_proxy(m_ptr)->strides; + } + + /// Stride along a given axis + ssize_t strides(ssize_t dim) const { + if (dim >= ndim()) + fail_dim_check(dim, "invalid axis"); + return strides()[dim]; + } + + /// Return the NumPy array flags + int flags() const { + return detail::array_proxy(m_ptr)->flags; + } + + /// If set, the array is writeable (otherwise the buffer is read-only) + bool writeable() const { + return detail::check_flags(m_ptr, detail::npy_api::NPY_ARRAY_WRITEABLE_); + } + + /// If set, the array owns the data (will be freed when the array is deleted) + bool owndata() const { + return detail::check_flags(m_ptr, detail::npy_api::NPY_ARRAY_OWNDATA_); + } + + /// Pointer to the contained data. If index is not provided, points to the + /// beginning of the buffer. May throw if the index would lead to out of bounds access. + template const void* data(Ix... index) const { + return static_cast(detail::array_proxy(m_ptr)->data + offset_at(index...)); + } + + /// Mutable pointer to the contained data. If index is not provided, points to the + /// beginning of the buffer. May throw if the index would lead to out of bounds access. + /// May throw if the array is not writeable. + template void* mutable_data(Ix... index) { + check_writeable(); + return static_cast(detail::array_proxy(m_ptr)->data + offset_at(index...)); + } + + /// Byte offset from beginning of the array to a given index (full or partial). + /// May throw if the index would lead to out of bounds access. + template ssize_t offset_at(Ix... index) const { + if ((ssize_t) sizeof...(index) > ndim()) + fail_dim_check(sizeof...(index), "too many indices for an array"); + return byte_offset(ssize_t(index)...); + } + + ssize_t offset_at() const { return 0; } + + /// Item count from beginning of the array to a given index (full or partial). + /// May throw if the index would lead to out of bounds access. + template ssize_t index_at(Ix... index) const { + return offset_at(index...) / itemsize(); + } + + /** + * Returns a proxy object that provides access to the array's data without bounds or + * dimensionality checking. Will throw if the array is missing the `writeable` flag. Use with + * care: the array must not be destroyed or reshaped for the duration of the returned object, + * and the caller must take care not to access invalid dimensions or dimension indices. + */ + template detail::unchecked_mutable_reference mutable_unchecked() & { + if (PYBIND11_SILENCE_MSVC_C4127(Dims >= 0) && ndim() != Dims) + throw std::domain_error("array has incorrect number of dimensions: " + std::to_string(ndim()) + + "; expected " + std::to_string(Dims)); + return detail::unchecked_mutable_reference(mutable_data(), shape(), strides(), ndim()); + } + + /** + * Returns a proxy object that provides const access to the array's data without bounds or + * dimensionality checking. Unlike `mutable_unchecked()`, this does not require that the + * underlying array have the `writable` flag. Use with care: the array must not be destroyed or + * reshaped for the duration of the returned object, and the caller must take care not to access + * invalid dimensions or dimension indices. + */ + template detail::unchecked_reference unchecked() const & { + if (PYBIND11_SILENCE_MSVC_C4127(Dims >= 0) && ndim() != Dims) + throw std::domain_error("array has incorrect number of dimensions: " + std::to_string(ndim()) + + "; expected " + std::to_string(Dims)); + return detail::unchecked_reference(data(), shape(), strides(), ndim()); + } + + /// Return a new view with all of the dimensions of length 1 removed + array squeeze() { + auto& api = detail::npy_api::get(); + return reinterpret_steal(api.PyArray_Squeeze_(m_ptr)); + } + + /// Resize array to given shape + /// If refcheck is true and more that one reference exist to this array + /// then resize will succeed only if it makes a reshape, i.e. original size doesn't change + void resize(ShapeContainer new_shape, bool refcheck = true) { + detail::npy_api::PyArray_Dims d = { + // Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1) + reinterpret_cast(new_shape->data()), + int(new_shape->size()) + }; + // try to resize, set ordering param to -1 cause it's not used anyway + auto new_array = reinterpret_steal( + detail::npy_api::get().PyArray_Resize_(m_ptr, &d, int(refcheck), -1) + ); + if (!new_array) throw error_already_set(); + if (isinstance(new_array)) { *this = std::move(new_array); } + } + + /// Optional `order` parameter omitted, to be added as needed. + array reshape(ShapeContainer new_shape) { + detail::npy_api::PyArray_Dims d + = {reinterpret_cast(new_shape->data()), int(new_shape->size())}; + auto new_array + = reinterpret_steal(detail::npy_api::get().PyArray_Newshape_(m_ptr, &d, 0)); + if (!new_array) { + throw error_already_set(); + } + return new_array; + } + + /// Create a view of an array in a different data type. + /// This function may fundamentally reinterpret the data in the array. + /// It is the responsibility of the caller to ensure that this is safe. + /// Only supports the `dtype` argument, the `type` argument is omitted, + /// to be added as needed. + array view(const std::string &dtype) { + auto &api = detail::npy_api::get(); + auto new_view = reinterpret_steal(api.PyArray_View_( + m_ptr, dtype::from_args(pybind11::str(dtype)).release().ptr(), nullptr)); + if (!new_view) { + throw error_already_set(); + } + return new_view; + } + + /// Ensure that the argument is a NumPy array + /// In case of an error, nullptr is returned and the Python error is cleared. + static array ensure(handle h, int ExtraFlags = 0) { + auto result = reinterpret_steal(raw_array(h.ptr(), ExtraFlags)); + if (!result) + PyErr_Clear(); + return result; + } + +protected: + template friend struct detail::npy_format_descriptor; + + void fail_dim_check(ssize_t dim, const std::string& msg) const { + throw index_error(msg + ": " + std::to_string(dim) + + " (ndim = " + std::to_string(ndim()) + ")"); + } + + template ssize_t byte_offset(Ix... index) const { + check_dimensions(index...); + return detail::byte_offset_unsafe(strides(), ssize_t(index)...); + } + + void check_writeable() const { + if (!writeable()) + throw std::domain_error("array is not writeable"); + } + + template void check_dimensions(Ix... index) const { + check_dimensions_impl(ssize_t(0), shape(), ssize_t(index)...); + } + + void check_dimensions_impl(ssize_t, const ssize_t*) const { } + + template void check_dimensions_impl(ssize_t axis, const ssize_t* shape, ssize_t i, Ix... index) const { + if (i >= *shape) { + throw index_error(std::string("index ") + std::to_string(i) + + " is out of bounds for axis " + std::to_string(axis) + + " with size " + std::to_string(*shape)); + } + check_dimensions_impl(axis + 1, shape + 1, index...); + } + + /// Create array from any object -- always returns a new reference + static PyObject *raw_array(PyObject *ptr, int ExtraFlags = 0) { + if (ptr == nullptr) { + PyErr_SetString(PyExc_ValueError, "cannot create a pybind11::array from a nullptr"); + return nullptr; + } + return detail::npy_api::get().PyArray_FromAny_( + ptr, nullptr, 0, 0, detail::npy_api::NPY_ARRAY_ENSUREARRAY_ | ExtraFlags, nullptr); + } +}; + +template class array_t : public array { +private: + struct private_ctor {}; + // Delegating constructor needed when both moving and accessing in the same constructor + array_t(private_ctor, ShapeContainer &&shape, StridesContainer &&strides, const T *ptr, handle base) + : array(std::move(shape), std::move(strides), ptr, base) {} +public: + static_assert(!detail::array_info::is_array, "Array types cannot be used with array_t"); + + using value_type = T; + + array_t() : array(0, static_cast(nullptr)) {} + array_t(handle h, borrowed_t) : array(h, borrowed_t{}) { } + array_t(handle h, stolen_t) : array(h, stolen_t{}) { } + + PYBIND11_DEPRECATED("Use array_t::ensure() instead") + array_t(handle h, bool is_borrowed) : array(raw_array_t(h.ptr()), stolen_t{}) { + if (!m_ptr) PyErr_Clear(); + if (!is_borrowed) Py_XDECREF(h.ptr()); + } + + // NOLINTNEXTLINE(google-explicit-constructor) + array_t(const object &o) : array(raw_array_t(o.ptr()), stolen_t{}) { + if (!m_ptr) throw error_already_set(); + } + + explicit array_t(const buffer_info& info, handle base = handle()) : array(info, base) { } + + array_t(ShapeContainer shape, StridesContainer strides, const T *ptr = nullptr, handle base = handle()) + : array(std::move(shape), std::move(strides), ptr, base) { } + + explicit array_t(ShapeContainer shape, const T *ptr = nullptr, handle base = handle()) + : array_t(private_ctor{}, + std::move(shape), + (ExtraFlags & f_style) != 0 ? detail::f_strides(*shape, itemsize()) + : detail::c_strides(*shape, itemsize()), + ptr, + base) {} + + explicit array_t(ssize_t count, const T *ptr = nullptr, handle base = handle()) + : array({count}, {}, ptr, base) { } + + constexpr ssize_t itemsize() const { + return sizeof(T); + } + + template ssize_t index_at(Ix... index) const { + return offset_at(index...) / itemsize(); + } + + template const T* data(Ix... index) const { + return static_cast(array::data(index...)); + } + + template T* mutable_data(Ix... index) { + return static_cast(array::mutable_data(index...)); + } + + // Reference to element at a given index + template const T& at(Ix... index) const { + if ((ssize_t) sizeof...(index) != ndim()) + fail_dim_check(sizeof...(index), "index dimension mismatch"); + return *(static_cast(array::data()) + byte_offset(ssize_t(index)...) / itemsize()); + } + + // Mutable reference to element at a given index + template T& mutable_at(Ix... index) { + if ((ssize_t) sizeof...(index) != ndim()) + fail_dim_check(sizeof...(index), "index dimension mismatch"); + return *(static_cast(array::mutable_data()) + byte_offset(ssize_t(index)...) / itemsize()); + } + + /** + * Returns a proxy object that provides access to the array's data without bounds or + * dimensionality checking. Will throw if the array is missing the `writeable` flag. Use with + * care: the array must not be destroyed or reshaped for the duration of the returned object, + * and the caller must take care not to access invalid dimensions or dimension indices. + */ + template detail::unchecked_mutable_reference mutable_unchecked() & { + return array::mutable_unchecked(); + } + + /** + * Returns a proxy object that provides const access to the array's data without bounds or + * dimensionality checking. Unlike `unchecked()`, this does not require that the underlying + * array have the `writable` flag. Use with care: the array must not be destroyed or reshaped + * for the duration of the returned object, and the caller must take care not to access invalid + * dimensions or dimension indices. + */ + template detail::unchecked_reference unchecked() const & { + return array::unchecked(); + } + + /// Ensure that the argument is a NumPy array of the correct dtype (and if not, try to convert + /// it). In case of an error, nullptr is returned and the Python error is cleared. + static array_t ensure(handle h) { + auto result = reinterpret_steal(raw_array_t(h.ptr())); + if (!result) + PyErr_Clear(); + return result; + } + + static bool check_(handle h) { + const auto &api = detail::npy_api::get(); + return api.PyArray_Check_(h.ptr()) + && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr, dtype::of().ptr()) + && detail::check_flags(h.ptr(), ExtraFlags & (array::c_style | array::f_style)); + } + +protected: + /// Create array from any object -- always returns a new reference + static PyObject *raw_array_t(PyObject *ptr) { + if (ptr == nullptr) { + PyErr_SetString(PyExc_ValueError, "cannot create a pybind11::array_t from a nullptr"); + return nullptr; + } + return detail::npy_api::get().PyArray_FromAny_( + ptr, dtype::of().release().ptr(), 0, 0, + detail::npy_api::NPY_ARRAY_ENSUREARRAY_ | ExtraFlags, nullptr); + } +}; + +template +struct format_descriptor::value>> { + static std::string format() { + return detail::npy_format_descriptor::type>::format(); + } +}; + +template struct format_descriptor { + static std::string format() { return std::to_string(N) + "s"; } +}; +template struct format_descriptor> { + static std::string format() { return std::to_string(N) + "s"; } +}; + +template +struct format_descriptor::value>> { + static std::string format() { + return format_descriptor< + typename std::remove_cv::type>::type>::format(); + } +}; + +template +struct format_descriptor::is_array>> { + static std::string format() { + using namespace detail; + static constexpr auto extents = _("(") + array_info::extents + _(")"); + return extents.text + format_descriptor>::format(); + } +}; + +PYBIND11_NAMESPACE_BEGIN(detail) +template +struct pyobject_caster> { + using type = array_t; + + bool load(handle src, bool convert) { + if (!convert && !type::check_(src)) + return false; + value = type::ensure(src); + return static_cast(value); + } + + static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) { + return src.inc_ref(); + } + PYBIND11_TYPE_CASTER(type, handle_type_name::name); +}; + +template +struct compare_buffer_info::value>> { + static bool compare(const buffer_info& b) { + return npy_api::get().PyArray_EquivTypes_(dtype::of().ptr(), dtype(b).ptr()); + } +}; + +template +struct npy_format_descriptor_name; + +template +struct npy_format_descriptor_name::value>> { + static constexpr auto name = _::value>( + _("bool"), _::value>("numpy.int", "numpy.uint") + _() + ); +}; + +template +struct npy_format_descriptor_name::value>> { + static constexpr auto name = _::value + || std::is_same::value + || std::is_same::value + || std::is_same::value>( + _("numpy.float") + _(), _("numpy.longdouble") + ); +}; + +template +struct npy_format_descriptor_name::value>> { + static constexpr auto name = _::value + || std::is_same::value + || std::is_same::value + || std::is_same::value>( + _("numpy.complex") + _(), _("numpy.longcomplex") + ); +}; + +template +struct npy_format_descriptor::value>> + : npy_format_descriptor_name { +private: + // NB: the order here must match the one in common.h + constexpr static const int values[15] = { + npy_api::NPY_BOOL_, + npy_api::NPY_BYTE_, npy_api::NPY_UBYTE_, npy_api::NPY_INT16_, npy_api::NPY_UINT16_, + npy_api::NPY_INT32_, npy_api::NPY_UINT32_, npy_api::NPY_INT64_, npy_api::NPY_UINT64_, + npy_api::NPY_FLOAT_, npy_api::NPY_DOUBLE_, npy_api::NPY_LONGDOUBLE_, + npy_api::NPY_CFLOAT_, npy_api::NPY_CDOUBLE_, npy_api::NPY_CLONGDOUBLE_ + }; + +public: + static constexpr int value = values[detail::is_fmt_numeric::index]; + + static pybind11::dtype dtype() { + if (auto ptr = npy_api::get().PyArray_DescrFromType_(value)) + return reinterpret_steal(ptr); + pybind11_fail("Unsupported buffer format!"); + } +}; + +#define PYBIND11_DECL_CHAR_FMT \ + static constexpr auto name = _("S") + _(); \ + static pybind11::dtype dtype() { return pybind11::dtype(std::string("S") + std::to_string(N)); } +template struct npy_format_descriptor { PYBIND11_DECL_CHAR_FMT }; +template struct npy_format_descriptor> { PYBIND11_DECL_CHAR_FMT }; +#undef PYBIND11_DECL_CHAR_FMT + +template struct npy_format_descriptor::is_array>> { +private: + using base_descr = npy_format_descriptor::type>; +public: + static_assert(!array_info::is_empty, "Zero-sized arrays are not supported"); + + static constexpr auto name = _("(") + array_info::extents + _(")") + base_descr::name; + static pybind11::dtype dtype() { + list shape; + array_info::append_extents(shape); + return pybind11::dtype::from_args(pybind11::make_tuple(base_descr::dtype(), shape)); + } +}; + +template struct npy_format_descriptor::value>> { +private: + using base_descr = npy_format_descriptor::type>; +public: + static constexpr auto name = base_descr::name; + static pybind11::dtype dtype() { return base_descr::dtype(); } +}; + +struct field_descriptor { + const char *name; + ssize_t offset; + ssize_t size; + std::string format; + dtype descr; +}; + +PYBIND11_NOINLINE void register_structured_dtype( + any_container fields, + const std::type_info& tinfo, ssize_t itemsize, + bool (*direct_converter)(PyObject *, void *&)) { + + auto& numpy_internals = get_numpy_internals(); + if (numpy_internals.get_type_info(tinfo, false)) + pybind11_fail("NumPy: dtype is already registered"); + + // Use ordered fields because order matters as of NumPy 1.14: + // https://docs.scipy.org/doc/numpy/release.html#multiple-field-indexing-assignment-of-structured-arrays + std::vector ordered_fields(std::move(fields)); + std::sort(ordered_fields.begin(), ordered_fields.end(), + [](const field_descriptor &a, const field_descriptor &b) { return a.offset < b.offset; }); + + list names, formats, offsets; + for (auto& field : ordered_fields) { + if (!field.descr) + pybind11_fail(std::string("NumPy: unsupported field dtype: `") + + field.name + "` @ " + tinfo.name()); + names.append(PYBIND11_STR_TYPE(field.name)); + formats.append(field.descr); + offsets.append(pybind11::int_(field.offset)); + } + auto dtype_ptr + = pybind11::dtype(std::move(names), std::move(formats), std::move(offsets), itemsize) + .release() + .ptr(); + + // There is an existing bug in NumPy (as of v1.11): trailing bytes are + // not encoded explicitly into the format string. This will supposedly + // get fixed in v1.12; for further details, see these: + // - https://github.com/numpy/numpy/issues/7797 + // - https://github.com/numpy/numpy/pull/7798 + // Because of this, we won't use numpy's logic to generate buffer format + // strings and will just do it ourselves. + ssize_t offset = 0; + std::ostringstream oss; + // mark the structure as unaligned with '^', because numpy and C++ don't + // always agree about alignment (particularly for complex), and we're + // explicitly listing all our padding. This depends on none of the fields + // overriding the endianness. Putting the ^ in front of individual fields + // isn't guaranteed to work due to https://github.com/numpy/numpy/issues/9049 + oss << "^T{"; + for (auto& field : ordered_fields) { + if (field.offset > offset) + oss << (field.offset - offset) << 'x'; + oss << field.format << ':' << field.name << ':'; + offset = field.offset + field.size; + } + if (itemsize > offset) + oss << (itemsize - offset) << 'x'; + oss << '}'; + auto format_str = oss.str(); + + // Sanity check: verify that NumPy properly parses our buffer format string + auto& api = npy_api::get(); + auto arr = array(buffer_info(nullptr, itemsize, format_str, 1)); + if (!api.PyArray_EquivTypes_(dtype_ptr, arr.dtype().ptr())) + pybind11_fail("NumPy: invalid buffer descriptor!"); + + auto tindex = std::type_index(tinfo); + numpy_internals.registered_dtypes[tindex] = { dtype_ptr, format_str }; + get_internals().direct_conversions[tindex].push_back(direct_converter); +} + +template struct npy_format_descriptor { + static_assert(is_pod_struct::value, "Attempt to use a non-POD or unimplemented POD type as a numpy dtype"); + + static constexpr auto name = make_caster::name; + + static pybind11::dtype dtype() { + return reinterpret_borrow(dtype_ptr()); + } + + static std::string format() { + static auto format_str = get_numpy_internals().get_type_info(true)->format_str; + return format_str; + } + + static void register_dtype(any_container fields) { + register_structured_dtype(std::move(fields), typeid(typename std::remove_cv::type), + sizeof(T), &direct_converter); + } + +private: + static PyObject* dtype_ptr() { + static PyObject* ptr = get_numpy_internals().get_type_info(true)->dtype_ptr; + return ptr; + } + + static bool direct_converter(PyObject *obj, void*& value) { + auto& api = npy_api::get(); + if (!PyObject_TypeCheck(obj, api.PyVoidArrType_Type_)) + return false; + if (auto descr = reinterpret_steal(api.PyArray_DescrFromScalar_(obj))) { + if (api.PyArray_EquivTypes_(dtype_ptr(), descr.ptr())) { + value = ((PyVoidScalarObject_Proxy *) obj)->obval; + return true; + } + } + return false; + } +}; + +#ifdef __CLION_IDE__ // replace heavy macro with dummy code for the IDE (doesn't affect code) +# define PYBIND11_NUMPY_DTYPE(Type, ...) ((void)0) +# define PYBIND11_NUMPY_DTYPE_EX(Type, ...) ((void)0) +#else + +#define PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, Name) \ + ::pybind11::detail::field_descriptor { \ + Name, offsetof(T, Field), sizeof(decltype(std::declval().Field)), \ + ::pybind11::format_descriptor().Field)>::format(), \ + ::pybind11::detail::npy_format_descriptor().Field)>::dtype() \ + } + +// Extract name, offset and format descriptor for a struct field +#define PYBIND11_FIELD_DESCRIPTOR(T, Field) PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, #Field) + +// The main idea of this macro is borrowed from https://github.com/swansontec/map-macro +// (C) William Swanson, Paul Fultz +#define PYBIND11_EVAL0(...) __VA_ARGS__ +#define PYBIND11_EVAL1(...) PYBIND11_EVAL0 (PYBIND11_EVAL0 (PYBIND11_EVAL0 (__VA_ARGS__))) +#define PYBIND11_EVAL2(...) PYBIND11_EVAL1 (PYBIND11_EVAL1 (PYBIND11_EVAL1 (__VA_ARGS__))) +#define PYBIND11_EVAL3(...) PYBIND11_EVAL2 (PYBIND11_EVAL2 (PYBIND11_EVAL2 (__VA_ARGS__))) +#define PYBIND11_EVAL4(...) PYBIND11_EVAL3 (PYBIND11_EVAL3 (PYBIND11_EVAL3 (__VA_ARGS__))) +#define PYBIND11_EVAL(...) PYBIND11_EVAL4 (PYBIND11_EVAL4 (PYBIND11_EVAL4 (__VA_ARGS__))) +#define PYBIND11_MAP_END(...) +#define PYBIND11_MAP_OUT +#define PYBIND11_MAP_COMMA , +#define PYBIND11_MAP_GET_END() 0, PYBIND11_MAP_END +#define PYBIND11_MAP_NEXT0(test, next, ...) next PYBIND11_MAP_OUT +#define PYBIND11_MAP_NEXT1(test, next) PYBIND11_MAP_NEXT0 (test, next, 0) +#define PYBIND11_MAP_NEXT(test, next) PYBIND11_MAP_NEXT1 (PYBIND11_MAP_GET_END test, next) +#if defined(_MSC_VER) && !defined(__clang__) // MSVC is not as eager to expand macros, hence this workaround +#define PYBIND11_MAP_LIST_NEXT1(test, next) \ + PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0)) +#else +#define PYBIND11_MAP_LIST_NEXT1(test, next) \ + PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0) +#endif +#define PYBIND11_MAP_LIST_NEXT(test, next) \ + PYBIND11_MAP_LIST_NEXT1 (PYBIND11_MAP_GET_END test, next) +#define PYBIND11_MAP_LIST0(f, t, x, peek, ...) \ + f(t, x) PYBIND11_MAP_LIST_NEXT (peek, PYBIND11_MAP_LIST1) (f, t, peek, __VA_ARGS__) +#define PYBIND11_MAP_LIST1(f, t, x, peek, ...) \ + f(t, x) PYBIND11_MAP_LIST_NEXT (peek, PYBIND11_MAP_LIST0) (f, t, peek, __VA_ARGS__) +// PYBIND11_MAP_LIST(f, t, a1, a2, ...) expands to f(t, a1), f(t, a2), ... +#define PYBIND11_MAP_LIST(f, t, ...) \ + PYBIND11_EVAL (PYBIND11_MAP_LIST1 (f, t, __VA_ARGS__, (), 0)) + +#define PYBIND11_NUMPY_DTYPE(Type, ...) \ + ::pybind11::detail::npy_format_descriptor::register_dtype \ + (::std::vector<::pybind11::detail::field_descriptor> \ + {PYBIND11_MAP_LIST (PYBIND11_FIELD_DESCRIPTOR, Type, __VA_ARGS__)}) + +#if defined(_MSC_VER) && !defined(__clang__) +#define PYBIND11_MAP2_LIST_NEXT1(test, next) \ + PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0)) +#else +#define PYBIND11_MAP2_LIST_NEXT1(test, next) \ + PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0) +#endif +#define PYBIND11_MAP2_LIST_NEXT(test, next) \ + PYBIND11_MAP2_LIST_NEXT1 (PYBIND11_MAP_GET_END test, next) +#define PYBIND11_MAP2_LIST0(f, t, x1, x2, peek, ...) \ + f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT (peek, PYBIND11_MAP2_LIST1) (f, t, peek, __VA_ARGS__) +#define PYBIND11_MAP2_LIST1(f, t, x1, x2, peek, ...) \ + f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT (peek, PYBIND11_MAP2_LIST0) (f, t, peek, __VA_ARGS__) +// PYBIND11_MAP2_LIST(f, t, a1, a2, ...) expands to f(t, a1, a2), f(t, a3, a4), ... +#define PYBIND11_MAP2_LIST(f, t, ...) \ + PYBIND11_EVAL (PYBIND11_MAP2_LIST1 (f, t, __VA_ARGS__, (), 0)) + +#define PYBIND11_NUMPY_DTYPE_EX(Type, ...) \ + ::pybind11::detail::npy_format_descriptor::register_dtype \ + (::std::vector<::pybind11::detail::field_descriptor> \ + {PYBIND11_MAP2_LIST (PYBIND11_FIELD_DESCRIPTOR_EX, Type, __VA_ARGS__)}) + +#endif // __CLION_IDE__ + +class common_iterator { +public: + using container_type = std::vector; + using value_type = container_type::value_type; + using size_type = container_type::size_type; + + common_iterator() : m_strides() {} + + common_iterator(void* ptr, const container_type& strides, const container_type& shape) + : p_ptr(reinterpret_cast(ptr)), m_strides(strides.size()) { + m_strides.back() = static_cast(strides.back()); + for (size_type i = m_strides.size() - 1; i != 0; --i) { + size_type j = i - 1; + auto s = static_cast(shape[i]); + m_strides[j] = strides[j] + m_strides[i] - strides[i] * s; + } + } + + void increment(size_type dim) { + p_ptr += m_strides[dim]; + } + + void* data() const { + return p_ptr; + } + +private: + char *p_ptr{0}; + container_type m_strides; +}; + +template class multi_array_iterator { +public: + using container_type = std::vector; + + multi_array_iterator(const std::array &buffers, + const container_type &shape) + : m_shape(shape.size()), m_index(shape.size(), 0), + m_common_iterator() { + + // Manual copy to avoid conversion warning if using std::copy + for (size_t i = 0; i < shape.size(); ++i) + m_shape[i] = shape[i]; + + container_type strides(shape.size()); + for (size_t i = 0; i < N; ++i) + init_common_iterator(buffers[i], shape, m_common_iterator[i], strides); + } + + multi_array_iterator& operator++() { + for (size_t j = m_index.size(); j != 0; --j) { + size_t i = j - 1; + if (++m_index[i] != m_shape[i]) { + increment_common_iterator(i); + break; + } + m_index[i] = 0; + } + return *this; + } + + template T* data() const { + return reinterpret_cast(m_common_iterator[K].data()); + } + +private: + + using common_iter = common_iterator; + + void init_common_iterator(const buffer_info &buffer, + const container_type &shape, + common_iter &iterator, + container_type &strides) { + auto buffer_shape_iter = buffer.shape.rbegin(); + auto buffer_strides_iter = buffer.strides.rbegin(); + auto shape_iter = shape.rbegin(); + auto strides_iter = strides.rbegin(); + + while (buffer_shape_iter != buffer.shape.rend()) { + if (*shape_iter == *buffer_shape_iter) + *strides_iter = *buffer_strides_iter; + else + *strides_iter = 0; + + ++buffer_shape_iter; + ++buffer_strides_iter; + ++shape_iter; + ++strides_iter; + } + + std::fill(strides_iter, strides.rend(), 0); + iterator = common_iter(buffer.ptr, strides, shape); + } + + void increment_common_iterator(size_t dim) { + for (auto &iter : m_common_iterator) + iter.increment(dim); + } + + container_type m_shape; + container_type m_index; + std::array m_common_iterator; +}; + +enum class broadcast_trivial { non_trivial, c_trivial, f_trivial }; + +// Populates the shape and number of dimensions for the set of buffers. Returns a broadcast_trivial +// enum value indicating whether the broadcast is "trivial"--that is, has each buffer being either a +// singleton or a full-size, C-contiguous (`c_trivial`) or Fortran-contiguous (`f_trivial`) storage +// buffer; returns `non_trivial` otherwise. +template +broadcast_trivial broadcast(const std::array &buffers, ssize_t &ndim, std::vector &shape) { + ndim = std::accumulate(buffers.begin(), buffers.end(), ssize_t(0), [](ssize_t res, const buffer_info &buf) { + return std::max(res, buf.ndim); + }); + + shape.clear(); + shape.resize((size_t) ndim, 1); + + // Figure out the output size, and make sure all input arrays conform (i.e. are either size 1 or + // the full size). + for (size_t i = 0; i < N; ++i) { + auto res_iter = shape.rbegin(); + auto end = buffers[i].shape.rend(); + for (auto shape_iter = buffers[i].shape.rbegin(); shape_iter != end; ++shape_iter, ++res_iter) { + const auto &dim_size_in = *shape_iter; + auto &dim_size_out = *res_iter; + + // Each input dimension can either be 1 or `n`, but `n` values must match across buffers + if (dim_size_out == 1) + dim_size_out = dim_size_in; + else if (dim_size_in != 1 && dim_size_in != dim_size_out) + pybind11_fail("pybind11::vectorize: incompatible size/dimension of inputs!"); + } + } + + bool trivial_broadcast_c = true; + bool trivial_broadcast_f = true; + for (size_t i = 0; i < N && (trivial_broadcast_c || trivial_broadcast_f); ++i) { + if (buffers[i].size == 1) + continue; + + // Require the same number of dimensions: + if (buffers[i].ndim != ndim) + return broadcast_trivial::non_trivial; + + // Require all dimensions be full-size: + if (!std::equal(buffers[i].shape.cbegin(), buffers[i].shape.cend(), shape.cbegin())) + return broadcast_trivial::non_trivial; + + // Check for C contiguity (but only if previous inputs were also C contiguous) + if (trivial_broadcast_c) { + ssize_t expect_stride = buffers[i].itemsize; + auto end = buffers[i].shape.crend(); + for (auto shape_iter = buffers[i].shape.crbegin(), stride_iter = buffers[i].strides.crbegin(); + trivial_broadcast_c && shape_iter != end; ++shape_iter, ++stride_iter) { + if (expect_stride == *stride_iter) + expect_stride *= *shape_iter; + else + trivial_broadcast_c = false; + } + } + + // Check for Fortran contiguity (if previous inputs were also F contiguous) + if (trivial_broadcast_f) { + ssize_t expect_stride = buffers[i].itemsize; + auto end = buffers[i].shape.cend(); + for (auto shape_iter = buffers[i].shape.cbegin(), stride_iter = buffers[i].strides.cbegin(); + trivial_broadcast_f && shape_iter != end; ++shape_iter, ++stride_iter) { + if (expect_stride == *stride_iter) + expect_stride *= *shape_iter; + else + trivial_broadcast_f = false; + } + } + } + + return + trivial_broadcast_c ? broadcast_trivial::c_trivial : + trivial_broadcast_f ? broadcast_trivial::f_trivial : + broadcast_trivial::non_trivial; +} + +template +struct vectorize_arg { + static_assert(!std::is_rvalue_reference::value, "Functions with rvalue reference arguments cannot be vectorized"); + // The wrapped function gets called with this type: + using call_type = remove_reference_t; + // Is this a vectorized argument? + static constexpr bool vectorize = + satisfies_any_of::value && + satisfies_none_of::value && + (!std::is_reference::value || + (std::is_lvalue_reference::value && std::is_const::value)); + // Accept this type: an array for vectorized types, otherwise the type as-is: + using type = conditional_t, array::forcecast>, T>; +}; + + +// py::vectorize when a return type is present +template +struct vectorize_returned_array { + using Type = array_t; + + static Type create(broadcast_trivial trivial, const std::vector &shape) { + if (trivial == broadcast_trivial::f_trivial) + return array_t(shape); + return array_t(shape); + } + + static Return *mutable_data(Type &array) { + return array.mutable_data(); + } + + static Return call(Func &f, Args &... args) { + return f(args...); + } + + static void call(Return *out, size_t i, Func &f, Args &... args) { + out[i] = f(args...); + } +}; + +// py::vectorize when a return type is not present +template +struct vectorize_returned_array { + using Type = none; + + static Type create(broadcast_trivial, const std::vector &) { + return none(); + } + + static void *mutable_data(Type &) { + return nullptr; + } + + static detail::void_type call(Func &f, Args &... args) { + f(args...); + return {}; + } + + static void call(void *, size_t, Func &f, Args &... args) { + f(args...); + } +}; + + +template +struct vectorize_helper { + +// NVCC for some reason breaks if NVectorized is private +#ifdef __CUDACC__ +public: +#else +private: +#endif + + static constexpr size_t N = sizeof...(Args); + static constexpr size_t NVectorized = constexpr_sum(vectorize_arg::vectorize...); + static_assert(NVectorized >= 1, + "pybind11::vectorize(...) requires a function with at least one vectorizable argument"); + +public: + template ::type>::value>> + explicit vectorize_helper(T &&f) : f(std::forward(f)) {} + + object operator()(typename vectorize_arg::type... args) { + return run(args..., + make_index_sequence(), + select_indices::vectorize...>(), + make_index_sequence()); + } + +private: + remove_reference_t f; + + // Internal compiler error in MSVC 19.16.27025.1 (Visual Studio 2017 15.9.4), when compiling with "/permissive-" flag + // when arg_call_types is manually inlined. + using arg_call_types = std::tuple::call_type...>; + template using param_n_t = typename std::tuple_element::type; + + using returned_array = vectorize_returned_array; + + // Runs a vectorized function given arguments tuple and three index sequences: + // - Index is the full set of 0 ... (N-1) argument indices; + // - VIndex is the subset of argument indices with vectorized parameters, letting us access + // vectorized arguments (anything not in this sequence is passed through) + // - BIndex is a incremental sequence (beginning at 0) of the same size as VIndex, so that + // we can store vectorized buffer_infos in an array (argument VIndex has its buffer at + // index BIndex in the array). + template object run( + typename vectorize_arg::type &...args, + index_sequence i_seq, index_sequence vi_seq, index_sequence bi_seq) { + + // Pointers to values the function was called with; the vectorized ones set here will start + // out as array_t pointers, but they will be changed them to T pointers before we make + // call the wrapped function. Non-vectorized pointers are left as-is. + std::array params{{ &args... }}; + + // The array of `buffer_info`s of vectorized arguments: + std::array buffers{{ reinterpret_cast(params[VIndex])->request()... }}; + + /* Determine dimensions parameters of output array */ + ssize_t nd = 0; + std::vector shape(0); + auto trivial = broadcast(buffers, nd, shape); + auto ndim = (size_t) nd; + + size_t size = std::accumulate(shape.begin(), shape.end(), (size_t) 1, std::multiplies()); + + // If all arguments are 0-dimension arrays (i.e. single values) return a plain value (i.e. + // not wrapped in an array). + if (size == 1 && ndim == 0) { + PYBIND11_EXPAND_SIDE_EFFECTS(params[VIndex] = buffers[BIndex].ptr); + return cast(returned_array::call(f, *reinterpret_cast *>(params[Index])...)); + } + + auto result = returned_array::create(trivial, shape); + + if (size == 0) return std::move(result); + + /* Call the function */ + auto mutable_data = returned_array::mutable_data(result); + if (trivial == broadcast_trivial::non_trivial) + apply_broadcast(buffers, params, mutable_data, size, shape, i_seq, vi_seq, bi_seq); + else + apply_trivial(buffers, params, mutable_data, size, i_seq, vi_seq, bi_seq); + + return std::move(result); + } + + template + void apply_trivial(std::array &buffers, + std::array ¶ms, + Return *out, + size_t size, + index_sequence, index_sequence, index_sequence) { + + // Initialize an array of mutable byte references and sizes with references set to the + // appropriate pointer in `params`; as we iterate, we'll increment each pointer by its size + // (except for singletons, which get an increment of 0). + std::array, NVectorized> vecparams{{ + std::pair( + reinterpret_cast(params[VIndex] = buffers[BIndex].ptr), + buffers[BIndex].size == 1 ? 0 : sizeof(param_n_t) + )... + }}; + + for (size_t i = 0; i < size; ++i) { + returned_array::call(out, i, f, *reinterpret_cast *>(params[Index])...); + for (auto &x : vecparams) x.first += x.second; + } + } + + template + void apply_broadcast(std::array &buffers, + std::array ¶ms, + Return *out, + size_t size, + const std::vector &output_shape, + index_sequence, index_sequence, index_sequence) { + + multi_array_iterator input_iter(buffers, output_shape); + + for (size_t i = 0; i < size; ++i, ++input_iter) { + PYBIND11_EXPAND_SIDE_EFFECTS(( + params[VIndex] = input_iter.template data() + )); + returned_array::call(out, i, f, *reinterpret_cast *>(std::get(params))...); + } + } +}; + +template +vectorize_helper +vectorize_extractor(const Func &f, Return (*) (Args ...)) { + return detail::vectorize_helper(f); +} + +template struct handle_type_name> { + static constexpr auto name = _("numpy.ndarray[") + npy_format_descriptor::name + _("]"); +}; + +PYBIND11_NAMESPACE_END(detail) + +// Vanilla pointer vectorizer: +template +detail::vectorize_helper +vectorize(Return (*f) (Args ...)) { + return detail::vectorize_helper(f); +} + +// lambda vectorizer: +template ::value, int> = 0> +auto vectorize(Func &&f) -> decltype( + detail::vectorize_extractor(std::forward(f), (detail::function_signature_t *) nullptr)) { + return detail::vectorize_extractor(std::forward(f), (detail::function_signature_t *) nullptr); +} + +// Vectorize a class method (non-const): +template ())), Return, Class *, Args...>> +Helper vectorize(Return (Class::*f)(Args...)) { + return Helper(std::mem_fn(f)); +} + +// Vectorize a class method (const): +template ())), Return, const Class *, Args...>> +Helper vectorize(Return (Class::*f)(Args...) const) { + return Helper(std::mem_fn(f)); +} + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/operators.h b/Libraries/pybind11-2.8.0/pybind11/operators.h new file mode 100644 index 00000000..2a615315 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/operators.h @@ -0,0 +1,163 @@ +/* + pybind11/operator.h: Metatemplates for operator overloading + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +/// Enumeration with all supported operator types +enum op_id : int { + op_add, op_sub, op_mul, op_div, op_mod, op_divmod, op_pow, op_lshift, + op_rshift, op_and, op_xor, op_or, op_neg, op_pos, op_abs, op_invert, + op_int, op_long, op_float, op_str, op_cmp, op_gt, op_ge, op_lt, op_le, + op_eq, op_ne, op_iadd, op_isub, op_imul, op_idiv, op_imod, op_ilshift, + op_irshift, op_iand, op_ixor, op_ior, op_complex, op_bool, op_nonzero, + op_repr, op_truediv, op_itruediv, op_hash +}; + +enum op_type : int { + op_l, /* base type on left */ + op_r, /* base type on right */ + op_u /* unary operator */ +}; + +struct self_t { }; +static const self_t self = self_t(); + +/// Type for an unused type slot +struct undefined_t { }; + +/// Don't warn about an unused variable +inline self_t __self() { return self; } + +/// base template of operator implementations +template struct op_impl { }; + +/// Operator implementation generator +template struct op_ { + template void execute(Class &cl, const Extra&... extra) const { + using Base = typename Class::type; + using L_type = conditional_t::value, Base, L>; + using R_type = conditional_t::value, Base, R>; + using op = op_impl; + cl.def(op::name(), &op::execute, is_operator(), extra...); + #if PY_MAJOR_VERSION < 3 + if (PYBIND11_SILENCE_MSVC_C4127(id == op_truediv) || + PYBIND11_SILENCE_MSVC_C4127(id == op_itruediv)) + cl.def(id == op_itruediv ? "__idiv__" : ot == op_l ? "__div__" : "__rdiv__", + &op::execute, is_operator(), extra...); + #endif + } + template void execute_cast(Class &cl, const Extra&... extra) const { + using Base = typename Class::type; + using L_type = conditional_t::value, Base, L>; + using R_type = conditional_t::value, Base, R>; + using op = op_impl; + cl.def(op::name(), &op::execute_cast, is_operator(), extra...); + #if PY_MAJOR_VERSION < 3 + if (id == op_truediv || id == op_itruediv) + cl.def(id == op_itruediv ? "__idiv__" : ot == op_l ? "__div__" : "__rdiv__", + &op::execute, is_operator(), extra...); + #endif + } +}; + +#define PYBIND11_BINARY_OPERATOR(id, rid, op, expr) \ +template struct op_impl { \ + static char const* name() { return "__" #id "__"; } \ + static auto execute(const L &l, const R &r) -> decltype(expr) { return (expr); } \ + static B execute_cast(const L &l, const R &r) { return B(expr); } \ +}; \ +template struct op_impl { \ + static char const* name() { return "__" #rid "__"; } \ + static auto execute(const R &r, const L &l) -> decltype(expr) { return (expr); } \ + static B execute_cast(const R &r, const L &l) { return B(expr); } \ +}; \ +inline op_ op(const self_t &, const self_t &) { \ + return op_(); \ +} \ +template op_ op(const self_t &, const T &) { \ + return op_(); \ +} \ +template op_ op(const T &, const self_t &) { \ + return op_(); \ +} + +#define PYBIND11_INPLACE_OPERATOR(id, op, expr) \ +template struct op_impl { \ + static char const* name() { return "__" #id "__"; } \ + static auto execute(L &l, const R &r) -> decltype(expr) { return expr; } \ + static B execute_cast(L &l, const R &r) { return B(expr); } \ +}; \ +template op_ op(const self_t &, const T &) { \ + return op_(); \ +} + +#define PYBIND11_UNARY_OPERATOR(id, op, expr) \ +template struct op_impl { \ + static char const* name() { return "__" #id "__"; } \ + static auto execute(const L &l) -> decltype(expr) { return expr; } \ + static B execute_cast(const L &l) { return B(expr); } \ +}; \ +inline op_ op(const self_t &) { \ + return op_(); \ +} + +PYBIND11_BINARY_OPERATOR(sub, rsub, operator-, l - r) +PYBIND11_BINARY_OPERATOR(add, radd, operator+, l + r) +PYBIND11_BINARY_OPERATOR(mul, rmul, operator*, l * r) +PYBIND11_BINARY_OPERATOR(truediv, rtruediv, operator/, l / r) +PYBIND11_BINARY_OPERATOR(mod, rmod, operator%, l % r) +PYBIND11_BINARY_OPERATOR(lshift, rlshift, operator<<, l << r) +PYBIND11_BINARY_OPERATOR(rshift, rrshift, operator>>, l >> r) +PYBIND11_BINARY_OPERATOR(and, rand, operator&, l & r) +PYBIND11_BINARY_OPERATOR(xor, rxor, operator^, l ^ r) +PYBIND11_BINARY_OPERATOR(eq, eq, operator==, l == r) +PYBIND11_BINARY_OPERATOR(ne, ne, operator!=, l != r) +PYBIND11_BINARY_OPERATOR(or, ror, operator|, l | r) +PYBIND11_BINARY_OPERATOR(gt, lt, operator>, l > r) +PYBIND11_BINARY_OPERATOR(ge, le, operator>=, l >= r) +PYBIND11_BINARY_OPERATOR(lt, gt, operator<, l < r) +PYBIND11_BINARY_OPERATOR(le, ge, operator<=, l <= r) +//PYBIND11_BINARY_OPERATOR(pow, rpow, pow, std::pow(l, r)) +PYBIND11_INPLACE_OPERATOR(iadd, operator+=, l += r) +PYBIND11_INPLACE_OPERATOR(isub, operator-=, l -= r) +PYBIND11_INPLACE_OPERATOR(imul, operator*=, l *= r) +PYBIND11_INPLACE_OPERATOR(itruediv, operator/=, l /= r) +PYBIND11_INPLACE_OPERATOR(imod, operator%=, l %= r) +PYBIND11_INPLACE_OPERATOR(ilshift, operator<<=, l <<= r) +PYBIND11_INPLACE_OPERATOR(irshift, operator>>=, l >>= r) +PYBIND11_INPLACE_OPERATOR(iand, operator&=, l &= r) +PYBIND11_INPLACE_OPERATOR(ixor, operator^=, l ^= r) +PYBIND11_INPLACE_OPERATOR(ior, operator|=, l |= r) +PYBIND11_UNARY_OPERATOR(neg, operator-, -l) +PYBIND11_UNARY_OPERATOR(pos, operator+, +l) +// WARNING: This usage of `abs` should only be done for existing STL overloads. +// Adding overloads directly in to the `std::` namespace is advised against: +// https://en.cppreference.com/w/cpp/language/extending_std +PYBIND11_UNARY_OPERATOR(abs, abs, std::abs(l)) +PYBIND11_UNARY_OPERATOR(hash, hash, std::hash()(l)) +PYBIND11_UNARY_OPERATOR(invert, operator~, (~l)) +PYBIND11_UNARY_OPERATOR(bool, operator!, !!l) +PYBIND11_UNARY_OPERATOR(int, int_, (int) l) +PYBIND11_UNARY_OPERATOR(float, float_, (double) l) + +#undef PYBIND11_BINARY_OPERATOR +#undef PYBIND11_INPLACE_OPERATOR +#undef PYBIND11_UNARY_OPERATOR +PYBIND11_NAMESPACE_END(detail) + +using detail::self; +// Add named operators so that they are accessible via `py::`. +using detail::hash; + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/options.h b/Libraries/pybind11-2.8.0/pybind11/options.h new file mode 100644 index 00000000..d74db1c6 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/options.h @@ -0,0 +1,65 @@ +/* + pybind11/options.h: global settings that are configurable at runtime. + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +class options { +public: + + // Default RAII constructor, which leaves settings as they currently are. + options() : previous_state(global_state()) {} + + // Class is non-copyable. + options(const options&) = delete; + options& operator=(const options&) = delete; + + // Destructor, which restores settings that were in effect before. + ~options() { + global_state() = previous_state; + } + + // Setter methods (affect the global state): + + options& disable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = false; return *this; } + + options& enable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = true; return *this; } + + options& disable_function_signatures() & { global_state().show_function_signatures = false; return *this; } + + options& enable_function_signatures() & { global_state().show_function_signatures = true; return *this; } + + // Getter methods (return the global state): + + static bool show_user_defined_docstrings() { return global_state().show_user_defined_docstrings; } + + static bool show_function_signatures() { return global_state().show_function_signatures; } + + // This type is not meant to be allocated on the heap. + void* operator new(size_t) = delete; + +private: + + struct state { + bool show_user_defined_docstrings = true; //< Include user-supplied texts in docstrings. + bool show_function_signatures = true; //< Include auto-generated function signatures in docstrings. + }; + + static state &global_state() { + static state instance; + return instance; + } + + state previous_state; +}; + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/pybind11.h b/Libraries/pybind11-2.8.0/pybind11/pybind11.h new file mode 100644 index 00000000..370e52cf --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/pybind11.h @@ -0,0 +1,2463 @@ +/* + pybind11/pybind11.h: Main header file of the C++11 python + binding generator library + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "attr.h" +#include "gil.h" +#include "options.h" +#include "detail/class.h" +#include "detail/init.h" + +#include +#include +#include +#include +#include +#include + +#include + +#if defined(__cpp_lib_launder) && !(defined(_MSC_VER) && (_MSC_VER < 1914)) +# define PYBIND11_STD_LAUNDER std::launder +# define PYBIND11_HAS_STD_LAUNDER 1 +#else +# define PYBIND11_STD_LAUNDER +# define PYBIND11_HAS_STD_LAUNDER 0 +#endif +#if defined(__GNUG__) && !defined(__clang__) +# include +#endif + +/* https://stackoverflow.com/questions/46798456/handling-gccs-noexcept-type-warning + This warning is about ABI compatibility, not code health. + It is only actually needed in a couple places, but apparently GCC 7 "generates this warning if + and only if the first template instantiation ... involves noexcept" [stackoverflow], therefore + it could get triggered from seemingly random places, depending on user code. + No other GCC version generates this warning. + */ +#if defined(__GNUC__) && __GNUC__ == 7 +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wnoexcept-type" +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +PYBIND11_NAMESPACE_BEGIN(detail) + +// Apply all the extensions translators from a list +// Return true if one of the translators completed without raising an exception +// itself. Return of false indicates that if there are other translators +// available, they should be tried. +inline bool apply_exception_translators(std::forward_list& translators) { + auto last_exception = std::current_exception(); + + for (auto &translator : translators) { + try { + translator(last_exception); + return true; + } catch (...) { + last_exception = std::current_exception(); + } + } + return false; +} + +#if defined(_MSC_VER) +# define PYBIND11_COMPAT_STRDUP _strdup +#else +# define PYBIND11_COMPAT_STRDUP strdup +#endif + +PYBIND11_NAMESPACE_END(detail) + +/// Wraps an arbitrary C++ function/method/lambda function/.. into a callable Python object +class cpp_function : public function { +public: + cpp_function() = default; + // NOLINTNEXTLINE(google-explicit-constructor) + cpp_function(std::nullptr_t) { } + + /// Construct a cpp_function from a vanilla function pointer + template + // NOLINTNEXTLINE(google-explicit-constructor) + cpp_function(Return (*f)(Args...), const Extra&... extra) { + initialize(f, f, extra...); + } + + /// Construct a cpp_function from a lambda function (possibly with internal state) + template ::value>> + // NOLINTNEXTLINE(google-explicit-constructor) + cpp_function(Func &&f, const Extra&... extra) { + initialize(std::forward(f), + (detail::function_signature_t *) nullptr, extra...); + } + + /// Construct a cpp_function from a class method (non-const, no ref-qualifier) + template + // NOLINTNEXTLINE(google-explicit-constructor) + cpp_function(Return (Class::*f)(Arg...), const Extra&... extra) { + initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(std::forward(args)...); }, + (Return (*) (Class *, Arg...)) nullptr, extra...); + } + + /// Construct a cpp_function from a class method (non-const, lvalue ref-qualifier) + /// A copy of the overload for non-const functions without explicit ref-qualifier + /// but with an added `&`. + template + // NOLINTNEXTLINE(google-explicit-constructor) + cpp_function(Return (Class::*f)(Arg...)&, const Extra&... extra) { + initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(args...); }, + (Return (*) (Class *, Arg...)) nullptr, extra...); + } + + /// Construct a cpp_function from a class method (const, no ref-qualifier) + template + // NOLINTNEXTLINE(google-explicit-constructor) + cpp_function(Return (Class::*f)(Arg...) const, const Extra&... extra) { + initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(std::forward(args)...); }, + (Return (*)(const Class *, Arg ...)) nullptr, extra...); + } + + /// Construct a cpp_function from a class method (const, lvalue ref-qualifier) + /// A copy of the overload for const functions without explicit ref-qualifier + /// but with an added `&`. + template + // NOLINTNEXTLINE(google-explicit-constructor) + cpp_function(Return (Class::*f)(Arg...) const&, const Extra&... extra) { + initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); }, + (Return (*)(const Class *, Arg ...)) nullptr, extra...); + } + + /// Return the function name + object name() const { return attr("__name__"); } + +protected: + struct InitializingFunctionRecordDeleter { + // `destruct(function_record, false)`: `initialize_generic` copies strings and + // takes care of cleaning up in case of exceptions. So pass `false` to `free_strings`. + void operator()(detail::function_record * rec) { destruct(rec, false); } + }; + using unique_function_record = std::unique_ptr; + + /// Space optimization: don't inline this frequently instantiated fragment + PYBIND11_NOINLINE unique_function_record make_function_record() { + return unique_function_record(new detail::function_record()); + } + + /// Special internal constructor for functors, lambda functions, etc. + template + void initialize(Func &&f, Return (*)(Args...), const Extra&... extra) { + using namespace detail; + struct capture { remove_reference_t f; }; + + /* Store the function including any extra state it might have (e.g. a lambda capture object) */ + // The unique_ptr makes sure nothing is leaked in case of an exception. + auto unique_rec = make_function_record(); + auto rec = unique_rec.get(); + + /* Store the capture object directly in the function record if there is enough space */ + if (PYBIND11_SILENCE_MSVC_C4127(sizeof(capture) <= sizeof(rec->data))) { + /* Without these pragmas, GCC warns that there might not be + enough space to use the placement new operator. However, the + 'if' statement above ensures that this is the case. */ +#if defined(__GNUG__) && __GNUC__ >= 6 && !defined(__clang__) && !defined(__INTEL_COMPILER) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wplacement-new" +#endif + new ((capture *) &rec->data) capture { std::forward(f) }; +#if defined(__GNUG__) && __GNUC__ >= 6 && !defined(__clang__) && !defined(__INTEL_COMPILER) +# pragma GCC diagnostic pop +#endif +#if defined(__GNUG__) && !PYBIND11_HAS_STD_LAUNDER && !defined(__INTEL_COMPILER) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif + // UB without std::launder, but without breaking ABI and/or + // a significant refactoring it's "impossible" to solve. + if (!std::is_trivially_destructible::value) + rec->free_data = [](function_record *r) { + auto data = PYBIND11_STD_LAUNDER((capture *) &r->data); + (void) data; + data->~capture(); + }; +#if defined(__GNUG__) && !PYBIND11_HAS_STD_LAUNDER && !defined(__INTEL_COMPILER) +# pragma GCC diagnostic pop +#endif + } else { + rec->data[0] = new capture { std::forward(f) }; + rec->free_data = [](function_record *r) { delete ((capture *) r->data[0]); }; + } + + /* Type casters for the function arguments and return value */ + using cast_in = argument_loader; + using cast_out = make_caster< + conditional_t::value, void_type, Return> + >; + + static_assert(expected_num_args(sizeof...(Args), cast_in::has_args, cast_in::has_kwargs), + "The number of argument annotations does not match the number of function arguments"); + + /* Dispatch code which converts function arguments and performs the actual function call */ + rec->impl = [](function_call &call) -> handle { + cast_in args_converter; + + /* Try to cast the function arguments into the C++ domain */ + if (!args_converter.load_args(call)) + return PYBIND11_TRY_NEXT_OVERLOAD; + + /* Invoke call policy pre-call hook */ + process_attributes::precall(call); + + /* Get a pointer to the capture object */ + auto data = (sizeof(capture) <= sizeof(call.func.data) + ? &call.func.data : call.func.data[0]); + auto *cap = const_cast(reinterpret_cast(data)); + + /* Override policy for rvalues -- usually to enforce rvp::move on an rvalue */ + return_value_policy policy = return_value_policy_override::policy(call.func.policy); + + /* Function scope guard -- defaults to the compile-to-nothing `void_type` */ + using Guard = extract_guard_t; + + /* Perform the function call */ + handle result = cast_out::cast( + std::move(args_converter).template call(cap->f), policy, call.parent); + + /* Invoke call policy post-call hook */ + process_attributes::postcall(call, result); + + return result; + }; + + /* Process any user-provided function attributes */ + process_attributes::init(extra..., rec); + + { + constexpr bool has_kw_only_args = any_of...>::value, + has_pos_only_args = any_of...>::value, + has_args = any_of...>::value, + has_arg_annotations = any_of...>::value; + static_assert(has_arg_annotations || !has_kw_only_args, "py::kw_only requires the use of argument annotations"); + static_assert(has_arg_annotations || !has_pos_only_args, "py::pos_only requires the use of argument annotations (for docstrings and aligning the annotations to the argument)"); + static_assert(!(has_args && has_kw_only_args), "py::kw_only cannot be combined with a py::args argument"); + } + + /* Generate a readable signature describing the function's arguments and return value types */ + static constexpr auto signature = _("(") + cast_in::arg_names + _(") -> ") + cast_out::name; + PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types(); + + /* Register the function with Python from generic (non-templated) code */ + // Pass on the ownership over the `unique_rec` to `initialize_generic`. `rec` stays valid. + initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args)); + + if (cast_in::has_args) rec->has_args = true; + if (cast_in::has_kwargs) rec->has_kwargs = true; + + /* Stash some additional information used by an important optimization in 'functional.h' */ + using FunctionType = Return (*)(Args...); + constexpr bool is_function_ptr = + std::is_convertible::value && + sizeof(capture) == sizeof(void *); + if (is_function_ptr) { + rec->is_stateless = true; + rec->data[1] = const_cast(reinterpret_cast(&typeid(FunctionType))); + } + } + + // Utility class that keeps track of all duplicated strings, and cleans them up in its destructor, + // unless they are released. Basically a RAII-solution to deal with exceptions along the way. + class strdup_guard { + public: + ~strdup_guard() { + for (auto s : strings) + std::free(s); + } + char *operator()(const char *s) { + auto t = PYBIND11_COMPAT_STRDUP(s); + strings.push_back(t); + return t; + } + void release() { + strings.clear(); + } + private: + std::vector strings; + }; + + /// Register a function call with Python (generic non-templated code goes here) + void initialize_generic(unique_function_record &&unique_rec, const char *text, + const std::type_info *const *types, size_t args) { + // Do NOT receive `unique_rec` by value. If this function fails to move out the unique_ptr, + // we do not want this to destuct the pointer. `initialize` (the caller) still relies on the + // pointee being alive after this call. Only move out if a `capsule` is going to keep it alive. + auto rec = unique_rec.get(); + + // Keep track of strdup'ed strings, and clean them up as long as the function's capsule + // has not taken ownership yet (when `unique_rec.release()` is called). + // Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the strings + // are only referenced before strdup'ing. So only *after* the following block could `destruct` + // safely be called, but even then, `repr` could still throw in the middle of copying all strings. + strdup_guard guarded_strdup; + + /* Create copies of all referenced C-style strings */ + rec->name = guarded_strdup(rec->name ? rec->name : ""); + if (rec->doc) rec->doc = guarded_strdup(rec->doc); + for (auto &a: rec->args) { + if (a.name) + a.name = guarded_strdup(a.name); + if (a.descr) + a.descr = guarded_strdup(a.descr); + else if (a.value) + a.descr = guarded_strdup(repr(a.value).cast().c_str()); + } + + rec->is_constructor + = (strcmp(rec->name, "__init__") == 0) || (strcmp(rec->name, "__setstate__") == 0); + +#if !defined(NDEBUG) && !defined(PYBIND11_DISABLE_NEW_STYLE_INIT_WARNING) + if (rec->is_constructor && !rec->is_new_style_constructor) { + const auto class_name = detail::get_fully_qualified_tp_name((PyTypeObject *) rec->scope.ptr()); + const auto func_name = std::string(rec->name); + PyErr_WarnEx( + PyExc_FutureWarning, + ("pybind11-bound class '" + class_name + "' is using an old-style " + "placement-new '" + func_name + "' which has been deprecated. See " + "the upgrade guide in pybind11's docs. This message is only visible " + "when compiled in debug mode.").c_str(), 0 + ); + } +#endif + + /* Generate a proper function signature */ + std::string signature; + size_t type_index = 0, arg_index = 0; + for (auto *pc = text; *pc != '\0'; ++pc) { + const auto c = *pc; + + if (c == '{') { + // Write arg name for everything except *args and **kwargs. + if (*(pc + 1) == '*') + continue; + // Separator for keyword-only arguments, placed before the kw + // arguments start + if (rec->nargs_kw_only > 0 && arg_index + rec->nargs_kw_only == args) + signature += "*, "; + if (arg_index < rec->args.size() && rec->args[arg_index].name) { + signature += rec->args[arg_index].name; + } else if (arg_index == 0 && rec->is_method) { + signature += "self"; + } else { + signature += "arg" + std::to_string(arg_index - (rec->is_method ? 1 : 0)); + } + signature += ": "; + } else if (c == '}') { + // Write default value if available. + if (arg_index < rec->args.size() && rec->args[arg_index].descr) { + signature += " = "; + signature += rec->args[arg_index].descr; + } + // Separator for positional-only arguments (placed after the + // argument, rather than before like * + if (rec->nargs_pos_only > 0 && (arg_index + 1) == rec->nargs_pos_only) + signature += ", /"; + arg_index++; + } else if (c == '%') { + const std::type_info *t = types[type_index++]; + if (!t) + pybind11_fail("Internal error while parsing type signature (1)"); + if (auto tinfo = detail::get_type_info(*t)) { + handle th((PyObject *) tinfo->type); + signature += + th.attr("__module__").cast() + "." + + th.attr("__qualname__").cast(); // Python 3.3+, but we backport it to earlier versions + } else if (rec->is_new_style_constructor && arg_index == 0) { + // A new-style `__init__` takes `self` as `value_and_holder`. + // Rewrite it to the proper class type. + signature += + rec->scope.attr("__module__").cast() + "." + + rec->scope.attr("__qualname__").cast(); + } else { + std::string tname(t->name()); + detail::clean_type_id(tname); + signature += tname; + } + } else { + signature += c; + } + } + + if (arg_index != args || types[type_index] != nullptr) + pybind11_fail("Internal error while parsing type signature (2)"); + +#if PY_MAJOR_VERSION < 3 + if (strcmp(rec->name, "__next__") == 0) { + std::free(rec->name); + rec->name = guarded_strdup("next"); + } else if (strcmp(rec->name, "__bool__") == 0) { + std::free(rec->name); + rec->name = guarded_strdup("__nonzero__"); + } +#endif + rec->signature = guarded_strdup(signature.c_str()); + rec->args.shrink_to_fit(); + rec->nargs = (std::uint16_t) args; + + if (rec->sibling && PYBIND11_INSTANCE_METHOD_CHECK(rec->sibling.ptr())) + rec->sibling = PYBIND11_INSTANCE_METHOD_GET_FUNCTION(rec->sibling.ptr()); + + detail::function_record *chain = nullptr, *chain_start = rec; + if (rec->sibling) { + if (PyCFunction_Check(rec->sibling.ptr())) { + auto *self = PyCFunction_GET_SELF(rec->sibling.ptr()); + capsule rec_capsule = isinstance(self) ? reinterpret_borrow(self) : capsule(self); + chain = (detail::function_record *) rec_capsule; + /* Never append a method to an overload chain of a parent class; + instead, hide the parent's overloads in this case */ + if (!chain->scope.is(rec->scope)) + chain = nullptr; + } + // Don't trigger for things like the default __init__, which are wrapper_descriptors that we are intentionally replacing + else if (!rec->sibling.is_none() && rec->name[0] != '_') + pybind11_fail("Cannot overload existing non-function object \"" + std::string(rec->name) + + "\" with a function of the same name"); + } + + if (!chain) { + /* No existing overload was found, create a new function object */ + rec->def = new PyMethodDef(); + std::memset(rec->def, 0, sizeof(PyMethodDef)); + rec->def->ml_name = rec->name; + rec->def->ml_meth + = reinterpret_cast(reinterpret_cast(dispatcher)); + rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS; + + capsule rec_capsule(unique_rec.release(), [](void *ptr) { + destruct((detail::function_record *) ptr); + }); + guarded_strdup.release(); + + object scope_module; + if (rec->scope) { + if (hasattr(rec->scope, "__module__")) { + scope_module = rec->scope.attr("__module__"); + } else if (hasattr(rec->scope, "__name__")) { + scope_module = rec->scope.attr("__name__"); + } + } + + m_ptr = PyCFunction_NewEx(rec->def, rec_capsule.ptr(), scope_module.ptr()); + if (!m_ptr) + pybind11_fail("cpp_function::cpp_function(): Could not allocate function object"); + } else { + /* Append at the beginning or end of the overload chain */ + m_ptr = rec->sibling.ptr(); + inc_ref(); + if (chain->is_method != rec->is_method) + pybind11_fail("overloading a method with both static and instance methods is not supported; " + #if defined(NDEBUG) + "compile in debug mode for more details" + #else + "error while attempting to bind " + std::string(rec->is_method ? "instance" : "static") + " method " + + std::string(pybind11::str(rec->scope.attr("__name__"))) + "." + std::string(rec->name) + signature + #endif + ); + + if (rec->prepend) { + // Beginning of chain; we need to replace the capsule's current head-of-the-chain + // pointer with this one, then make this one point to the previous head of the + // chain. + chain_start = rec; + rec->next = chain; + auto rec_capsule = reinterpret_borrow(((PyCFunctionObject *) m_ptr)->m_self); + rec_capsule.set_pointer(unique_rec.release()); + guarded_strdup.release(); + } else { + // Or end of chain (normal behavior) + chain_start = chain; + while (chain->next) + chain = chain->next; + chain->next = unique_rec.release(); + guarded_strdup.release(); + } + } + + std::string signatures; + int index = 0; + /* Create a nice pydoc rec including all signatures and + docstrings of the functions in the overload chain */ + if (chain && options::show_function_signatures()) { + // First a generic signature + signatures += rec->name; + signatures += "(*args, **kwargs)\n"; + signatures += "Overloaded function.\n\n"; + } + // Then specific overload signatures + bool first_user_def = true; + for (auto it = chain_start; it != nullptr; it = it->next) { + if (options::show_function_signatures()) { + if (index > 0) signatures += "\n"; + if (chain) + signatures += std::to_string(++index) + ". "; + signatures += rec->name; + signatures += it->signature; + signatures += "\n"; + } + if (it->doc && it->doc[0] != '\0' && options::show_user_defined_docstrings()) { + // If we're appending another docstring, and aren't printing function signatures, we + // need to append a newline first: + if (!options::show_function_signatures()) { + if (first_user_def) first_user_def = false; + else signatures += "\n"; + } + if (options::show_function_signatures()) signatures += "\n"; + signatures += it->doc; + if (options::show_function_signatures()) signatures += "\n"; + } + } + + /* Install docstring */ + auto *func = (PyCFunctionObject *) m_ptr; + std::free(const_cast(func->m_ml->ml_doc)); + // Install docstring if it's non-empty (when at least one option is enabled) + func->m_ml->ml_doc + = signatures.empty() ? nullptr : PYBIND11_COMPAT_STRDUP(signatures.c_str()); + + if (rec->is_method) { + m_ptr = PYBIND11_INSTANCE_METHOD_NEW(m_ptr, rec->scope.ptr()); + if (!m_ptr) + pybind11_fail("cpp_function::cpp_function(): Could not allocate instance method object"); + Py_DECREF(func); + } + } + + /// When a cpp_function is GCed, release any memory allocated by pybind11 + static void destruct(detail::function_record *rec, bool free_strings = true) { + // If on Python 3.9, check the interpreter "MICRO" (patch) version. + // If this is running on 3.9.0, we have to work around a bug. + #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9 + static bool is_zero = Py_GetVersion()[4] == '0'; + #endif + + while (rec) { + detail::function_record *next = rec->next; + if (rec->free_data) + rec->free_data(rec); + // During initialization, these strings might not have been copied yet, + // so they cannot be freed. Once the function has been created, they can. + // Check `make_function_record` for more details. + if (free_strings) { + std::free((char *) rec->name); + std::free((char *) rec->doc); + std::free((char *) rec->signature); + for (auto &arg: rec->args) { + std::free(const_cast(arg.name)); + std::free(const_cast(arg.descr)); + } + } + for (auto &arg: rec->args) + arg.value.dec_ref(); + if (rec->def) { + std::free(const_cast(rec->def->ml_doc)); + // Python 3.9.0 decref's these in the wrong order; rec->def + // If loaded on 3.9.0, let these leak (use Python 3.9.1 at runtime to fix) + // See https://github.com/python/cpython/pull/22670 + #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9 + if (!is_zero) + delete rec->def; + #else + delete rec->def; + #endif + } + delete rec; + rec = next; + } + } + + + /// Main dispatch logic for calls to functions bound using pybind11 + static PyObject *dispatcher(PyObject *self, PyObject *args_in, PyObject *kwargs_in) { + using namespace detail; + + /* Iterator over the list of potentially admissible overloads */ + const function_record *overloads = (function_record *) PyCapsule_GetPointer(self, nullptr), + *it = overloads; + + /* Need to know how many arguments + keyword arguments there are to pick the right overload */ + const auto n_args_in = (size_t) PyTuple_GET_SIZE(args_in); + + handle parent = n_args_in > 0 ? PyTuple_GET_ITEM(args_in, 0) : nullptr, + result = PYBIND11_TRY_NEXT_OVERLOAD; + + auto self_value_and_holder = value_and_holder(); + if (overloads->is_constructor) { + if (!parent || !PyObject_TypeCheck(parent.ptr(), (PyTypeObject *) overloads->scope.ptr())) { + PyErr_SetString(PyExc_TypeError, "__init__(self, ...) called with invalid or missing `self` argument"); + return nullptr; + } + + const auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr()); + const auto pi = reinterpret_cast(parent.ptr()); + self_value_and_holder = pi->get_value_and_holder(tinfo, true); + + // If this value is already registered it must mean __init__ is invoked multiple times; + // we really can't support that in C++, so just ignore the second __init__. + if (self_value_and_holder.instance_registered()) + return none().release().ptr(); + } + + try { + // We do this in two passes: in the first pass, we load arguments with `convert=false`; + // in the second, we allow conversion (except for arguments with an explicit + // py::arg().noconvert()). This lets us prefer calls without conversion, with + // conversion as a fallback. + std::vector second_pass; + + // However, if there are no overloads, we can just skip the no-convert pass entirely + const bool overloaded = it != nullptr && it->next != nullptr; + + for (; it != nullptr; it = it->next) { + + /* For each overload: + 1. Copy all positional arguments we were given, also checking to make sure that + named positional arguments weren't *also* specified via kwarg. + 2. If we weren't given enough, try to make up the omitted ones by checking + whether they were provided by a kwarg matching the `py::arg("name")` name. If + so, use it (and remove it from kwargs; if not, see if the function binding + provided a default that we can use. + 3. Ensure that either all keyword arguments were "consumed", or that the function + takes a kwargs argument to accept unconsumed kwargs. + 4. Any positional arguments still left get put into a tuple (for args), and any + leftover kwargs get put into a dict. + 5. Pack everything into a vector; if we have py::args or py::kwargs, they are an + extra tuple or dict at the end of the positional arguments. + 6. Call the function call dispatcher (function_record::impl) + + If one of these fail, move on to the next overload and keep trying until we get a + result other than PYBIND11_TRY_NEXT_OVERLOAD. + */ + + const function_record &func = *it; + size_t num_args = func.nargs; // Number of positional arguments that we need + if (func.has_args) --num_args; // (but don't count py::args + if (func.has_kwargs) --num_args; // or py::kwargs) + size_t pos_args = num_args - func.nargs_kw_only; + + if (!func.has_args && n_args_in > pos_args) + continue; // Too many positional arguments for this overload + + if (n_args_in < pos_args && func.args.size() < pos_args) + continue; // Not enough positional arguments given, and not enough defaults to fill in the blanks + + function_call call(func, parent); + + size_t args_to_copy = (std::min)(pos_args, n_args_in); // Protect std::min with parentheses + size_t args_copied = 0; + + // 0. Inject new-style `self` argument + if (func.is_new_style_constructor) { + // The `value` may have been preallocated by an old-style `__init__` + // if it was a preceding candidate for overload resolution. + if (self_value_and_holder) + self_value_and_holder.type->dealloc(self_value_and_holder); + + call.init_self = PyTuple_GET_ITEM(args_in, 0); + call.args.emplace_back(reinterpret_cast(&self_value_and_holder)); + call.args_convert.push_back(false); + ++args_copied; + } + + // 1. Copy any position arguments given. + bool bad_arg = false; + for (; args_copied < args_to_copy; ++args_copied) { + const argument_record *arg_rec = args_copied < func.args.size() ? &func.args[args_copied] : nullptr; + if (kwargs_in && arg_rec && arg_rec->name && dict_getitemstring(kwargs_in, arg_rec->name)) { + bad_arg = true; + break; + } + + handle arg(PyTuple_GET_ITEM(args_in, args_copied)); + if (arg_rec && !arg_rec->none && arg.is_none()) { + bad_arg = true; + break; + } + call.args.push_back(arg); + call.args_convert.push_back(arg_rec ? arg_rec->convert : true); + } + if (bad_arg) + continue; // Maybe it was meant for another overload (issue #688) + + // We'll need to copy this if we steal some kwargs for defaults + dict kwargs = reinterpret_borrow(kwargs_in); + + // 1.5. Fill in any missing pos_only args from defaults if they exist + if (args_copied < func.nargs_pos_only) { + for (; args_copied < func.nargs_pos_only; ++args_copied) { + const auto &arg_rec = func.args[args_copied]; + handle value; + + if (arg_rec.value) { + value = arg_rec.value; + } + if (value) { + call.args.push_back(value); + call.args_convert.push_back(arg_rec.convert); + } else + break; + } + + if (args_copied < func.nargs_pos_only) + continue; // Not enough defaults to fill the positional arguments + } + + // 2. Check kwargs and, failing that, defaults that may help complete the list + if (args_copied < num_args) { + bool copied_kwargs = false; + + for (; args_copied < num_args; ++args_copied) { + const auto &arg_rec = func.args[args_copied]; + + handle value; + if (kwargs_in && arg_rec.name) + value = dict_getitemstring(kwargs.ptr(), arg_rec.name); + + if (value) { + // Consume a kwargs value + if (!copied_kwargs) { + kwargs = reinterpret_steal(PyDict_Copy(kwargs.ptr())); + copied_kwargs = true; + } + if (PyDict_DelItemString(kwargs.ptr(), arg_rec.name) == -1) { + throw error_already_set(); + } + } else if (arg_rec.value) { + value = arg_rec.value; + } + + if (!arg_rec.none && value.is_none()) { + break; + } + + if (value) { + call.args.push_back(value); + call.args_convert.push_back(arg_rec.convert); + } + else + break; + } + + if (args_copied < num_args) + continue; // Not enough arguments, defaults, or kwargs to fill the positional arguments + } + + // 3. Check everything was consumed (unless we have a kwargs arg) + if (kwargs && !kwargs.empty() && !func.has_kwargs) + continue; // Unconsumed kwargs, but no py::kwargs argument to accept them + + // 4a. If we have a py::args argument, create a new tuple with leftovers + if (func.has_args) { + tuple extra_args; + if (args_to_copy == 0) { + // We didn't copy out any position arguments from the args_in tuple, so we + // can reuse it directly without copying: + extra_args = reinterpret_borrow(args_in); + } else if (args_copied >= n_args_in) { + extra_args = tuple(0); + } else { + size_t args_size = n_args_in - args_copied; + extra_args = tuple(args_size); + for (size_t i = 0; i < args_size; ++i) { + extra_args[i] = PyTuple_GET_ITEM(args_in, args_copied + i); + } + } + call.args.push_back(extra_args); + call.args_convert.push_back(false); + call.args_ref = std::move(extra_args); + } + + // 4b. If we have a py::kwargs, pass on any remaining kwargs + if (func.has_kwargs) { + if (!kwargs.ptr()) + kwargs = dict(); // If we didn't get one, send an empty one + call.args.push_back(kwargs); + call.args_convert.push_back(false); + call.kwargs_ref = std::move(kwargs); + } + + // 5. Put everything in a vector. Not technically step 5, we've been building it + // in `call.args` all along. + #if !defined(NDEBUG) + if (call.args.size() != func.nargs || call.args_convert.size() != func.nargs) + pybind11_fail("Internal error: function call dispatcher inserted wrong number of arguments!"); + #endif + + std::vector second_pass_convert; + if (overloaded) { + // We're in the first no-convert pass, so swap out the conversion flags for a + // set of all-false flags. If the call fails, we'll swap the flags back in for + // the conversion-allowed call below. + second_pass_convert.resize(func.nargs, false); + call.args_convert.swap(second_pass_convert); + } + + // 6. Call the function. + try { + loader_life_support guard{}; + result = func.impl(call); + } catch (reference_cast_error &) { + result = PYBIND11_TRY_NEXT_OVERLOAD; + } + + if (result.ptr() != PYBIND11_TRY_NEXT_OVERLOAD) + break; + + if (overloaded) { + // The (overloaded) call failed; if the call has at least one argument that + // permits conversion (i.e. it hasn't been explicitly specified `.noconvert()`) + // then add this call to the list of second pass overloads to try. + for (size_t i = func.is_method ? 1 : 0; i < pos_args; i++) { + if (second_pass_convert[i]) { + // Found one: swap the converting flags back in and store the call for + // the second pass. + call.args_convert.swap(second_pass_convert); + second_pass.push_back(std::move(call)); + break; + } + } + } + } + + if (overloaded && !second_pass.empty() && result.ptr() == PYBIND11_TRY_NEXT_OVERLOAD) { + // The no-conversion pass finished without success, try again with conversion allowed + for (auto &call : second_pass) { + try { + loader_life_support guard{}; + result = call.func.impl(call); + } catch (reference_cast_error &) { + result = PYBIND11_TRY_NEXT_OVERLOAD; + } + + if (result.ptr() != PYBIND11_TRY_NEXT_OVERLOAD) { + // The error reporting logic below expects 'it' to be valid, as it would be + // if we'd encountered this failure in the first-pass loop. + if (!result) + it = &call.func; + break; + } + } + } + } catch (error_already_set &e) { + e.restore(); + return nullptr; +#ifdef __GLIBCXX__ + } catch ( abi::__forced_unwind& ) { + throw; +#endif + } catch (...) { + /* When an exception is caught, give each registered exception + translator a chance to translate it to a Python exception. First + all module-local translators will be tried in reverse order of + registration. If none of the module-locale translators handle + the exception (or there are no module-locale translators) then + the global translators will be tried, also in reverse order of + registration. + + A translator may choose to do one of the following: + + - catch the exception and call PyErr_SetString or PyErr_SetObject + to set a standard (or custom) Python exception, or + - do nothing and let the exception fall through to the next translator, or + - delegate translation to the next translator by throwing a new type of exception. */ + + auto &local_exception_translators = get_local_internals().registered_exception_translators; + if (detail::apply_exception_translators(local_exception_translators)) { + return nullptr; + } + auto &exception_translators = get_internals().registered_exception_translators; + if (detail::apply_exception_translators(exception_translators)) { + return nullptr; + } + + PyErr_SetString(PyExc_SystemError, "Exception escaped from default exception translator!"); + return nullptr; + } + + auto append_note_if_missing_header_is_suspected = [](std::string &msg) { + if (msg.find("std::") != std::string::npos) { + msg += "\n\n" + "Did you forget to `#include `? Or ,\n" + ", , etc. Some automatic\n" + "conversions are optional and require extra headers to be included\n" + "when compiling your pybind11 module."; + } + }; + + if (result.ptr() == PYBIND11_TRY_NEXT_OVERLOAD) { + if (overloads->is_operator) + return handle(Py_NotImplemented).inc_ref().ptr(); + + std::string msg = std::string(overloads->name) + "(): incompatible " + + std::string(overloads->is_constructor ? "constructor" : "function") + + " arguments. The following argument types are supported:\n"; + + int ctr = 0; + for (const function_record *it2 = overloads; it2 != nullptr; it2 = it2->next) { + msg += " "+ std::to_string(++ctr) + ". "; + + bool wrote_sig = false; + if (overloads->is_constructor) { + // For a constructor, rewrite `(self: Object, arg0, ...) -> NoneType` as `Object(arg0, ...)` + std::string sig = it2->signature; + size_t start = sig.find('(') + 7; // skip "(self: " + if (start < sig.size()) { + // End at the , for the next argument + size_t end = sig.find(", "), next = end + 2; + size_t ret = sig.rfind(" -> "); + // Or the ), if there is no comma: + if (end >= sig.size()) next = end = sig.find(')'); + if (start < end && next < sig.size()) { + msg.append(sig, start, end - start); + msg += '('; + msg.append(sig, next, ret - next); + wrote_sig = true; + } + } + } + if (!wrote_sig) msg += it2->signature; + + msg += "\n"; + } + msg += "\nInvoked with: "; + auto args_ = reinterpret_borrow(args_in); + bool some_args = false; + for (size_t ti = overloads->is_constructor ? 1 : 0; ti < args_.size(); ++ti) { + if (!some_args) some_args = true; + else msg += ", "; + try { + msg += pybind11::repr(args_[ti]); + } catch (const error_already_set&) { + msg += ""; + } + } + if (kwargs_in) { + auto kwargs = reinterpret_borrow(kwargs_in); + if (!kwargs.empty()) { + if (some_args) msg += "; "; + msg += "kwargs: "; + bool first = true; + for (auto kwarg : kwargs) { + if (first) first = false; + else msg += ", "; + msg += pybind11::str("{}=").format(kwarg.first); + try { + msg += pybind11::repr(kwarg.second); + } catch (const error_already_set&) { + msg += ""; + } + } + } + } + + append_note_if_missing_header_is_suspected(msg); + PyErr_SetString(PyExc_TypeError, msg.c_str()); + return nullptr; + } + if (!result) { + std::string msg = "Unable to convert function return value to a " + "Python type! The signature was\n\t"; + msg += it->signature; + append_note_if_missing_header_is_suspected(msg); + PyErr_SetString(PyExc_TypeError, msg.c_str()); + return nullptr; + } + if (overloads->is_constructor && !self_value_and_holder.holder_constructed()) { + auto *pi = reinterpret_cast(parent.ptr()); + self_value_and_holder.type->init_instance(pi, nullptr); + } + return result.ptr(); + } +}; + + +/// Wrapper for Python extension modules +class module_ : public object { +public: + PYBIND11_OBJECT_DEFAULT(module_, object, PyModule_Check) + + /// Create a new top-level Python module with the given name and docstring + PYBIND11_DEPRECATED("Use PYBIND11_MODULE or module_::create_extension_module instead") + explicit module_(const char *name, const char *doc = nullptr) { +#if PY_MAJOR_VERSION >= 3 + *this = create_extension_module(name, doc, new PyModuleDef()); +#else + *this = create_extension_module(name, doc, nullptr); +#endif + } + + /** \rst + Create Python binding for a new function within the module scope. ``Func`` + can be a plain C++ function, a function pointer, or a lambda function. For + details on the ``Extra&& ... extra`` argument, see section :ref:`extras`. + \endrst */ + template + module_ &def(const char *name_, Func &&f, const Extra& ... extra) { + cpp_function func(std::forward(f), name(name_), scope(*this), + sibling(getattr(*this, name_, none())), extra...); + // NB: allow overwriting here because cpp_function sets up a chain with the intention of + // overwriting (and has already checked internally that it isn't overwriting non-functions). + add_object(name_, func, true /* overwrite */); + return *this; + } + + /** \rst + Create and return a new Python submodule with the given name and docstring. + This also works recursively, i.e. + + .. code-block:: cpp + + py::module_ m("example", "pybind11 example plugin"); + py::module_ m2 = m.def_submodule("sub", "A submodule of 'example'"); + py::module_ m3 = m2.def_submodule("subsub", "A submodule of 'example.sub'"); + \endrst */ + module_ def_submodule(const char *name, const char *doc = nullptr) { + std::string full_name = std::string(PyModule_GetName(m_ptr)) + + std::string(".") + std::string(name); + auto result = reinterpret_borrow(PyImport_AddModule(full_name.c_str())); + if (doc && options::show_user_defined_docstrings()) + result.attr("__doc__") = pybind11::str(doc); + attr(name) = result; + return result; + } + + /// Import and return a module or throws `error_already_set`. + static module_ import(const char *name) { + PyObject *obj = PyImport_ImportModule(name); + if (!obj) + throw error_already_set(); + return reinterpret_steal(obj); + } + + /// Reload the module or throws `error_already_set`. + void reload() { + PyObject *obj = PyImport_ReloadModule(ptr()); + if (!obj) + throw error_already_set(); + *this = reinterpret_steal(obj); + } + + /** \rst + Adds an object to the module using the given name. Throws if an object with the given name + already exists. + + ``overwrite`` should almost always be false: attempting to overwrite objects that pybind11 has + established will, in most cases, break things. + \endrst */ + PYBIND11_NOINLINE void add_object(const char *name, handle obj, bool overwrite = false) { + if (!overwrite && hasattr(*this, name)) + pybind11_fail("Error during initialization: multiple incompatible definitions with name \"" + + std::string(name) + "\""); + + PyModule_AddObject(ptr(), name, obj.inc_ref().ptr() /* steals a reference */); + } + +#if PY_MAJOR_VERSION >= 3 + using module_def = PyModuleDef; +#else + struct module_def {}; +#endif + + /** \rst + Create a new top-level module that can be used as the main module of a C extension. + + For Python 3, ``def`` should point to a statically allocated module_def. + For Python 2, ``def`` can be a nullptr and is completely ignored. + \endrst */ + static module_ create_extension_module(const char *name, const char *doc, module_def *def) { +#if PY_MAJOR_VERSION >= 3 + // module_def is PyModuleDef + def = new (def) PyModuleDef { // Placement new (not an allocation). + /* m_base */ PyModuleDef_HEAD_INIT, + /* m_name */ name, + /* m_doc */ options::show_user_defined_docstrings() ? doc : nullptr, + /* m_size */ -1, + /* m_methods */ nullptr, + /* m_slots */ nullptr, + /* m_traverse */ nullptr, + /* m_clear */ nullptr, + /* m_free */ nullptr + }; + auto m = PyModule_Create(def); +#else + // Ignore module_def *def; only necessary for Python 3 + (void) def; + auto m = Py_InitModule3(name, nullptr, options::show_user_defined_docstrings() ? doc : nullptr); +#endif + if (m == nullptr) { + if (PyErr_Occurred()) + throw error_already_set(); + pybind11_fail("Internal error in module_::create_extension_module()"); + } + // TODO: Should be reinterpret_steal for Python 3, but Python also steals it again when returned from PyInit_... + // For Python 2, reinterpret_borrow is correct. + return reinterpret_borrow(m); + } +}; + +// When inside a namespace (or anywhere as long as it's not the first item on a line), +// C++20 allows "module" to be used. This is provided for backward compatibility, and for +// simplicity, if someone wants to use py::module for example, that is perfectly safe. +using module = module_; + +/// \ingroup python_builtins +/// Return a dictionary representing the global variables in the current execution frame, +/// or ``__main__.__dict__`` if there is no frame (usually when the interpreter is embedded). +inline dict globals() { + PyObject *p = PyEval_GetGlobals(); + return reinterpret_borrow(p ? p : module_::import("__main__").attr("__dict__").ptr()); +} + +PYBIND11_NAMESPACE_BEGIN(detail) +/// Generic support for creating new Python heap types +class generic_type : public object { +public: + PYBIND11_OBJECT_DEFAULT(generic_type, object, PyType_Check) +protected: + void initialize(const type_record &rec) { + if (rec.scope && hasattr(rec.scope, "__dict__") && rec.scope.attr("__dict__").contains(rec.name)) + pybind11_fail("generic_type: cannot initialize type \"" + std::string(rec.name) + + "\": an object with that name is already defined"); + + if ((rec.module_local ? get_local_type_info(*rec.type) : get_global_type_info(*rec.type)) + != nullptr) + pybind11_fail("generic_type: type \"" + std::string(rec.name) + + "\" is already registered!"); + + m_ptr = make_new_python_type(rec); + + /* Register supplemental type information in C++ dict */ + auto *tinfo = new detail::type_info(); + tinfo->type = (PyTypeObject *) m_ptr; + tinfo->cpptype = rec.type; + tinfo->type_size = rec.type_size; + tinfo->type_align = rec.type_align; + tinfo->operator_new = rec.operator_new; + tinfo->holder_size_in_ptrs = size_in_ptrs(rec.holder_size); + tinfo->init_instance = rec.init_instance; + tinfo->dealloc = rec.dealloc; + tinfo->simple_type = true; + tinfo->simple_ancestors = true; + tinfo->default_holder = rec.default_holder; + tinfo->module_local = rec.module_local; + + auto &internals = get_internals(); + auto tindex = std::type_index(*rec.type); + tinfo->direct_conversions = &internals.direct_conversions[tindex]; + if (rec.module_local) + get_local_internals().registered_types_cpp[tindex] = tinfo; + else + internals.registered_types_cpp[tindex] = tinfo; + internals.registered_types_py[(PyTypeObject *) m_ptr] = { tinfo }; + + if (rec.bases.size() > 1 || rec.multiple_inheritance) { + mark_parents_nonsimple(tinfo->type); + tinfo->simple_ancestors = false; + } + else if (rec.bases.size() == 1) { + auto parent_tinfo = get_type_info((PyTypeObject *) rec.bases[0].ptr()); + tinfo->simple_ancestors = parent_tinfo->simple_ancestors; + } + + if (rec.module_local) { + // Stash the local typeinfo and loader so that external modules can access it. + tinfo->module_local_load = &type_caster_generic::local_load; + setattr(m_ptr, PYBIND11_MODULE_LOCAL_ID, capsule(tinfo)); + } + } + + /// Helper function which tags all parents of a type using mult. inheritance + void mark_parents_nonsimple(PyTypeObject *value) { + auto t = reinterpret_borrow(value->tp_bases); + for (handle h : t) { + auto tinfo2 = get_type_info((PyTypeObject *) h.ptr()); + if (tinfo2) + tinfo2->simple_type = false; + mark_parents_nonsimple((PyTypeObject *) h.ptr()); + } + } + + void install_buffer_funcs( + buffer_info *(*get_buffer)(PyObject *, void *), + void *get_buffer_data) { + auto *type = (PyHeapTypeObject*) m_ptr; + auto tinfo = detail::get_type_info(&type->ht_type); + + if (!type->ht_type.tp_as_buffer) + pybind11_fail( + "To be able to register buffer protocol support for the type '" + + get_fully_qualified_tp_name(tinfo->type) + + "' the associated class<>(..) invocation must " + "include the pybind11::buffer_protocol() annotation!"); + + tinfo->get_buffer = get_buffer; + tinfo->get_buffer_data = get_buffer_data; + } + + // rec_func must be set for either fget or fset. + void def_property_static_impl(const char *name, + handle fget, handle fset, + detail::function_record *rec_func) { + const auto is_static = (rec_func != nullptr) && !(rec_func->is_method && rec_func->scope); + const auto has_doc = (rec_func != nullptr) && (rec_func->doc != nullptr) + && pybind11::options::show_user_defined_docstrings(); + auto property = handle((PyObject *) (is_static ? get_internals().static_property_type + : &PyProperty_Type)); + attr(name) = property(fget.ptr() ? fget : none(), + fset.ptr() ? fset : none(), + /*deleter*/none(), + pybind11::str(has_doc ? rec_func->doc : "")); + } +}; + +/// Set the pointer to operator new if it exists. The cast is needed because it can be overloaded. +template (T::operator new))>> +void set_operator_new(type_record *r) { r->operator_new = &T::operator new; } + +template void set_operator_new(...) { } + +template struct has_operator_delete : std::false_type { }; +template struct has_operator_delete(T::operator delete))>> + : std::true_type { }; +template struct has_operator_delete_size : std::false_type { }; +template struct has_operator_delete_size(T::operator delete))>> + : std::true_type { }; +/// Call class-specific delete if it exists or global otherwise. Can also be an overload set. +template ::value, int> = 0> +void call_operator_delete(T *p, size_t, size_t) { T::operator delete(p); } +template ::value && has_operator_delete_size::value, int> = 0> +void call_operator_delete(T *p, size_t s, size_t) { T::operator delete(p, s); } + +inline void call_operator_delete(void *p, size_t s, size_t a) { + (void)s; (void)a; + #if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912) + if (a > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { + #ifdef __cpp_sized_deallocation + ::operator delete(p, s, std::align_val_t(a)); + #else + ::operator delete(p, std::align_val_t(a)); + #endif + return; + } + #endif + #ifdef __cpp_sized_deallocation + ::operator delete(p, s); + #else + ::operator delete(p); + #endif +} + +inline void add_class_method(object& cls, const char *name_, const cpp_function &cf) { + cls.attr(cf.name()) = cf; + if (strcmp(name_, "__eq__") == 0 && !cls.attr("__dict__").contains("__hash__")) { + cls.attr("__hash__") = none(); + } +} + +PYBIND11_NAMESPACE_END(detail) + +/// Given a pointer to a member function, cast it to its `Derived` version. +/// Forward everything else unchanged. +template +auto method_adaptor(F &&f) -> decltype(std::forward(f)) { return std::forward(f); } + +template +auto method_adaptor(Return (Class::*pmf)(Args...)) -> Return (Derived::*)(Args...) { + static_assert(detail::is_accessible_base_of::value, + "Cannot bind an inaccessible base class method; use a lambda definition instead"); + return pmf; +} + +template +auto method_adaptor(Return (Class::*pmf)(Args...) const) -> Return (Derived::*)(Args...) const { + static_assert(detail::is_accessible_base_of::value, + "Cannot bind an inaccessible base class method; use a lambda definition instead"); + return pmf; +} + +template +class class_ : public detail::generic_type { + template using is_holder = detail::is_holder_type; + template using is_subtype = detail::is_strict_base_of; + template using is_base = detail::is_strict_base_of; + // struct instead of using here to help MSVC: + template struct is_valid_class_option : + detail::any_of, is_subtype, is_base> {}; + +public: + using type = type_; + using type_alias = detail::exactly_one_t; + constexpr static bool has_alias = !std::is_void::value; + using holder_type = detail::exactly_one_t, options...>; + + static_assert(detail::all_of...>::value, + "Unknown/invalid class_ template parameters provided"); + + static_assert(!has_alias || std::is_polymorphic::value, + "Cannot use an alias class with a non-polymorphic type"); + + PYBIND11_OBJECT(class_, generic_type, PyType_Check) + + template + class_(handle scope, const char *name, const Extra &... extra) { + using namespace detail; + + // MI can only be specified via class_ template options, not constructor parameters + static_assert( + none_of...>::value || // no base class arguments, or: + ( constexpr_sum(is_pyobject::value...) == 1 && // Exactly one base + constexpr_sum(is_base::value...) == 0 && // no template option bases + none_of...>::value), // no multiple_inheritance attr + "Error: multiple inheritance bases must be specified via class_ template options"); + + type_record record; + record.scope = scope; + record.name = name; + record.type = &typeid(type); + record.type_size = sizeof(conditional_t); + record.type_align = alignof(conditional_t&); + record.holder_size = sizeof(holder_type); + record.init_instance = init_instance; + record.dealloc = dealloc; + record.default_holder = detail::is_instantiation::value; + + set_operator_new(&record); + + /* Register base classes specified via template arguments to class_, if any */ + PYBIND11_EXPAND_SIDE_EFFECTS(add_base(record)); + + /* Process optional arguments, if any */ + process_attributes::init(extra..., &record); + + generic_type::initialize(record); + + if (has_alias) { + auto &instances = record.module_local ? get_local_internals().registered_types_cpp : get_internals().registered_types_cpp; + instances[std::type_index(typeid(type_alias))] = instances[std::type_index(typeid(type))]; + } + } + + template ::value, int> = 0> + static void add_base(detail::type_record &rec) { + rec.add_base(typeid(Base), [](void *src) -> void * { + return static_cast(reinterpret_cast(src)); + }); + } + + template ::value, int> = 0> + static void add_base(detail::type_record &) { } + + template + class_ &def(const char *name_, Func&& f, const Extra&... extra) { + cpp_function cf(method_adaptor(std::forward(f)), name(name_), is_method(*this), + sibling(getattr(*this, name_, none())), extra...); + add_class_method(*this, name_, cf); + return *this; + } + + template class_ & + def_static(const char *name_, Func &&f, const Extra&... extra) { + static_assert(!std::is_member_function_pointer::value, + "def_static(...) called with a non-static member function pointer"); + cpp_function cf(std::forward(f), name(name_), scope(*this), + sibling(getattr(*this, name_, none())), extra...); + attr(cf.name()) = staticmethod(cf); + return *this; + } + + template + class_ &def(const detail::op_ &op, const Extra&... extra) { + op.execute(*this, extra...); + return *this; + } + + template + class_ & def_cast(const detail::op_ &op, const Extra&... extra) { + op.execute_cast(*this, extra...); + return *this; + } + + template + class_ &def(const detail::initimpl::constructor &init, const Extra&... extra) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(init); + init.execute(*this, extra...); + return *this; + } + + template + class_ &def(const detail::initimpl::alias_constructor &init, const Extra&... extra) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(init); + init.execute(*this, extra...); + return *this; + } + + template + class_ &def(detail::initimpl::factory &&init, const Extra&... extra) { + std::move(init).execute(*this, extra...); + return *this; + } + + template + class_ &def(detail::initimpl::pickle_factory &&pf, const Extra &...extra) { + std::move(pf).execute(*this, extra...); + return *this; + } + + template + class_& def_buffer(Func &&func) { + struct capture { Func func; }; + auto *ptr = new capture { std::forward(func) }; + install_buffer_funcs([](PyObject *obj, void *ptr) -> buffer_info* { + detail::make_caster caster; + if (!caster.load(obj, false)) + return nullptr; + return new buffer_info(((capture *) ptr)->func(caster)); + }, ptr); + weakref(m_ptr, cpp_function([ptr](handle wr) { + delete ptr; + wr.dec_ref(); + })).release(); + return *this; + } + + template + class_ &def_buffer(Return (Class::*func)(Args...)) { + return def_buffer([func] (type &obj) { return (obj.*func)(); }); + } + + template + class_ &def_buffer(Return (Class::*func)(Args...) const) { + return def_buffer([func] (const type &obj) { return (obj.*func)(); }); + } + + template + class_ &def_readwrite(const char *name, D C::*pm, const Extra&... extra) { + static_assert(std::is_same::value || std::is_base_of::value, "def_readwrite() requires a class member (or base class member)"); + cpp_function fget([pm](const type &c) -> const D &{ return c.*pm; }, is_method(*this)), + fset([pm](type &c, const D &value) { c.*pm = value; }, is_method(*this)); + def_property(name, fget, fset, return_value_policy::reference_internal, extra...); + return *this; + } + + template + class_ &def_readonly(const char *name, const D C::*pm, const Extra& ...extra) { + static_assert(std::is_same::value || std::is_base_of::value, "def_readonly() requires a class member (or base class member)"); + cpp_function fget([pm](const type &c) -> const D &{ return c.*pm; }, is_method(*this)); + def_property_readonly(name, fget, return_value_policy::reference_internal, extra...); + return *this; + } + + template + class_ &def_readwrite_static(const char *name, D *pm, const Extra& ...extra) { + cpp_function fget([pm](const object &) -> const D & { return *pm; }, scope(*this)), + fset([pm](const object &, const D &value) { *pm = value; }, scope(*this)); + def_property_static(name, fget, fset, return_value_policy::reference, extra...); + return *this; + } + + template + class_ &def_readonly_static(const char *name, const D *pm, const Extra& ...extra) { + cpp_function fget([pm](const object &) -> const D & { return *pm; }, scope(*this)); + def_property_readonly_static(name, fget, return_value_policy::reference, extra...); + return *this; + } + + /// Uses return_value_policy::reference_internal by default + template + class_ &def_property_readonly(const char *name, const Getter &fget, const Extra& ...extra) { + return def_property_readonly(name, cpp_function(method_adaptor(fget)), + return_value_policy::reference_internal, extra...); + } + + /// Uses cpp_function's return_value_policy by default + template + class_ &def_property_readonly(const char *name, const cpp_function &fget, const Extra& ...extra) { + return def_property(name, fget, nullptr, extra...); + } + + /// Uses return_value_policy::reference by default + template + class_ &def_property_readonly_static(const char *name, const Getter &fget, const Extra& ...extra) { + return def_property_readonly_static(name, cpp_function(fget), return_value_policy::reference, extra...); + } + + /// Uses cpp_function's return_value_policy by default + template + class_ &def_property_readonly_static(const char *name, const cpp_function &fget, const Extra& ...extra) { + return def_property_static(name, fget, nullptr, extra...); + } + + /// Uses return_value_policy::reference_internal by default + template + class_ &def_property(const char *name, const Getter &fget, const Setter &fset, const Extra& ...extra) { + return def_property(name, fget, cpp_function(method_adaptor(fset)), extra...); + } + template + class_ &def_property(const char *name, const Getter &fget, const cpp_function &fset, const Extra& ...extra) { + return def_property(name, cpp_function(method_adaptor(fget)), fset, + return_value_policy::reference_internal, extra...); + } + + /// Uses cpp_function's return_value_policy by default + template + class_ &def_property(const char *name, const cpp_function &fget, const cpp_function &fset, const Extra& ...extra) { + return def_property_static(name, fget, fset, is_method(*this), extra...); + } + + /// Uses return_value_policy::reference by default + template + class_ &def_property_static(const char *name, const Getter &fget, const cpp_function &fset, const Extra& ...extra) { + return def_property_static(name, cpp_function(fget), fset, return_value_policy::reference, extra...); + } + + /// Uses cpp_function's return_value_policy by default + template + class_ &def_property_static(const char *name, const cpp_function &fget, const cpp_function &fset, const Extra& ...extra) { + static_assert( 0 == detail::constexpr_sum(std::is_base_of::value...), + "Argument annotations are not allowed for properties"); + auto rec_fget = get_function_record(fget), rec_fset = get_function_record(fset); + auto *rec_active = rec_fget; + if (rec_fget) { + char *doc_prev = rec_fget->doc; /* 'extra' field may include a property-specific documentation string */ + detail::process_attributes::init(extra..., rec_fget); + if (rec_fget->doc && rec_fget->doc != doc_prev) { + std::free(doc_prev); + rec_fget->doc = PYBIND11_COMPAT_STRDUP(rec_fget->doc); + } + } + if (rec_fset) { + char *doc_prev = rec_fset->doc; + detail::process_attributes::init(extra..., rec_fset); + if (rec_fset->doc && rec_fset->doc != doc_prev) { + std::free(doc_prev); + rec_fset->doc = PYBIND11_COMPAT_STRDUP(rec_fset->doc); + } + if (! rec_active) rec_active = rec_fset; + } + def_property_static_impl(name, fget, fset, rec_active); + return *this; + } + +private: + /// Initialize holder object, variant 1: object derives from enable_shared_from_this + template + static void init_holder(detail::instance *inst, detail::value_and_holder &v_h, + const holder_type * /* unused */, const std::enable_shared_from_this * /* dummy */) { + + auto sh = std::dynamic_pointer_cast( + detail::try_get_shared_from_this(v_h.value_ptr())); + if (sh) { + new (std::addressof(v_h.holder())) holder_type(std::move(sh)); + v_h.set_holder_constructed(); + } + + if (!v_h.holder_constructed() && inst->owned) { + new (std::addressof(v_h.holder())) holder_type(v_h.value_ptr()); + v_h.set_holder_constructed(); + } + } + + static void init_holder_from_existing(const detail::value_and_holder &v_h, + const holder_type *holder_ptr, std::true_type /*is_copy_constructible*/) { + new (std::addressof(v_h.holder())) holder_type(*reinterpret_cast(holder_ptr)); + } + + static void init_holder_from_existing(const detail::value_and_holder &v_h, + const holder_type *holder_ptr, std::false_type /*is_copy_constructible*/) { + new (std::addressof(v_h.holder())) holder_type(std::move(*const_cast(holder_ptr))); + } + + /// Initialize holder object, variant 2: try to construct from existing holder object, if possible + static void init_holder(detail::instance *inst, detail::value_and_holder &v_h, + const holder_type *holder_ptr, const void * /* dummy -- not enable_shared_from_this) */) { + if (holder_ptr) { + init_holder_from_existing(v_h, holder_ptr, std::is_copy_constructible()); + v_h.set_holder_constructed(); + } else if (inst->owned || detail::always_construct_holder::value) { + new (std::addressof(v_h.holder())) holder_type(v_h.value_ptr()); + v_h.set_holder_constructed(); + } + } + + /// Performs instance initialization including constructing a holder and registering the known + /// instance. Should be called as soon as the `type` value_ptr is set for an instance. Takes an + /// optional pointer to an existing holder to use; if not specified and the instance is + /// `.owned`, a new holder will be constructed to manage the value pointer. + static void init_instance(detail::instance *inst, const void *holder_ptr) { + auto v_h = inst->get_value_and_holder(detail::get_type_info(typeid(type))); + if (!v_h.instance_registered()) { + register_instance(inst, v_h.value_ptr(), v_h.type); + v_h.set_instance_registered(); + } + init_holder(inst, v_h, (const holder_type *) holder_ptr, v_h.value_ptr()); + } + + /// Deallocates an instance; via holder, if constructed; otherwise via operator delete. + static void dealloc(detail::value_and_holder &v_h) { + // We could be deallocating because we are cleaning up after a Python exception. + // If so, the Python error indicator will be set. We need to clear that before + // running the destructor, in case the destructor code calls more Python. + // If we don't, the Python API will exit with an exception, and pybind11 will + // throw error_already_set from the C++ destructor which is forbidden and triggers + // std::terminate(). + error_scope scope; + if (v_h.holder_constructed()) { + v_h.holder().~holder_type(); + v_h.set_holder_constructed(false); + } + else { + detail::call_operator_delete(v_h.value_ptr(), + v_h.type->type_size, + v_h.type->type_align + ); + } + v_h.value_ptr() = nullptr; + } + + static detail::function_record *get_function_record(handle h) { + h = detail::get_function(h); + return h ? (detail::function_record *) reinterpret_borrow(PyCFunction_GET_SELF(h.ptr())) + : nullptr; + } +}; + +/// Binds an existing constructor taking arguments Args... +template detail::initimpl::constructor init() { return {}; } +/// Like `init()`, but the instance is always constructed through the alias class (even +/// when not inheriting on the Python side). +template detail::initimpl::alias_constructor init_alias() { return {}; } + +/// Binds a factory function as a constructor +template > +Ret init(Func &&f) { return {std::forward(f)}; } + +/// Dual-argument factory function: the first function is called when no alias is needed, the second +/// when an alias is needed (i.e. due to python-side inheritance). Arguments must be identical. +template > +Ret init(CFunc &&c, AFunc &&a) { + return {std::forward(c), std::forward(a)}; +} + +/// Binds pickling functions `__getstate__` and `__setstate__` and ensures that the type +/// returned by `__getstate__` is the same as the argument accepted by `__setstate__`. +template +detail::initimpl::pickle_factory pickle(GetState &&g, SetState &&s) { + return {std::forward(g), std::forward(s)}; +} + +PYBIND11_NAMESPACE_BEGIN(detail) + +inline str enum_name(handle arg) { + dict entries = arg.get_type().attr("__entries"); + for (auto kv : entries) { + if (handle(kv.second[int_(0)]).equal(arg)) + return pybind11::str(kv.first); + } + return "???"; +} + +struct enum_base { + enum_base(const handle &base, const handle &parent) : m_base(base), m_parent(parent) { } + + PYBIND11_NOINLINE void init(bool is_arithmetic, bool is_convertible) { + m_base.attr("__entries") = dict(); + auto property = handle((PyObject *) &PyProperty_Type); + auto static_property = handle((PyObject *) get_internals().static_property_type); + + m_base.attr("__repr__") = cpp_function( + [](const object &arg) -> str { + handle type = type::handle_of(arg); + object type_name = type.attr("__name__"); + return pybind11::str("<{}.{}: {}>").format(type_name, enum_name(arg), int_(arg)); + }, + name("__repr__"), + is_method(m_base)); + + m_base.attr("name") = property(cpp_function(&enum_name, name("name"), is_method(m_base))); + + m_base.attr("__str__") = cpp_function( + [](handle arg) -> str { + object type_name = type::handle_of(arg).attr("__name__"); + return pybind11::str("{}.{}").format(type_name, enum_name(arg)); + }, name("name"), is_method(m_base) + ); + + m_base.attr("__doc__") = static_property(cpp_function( + [](handle arg) -> std::string { + std::string docstring; + dict entries = arg.attr("__entries"); + if (((PyTypeObject *) arg.ptr())->tp_doc) + docstring += std::string(((PyTypeObject *) arg.ptr())->tp_doc) + "\n\n"; + docstring += "Members:"; + for (auto kv : entries) { + auto key = std::string(pybind11::str(kv.first)); + auto comment = kv.second[int_(1)]; + docstring += "\n\n " + key; + if (!comment.is_none()) + docstring += " : " + (std::string) pybind11::str(comment); + } + return docstring; + }, name("__doc__") + ), none(), none(), ""); + + m_base.attr("__members__") = static_property(cpp_function( + [](handle arg) -> dict { + dict entries = arg.attr("__entries"), m; + for (auto kv : entries) + m[kv.first] = kv.second[int_(0)]; + return m; + }, name("__members__")), none(), none(), "" + ); + +#define PYBIND11_ENUM_OP_STRICT(op, expr, strict_behavior) \ + m_base.attr(op) = cpp_function( \ + [](const object &a, const object &b) { \ + if (!type::handle_of(a).is(type::handle_of(b))) \ + strict_behavior; /* NOLINT(bugprone-macro-parentheses) */ \ + return expr; \ + }, \ + name(op), \ + is_method(m_base), \ + arg("other")) + +#define PYBIND11_ENUM_OP_CONV(op, expr) \ + m_base.attr(op) = cpp_function( \ + [](const object &a_, const object &b_) { \ + int_ a(a_), b(b_); \ + return expr; \ + }, \ + name(op), \ + is_method(m_base), \ + arg("other")) + +#define PYBIND11_ENUM_OP_CONV_LHS(op, expr) \ + m_base.attr(op) = cpp_function( \ + [](const object &a_, const object &b) { \ + int_ a(a_); \ + return expr; \ + }, \ + name(op), \ + is_method(m_base), \ + arg("other")) + + if (is_convertible) { + PYBIND11_ENUM_OP_CONV_LHS("__eq__", !b.is_none() && a.equal(b)); + PYBIND11_ENUM_OP_CONV_LHS("__ne__", b.is_none() || !a.equal(b)); + + if (is_arithmetic) { + PYBIND11_ENUM_OP_CONV("__lt__", a < b); + PYBIND11_ENUM_OP_CONV("__gt__", a > b); + PYBIND11_ENUM_OP_CONV("__le__", a <= b); + PYBIND11_ENUM_OP_CONV("__ge__", a >= b); + PYBIND11_ENUM_OP_CONV("__and__", a & b); + PYBIND11_ENUM_OP_CONV("__rand__", a & b); + PYBIND11_ENUM_OP_CONV("__or__", a | b); + PYBIND11_ENUM_OP_CONV("__ror__", a | b); + PYBIND11_ENUM_OP_CONV("__xor__", a ^ b); + PYBIND11_ENUM_OP_CONV("__rxor__", a ^ b); + m_base.attr("__invert__") + = cpp_function([](const object &arg) { return ~(int_(arg)); }, + name("__invert__"), + is_method(m_base)); + } + } else { + PYBIND11_ENUM_OP_STRICT("__eq__", int_(a).equal(int_(b)), return false); + PYBIND11_ENUM_OP_STRICT("__ne__", !int_(a).equal(int_(b)), return true); + + if (is_arithmetic) { + #define PYBIND11_THROW throw type_error("Expected an enumeration of matching type!"); + PYBIND11_ENUM_OP_STRICT("__lt__", int_(a) < int_(b), PYBIND11_THROW); + PYBIND11_ENUM_OP_STRICT("__gt__", int_(a) > int_(b), PYBIND11_THROW); + PYBIND11_ENUM_OP_STRICT("__le__", int_(a) <= int_(b), PYBIND11_THROW); + PYBIND11_ENUM_OP_STRICT("__ge__", int_(a) >= int_(b), PYBIND11_THROW); + #undef PYBIND11_THROW + } + } + + #undef PYBIND11_ENUM_OP_CONV_LHS + #undef PYBIND11_ENUM_OP_CONV + #undef PYBIND11_ENUM_OP_STRICT + + m_base.attr("__getstate__") = cpp_function( + [](const object &arg) { return int_(arg); }, name("__getstate__"), is_method(m_base)); + + m_base.attr("__hash__") = cpp_function( + [](const object &arg) { return int_(arg); }, name("__hash__"), is_method(m_base)); + } + + PYBIND11_NOINLINE void value(char const* name_, object value, const char *doc = nullptr) { + dict entries = m_base.attr("__entries"); + str name(name_); + if (entries.contains(name)) { + std::string type_name = (std::string) str(m_base.attr("__name__")); + throw value_error(type_name + ": element \"" + std::string(name_) + "\" already exists!"); + } + + entries[name] = std::make_pair(value, doc); + m_base.attr(name) = value; + } + + PYBIND11_NOINLINE void export_values() { + dict entries = m_base.attr("__entries"); + for (auto kv : entries) + m_parent.attr(kv.first) = kv.second[int_(0)]; + } + + handle m_base; + handle m_parent; +}; + +template struct equivalent_integer {}; +template <> struct equivalent_integer { using type = int8_t; }; +template <> struct equivalent_integer { using type = uint8_t; }; +template <> struct equivalent_integer { using type = int16_t; }; +template <> struct equivalent_integer { using type = uint16_t; }; +template <> struct equivalent_integer { using type = int32_t; }; +template <> struct equivalent_integer { using type = uint32_t; }; +template <> struct equivalent_integer { using type = int64_t; }; +template <> struct equivalent_integer { using type = uint64_t; }; + +template +using equivalent_integer_t = typename equivalent_integer::value, sizeof(IntLike)>::type; + +PYBIND11_NAMESPACE_END(detail) + +/// Binds C++ enumerations and enumeration classes to Python +template class enum_ : public class_ { +public: + using Base = class_; + using Base::def; + using Base::attr; + using Base::def_property_readonly; + using Base::def_property_readonly_static; + using Underlying = typename std::underlying_type::type; + // Scalar is the integer representation of underlying type + using Scalar = detail::conditional_t, std::is_same + >::value, detail::equivalent_integer_t, Underlying>; + + template + enum_(const handle &scope, const char *name, const Extra&... extra) + : class_(scope, name, extra...), m_base(*this, scope) { + constexpr bool is_arithmetic = detail::any_of...>::value; + constexpr bool is_convertible = std::is_convertible::value; + m_base.init(is_arithmetic, is_convertible); + + def(init([](Scalar i) { return static_cast(i); }), arg("value")); + def_property_readonly("value", [](Type value) { return (Scalar) value; }); + def("__int__", [](Type value) { return (Scalar) value; }); + #if PY_MAJOR_VERSION < 3 + def("__long__", [](Type value) { return (Scalar) value; }); + #endif + #if PY_MAJOR_VERSION > 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8) + def("__index__", [](Type value) { return (Scalar) value; }); + #endif + + attr("__setstate__") = cpp_function( + [](detail::value_and_holder &v_h, Scalar arg) { + detail::initimpl::setstate(v_h, static_cast(arg), + Py_TYPE(v_h.inst) != v_h.type->type); }, + detail::is_new_style_constructor(), + pybind11::name("__setstate__"), is_method(*this), arg("state")); + } + + /// Export enumeration entries into the parent scope + enum_& export_values() { + m_base.export_values(); + return *this; + } + + /// Add an enumeration entry + enum_& value(char const* name, Type value, const char *doc = nullptr) { + m_base.value(name, pybind11::cast(value, return_value_policy::copy), doc); + return *this; + } + +private: + detail::enum_base m_base; +}; + +PYBIND11_NAMESPACE_BEGIN(detail) + + +PYBIND11_NOINLINE void keep_alive_impl(handle nurse, handle patient) { + if (!nurse || !patient) + pybind11_fail("Could not activate keep_alive!"); + + if (patient.is_none() || nurse.is_none()) + return; /* Nothing to keep alive or nothing to be kept alive by */ + + auto tinfo = all_type_info(Py_TYPE(nurse.ptr())); + if (!tinfo.empty()) { + /* It's a pybind-registered type, so we can store the patient in the + * internal list. */ + add_patient(nurse.ptr(), patient.ptr()); + } + else { + /* Fall back to clever approach based on weak references taken from + * Boost.Python. This is not used for pybind-registered types because + * the objects can be destroyed out-of-order in a GC pass. */ + cpp_function disable_lifesupport( + [patient](handle weakref) { patient.dec_ref(); weakref.dec_ref(); }); + + weakref wr(nurse, disable_lifesupport); + + patient.inc_ref(); /* reference patient and leak the weak reference */ + (void) wr.release(); + } +} + +PYBIND11_NOINLINE void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret) { + auto get_arg = [&](size_t n) { + if (n == 0) + return ret; + if (n == 1 && call.init_self) + return call.init_self; + if (n <= call.args.size()) + return call.args[n - 1]; + return handle(); + }; + + keep_alive_impl(get_arg(Nurse), get_arg(Patient)); +} + +inline std::pair all_type_info_get_cache(PyTypeObject *type) { + auto res = get_internals().registered_types_py +#ifdef __cpp_lib_unordered_map_try_emplace + .try_emplace(type); +#else + .emplace(type, std::vector()); +#endif + if (res.second) { + // New cache entry created; set up a weak reference to automatically remove it if the type + // gets destroyed: + weakref((PyObject *) type, cpp_function([type](handle wr) { + get_internals().registered_types_py.erase(type); + wr.dec_ref(); + })).release(); + } + + return res; +} + +/* There are a large number of apparently unused template arguments because + * each combination requires a separate py::class_ registration. + */ +template +struct iterator_state { + Iterator it; + Sentinel end; + bool first_or_done; +}; + +// Note: these helpers take the iterator by non-const reference because some +// iterators in the wild can't be dereferenced when const. C++ needs the extra parens in decltype +// to enforce an lvalue. The & after Iterator is required for MSVC < 16.9. SFINAE cannot be +// reused for result_type due to bugs in ICC, NVCC, and PGI compilers. See PR #3293. +template ()))> +struct iterator_access { + using result_type = decltype((*std::declval())); + // NOLINTNEXTLINE(readability-const-return-type) // PR #3263 + result_type operator()(Iterator &it) const { + return *it; + } +}; + +template ()).first)) > +struct iterator_key_access { + using result_type = decltype(((*std::declval()).first)); + result_type operator()(Iterator &it) const { + return (*it).first; + } +}; + +template ()).second))> +struct iterator_value_access { + using result_type = decltype(((*std::declval()).second)); + result_type operator()(Iterator &it) const { + return (*it).second; + } +}; + +template +iterator make_iterator_impl(Iterator first, Sentinel last, Extra &&... extra) { + using state = detail::iterator_state; + // TODO: state captures only the types of Extra, not the values + + if (!detail::get_type_info(typeid(state), false)) { + class_(handle(), "iterator", pybind11::module_local()) + .def("__iter__", [](state &s) -> state& { return s; }) + .def("__next__", [](state &s) -> ValueType { + if (!s.first_or_done) + ++s.it; + else + s.first_or_done = false; + if (s.it == s.end) { + s.first_or_done = true; + throw stop_iteration(); + } + return Access()(s.it); + // NOLINTNEXTLINE(readability-const-return-type) // PR #3263 + }, std::forward(extra)..., Policy); + } + + return cast(state{first, last, true}); +} + +PYBIND11_NAMESPACE_END(detail) + +/// Makes a python iterator from a first and past-the-end C++ InputIterator. +template ::result_type, + typename... Extra> +iterator make_iterator(Iterator first, Sentinel last, Extra &&... extra) { + return detail::make_iterator_impl< + detail::iterator_access, + Policy, + Iterator, + Sentinel, + ValueType, + Extra...>(first, last, std::forward(extra)...); +} + +/// Makes a python iterator over the keys (`.first`) of a iterator over pairs from a +/// first and past-the-end InputIterator. +template ::result_type, + typename... Extra> +iterator make_key_iterator(Iterator first, Sentinel last, Extra &&...extra) { + return detail::make_iterator_impl< + detail::iterator_key_access, + Policy, + Iterator, + Sentinel, + KeyType, + Extra...>(first, last, std::forward(extra)...); +} + +/// Makes a python iterator over the values (`.second`) of a iterator over pairs from a +/// first and past-the-end InputIterator. +template ::result_type, + typename... Extra> +iterator make_value_iterator(Iterator first, Sentinel last, Extra &&...extra) { + return detail::make_iterator_impl< + detail::iterator_value_access, + Policy, Iterator, + Sentinel, + ValueType, + Extra...>(first, last, std::forward(extra)...); +} + +/// Makes an iterator over values of an stl container or other container supporting +/// `std::begin()`/`std::end()` +template iterator make_iterator(Type &value, Extra&&... extra) { + return make_iterator(std::begin(value), std::end(value), extra...); +} + +/// Makes an iterator over the keys (`.first`) of a stl map-like container supporting +/// `std::begin()`/`std::end()` +template iterator make_key_iterator(Type &value, Extra&&... extra) { + return make_key_iterator(std::begin(value), std::end(value), extra...); +} + +/// Makes an iterator over the values (`.second`) of a stl map-like container supporting +/// `std::begin()`/`std::end()` +template iterator make_value_iterator(Type &value, Extra&&... extra) { + return make_value_iterator(std::begin(value), std::end(value), extra...); +} + +template void implicitly_convertible() { + struct set_flag { + bool &flag; + explicit set_flag(bool &flag_) : flag(flag_) { flag_ = true; } + ~set_flag() { flag = false; } + }; + auto implicit_caster = [](PyObject *obj, PyTypeObject *type) -> PyObject * { + static bool currently_used = false; + if (currently_used) // implicit conversions are non-reentrant + return nullptr; + set_flag flag_helper(currently_used); + if (!detail::make_caster().load(obj, false)) + return nullptr; + tuple args(1); + args[0] = obj; + PyObject *result = PyObject_Call((PyObject *) type, args.ptr(), nullptr); + if (result == nullptr) + PyErr_Clear(); + return result; + }; + + if (auto tinfo = detail::get_type_info(typeid(OutputType))) + tinfo->implicit_conversions.push_back(implicit_caster); + else + pybind11_fail("implicitly_convertible: Unable to find type " + type_id()); +} + + +inline void register_exception_translator(ExceptionTranslator &&translator) { + detail::get_internals().registered_exception_translators.push_front( + std::forward(translator)); +} + + +/** + * Add a new module-local exception translator. Locally registered functions + * will be tried before any globally registered exception translators, which + * will only be invoked if the module-local handlers do not deal with + * the exception. + */ +inline void register_local_exception_translator(ExceptionTranslator &&translator) { + detail::get_local_internals().registered_exception_translators.push_front( + std::forward(translator)); +} + +/** + * Wrapper to generate a new Python exception type. + * + * This should only be used with PyErr_SetString for now. + * It is not (yet) possible to use as a py::base. + * Template type argument is reserved for future use. + */ +template +class exception : public object { +public: + exception() = default; + exception(handle scope, const char *name, handle base = PyExc_Exception) { + std::string full_name = scope.attr("__name__").cast() + + std::string(".") + name; + m_ptr = PyErr_NewException(const_cast(full_name.c_str()), base.ptr(), NULL); + if (hasattr(scope, "__dict__") && scope.attr("__dict__").contains(name)) + pybind11_fail("Error during initialization: multiple incompatible " + "definitions with name \"" + std::string(name) + "\""); + scope.attr(name) = *this; + } + + // Sets the current python exception to this exception object with the given message + void operator()(const char *message) { + PyErr_SetString(m_ptr, message); + } +}; + +PYBIND11_NAMESPACE_BEGIN(detail) +// Returns a reference to a function-local static exception object used in the simple +// register_exception approach below. (It would be simpler to have the static local variable +// directly in register_exception, but that makes clang <3.5 segfault - issue #1349). +template +exception &get_exception_object() { static exception ex; return ex; } + +// Helper function for register_exception and register_local_exception +template +exception ®ister_exception_impl(handle scope, + const char *name, + handle base, + bool isLocal) { + auto &ex = detail::get_exception_object(); + if (!ex) ex = exception(scope, name, base); + + auto register_func = isLocal ? ®ister_local_exception_translator + : ®ister_exception_translator; + + register_func([](std::exception_ptr p) { + if (!p) return; + try { + std::rethrow_exception(p); + } catch (const CppException &e) { + detail::get_exception_object()(e.what()); + } + }); + return ex; +} + +PYBIND11_NAMESPACE_END(detail) + +/** + * Registers a Python exception in `m` of the given `name` and installs a translator to + * translate the C++ exception to the created Python exception using the what() method. + * This is intended for simple exception translations; for more complex translation, register the + * exception object and translator directly. + */ +template +exception ®ister_exception(handle scope, + const char *name, + handle base = PyExc_Exception) { + return detail::register_exception_impl(scope, name, base, false /* isLocal */); +} + +/** + * Registers a Python exception in `m` of the given `name` and installs a translator to + * translate the C++ exception to the created Python exception using the what() method. + * This translator will only be used for exceptions that are thrown in this module and will be + * tried before global exception translators, including those registered with register_exception. + * This is intended for simple exception translations; for more complex translation, register the + * exception object and translator directly. + */ +template +exception ®ister_local_exception(handle scope, + const char *name, + handle base = PyExc_Exception) { + return detail::register_exception_impl(scope, name, base, true /* isLocal */); +} + +PYBIND11_NAMESPACE_BEGIN(detail) +PYBIND11_NOINLINE void print(const tuple &args, const dict &kwargs) { + auto strings = tuple(args.size()); + for (size_t i = 0; i < args.size(); ++i) { + strings[i] = str(args[i]); + } + auto sep = kwargs.contains("sep") ? kwargs["sep"] : cast(" "); + auto line = sep.attr("join")(strings); + + object file; + if (kwargs.contains("file")) { + file = kwargs["file"].cast(); + } else { + try { + file = module_::import("sys").attr("stdout"); + } catch (const error_already_set &) { + /* If print() is called from code that is executed as + part of garbage collection during interpreter shutdown, + importing 'sys' can fail. Give up rather than crashing the + interpreter in this case. */ + return; + } + } + + auto write = file.attr("write"); + write(line); + write(kwargs.contains("end") ? kwargs["end"] : cast("\n")); + + if (kwargs.contains("flush") && kwargs["flush"].cast()) + file.attr("flush")(); +} +PYBIND11_NAMESPACE_END(detail) + +template +void print(Args &&...args) { + auto c = detail::collect_arguments(std::forward(args)...); + detail::print(c.args(), c.kwargs()); +} + +error_already_set::~error_already_set() { + if (m_type) { + gil_scoped_acquire gil; + error_scope scope; + m_type.release().dec_ref(); + m_value.release().dec_ref(); + m_trace.release().dec_ref(); + } +} + +PYBIND11_NAMESPACE_BEGIN(detail) +inline function get_type_override(const void *this_ptr, const type_info *this_type, const char *name) { + handle self = get_object_handle(this_ptr, this_type); + if (!self) + return function(); + handle type = type::handle_of(self); + auto key = std::make_pair(type.ptr(), name); + + /* Cache functions that aren't overridden in Python to avoid + many costly Python dictionary lookups below */ + auto &cache = get_internals().inactive_override_cache; + if (cache.find(key) != cache.end()) + return function(); + + function override = getattr(self, name, function()); + if (override.is_cpp_function()) { + cache.insert(key); + return function(); + } + + /* Don't call dispatch code if invoked from overridden function. + Unfortunately this doesn't work on PyPy. */ +#if !defined(PYPY_VERSION) + PyFrameObject *frame = PyThreadState_Get()->frame; + if (frame != nullptr && (std::string) str(frame->f_code->co_name) == name + && frame->f_code->co_argcount > 0) { + PyFrame_FastToLocals(frame); + PyObject *self_caller = dict_getitem( + frame->f_locals, PyTuple_GET_ITEM(frame->f_code->co_varnames, 0)); + if (self_caller == self.ptr()) + return function(); + } +#else + /* PyPy currently doesn't provide a detailed cpyext emulation of + frame objects, so we have to emulate this using Python. This + is going to be slow..*/ + dict d; d["self"] = self; d["name"] = pybind11::str(name); + PyObject *result = PyRun_String( + "import inspect\n" + "frame = inspect.currentframe()\n" + "if frame is not None:\n" + " frame = frame.f_back\n" + " if frame is not None and str(frame.f_code.co_name) == name and " + "frame.f_code.co_argcount > 0:\n" + " self_caller = frame.f_locals[frame.f_code.co_varnames[0]]\n" + " if self_caller == self:\n" + " self = None\n", + Py_file_input, d.ptr(), d.ptr()); + if (result == nullptr) + throw error_already_set(); + if (d["self"].is_none()) + return function(); + Py_DECREF(result); +#endif + + return override; +} +PYBIND11_NAMESPACE_END(detail) + +/** \rst + Try to retrieve a python method by the provided name from the instance pointed to by the this_ptr. + + :this_ptr: The pointer to the object the overridden method should be retrieved for. This should be + the first non-trampoline class encountered in the inheritance chain. + :name: The name of the overridden Python method to retrieve. + :return: The Python method by this name from the object or an empty function wrapper. + \endrst */ +template function get_override(const T *this_ptr, const char *name) { + auto tinfo = detail::get_type_info(typeid(T)); + return tinfo ? detail::get_type_override(this_ptr, tinfo, name) : function(); +} + +#define PYBIND11_OVERRIDE_IMPL(ret_type, cname, name, ...) \ + do { \ + pybind11::gil_scoped_acquire gil; \ + pybind11::function override \ + = pybind11::get_override(static_cast(this), name); \ + if (override) { \ + auto o = override(__VA_ARGS__); \ + if (pybind11::detail::cast_is_temporary_value_reference::value) { \ + static pybind11::detail::override_caster_t caster; \ + return pybind11::detail::cast_ref(std::move(o), caster); \ + } \ + return pybind11::detail::cast_safe(std::move(o)); \ + } \ + } while (false) + +/** \rst + Macro to populate the virtual method in the trampoline class. This macro tries to look up a method named 'fn' + from the Python side, deals with the :ref:`gil` and necessary argument conversions to call this method and return + the appropriate type. See :ref:`overriding_virtuals` for more information. This macro should be used when the method + name in C is not the same as the method name in Python. For example with `__str__`. + + .. code-block:: cpp + + std::string toString() override { + PYBIND11_OVERRIDE_NAME( + std::string, // Return type (ret_type) + Animal, // Parent class (cname) + "__str__", // Name of method in Python (name) + toString, // Name of function in C++ (fn) + ); + } +\endrst */ +#define PYBIND11_OVERRIDE_NAME(ret_type, cname, name, fn, ...) \ + do { \ + PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \ + return cname::fn(__VA_ARGS__); \ + } while (false) + +/** \rst + Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE_NAME`, except that it + throws if no override can be found. +\endrst */ +#define PYBIND11_OVERRIDE_PURE_NAME(ret_type, cname, name, fn, ...) \ + do { \ + PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \ + pybind11::pybind11_fail("Tried to call pure virtual function \"" PYBIND11_STRINGIFY(cname) "::" name "\""); \ + } while (false) + +/** \rst + Macro to populate the virtual method in the trampoline class. This macro tries to look up the method + from the Python side, deals with the :ref:`gil` and necessary argument conversions to call this method and return + the appropriate type. This macro should be used if the method name in C and in Python are identical. + See :ref:`overriding_virtuals` for more information. + + .. code-block:: cpp + + class PyAnimal : public Animal { + public: + // Inherit the constructors + using Animal::Animal; + + // Trampoline (need one for each virtual function) + std::string go(int n_times) override { + PYBIND11_OVERRIDE_PURE( + std::string, // Return type (ret_type) + Animal, // Parent class (cname) + go, // Name of function in C++ (must match Python name) (fn) + n_times // Argument(s) (...) + ); + } + }; +\endrst */ +#define PYBIND11_OVERRIDE(ret_type, cname, fn, ...) \ + PYBIND11_OVERRIDE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__) + +/** \rst + Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE`, except that it throws + if no override can be found. +\endrst */ +#define PYBIND11_OVERRIDE_PURE(ret_type, cname, fn, ...) \ + PYBIND11_OVERRIDE_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__) + + +// Deprecated versions + +PYBIND11_DEPRECATED("get_type_overload has been deprecated") +inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name) { + return detail::get_type_override(this_ptr, this_type, name); +} + +template +inline function get_overload(const T *this_ptr, const char *name) { + return get_override(this_ptr, name); +} + +#define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...) \ + PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__) +#define PYBIND11_OVERLOAD_NAME(ret_type, cname, name, fn, ...) \ + PYBIND11_OVERRIDE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__) +#define PYBIND11_OVERLOAD_PURE_NAME(ret_type, cname, name, fn, ...) \ + PYBIND11_OVERRIDE_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__); +#define PYBIND11_OVERLOAD(ret_type, cname, fn, ...) \ + PYBIND11_OVERRIDE(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), fn, __VA_ARGS__) +#define PYBIND11_OVERLOAD_PURE(ret_type, cname, fn, ...) \ + PYBIND11_OVERRIDE_PURE(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), fn, __VA_ARGS__); + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) + +#if defined(__GNUC__) && __GNUC__ == 7 +# pragma GCC diagnostic pop // -Wnoexcept-type +#endif diff --git a/Libraries/pybind11-2.8.0/pybind11/pytypes.h b/Libraries/pybind11-2.8.0/pybind11/pytypes.h new file mode 100644 index 00000000..f54d5fad --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/pytypes.h @@ -0,0 +1,1879 @@ +/* + pybind11/pytypes.h: Convenience wrapper classes for basic Python types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "buffer_info.h" +#include +#include + +#if defined(PYBIND11_HAS_OPTIONAL) +# include +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +/* A few forward declarations */ +class handle; class object; +class str; class iterator; +class type; +struct arg; struct arg_v; + +PYBIND11_NAMESPACE_BEGIN(detail) +class args_proxy; +bool isinstance_generic(handle obj, const std::type_info &tp); + +// Accessor forward declarations +template class accessor; +namespace accessor_policies { + struct obj_attr; + struct str_attr; + struct generic_item; + struct sequence_item; + struct list_item; + struct tuple_item; +} // namespace accessor_policies +using obj_attr_accessor = accessor; +using str_attr_accessor = accessor; +using item_accessor = accessor; +using sequence_accessor = accessor; +using list_accessor = accessor; +using tuple_accessor = accessor; + +/// Tag and check to identify a class which implements the Python object API +class pyobject_tag { }; +template using is_pyobject = std::is_base_of>; + +/** \rst + A mixin class which adds common functions to `handle`, `object` and various accessors. + The only requirement for `Derived` is to implement ``PyObject *Derived::ptr() const``. +\endrst */ +template +class object_api : public pyobject_tag { + const Derived &derived() const { return static_cast(*this); } + +public: + /** \rst + Return an iterator equivalent to calling ``iter()`` in Python. The object + must be a collection which supports the iteration protocol. + \endrst */ + iterator begin() const; + /// Return a sentinel which ends iteration. + iterator end() const; + + /** \rst + Return an internal functor to invoke the object's sequence protocol. Casting + the returned ``detail::item_accessor`` instance to a `handle` or `object` + subclass causes a corresponding call to ``__getitem__``. Assigning a `handle` + or `object` subclass causes a call to ``__setitem__``. + \endrst */ + item_accessor operator[](handle key) const; + /// See above (the only difference is that they key is provided as a string literal) + item_accessor operator[](const char *key) const; + + /** \rst + Return an internal functor to access the object's attributes. Casting the + returned ``detail::obj_attr_accessor`` instance to a `handle` or `object` + subclass causes a corresponding call to ``getattr``. Assigning a `handle` + or `object` subclass causes a call to ``setattr``. + \endrst */ + obj_attr_accessor attr(handle key) const; + /// See above (the only difference is that they key is provided as a string literal) + str_attr_accessor attr(const char *key) const; + + /** \rst + Matches * unpacking in Python, e.g. to unpack arguments out of a ``tuple`` + or ``list`` for a function call. Applying another * to the result yields + ** unpacking, e.g. to unpack a dict as function keyword arguments. + See :ref:`calling_python_functions`. + \endrst */ + args_proxy operator*() const; + + /// Check if the given item is contained within this object, i.e. ``item in obj``. + template bool contains(T &&item) const; + + /** \rst + Assuming the Python object is a function or implements the ``__call__`` + protocol, ``operator()`` invokes the underlying function, passing an + arbitrary set of parameters. The result is returned as a `object` and + may need to be converted back into a Python object using `handle::cast()`. + + When some of the arguments cannot be converted to Python objects, the + function will throw a `cast_error` exception. When the Python function + call fails, a `error_already_set` exception is thrown. + \endrst */ + template + object operator()(Args &&...args) const; + template + PYBIND11_DEPRECATED("call(...) was deprecated in favor of operator()(...)") + object call(Args&&... args) const; + + /// Equivalent to ``obj is other`` in Python. + bool is(object_api const& other) const { return derived().ptr() == other.derived().ptr(); } + /// Equivalent to ``obj is None`` in Python. + bool is_none() const { return derived().ptr() == Py_None; } + /// Equivalent to obj == other in Python + bool equal(object_api const &other) const { return rich_compare(other, Py_EQ); } + bool not_equal(object_api const &other) const { return rich_compare(other, Py_NE); } + bool operator<(object_api const &other) const { return rich_compare(other, Py_LT); } + bool operator<=(object_api const &other) const { return rich_compare(other, Py_LE); } + bool operator>(object_api const &other) const { return rich_compare(other, Py_GT); } + bool operator>=(object_api const &other) const { return rich_compare(other, Py_GE); } + + object operator-() const; + object operator~() const; + object operator+(object_api const &other) const; + object operator+=(object_api const &other) const; + object operator-(object_api const &other) const; + object operator-=(object_api const &other) const; + object operator*(object_api const &other) const; + object operator*=(object_api const &other) const; + object operator/(object_api const &other) const; + object operator/=(object_api const &other) const; + object operator|(object_api const &other) const; + object operator|=(object_api const &other) const; + object operator&(object_api const &other) const; + object operator&=(object_api const &other) const; + object operator^(object_api const &other) const; + object operator^=(object_api const &other) const; + object operator<<(object_api const &other) const; + object operator<<=(object_api const &other) const; + object operator>>(object_api const &other) const; + object operator>>=(object_api const &other) const; + + PYBIND11_DEPRECATED("Use py::str(obj) instead") + pybind11::str str() const; + + /// Get or set the object's docstring, i.e. ``obj.__doc__``. + str_attr_accessor doc() const; + + /// Return the object's current reference count + int ref_count() const { return static_cast(Py_REFCNT(derived().ptr())); } + + // TODO PYBIND11_DEPRECATED("Call py::type::handle_of(h) or py::type::of(h) instead of h.get_type()") + handle get_type() const; + +private: + bool rich_compare(object_api const &other, int value) const; +}; + +PYBIND11_NAMESPACE_END(detail) + +/** \rst + Holds a reference to a Python object (no reference counting) + + The `handle` class is a thin wrapper around an arbitrary Python object (i.e. a + ``PyObject *`` in Python's C API). It does not perform any automatic reference + counting and merely provides a basic C++ interface to various Python API functions. + + .. seealso:: + The `object` class inherits from `handle` and adds automatic reference + counting features. +\endrst */ +class handle : public detail::object_api { +public: + /// The default constructor creates a handle with a ``nullptr``-valued pointer + handle() = default; + /// Creates a ``handle`` from the given raw Python object pointer + // NOLINTNEXTLINE(google-explicit-constructor) + handle(PyObject *ptr) : m_ptr(ptr) { } // Allow implicit conversion from PyObject* + + /// Return the underlying ``PyObject *`` pointer + PyObject *ptr() const { return m_ptr; } + PyObject *&ptr() { return m_ptr; } + + /** \rst + Manually increase the reference count of the Python object. Usually, it is + preferable to use the `object` class which derives from `handle` and calls + this function automatically. Returns a reference to itself. + \endrst */ + const handle& inc_ref() const & { Py_XINCREF(m_ptr); return *this; } + + /** \rst + Manually decrease the reference count of the Python object. Usually, it is + preferable to use the `object` class which derives from `handle` and calls + this function automatically. Returns a reference to itself. + \endrst */ + const handle& dec_ref() const & { Py_XDECREF(m_ptr); return *this; } + + /** \rst + Attempt to cast the Python object into the given C++ type. A `cast_error` + will be throw upon failure. + \endrst */ + template T cast() const; + /// Return ``true`` when the `handle` wraps a valid Python object + explicit operator bool() const { return m_ptr != nullptr; } + /** \rst + Deprecated: Check that the underlying pointers are the same. + Equivalent to ``obj1 is obj2`` in Python. + \endrst */ + PYBIND11_DEPRECATED("Use obj1.is(obj2) instead") + bool operator==(const handle &h) const { return m_ptr == h.m_ptr; } + PYBIND11_DEPRECATED("Use !obj1.is(obj2) instead") + bool operator!=(const handle &h) const { return m_ptr != h.m_ptr; } + PYBIND11_DEPRECATED("Use handle::operator bool() instead") + bool check() const { return m_ptr != nullptr; } +protected: + PyObject *m_ptr = nullptr; +}; + +/** \rst + Holds a reference to a Python object (with reference counting) + + Like `handle`, the `object` class is a thin wrapper around an arbitrary Python + object (i.e. a ``PyObject *`` in Python's C API). In contrast to `handle`, it + optionally increases the object's reference count upon construction, and it + *always* decreases the reference count when the `object` instance goes out of + scope and is destructed. When using `object` instances consistently, it is much + easier to get reference counting right at the first attempt. +\endrst */ +class object : public handle { +public: + object() = default; + PYBIND11_DEPRECATED("Use reinterpret_borrow() or reinterpret_steal()") + object(handle h, bool is_borrowed) : handle(h) { if (is_borrowed) inc_ref(); } + /// Copy constructor; always increases the reference count + object(const object &o) : handle(o) { inc_ref(); } + /// Move constructor; steals the object from ``other`` and preserves its reference count + object(object &&other) noexcept { m_ptr = other.m_ptr; other.m_ptr = nullptr; } + /// Destructor; automatically calls `handle::dec_ref()` + ~object() { dec_ref(); } + + /** \rst + Resets the internal pointer to ``nullptr`` without decreasing the + object's reference count. The function returns a raw handle to the original + Python object. + \endrst */ + handle release() { + PyObject *tmp = m_ptr; + m_ptr = nullptr; + return handle(tmp); + } + + object& operator=(const object &other) { + other.inc_ref(); + // Use temporary variable to ensure `*this` remains valid while + // `Py_XDECREF` executes, in case `*this` is accessible from Python. + handle temp(m_ptr); + m_ptr = other.m_ptr; + temp.dec_ref(); + return *this; + } + + object& operator=(object &&other) noexcept { + if (this != &other) { + handle temp(m_ptr); + m_ptr = other.m_ptr; + other.m_ptr = nullptr; + temp.dec_ref(); + } + return *this; + } + + // Calling cast() on an object lvalue just copies (via handle::cast) + template T cast() const &; + // Calling on an object rvalue does a move, if needed and/or possible + template T cast() &&; + +protected: + // Tags for choosing constructors from raw PyObject * + struct borrowed_t { }; + struct stolen_t { }; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Issue in breathe 4.26.1 + template friend T reinterpret_borrow(handle); + template friend T reinterpret_steal(handle); +#endif + +public: + // Only accessible from derived classes and the reinterpret_* functions + object(handle h, borrowed_t) : handle(h) { inc_ref(); } + object(handle h, stolen_t) : handle(h) { } +}; + +/** \rst + Declare that a `handle` or ``PyObject *`` is a certain type and borrow the reference. + The target type ``T`` must be `object` or one of its derived classes. The function + doesn't do any conversions or checks. It's up to the user to make sure that the + target type is correct. + + .. code-block:: cpp + + PyObject *p = PyList_GetItem(obj, index); + py::object o = reinterpret_borrow(p); + // or + py::tuple t = reinterpret_borrow(p); // <-- `p` must be already be a `tuple` +\endrst */ +template T reinterpret_borrow(handle h) { return {h, object::borrowed_t{}}; } + +/** \rst + Like `reinterpret_borrow`, but steals the reference. + + .. code-block:: cpp + + PyObject *p = PyObject_Str(obj); + py::str s = reinterpret_steal(p); // <-- `p` must be already be a `str` +\endrst */ +template T reinterpret_steal(handle h) { return {h, object::stolen_t{}}; } + +PYBIND11_NAMESPACE_BEGIN(detail) +std::string error_string(); +PYBIND11_NAMESPACE_END(detail) + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4275 4251) // warning C4275: An exported class was derived from a class that wasn't exported. Can be ignored when derived from a STL class. +#endif +/// Fetch and hold an error which was already set in Python. An instance of this is typically +/// thrown to propagate python-side errors back through C++ which can either be caught manually or +/// else falls back to the function dispatcher (which then raises the captured error back to +/// python). +class PYBIND11_EXPORT_EXCEPTION error_already_set : public std::runtime_error { +public: + /// Constructs a new exception from the current Python error indicator, if any. The current + /// Python error indicator will be cleared. + error_already_set() : std::runtime_error(detail::error_string()) { + PyErr_Fetch(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr()); + } + + error_already_set(const error_already_set &) = default; + error_already_set(error_already_set &&) = default; + + inline ~error_already_set() override; + + /// Give the currently-held error back to Python, if any. If there is currently a Python error + /// already set it is cleared first. After this call, the current object no longer stores the + /// error variables (but the `.what()` string is still available). + void restore() { PyErr_Restore(m_type.release().ptr(), m_value.release().ptr(), m_trace.release().ptr()); } + + /// If it is impossible to raise the currently-held error, such as in a destructor, we can write + /// it out using Python's unraisable hook (`sys.unraisablehook`). The error context should be + /// some object whose `repr()` helps identify the location of the error. Python already knows the + /// type and value of the error, so there is no need to repeat that. After this call, the current + /// object no longer stores the error variables, and neither does Python. + void discard_as_unraisable(object err_context) { + restore(); + PyErr_WriteUnraisable(err_context.ptr()); + } + /// An alternate version of `discard_as_unraisable()`, where a string provides information on the + /// location of the error. For example, `__func__` could be helpful. + void discard_as_unraisable(const char *err_context) { + discard_as_unraisable(reinterpret_steal(PYBIND11_FROM_STRING(err_context))); + } + + // Does nothing; provided for backwards compatibility. + PYBIND11_DEPRECATED("Use of error_already_set.clear() is deprecated") + void clear() {} + + /// Check if the currently trapped error type matches the given Python exception class (or a + /// subclass thereof). May also be passed a tuple to search for any exception class matches in + /// the given tuple. + bool matches(handle exc) const { + return (PyErr_GivenExceptionMatches(m_type.ptr(), exc.ptr()) != 0); + } + + const object& type() const { return m_type; } + const object& value() const { return m_value; } + const object& trace() const { return m_trace; } + +private: + object m_type, m_value, m_trace; +}; +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#if PY_VERSION_HEX >= 0x03030000 + +/// Replaces the current Python error indicator with the chosen error, performing a +/// 'raise from' to indicate that the chosen error was caused by the original error. +inline void raise_from(PyObject *type, const char *message) { + // Based on _PyErr_FormatVFromCause: + // https://github.com/python/cpython/blob/467ab194fc6189d9f7310c89937c51abeac56839/Python/errors.c#L405 + // See https://github.com/pybind/pybind11/pull/2112 for details. + PyObject *exc = nullptr, *val = nullptr, *val2 = nullptr, *tb = nullptr; + + assert(PyErr_Occurred()); + PyErr_Fetch(&exc, &val, &tb); + PyErr_NormalizeException(&exc, &val, &tb); + if (tb != nullptr) { + PyException_SetTraceback(val, tb); + Py_DECREF(tb); + } + Py_DECREF(exc); + assert(!PyErr_Occurred()); + + PyErr_SetString(type, message); + + PyErr_Fetch(&exc, &val2, &tb); + PyErr_NormalizeException(&exc, &val2, &tb); + Py_INCREF(val); + PyException_SetCause(val2, val); + PyException_SetContext(val2, val); + PyErr_Restore(exc, val2, tb); +} + +/// Sets the current Python error indicator with the chosen error, performing a 'raise from' +/// from the error contained in error_already_set to indicate that the chosen error was +/// caused by the original error. After this function is called error_already_set will +/// no longer contain an error. +inline void raise_from(error_already_set& err, PyObject *type, const char *message) { + err.restore(); + raise_from(type, message); +} + +#endif + +/** \defgroup python_builtins _ + Unless stated otherwise, the following C++ functions behave the same + as their Python counterparts. + */ + +/** \ingroup python_builtins + \rst + Return true if ``obj`` is an instance of ``T``. Type ``T`` must be a subclass of + `object` or a class which was exposed to Python as ``py::class_``. +\endrst */ +template ::value, int> = 0> +bool isinstance(handle obj) { return T::check_(obj); } + +template ::value, int> = 0> +bool isinstance(handle obj) { return detail::isinstance_generic(obj, typeid(T)); } + +template <> inline bool isinstance(handle) = delete; +template <> inline bool isinstance(handle obj) { return obj.ptr() != nullptr; } + +/// \ingroup python_builtins +/// Return true if ``obj`` is an instance of the ``type``. +inline bool isinstance(handle obj, handle type) { + const auto result = PyObject_IsInstance(obj.ptr(), type.ptr()); + if (result == -1) + throw error_already_set(); + return result != 0; +} + +/// \addtogroup python_builtins +/// @{ +inline bool hasattr(handle obj, handle name) { + return PyObject_HasAttr(obj.ptr(), name.ptr()) == 1; +} + +inline bool hasattr(handle obj, const char *name) { + return PyObject_HasAttrString(obj.ptr(), name) == 1; +} + +inline void delattr(handle obj, handle name) { + if (PyObject_DelAttr(obj.ptr(), name.ptr()) != 0) { throw error_already_set(); } +} + +inline void delattr(handle obj, const char *name) { + if (PyObject_DelAttrString(obj.ptr(), name) != 0) { throw error_already_set(); } +} + +inline object getattr(handle obj, handle name) { + PyObject *result = PyObject_GetAttr(obj.ptr(), name.ptr()); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); +} + +inline object getattr(handle obj, const char *name) { + PyObject *result = PyObject_GetAttrString(obj.ptr(), name); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); +} + +inline object getattr(handle obj, handle name, handle default_) { + if (PyObject *result = PyObject_GetAttr(obj.ptr(), name.ptr())) { + return reinterpret_steal(result); + } + PyErr_Clear(); + return reinterpret_borrow(default_); +} + +inline object getattr(handle obj, const char *name, handle default_) { + if (PyObject *result = PyObject_GetAttrString(obj.ptr(), name)) { + return reinterpret_steal(result); + } + PyErr_Clear(); + return reinterpret_borrow(default_); +} + +inline void setattr(handle obj, handle name, handle value) { + if (PyObject_SetAttr(obj.ptr(), name.ptr(), value.ptr()) != 0) { throw error_already_set(); } +} + +inline void setattr(handle obj, const char *name, handle value) { + if (PyObject_SetAttrString(obj.ptr(), name, value.ptr()) != 0) { throw error_already_set(); } +} + +inline ssize_t hash(handle obj) { + auto h = PyObject_Hash(obj.ptr()); + if (h == -1) { throw error_already_set(); } + return h; +} + +/// @} python_builtins + +PYBIND11_NAMESPACE_BEGIN(detail) +inline handle get_function(handle value) { + if (value) { +#if PY_MAJOR_VERSION >= 3 + if (PyInstanceMethod_Check(value.ptr())) + value = PyInstanceMethod_GET_FUNCTION(value.ptr()); + else +#endif + if (PyMethod_Check(value.ptr())) + value = PyMethod_GET_FUNCTION(value.ptr()); + } + return value; +} + +// Reimplementation of python's dict helper functions to ensure that exceptions +// aren't swallowed (see #2862) + +// copied from cpython _PyDict_GetItemStringWithError +inline PyObject * dict_getitemstring(PyObject *v, const char *key) +{ +#if PY_MAJOR_VERSION >= 3 + PyObject *kv = nullptr, *rv = nullptr; + kv = PyUnicode_FromString(key); + if (kv == NULL) { + throw error_already_set(); + } + + rv = PyDict_GetItemWithError(v, kv); + Py_DECREF(kv); + if (rv == NULL && PyErr_Occurred()) { + throw error_already_set(); + } + return rv; +#else + return PyDict_GetItemString(v, key); +#endif +} + +inline PyObject * dict_getitem(PyObject *v, PyObject *key) +{ +#if PY_MAJOR_VERSION >= 3 + PyObject *rv = PyDict_GetItemWithError(v, key); + if (rv == NULL && PyErr_Occurred()) { + throw error_already_set(); + } + return rv; +#else + return PyDict_GetItem(v, key); +#endif +} + +// Helper aliases/functions to support implicit casting of values given to python accessors/methods. +// When given a pyobject, this simply returns the pyobject as-is; for other C++ type, the value goes +// through pybind11::cast(obj) to convert it to an `object`. +template ::value, int> = 0> +auto object_or_cast(T &&o) -> decltype(std::forward(o)) { return std::forward(o); } +// The following casting version is implemented in cast.h: +template ::value, int> = 0> +object object_or_cast(T &&o); +// Match a PyObject*, which we want to convert directly to handle via its converting constructor +inline handle object_or_cast(PyObject *ptr) { return ptr; } + +#if defined(_MSC_VER) && _MSC_VER < 1920 +# pragma warning(push) +# pragma warning(disable: 4522) // warning C4522: multiple assignment operators specified +#endif +template +class accessor : public object_api> { + using key_type = typename Policy::key_type; + +public: + accessor(handle obj, key_type key) : obj(obj), key(std::move(key)) { } + accessor(const accessor &) = default; + accessor(accessor &&) noexcept = default; + + // accessor overload required to override default assignment operator (templates are not allowed + // to replace default compiler-generated assignments). + void operator=(const accessor &a) && { std::move(*this).operator=(handle(a)); } + void operator=(const accessor &a) & { operator=(handle(a)); } + + template void operator=(T &&value) && { + Policy::set(obj, key, object_or_cast(std::forward(value))); + } + template void operator=(T &&value) & { + get_cache() = reinterpret_borrow(object_or_cast(std::forward(value))); + } + + template + PYBIND11_DEPRECATED("Use of obj.attr(...) as bool is deprecated in favor of pybind11::hasattr(obj, ...)") + explicit operator enable_if_t::value || + std::is_same::value, bool>() const { + return hasattr(obj, key); + } + template + PYBIND11_DEPRECATED("Use of obj[key] as bool is deprecated in favor of obj.contains(key)") + explicit operator enable_if_t::value, bool>() const { + return obj.contains(key); + } + + // NOLINTNEXTLINE(google-explicit-constructor) + operator object() const { return get_cache(); } + PyObject *ptr() const { return get_cache().ptr(); } + template T cast() const { return get_cache().template cast(); } + +private: + object &get_cache() const { + if (!cache) { cache = Policy::get(obj, key); } + return cache; + } + +private: + handle obj; + key_type key; + mutable object cache; +}; +#if defined(_MSC_VER) && _MSC_VER < 1920 +# pragma warning(pop) +#endif + +PYBIND11_NAMESPACE_BEGIN(accessor_policies) +struct obj_attr { + using key_type = object; + static object get(handle obj, handle key) { return getattr(obj, key); } + static void set(handle obj, handle key, handle val) { setattr(obj, key, val); } +}; + +struct str_attr { + using key_type = const char *; + static object get(handle obj, const char *key) { return getattr(obj, key); } + static void set(handle obj, const char *key, handle val) { setattr(obj, key, val); } +}; + +struct generic_item { + using key_type = object; + + static object get(handle obj, handle key) { + PyObject *result = PyObject_GetItem(obj.ptr(), key.ptr()); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); + } + + static void set(handle obj, handle key, handle val) { + if (PyObject_SetItem(obj.ptr(), key.ptr(), val.ptr()) != 0) { throw error_already_set(); } + } +}; + +struct sequence_item { + using key_type = size_t; + + template ::value, int> = 0> + static object get(handle obj, const IdxType &index) { + PyObject *result = PySequence_GetItem(obj.ptr(), ssize_t_cast(index)); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); + } + + template ::value, int> = 0> + static void set(handle obj, const IdxType &index, handle val) { + // PySequence_SetItem does not steal a reference to 'val' + if (PySequence_SetItem(obj.ptr(), ssize_t_cast(index), val.ptr()) != 0) { + throw error_already_set(); + } + } +}; + +struct list_item { + using key_type = size_t; + + template ::value, int> = 0> + static object get(handle obj, const IdxType &index) { + PyObject *result = PyList_GetItem(obj.ptr(), ssize_t_cast(index)); + if (!result) { throw error_already_set(); } + return reinterpret_borrow(result); + } + + template ::value, int> = 0> + static void set(handle obj, const IdxType &index, handle val) { + // PyList_SetItem steals a reference to 'val' + if (PyList_SetItem(obj.ptr(), ssize_t_cast(index), val.inc_ref().ptr()) != 0) { + throw error_already_set(); + } + } +}; + +struct tuple_item { + using key_type = size_t; + + template ::value, int> = 0> + static object get(handle obj, const IdxType &index) { + PyObject *result = PyTuple_GetItem(obj.ptr(), ssize_t_cast(index)); + if (!result) { throw error_already_set(); } + return reinterpret_borrow(result); + } + + template ::value, int> = 0> + static void set(handle obj, const IdxType &index, handle val) { + // PyTuple_SetItem steals a reference to 'val' + if (PyTuple_SetItem(obj.ptr(), ssize_t_cast(index), val.inc_ref().ptr()) != 0) { + throw error_already_set(); + } + } +}; +PYBIND11_NAMESPACE_END(accessor_policies) + +/// STL iterator template used for tuple, list, sequence and dict +template +class generic_iterator : public Policy { + using It = generic_iterator; + +public: + using difference_type = ssize_t; + using iterator_category = typename Policy::iterator_category; + using value_type = typename Policy::value_type; + using reference = typename Policy::reference; + using pointer = typename Policy::pointer; + + generic_iterator() = default; + generic_iterator(handle seq, ssize_t index) : Policy(seq, index) { } + + // NOLINTNEXTLINE(readability-const-return-type) // PR #3263 + reference operator*() const { return Policy::dereference(); } + // NOLINTNEXTLINE(readability-const-return-type) // PR #3263 + reference operator[](difference_type n) const { return *(*this + n); } + pointer operator->() const { return **this; } + + It &operator++() { Policy::increment(); return *this; } + It operator++(int) { auto copy = *this; Policy::increment(); return copy; } + It &operator--() { Policy::decrement(); return *this; } + It operator--(int) { auto copy = *this; Policy::decrement(); return copy; } + It &operator+=(difference_type n) { Policy::advance(n); return *this; } + It &operator-=(difference_type n) { Policy::advance(-n); return *this; } + + friend It operator+(const It &a, difference_type n) { auto copy = a; return copy += n; } + friend It operator+(difference_type n, const It &b) { return b + n; } + friend It operator-(const It &a, difference_type n) { auto copy = a; return copy -= n; } + friend difference_type operator-(const It &a, const It &b) { return a.distance_to(b); } + + friend bool operator==(const It &a, const It &b) { return a.equal(b); } + friend bool operator!=(const It &a, const It &b) { return !(a == b); } + friend bool operator< (const It &a, const It &b) { return b - a > 0; } + friend bool operator> (const It &a, const It &b) { return b < a; } + friend bool operator>=(const It &a, const It &b) { return !(a < b); } + friend bool operator<=(const It &a, const It &b) { return !(a > b); } +}; + +PYBIND11_NAMESPACE_BEGIN(iterator_policies) +/// Quick proxy class needed to implement ``operator->`` for iterators which can't return pointers +template +struct arrow_proxy { + T value; + + // NOLINTNEXTLINE(google-explicit-constructor) + arrow_proxy(T &&value) noexcept : value(std::move(value)) { } + T *operator->() const { return &value; } +}; + +/// Lightweight iterator policy using just a simple pointer: see ``PySequence_Fast_ITEMS`` +class sequence_fast_readonly { +protected: + using iterator_category = std::random_access_iterator_tag; + using value_type = handle; + using reference = const handle; // PR #3263 + using pointer = arrow_proxy; + + sequence_fast_readonly(handle obj, ssize_t n) : ptr(PySequence_Fast_ITEMS(obj.ptr()) + n) { } + + // NOLINTNEXTLINE(readability-const-return-type) // PR #3263 + reference dereference() const { return *ptr; } + void increment() { ++ptr; } + void decrement() { --ptr; } + void advance(ssize_t n) { ptr += n; } + bool equal(const sequence_fast_readonly &b) const { return ptr == b.ptr; } + ssize_t distance_to(const sequence_fast_readonly &b) const { return ptr - b.ptr; } + +private: + PyObject **ptr; +}; + +/// Full read and write access using the sequence protocol: see ``detail::sequence_accessor`` +class sequence_slow_readwrite { +protected: + using iterator_category = std::random_access_iterator_tag; + using value_type = object; + using reference = sequence_accessor; + using pointer = arrow_proxy; + + sequence_slow_readwrite(handle obj, ssize_t index) : obj(obj), index(index) { } + + reference dereference() const { return {obj, static_cast(index)}; } + void increment() { ++index; } + void decrement() { --index; } + void advance(ssize_t n) { index += n; } + bool equal(const sequence_slow_readwrite &b) const { return index == b.index; } + ssize_t distance_to(const sequence_slow_readwrite &b) const { return index - b.index; } + +private: + handle obj; + ssize_t index; +}; + +/// Python's dictionary protocol permits this to be a forward iterator +class dict_readonly { +protected: + using iterator_category = std::forward_iterator_tag; + using value_type = std::pair; + using reference = const value_type; // PR #3263 + using pointer = arrow_proxy; + + dict_readonly() = default; + dict_readonly(handle obj, ssize_t pos) : obj(obj), pos(pos) { increment(); } + + // NOLINTNEXTLINE(readability-const-return-type) // PR #3263 + reference dereference() const { return {key, value}; } + void increment() { + if (PyDict_Next(obj.ptr(), &pos, &key, &value) == 0) { + pos = -1; + } + } + bool equal(const dict_readonly &b) const { return pos == b.pos; } + +private: + handle obj; + PyObject *key = nullptr, *value = nullptr; + ssize_t pos = -1; +}; +PYBIND11_NAMESPACE_END(iterator_policies) + +#if !defined(PYPY_VERSION) +using tuple_iterator = generic_iterator; +using list_iterator = generic_iterator; +#else +using tuple_iterator = generic_iterator; +using list_iterator = generic_iterator; +#endif + +using sequence_iterator = generic_iterator; +using dict_iterator = generic_iterator; + +inline bool PyIterable_Check(PyObject *obj) { + PyObject *iter = PyObject_GetIter(obj); + if (iter) { + Py_DECREF(iter); + return true; + } + PyErr_Clear(); + return false; +} + +inline bool PyNone_Check(PyObject *o) { return o == Py_None; } +inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; } + +#ifdef PYBIND11_STR_LEGACY_PERMISSIVE +inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); } +#define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive +#else +#define PYBIND11_STR_CHECK_FUN PyUnicode_Check +#endif + +inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; } + +class kwargs_proxy : public handle { +public: + explicit kwargs_proxy(handle h) : handle(h) { } +}; + +class args_proxy : public handle { +public: + explicit args_proxy(handle h) : handle(h) { } + kwargs_proxy operator*() const { return kwargs_proxy(*this); } +}; + +/// Python argument categories (using PEP 448 terms) +template using is_keyword = std::is_base_of; +template using is_s_unpacking = std::is_same; // * unpacking +template using is_ds_unpacking = std::is_same; // ** unpacking +template using is_positional = satisfies_none_of; +template using is_keyword_or_ds = satisfies_any_of; + +// Call argument collector forward declarations +template +class simple_collector; +template +class unpacking_collector; + +PYBIND11_NAMESPACE_END(detail) + +// TODO: After the deprecated constructors are removed, this macro can be simplified by +// inheriting ctors: `using Parent::Parent`. It's not an option right now because +// the `using` statement triggers the parent deprecation warning even if the ctor +// isn't even used. +#define PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ + public: \ + PYBIND11_DEPRECATED("Use reinterpret_borrow<"#Name">() or reinterpret_steal<"#Name">()") \ + Name(handle h, bool is_borrowed) : Parent(is_borrowed ? Parent(h, borrowed_t{}) : Parent(h, stolen_t{})) { } \ + Name(handle h, borrowed_t) : Parent(h, borrowed_t{}) { } \ + Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \ + PYBIND11_DEPRECATED("Use py::isinstance(obj) instead") \ + bool check() const { return m_ptr != nullptr && (CheckFun(m_ptr) != 0); } \ + static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \ + template \ + /* NOLINTNEXTLINE(google-explicit-constructor) */ \ + Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { } + +#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \ + PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ + /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \ + /* NOLINTNEXTLINE(google-explicit-constructor) */ \ + Name(const object &o) \ + : Parent(check_(o) ? o.inc_ref().ptr() : ConvertFun(o.ptr()), stolen_t{}) \ + { if (!m_ptr) throw error_already_set(); } \ + /* NOLINTNEXTLINE(google-explicit-constructor) */ \ + Name(object &&o) \ + : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \ + { if (!m_ptr) throw error_already_set(); } + +#define PYBIND11_OBJECT_CVT_DEFAULT(Name, Parent, CheckFun, ConvertFun) \ + PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \ + Name() : Parent() { } + +#define PYBIND11_OBJECT_CHECK_FAILED(Name, o_ptr) \ + ::pybind11::type_error("Object of type '" + \ + ::pybind11::detail::get_fully_qualified_tp_name(Py_TYPE(o_ptr)) + \ + "' is not an instance of '" #Name "'") + +#define PYBIND11_OBJECT(Name, Parent, CheckFun) \ + PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ + /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \ + /* NOLINTNEXTLINE(google-explicit-constructor) */ \ + Name(const object &o) : Parent(o) \ + { if (m_ptr && !check_(m_ptr)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr); } \ + /* NOLINTNEXTLINE(google-explicit-constructor) */ \ + Name(object &&o) : Parent(std::move(o)) \ + { if (m_ptr && !check_(m_ptr)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr); } + +#define PYBIND11_OBJECT_DEFAULT(Name, Parent, CheckFun) \ + PYBIND11_OBJECT(Name, Parent, CheckFun) \ + Name() : Parent() { } + +/// \addtogroup pytypes +/// @{ + +/** \rst + Wraps a Python iterator so that it can also be used as a C++ input iterator + + Caveat: copying an iterator does not (and cannot) clone the internal + state of the Python iterable. This also applies to the post-increment + operator. This iterator should only be used to retrieve the current + value using ``operator*()``. +\endrst */ +class iterator : public object { +public: + using iterator_category = std::input_iterator_tag; + using difference_type = ssize_t; + using value_type = handle; + using reference = const handle; // PR #3263 + using pointer = const handle *; + + PYBIND11_OBJECT_DEFAULT(iterator, object, PyIter_Check) + + iterator& operator++() { + advance(); + return *this; + } + + iterator operator++(int) { + auto rv = *this; + advance(); + return rv; + } + + // NOLINTNEXTLINE(readability-const-return-type) // PR #3263 + reference operator*() const { + if (m_ptr && !value.ptr()) { + auto& self = const_cast(*this); + self.advance(); + } + return value; + } + + pointer operator->() const { operator*(); return &value; } + + /** \rst + The value which marks the end of the iteration. ``it == iterator::sentinel()`` + is equivalent to catching ``StopIteration`` in Python. + + .. code-block:: cpp + + void foo(py::iterator it) { + while (it != py::iterator::sentinel()) { + // use `*it` + ++it; + } + } + \endrst */ + static iterator sentinel() { return {}; } + + friend bool operator==(const iterator &a, const iterator &b) { return a->ptr() == b->ptr(); } + friend bool operator!=(const iterator &a, const iterator &b) { return a->ptr() != b->ptr(); } + +private: + void advance() { + value = reinterpret_steal(PyIter_Next(m_ptr)); + if (PyErr_Occurred()) { throw error_already_set(); } + } + +private: + object value = {}; +}; + + + +class type : public object { +public: + PYBIND11_OBJECT(type, object, PyType_Check) + + /// Return a type handle from a handle or an object + static handle handle_of(handle h) { return handle((PyObject*) Py_TYPE(h.ptr())); } + + /// Return a type object from a handle or an object + static type of(handle h) { return type(type::handle_of(h), borrowed_t{}); } + + // Defined in pybind11/cast.h + /// Convert C++ type to handle if previously registered. Does not convert + /// standard types, like int, float. etc. yet. + /// See https://github.com/pybind/pybind11/issues/2486 + template + static handle handle_of(); + + /// Convert C++ type to type if previously registered. Does not convert + /// standard types, like int, float. etc. yet. + /// See https://github.com/pybind/pybind11/issues/2486 + template + static type of() {return type(type::handle_of(), borrowed_t{}); } +}; + +class iterable : public object { +public: + PYBIND11_OBJECT_DEFAULT(iterable, object, detail::PyIterable_Check) +}; + +class bytes; + +class str : public object { +public: + PYBIND11_OBJECT_CVT(str, object, PYBIND11_STR_CHECK_FUN, raw_str) + + template ::value, int> = 0> + str(const char *c, const SzType &n) + : object(PyUnicode_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate string object!"); + } + + // 'explicit' is explicitly omitted from the following constructors to allow implicit conversion to py::str from C++ string-like objects + // NOLINTNEXTLINE(google-explicit-constructor) + str(const char *c = "") + : object(PyUnicode_FromString(c), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate string object!"); + } + + // NOLINTNEXTLINE(google-explicit-constructor) + str(const std::string &s) : str(s.data(), s.size()) { } + + explicit str(const bytes &b); + + /** \rst + Return a string representation of the object. This is analogous to + the ``str()`` function in Python. + \endrst */ + explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { if (!m_ptr) throw error_already_set(); } + + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::string() const { + object temp = *this; + if (PyUnicode_Check(m_ptr)) { + temp = reinterpret_steal(PyUnicode_AsUTF8String(m_ptr)); + if (!temp) + throw error_already_set(); + } + char *buffer = nullptr; + ssize_t length = 0; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), &buffer, &length)) + pybind11_fail("Unable to extract string contents! (invalid type)"); + return std::string(buffer, (size_t) length); + } + + template + str format(Args &&...args) const { + return attr("format")(std::forward(args)...); + } + +private: + /// Return string representation -- always returns a new reference, even if already a str + static PyObject *raw_str(PyObject *op) { + PyObject *str_value = PyObject_Str(op); +#if PY_MAJOR_VERSION < 3 + if (!str_value) throw error_already_set(); + PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr); + Py_XDECREF(str_value); str_value = unicode; +#endif + return str_value; + } +}; +/// @} pytypes + +inline namespace literals { +/** \rst + String literal version of `str` + \endrst */ +inline str operator"" _s(const char *s, size_t size) { return {s, size}; } +} // namespace literals + +/// \addtogroup pytypes +/// @{ +class bytes : public object { +public: + PYBIND11_OBJECT(bytes, object, PYBIND11_BYTES_CHECK) + + // Allow implicit conversion: + // NOLINTNEXTLINE(google-explicit-constructor) + bytes(const char *c = "") + : object(PYBIND11_BYTES_FROM_STRING(c), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate bytes object!"); + } + + template ::value, int> = 0> + bytes(const char *c, const SzType &n) + : object(PYBIND11_BYTES_FROM_STRING_AND_SIZE(c, ssize_t_cast(n)), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate bytes object!"); + } + + // Allow implicit conversion: + // NOLINTNEXTLINE(google-explicit-constructor) + bytes(const std::string &s) : bytes(s.data(), s.size()) { } + + explicit bytes(const pybind11::str &s); + + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::string() const { + char *buffer = nullptr; + ssize_t length = 0; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length)) + pybind11_fail("Unable to extract bytes contents!"); + return std::string(buffer, (size_t) length); + } +}; +// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors +// are included in the doxygen group; close here and reopen after as a workaround +/// @} pytypes + +inline bytes::bytes(const pybind11::str &s) { + object temp = s; + if (PyUnicode_Check(s.ptr())) { + temp = reinterpret_steal(PyUnicode_AsUTF8String(s.ptr())); + if (!temp) + pybind11_fail("Unable to extract string contents! (encoding issue)"); + } + char *buffer = nullptr; + ssize_t length = 0; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), &buffer, &length)) + pybind11_fail("Unable to extract string contents! (invalid type)"); + auto obj = reinterpret_steal(PYBIND11_BYTES_FROM_STRING_AND_SIZE(buffer, length)); + if (!obj) + pybind11_fail("Could not allocate bytes object!"); + m_ptr = obj.release().ptr(); +} + +inline str::str(const bytes& b) { + char *buffer = nullptr; + ssize_t length = 0; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(b.ptr(), &buffer, &length)) + pybind11_fail("Unable to extract bytes contents!"); + auto obj = reinterpret_steal(PyUnicode_FromStringAndSize(buffer, length)); + if (!obj) + pybind11_fail("Could not allocate string object!"); + m_ptr = obj.release().ptr(); +} + +/// \addtogroup pytypes +/// @{ +class bytearray : public object { +public: + PYBIND11_OBJECT_CVT(bytearray, object, PyByteArray_Check, PyByteArray_FromObject) + + template ::value, int> = 0> + bytearray(const char *c, const SzType &n) + : object(PyByteArray_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate bytearray object!"); + } + + bytearray() + : bytearray("", 0) {} + + explicit bytearray(const std::string &s) : bytearray(s.data(), s.size()) { } + + size_t size() const { return static_cast(PyByteArray_Size(m_ptr)); } + + explicit operator std::string() const { + char *buffer = PyByteArray_AS_STRING(m_ptr); + ssize_t size = PyByteArray_GET_SIZE(m_ptr); + return std::string(buffer, static_cast(size)); + } +}; +// Note: breathe >= 4.17.0 will fail to build docs if the below two constructors +// are included in the doxygen group; close here and reopen after as a workaround +/// @} pytypes + +/// \addtogroup pytypes +/// @{ +class none : public object { +public: + PYBIND11_OBJECT(none, object, detail::PyNone_Check) + none() : object(Py_None, borrowed_t{}) { } +}; + +class ellipsis : public object { +public: + PYBIND11_OBJECT(ellipsis, object, detail::PyEllipsis_Check) + ellipsis() : object(Py_Ellipsis, borrowed_t{}) { } +}; + +class bool_ : public object { +public: + PYBIND11_OBJECT_CVT(bool_, object, PyBool_Check, raw_bool) + bool_() : object(Py_False, borrowed_t{}) { } + // Allow implicit conversion from and to `bool`: + // NOLINTNEXTLINE(google-explicit-constructor) + bool_(bool value) : object(value ? Py_True : Py_False, borrowed_t{}) { } + // NOLINTNEXTLINE(google-explicit-constructor) + operator bool() const { return (m_ptr != nullptr) && PyLong_AsLong(m_ptr) != 0; } + +private: + /// Return the truth value of an object -- always returns a new reference + static PyObject *raw_bool(PyObject *op) { + const auto value = PyObject_IsTrue(op); + if (value == -1) return nullptr; + return handle(value != 0 ? Py_True : Py_False).inc_ref().ptr(); + } +}; + +PYBIND11_NAMESPACE_BEGIN(detail) +// Converts a value to the given unsigned type. If an error occurs, you get back (Unsigned) -1; +// otherwise you get back the unsigned long or unsigned long long value cast to (Unsigned). +// (The distinction is critically important when casting a returned -1 error value to some other +// unsigned type: (A)-1 != (B)-1 when A and B are unsigned types of different sizes). +template +Unsigned as_unsigned(PyObject *o) { + if (PYBIND11_SILENCE_MSVC_C4127(sizeof(Unsigned) <= sizeof(unsigned long)) +#if PY_VERSION_HEX < 0x03000000 + || PyInt_Check(o) +#endif + ) { + unsigned long v = PyLong_AsUnsignedLong(o); + return v == (unsigned long) -1 && PyErr_Occurred() ? (Unsigned) -1 : (Unsigned) v; + } + unsigned long long v = PyLong_AsUnsignedLongLong(o); + return v == (unsigned long long) -1 && PyErr_Occurred() ? (Unsigned) -1 : (Unsigned) v; +} +PYBIND11_NAMESPACE_END(detail) + +class int_ : public object { +public: + PYBIND11_OBJECT_CVT(int_, object, PYBIND11_LONG_CHECK, PyNumber_Long) + int_() : object(PyLong_FromLong(0), stolen_t{}) { } + // Allow implicit conversion from C++ integral types: + template ::value, int> = 0> + // NOLINTNEXTLINE(google-explicit-constructor) + int_(T value) { + if (PYBIND11_SILENCE_MSVC_C4127(sizeof(T) <= sizeof(long))) { + if (std::is_signed::value) + m_ptr = PyLong_FromLong((long) value); + else + m_ptr = PyLong_FromUnsignedLong((unsigned long) value); + } else { + if (std::is_signed::value) + m_ptr = PyLong_FromLongLong((long long) value); + else + m_ptr = PyLong_FromUnsignedLongLong((unsigned long long) value); + } + if (!m_ptr) pybind11_fail("Could not allocate int object!"); + } + + template ::value, int> = 0> + // NOLINTNEXTLINE(google-explicit-constructor) + operator T() const { + return std::is_unsigned::value + ? detail::as_unsigned(m_ptr) + : sizeof(T) <= sizeof(long) + ? (T) PyLong_AsLong(m_ptr) + : (T) PYBIND11_LONG_AS_LONGLONG(m_ptr); + } +}; + +class float_ : public object { +public: + PYBIND11_OBJECT_CVT(float_, object, PyFloat_Check, PyNumber_Float) + // Allow implicit conversion from float/double: + // NOLINTNEXTLINE(google-explicit-constructor) + float_(float value) : object(PyFloat_FromDouble((double) value), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate float object!"); + } + // NOLINTNEXTLINE(google-explicit-constructor) + float_(double value = .0) : object(PyFloat_FromDouble((double) value), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate float object!"); + } + // NOLINTNEXTLINE(google-explicit-constructor) + operator float() const { return (float) PyFloat_AsDouble(m_ptr); } + // NOLINTNEXTLINE(google-explicit-constructor) + operator double() const { return (double) PyFloat_AsDouble(m_ptr); } +}; + +class weakref : public object { +public: + PYBIND11_OBJECT_CVT_DEFAULT(weakref, object, PyWeakref_Check, raw_weakref) + explicit weakref(handle obj, handle callback = {}) + : object(PyWeakref_NewRef(obj.ptr(), callback.ptr()), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate weak reference!"); + } + +private: + static PyObject *raw_weakref(PyObject *o) { + return PyWeakref_NewRef(o, nullptr); + } +}; + +class slice : public object { +public: + PYBIND11_OBJECT_DEFAULT(slice, object, PySlice_Check) + slice(handle start, handle stop, handle step) { + m_ptr = PySlice_New(start.ptr(), stop.ptr(), step.ptr()); + if (!m_ptr) + pybind11_fail("Could not allocate slice object!"); + } + +#ifdef PYBIND11_HAS_OPTIONAL + slice(std::optional start, std::optional stop, std::optional step) + : slice(index_to_object(start), index_to_object(stop), index_to_object(step)) {} +#else + slice(ssize_t start_, ssize_t stop_, ssize_t step_) + : slice(int_(start_), int_(stop_), int_(step_)) {} +#endif + + bool compute(size_t length, size_t *start, size_t *stop, size_t *step, + size_t *slicelength) const { + return PySlice_GetIndicesEx((PYBIND11_SLICE_OBJECT *) m_ptr, + (ssize_t) length, (ssize_t *) start, + (ssize_t *) stop, (ssize_t *) step, + (ssize_t *) slicelength) == 0; + } + bool compute(ssize_t length, ssize_t *start, ssize_t *stop, ssize_t *step, + ssize_t *slicelength) const { + return PySlice_GetIndicesEx((PYBIND11_SLICE_OBJECT *) m_ptr, + length, start, + stop, step, + slicelength) == 0; + } + +private: + template + static object index_to_object(T index) { + return index ? object(int_(*index)) : object(none()); + } +}; + +class capsule : public object { +public: + PYBIND11_OBJECT_DEFAULT(capsule, object, PyCapsule_CheckExact) + PYBIND11_DEPRECATED("Use reinterpret_borrow() or reinterpret_steal()") + capsule(PyObject *ptr, bool is_borrowed) : object(is_borrowed ? object(ptr, borrowed_t{}) : object(ptr, stolen_t{})) { } + + explicit capsule(const void *value, const char *name = nullptr, void (*destructor)(PyObject *) = nullptr) + : object(PyCapsule_New(const_cast(value), name, destructor), stolen_t{}) { + if (!m_ptr) + pybind11_fail("Could not allocate capsule object!"); + } + + PYBIND11_DEPRECATED("Please pass a destructor that takes a void pointer as input") + capsule(const void *value, void (*destruct)(PyObject *)) + : object(PyCapsule_New(const_cast(value), nullptr, destruct), stolen_t{}) { + if (!m_ptr) + pybind11_fail("Could not allocate capsule object!"); + } + + capsule(const void *value, void (*destructor)(void *)) { + m_ptr = PyCapsule_New(const_cast(value), nullptr, [](PyObject *o) { + auto destructor = reinterpret_cast(PyCapsule_GetContext(o)); + void *ptr = PyCapsule_GetPointer(o, nullptr); + destructor(ptr); + }); + + if (!m_ptr) + pybind11_fail("Could not allocate capsule object!"); + + if (PyCapsule_SetContext(m_ptr, (void *) destructor) != 0) + pybind11_fail("Could not set capsule context!"); + } + + explicit capsule(void (*destructor)()) { + m_ptr = PyCapsule_New(reinterpret_cast(destructor), nullptr, [](PyObject *o) { + auto destructor = reinterpret_cast(PyCapsule_GetPointer(o, nullptr)); + destructor(); + }); + + if (!m_ptr) + pybind11_fail("Could not allocate capsule object!"); + } + + // NOLINTNEXTLINE(google-explicit-constructor) + template operator T *() const { + return get_pointer(); + } + + /// Get the pointer the capsule holds. + template + T* get_pointer() const { + auto name = this->name(); + T *result = static_cast(PyCapsule_GetPointer(m_ptr, name)); + if (!result) { + PyErr_Clear(); + pybind11_fail("Unable to extract capsule contents!"); + } + return result; + } + + /// Replaces a capsule's pointer *without* calling the destructor on the existing one. + void set_pointer(const void *value) { + if (PyCapsule_SetPointer(m_ptr, const_cast(value)) != 0) { + PyErr_Clear(); + pybind11_fail("Could not set capsule pointer"); + } + } + + const char *name() const { return PyCapsule_GetName(m_ptr); } +}; + +class tuple : public object { +public: + PYBIND11_OBJECT_CVT(tuple, object, PyTuple_Check, PySequence_Tuple) + template ::value, int> = 0> + // Some compilers generate link errors when using `const SzType &` here: + explicit tuple(SzType size = 0) : object(PyTuple_New(ssize_t_cast(size)), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate tuple object!"); + } + size_t size() const { return (size_t) PyTuple_Size(m_ptr); } + bool empty() const { return size() == 0; } + detail::tuple_accessor operator[](size_t index) const { return {*this, index}; } + detail::item_accessor operator[](handle h) const { return object::operator[](h); } + detail::tuple_iterator begin() const { return {*this, 0}; } + detail::tuple_iterator end() const { return {*this, PyTuple_GET_SIZE(m_ptr)}; } +}; + +// We need to put this into a separate function because the Intel compiler +// fails to compile enable_if_t...>::value> part below +// (tested with ICC 2021.1 Beta 20200827). +template +constexpr bool args_are_all_keyword_or_ds() +{ + return detail::all_of...>::value; +} + +class dict : public object { +public: + PYBIND11_OBJECT_CVT(dict, object, PyDict_Check, raw_dict) + dict() : object(PyDict_New(), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate dict object!"); + } + template ()>, + // MSVC workaround: it can't compile an out-of-line definition, so defer the collector + typename collector = detail::deferred_t, Args...>> + explicit dict(Args &&...args) : dict(collector(std::forward(args)...).kwargs()) { } + + size_t size() const { return (size_t) PyDict_Size(m_ptr); } + bool empty() const { return size() == 0; } + detail::dict_iterator begin() const { return {*this, 0}; } + detail::dict_iterator end() const { return {}; } + void clear() /* py-non-const */ { PyDict_Clear(ptr()); } + template bool contains(T &&key) const { + return PyDict_Contains(m_ptr, detail::object_or_cast(std::forward(key)).ptr()) == 1; + } + +private: + /// Call the `dict` Python type -- always returns a new reference + static PyObject *raw_dict(PyObject *op) { + if (PyDict_Check(op)) + return handle(op).inc_ref().ptr(); + return PyObject_CallFunctionObjArgs((PyObject *) &PyDict_Type, op, nullptr); + } +}; + +class sequence : public object { +public: + PYBIND11_OBJECT_DEFAULT(sequence, object, PySequence_Check) + size_t size() const { + ssize_t result = PySequence_Size(m_ptr); + if (result == -1) + throw error_already_set(); + return (size_t) result; + } + bool empty() const { return size() == 0; } + detail::sequence_accessor operator[](size_t index) const { return {*this, index}; } + detail::item_accessor operator[](handle h) const { return object::operator[](h); } + detail::sequence_iterator begin() const { return {*this, 0}; } + detail::sequence_iterator end() const { return {*this, PySequence_Size(m_ptr)}; } +}; + +class list : public object { +public: + PYBIND11_OBJECT_CVT(list, object, PyList_Check, PySequence_List) + template ::value, int> = 0> + // Some compilers generate link errors when using `const SzType &` here: + explicit list(SzType size = 0) : object(PyList_New(ssize_t_cast(size)), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate list object!"); + } + size_t size() const { return (size_t) PyList_Size(m_ptr); } + bool empty() const { return size() == 0; } + detail::list_accessor operator[](size_t index) const { return {*this, index}; } + detail::item_accessor operator[](handle h) const { return object::operator[](h); } + detail::list_iterator begin() const { return {*this, 0}; } + detail::list_iterator end() const { return {*this, PyList_GET_SIZE(m_ptr)}; } + template void append(T &&val) /* py-non-const */ { + PyList_Append(m_ptr, detail::object_or_cast(std::forward(val)).ptr()); + } + template ::value, int> = 0> + void insert(const IdxType &index, ValType &&val) /* py-non-const */ { + PyList_Insert( + m_ptr, ssize_t_cast(index), detail::object_or_cast(std::forward(val)).ptr()); + } +}; + +class args : public tuple { PYBIND11_OBJECT_DEFAULT(args, tuple, PyTuple_Check) }; +class kwargs : public dict { PYBIND11_OBJECT_DEFAULT(kwargs, dict, PyDict_Check) }; + +class set : public object { +public: + PYBIND11_OBJECT_CVT(set, object, PySet_Check, PySet_New) + set() : object(PySet_New(nullptr), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate set object!"); + } + size_t size() const { return (size_t) PySet_Size(m_ptr); } + bool empty() const { return size() == 0; } + template bool add(T &&val) /* py-non-const */ { + return PySet_Add(m_ptr, detail::object_or_cast(std::forward(val)).ptr()) == 0; + } + void clear() /* py-non-const */ { PySet_Clear(m_ptr); } + template bool contains(T &&val) const { + return PySet_Contains(m_ptr, detail::object_or_cast(std::forward(val)).ptr()) == 1; + } +}; + +class function : public object { +public: + PYBIND11_OBJECT_DEFAULT(function, object, PyCallable_Check) + handle cpp_function() const { + handle fun = detail::get_function(m_ptr); + if (fun && PyCFunction_Check(fun.ptr())) + return fun; + return handle(); + } + bool is_cpp_function() const { return (bool) cpp_function(); } +}; + +class staticmethod : public object { +public: + PYBIND11_OBJECT_CVT(staticmethod, object, detail::PyStaticMethod_Check, PyStaticMethod_New) +}; + +class buffer : public object { +public: + PYBIND11_OBJECT_DEFAULT(buffer, object, PyObject_CheckBuffer) + + buffer_info request(bool writable = false) const { + int flags = PyBUF_STRIDES | PyBUF_FORMAT; + if (writable) flags |= PyBUF_WRITABLE; + auto *view = new Py_buffer(); + if (PyObject_GetBuffer(m_ptr, view, flags) != 0) { + delete view; + throw error_already_set(); + } + return buffer_info(view); + } +}; + +class memoryview : public object { +public: + PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject) + + /** \rst + Creates ``memoryview`` from ``buffer_info``. + + ``buffer_info`` must be created from ``buffer::request()``. Otherwise + throws an exception. + + For creating a ``memoryview`` from objects that support buffer protocol, + use ``memoryview(const object& obj)`` instead of this constructor. + \endrst */ + explicit memoryview(const buffer_info& info) { + if (!info.view()) + pybind11_fail("Prohibited to create memoryview without Py_buffer"); + // Note: PyMemoryView_FromBuffer never increments obj reference. + m_ptr = (info.view()->obj) ? + PyMemoryView_FromObject(info.view()->obj) : + PyMemoryView_FromBuffer(info.view()); + if (!m_ptr) + pybind11_fail("Unable to create memoryview from buffer descriptor"); + } + + /** \rst + Creates ``memoryview`` from static buffer. + + This method is meant for providing a ``memoryview`` for C/C++ buffer not + managed by Python. The caller is responsible for managing the lifetime + of ``ptr`` and ``format``, which MUST outlive the memoryview constructed + here. + + See also: Python C API documentation for `PyMemoryView_FromBuffer`_. + + .. _PyMemoryView_FromBuffer: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromBuffer + + :param ptr: Pointer to the buffer. + :param itemsize: Byte size of an element. + :param format: Pointer to the null-terminated format string. For + homogeneous Buffers, this should be set to + ``format_descriptor::value``. + :param shape: Shape of the tensor (1 entry per dimension). + :param strides: Number of bytes between adjacent entries (for each + per dimension). + :param readonly: Flag to indicate if the underlying storage may be + written to. + \endrst */ + static memoryview from_buffer( + void *ptr, ssize_t itemsize, const char *format, + detail::any_container shape, + detail::any_container strides, bool readonly = false); + + static memoryview from_buffer( + const void *ptr, ssize_t itemsize, const char *format, + detail::any_container shape, + detail::any_container strides) { + return memoryview::from_buffer( + const_cast(ptr), itemsize, format, std::move(shape), std::move(strides), true); + } + + template + static memoryview from_buffer( + T *ptr, detail::any_container shape, + detail::any_container strides, bool readonly = false) { + return memoryview::from_buffer( + reinterpret_cast(ptr), sizeof(T), + format_descriptor::value, shape, strides, readonly); + } + + template + static memoryview from_buffer( + const T *ptr, detail::any_container shape, + detail::any_container strides) { + return memoryview::from_buffer( + const_cast(ptr), shape, strides, true); + } + +#if PY_MAJOR_VERSION >= 3 + /** \rst + Creates ``memoryview`` from static memory. + + This method is meant for providing a ``memoryview`` for C/C++ buffer not + managed by Python. The caller is responsible for managing the lifetime + of ``mem``, which MUST outlive the memoryview constructed here. + + This method is not available in Python 2. + + See also: Python C API documentation for `PyMemoryView_FromBuffer`_. + + .. _PyMemoryView_FromMemory: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromMemory + \endrst */ + static memoryview from_memory(void *mem, ssize_t size, bool readonly = false) { + PyObject* ptr = PyMemoryView_FromMemory( + reinterpret_cast(mem), size, + (readonly) ? PyBUF_READ : PyBUF_WRITE); + if (!ptr) + pybind11_fail("Could not allocate memoryview object!"); + return memoryview(object(ptr, stolen_t{})); + } + + static memoryview from_memory(const void *mem, ssize_t size) { + return memoryview::from_memory(const_cast(mem), size, true); + } +#endif +}; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +inline memoryview memoryview::from_buffer( + void *ptr, ssize_t itemsize, const char* format, + detail::any_container shape, + detail::any_container strides, bool readonly) { + size_t ndim = shape->size(); + if (ndim != strides->size()) + pybind11_fail("memoryview: shape length doesn't match strides length"); + ssize_t size = ndim != 0u ? 1 : 0; + for (size_t i = 0; i < ndim; ++i) + size *= (*shape)[i]; + Py_buffer view; + view.buf = ptr; + view.obj = nullptr; + view.len = size * itemsize; + view.readonly = static_cast(readonly); + view.itemsize = itemsize; + view.format = const_cast(format); + view.ndim = static_cast(ndim); + view.shape = shape->data(); + view.strides = strides->data(); + view.suboffsets = nullptr; + view.internal = nullptr; + PyObject* obj = PyMemoryView_FromBuffer(&view); + if (!obj) + throw error_already_set(); + return memoryview(object(obj, stolen_t{})); +} +#endif // DOXYGEN_SHOULD_SKIP_THIS +/// @} pytypes + +/// \addtogroup python_builtins +/// @{ + +/// Get the length of a Python object. +inline size_t len(handle h) { + ssize_t result = PyObject_Length(h.ptr()); + if (result < 0) + throw error_already_set(); + return (size_t) result; +} + +/// Get the length hint of a Python object. +/// Returns 0 when this cannot be determined. +inline size_t len_hint(handle h) { +#if PY_VERSION_HEX >= 0x03040000 + ssize_t result = PyObject_LengthHint(h.ptr(), 0); +#else + ssize_t result = PyObject_Length(h.ptr()); +#endif + if (result < 0) { + // Sometimes a length can't be determined at all (eg generators) + // In which case simply return 0 + PyErr_Clear(); + return 0; + } + return (size_t) result; +} + +inline str repr(handle h) { + PyObject *str_value = PyObject_Repr(h.ptr()); + if (!str_value) throw error_already_set(); +#if PY_MAJOR_VERSION < 3 + PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr); + Py_XDECREF(str_value); str_value = unicode; + if (!str_value) throw error_already_set(); +#endif + return reinterpret_steal(str_value); +} + +inline iterator iter(handle obj) { + PyObject *result = PyObject_GetIter(obj.ptr()); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); +} +/// @} python_builtins + +PYBIND11_NAMESPACE_BEGIN(detail) +template iterator object_api::begin() const { return iter(derived()); } +template iterator object_api::end() const { return iterator::sentinel(); } +template item_accessor object_api::operator[](handle key) const { + return {derived(), reinterpret_borrow(key)}; +} +template item_accessor object_api::operator[](const char *key) const { + return {derived(), pybind11::str(key)}; +} +template obj_attr_accessor object_api::attr(handle key) const { + return {derived(), reinterpret_borrow(key)}; +} +template str_attr_accessor object_api::attr(const char *key) const { + return {derived(), key}; +} +template args_proxy object_api::operator*() const { + return args_proxy(derived().ptr()); +} +template template bool object_api::contains(T &&item) const { + return attr("__contains__")(std::forward(item)).template cast(); +} + +template +pybind11::str object_api::str() const { return pybind11::str(derived()); } + +template +str_attr_accessor object_api::doc() const { return attr("__doc__"); } + +template +handle object_api::get_type() const { return type::handle_of(derived()); } + +template +bool object_api::rich_compare(object_api const &other, int value) const { + int rv = PyObject_RichCompareBool(derived().ptr(), other.derived().ptr(), value); + if (rv == -1) + throw error_already_set(); + return rv == 1; +} + +#define PYBIND11_MATH_OPERATOR_UNARY(op, fn) \ + template object object_api::op() const { \ + object result = reinterpret_steal(fn(derived().ptr())); \ + if (!result.ptr()) \ + throw error_already_set(); \ + return result; \ + } + +#define PYBIND11_MATH_OPERATOR_BINARY(op, fn) \ + template \ + object object_api::op(object_api const &other) const { \ + object result = reinterpret_steal( \ + fn(derived().ptr(), other.derived().ptr())); \ + if (!result.ptr()) \ + throw error_already_set(); \ + return result; \ + } + +PYBIND11_MATH_OPERATOR_UNARY (operator~, PyNumber_Invert) +PYBIND11_MATH_OPERATOR_UNARY (operator-, PyNumber_Negative) +PYBIND11_MATH_OPERATOR_BINARY(operator+, PyNumber_Add) +PYBIND11_MATH_OPERATOR_BINARY(operator+=, PyNumber_InPlaceAdd) +PYBIND11_MATH_OPERATOR_BINARY(operator-, PyNumber_Subtract) +PYBIND11_MATH_OPERATOR_BINARY(operator-=, PyNumber_InPlaceSubtract) +PYBIND11_MATH_OPERATOR_BINARY(operator*, PyNumber_Multiply) +PYBIND11_MATH_OPERATOR_BINARY(operator*=, PyNumber_InPlaceMultiply) +PYBIND11_MATH_OPERATOR_BINARY(operator/, PyNumber_TrueDivide) +PYBIND11_MATH_OPERATOR_BINARY(operator/=, PyNumber_InPlaceTrueDivide) +PYBIND11_MATH_OPERATOR_BINARY(operator|, PyNumber_Or) +PYBIND11_MATH_OPERATOR_BINARY(operator|=, PyNumber_InPlaceOr) +PYBIND11_MATH_OPERATOR_BINARY(operator&, PyNumber_And) +PYBIND11_MATH_OPERATOR_BINARY(operator&=, PyNumber_InPlaceAnd) +PYBIND11_MATH_OPERATOR_BINARY(operator^, PyNumber_Xor) +PYBIND11_MATH_OPERATOR_BINARY(operator^=, PyNumber_InPlaceXor) +PYBIND11_MATH_OPERATOR_BINARY(operator<<, PyNumber_Lshift) +PYBIND11_MATH_OPERATOR_BINARY(operator<<=, PyNumber_InPlaceLshift) +PYBIND11_MATH_OPERATOR_BINARY(operator>>, PyNumber_Rshift) +PYBIND11_MATH_OPERATOR_BINARY(operator>>=, PyNumber_InPlaceRshift) + +#undef PYBIND11_MATH_OPERATOR_UNARY +#undef PYBIND11_MATH_OPERATOR_BINARY + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/stl.h b/Libraries/pybind11-2.8.0/pybind11/stl.h new file mode 100644 index 00000000..2c017b4f --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/stl.h @@ -0,0 +1,375 @@ +/* + pybind11/stl.h: Transparent conversion for STL data types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "pybind11.h" +#include +#include +#include +#include +#include +#include +#include +#include + +// See `detail/common.h` for implementation of these guards. +#if defined(PYBIND11_HAS_OPTIONAL) +# include +#elif defined(PYBIND11_HAS_EXP_OPTIONAL) +# include +#endif + +#if defined(PYBIND11_HAS_VARIANT) +# include +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +/// Extracts an const lvalue reference or rvalue reference for U based on the type of T (e.g. for +/// forwarding a container element). Typically used indirect via forwarded_type(), below. +template +using forwarded_type = conditional_t< + std::is_lvalue_reference::value, remove_reference_t &, remove_reference_t &&>; + +/// Forwards a value U as rvalue or lvalue according to whether T is rvalue or lvalue; typically +/// used for forwarding a container's elements. +template +forwarded_type forward_like(U &&u) { + return std::forward>(std::forward(u)); +} + +template struct set_caster { + using type = Type; + using key_conv = make_caster; + + bool load(handle src, bool convert) { + if (!isinstance(src)) + return false; + auto s = reinterpret_borrow(src); + value.clear(); + for (auto entry : s) { + key_conv conv; + if (!conv.load(entry, convert)) + return false; + value.insert(cast_op(std::move(conv))); + } + return true; + } + + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + if (!std::is_lvalue_reference::value) + policy = return_value_policy_override::policy(policy); + pybind11::set s; + for (auto &&value : src) { + auto value_ = reinterpret_steal(key_conv::cast(forward_like(value), policy, parent)); + if (!value_ || !s.add(value_)) + return handle(); + } + return s.release(); + } + + PYBIND11_TYPE_CASTER(type, _("Set[") + key_conv::name + _("]")); +}; + +template struct map_caster { + using key_conv = make_caster; + using value_conv = make_caster; + + bool load(handle src, bool convert) { + if (!isinstance(src)) + return false; + auto d = reinterpret_borrow(src); + value.clear(); + for (auto it : d) { + key_conv kconv; + value_conv vconv; + if (!kconv.load(it.first.ptr(), convert) || + !vconv.load(it.second.ptr(), convert)) + return false; + value.emplace(cast_op(std::move(kconv)), cast_op(std::move(vconv))); + } + return true; + } + + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + dict d; + return_value_policy policy_key = policy; + return_value_policy policy_value = policy; + if (!std::is_lvalue_reference::value) { + policy_key = return_value_policy_override::policy(policy_key); + policy_value = return_value_policy_override::policy(policy_value); + } + for (auto &&kv : src) { + auto key = reinterpret_steal(key_conv::cast(forward_like(kv.first), policy_key, parent)); + auto value = reinterpret_steal(value_conv::cast(forward_like(kv.second), policy_value, parent)); + if (!key || !value) + return handle(); + d[key] = value; + } + return d.release(); + } + + PYBIND11_TYPE_CASTER(Type, _("Dict[") + key_conv::name + _(", ") + value_conv::name + _("]")); +}; + +template struct list_caster { + using value_conv = make_caster; + + bool load(handle src, bool convert) { + if (!isinstance(src) || isinstance(src) || isinstance(src)) + return false; + auto s = reinterpret_borrow(src); + value.clear(); + reserve_maybe(s, &value); + for (auto it : s) { + value_conv conv; + if (!conv.load(it, convert)) + return false; + value.push_back(cast_op(std::move(conv))); + } + return true; + } + +private: + template < + typename T = Type, + enable_if_t().reserve(0)), void>::value, int> = 0> + void reserve_maybe(const sequence &s, Type *) { + value.reserve(s.size()); + } + void reserve_maybe(const sequence &, void *) {} + +public: + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + if (!std::is_lvalue_reference::value) + policy = return_value_policy_override::policy(policy); + list l(src.size()); + ssize_t index = 0; + for (auto &&value : src) { + auto value_ = reinterpret_steal(value_conv::cast(forward_like(value), policy, parent)); + if (!value_) + return handle(); + PyList_SET_ITEM(l.ptr(), index++, value_.release().ptr()); // steals a reference + } + return l.release(); + } + + PYBIND11_TYPE_CASTER(Type, _("List[") + value_conv::name + _("]")); +}; + +template struct type_caster> + : list_caster, Type> { }; + +template struct type_caster> + : list_caster, Type> { }; + +template struct type_caster> + : list_caster, Type> { }; + +template struct array_caster { + using value_conv = make_caster; + +private: + template + bool require_size(enable_if_t size) { + if (value.size() != size) + value.resize(size); + return true; + } + template + bool require_size(enable_if_t size) { + return size == Size; + } + +public: + bool load(handle src, bool convert) { + if (!isinstance(src)) + return false; + auto l = reinterpret_borrow(src); + if (!require_size(l.size())) + return false; + size_t ctr = 0; + for (auto it : l) { + value_conv conv; + if (!conv.load(it, convert)) + return false; + value[ctr++] = cast_op(std::move(conv)); + } + return true; + } + + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + list l(src.size()); + ssize_t index = 0; + for (auto &&value : src) { + auto value_ = reinterpret_steal(value_conv::cast(forward_like(value), policy, parent)); + if (!value_) + return handle(); + PyList_SET_ITEM(l.ptr(), index++, value_.release().ptr()); // steals a reference + } + return l.release(); + } + + PYBIND11_TYPE_CASTER(ArrayType, _("List[") + value_conv::name + _(_(""), _("[") + _() + _("]")) + _("]")); +}; + +template struct type_caster> + : array_caster, Type, false, Size> { }; + +template struct type_caster> + : array_caster, Type, true> { }; + +template struct type_caster> + : set_caster, Key> { }; + +template struct type_caster> + : set_caster, Key> { }; + +template struct type_caster> + : map_caster, Key, Value> { }; + +template struct type_caster> + : map_caster, Key, Value> { }; + +// This type caster is intended to be used for std::optional and std::experimental::optional +template struct optional_caster { + using value_conv = make_caster; + + template + static handle cast(T_ &&src, return_value_policy policy, handle parent) { + if (!src) + return none().inc_ref(); + if (!std::is_lvalue_reference::value) { + policy = return_value_policy_override::policy(policy); + } + return value_conv::cast(*std::forward(src), policy, parent); + } + + bool load(handle src, bool convert) { + if (!src) { + return false; + } + if (src.is_none()) { + return true; // default-constructed value is already empty + } + value_conv inner_caster; + if (!inner_caster.load(src, convert)) + return false; + + value.emplace(cast_op(std::move(inner_caster))); + return true; + } + + PYBIND11_TYPE_CASTER(T, _("Optional[") + value_conv::name + _("]")); +}; + +#if defined(PYBIND11_HAS_OPTIONAL) +template struct type_caster> + : public optional_caster> {}; + +template<> struct type_caster + : public void_caster {}; +#endif + +#if defined(PYBIND11_HAS_EXP_OPTIONAL) +template struct type_caster> + : public optional_caster> {}; + +template<> struct type_caster + : public void_caster {}; +#endif + +/// Visit a variant and cast any found type to Python +struct variant_caster_visitor { + return_value_policy policy; + handle parent; + + using result_type = handle; // required by boost::variant in C++11 + + template + result_type operator()(T &&src) const { + return make_caster::cast(std::forward(src), policy, parent); + } +}; + +/// Helper class which abstracts away variant's `visit` function. `std::variant` and similar +/// `namespace::variant` types which provide a `namespace::visit()` function are handled here +/// automatically using argument-dependent lookup. Users can provide specializations for other +/// variant-like classes, e.g. `boost::variant` and `boost::apply_visitor`. +template class Variant> +struct visit_helper { + template + static auto call(Args &&...args) -> decltype(visit(std::forward(args)...)) { + return visit(std::forward(args)...); + } +}; + +/// Generic variant caster +template struct variant_caster; + +template class V, typename... Ts> +struct variant_caster> { + static_assert(sizeof...(Ts) > 0, "Variant must consist of at least one alternative."); + + template + bool load_alternative(handle src, bool convert, type_list) { + auto caster = make_caster(); + if (caster.load(src, convert)) { + value = cast_op(caster); + return true; + } + return load_alternative(src, convert, type_list{}); + } + + bool load_alternative(handle, bool, type_list<>) { return false; } + + bool load(handle src, bool convert) { + // Do a first pass without conversions to improve constructor resolution. + // E.g. `py::int_(1).cast>()` needs to fill the `int` + // slot of the variant. Without two-pass loading `double` would be filled + // because it appears first and a conversion is possible. + if (convert && load_alternative(src, false, type_list{})) + return true; + return load_alternative(src, convert, type_list{}); + } + + template + static handle cast(Variant &&src, return_value_policy policy, handle parent) { + return visit_helper::call(variant_caster_visitor{policy, parent}, + std::forward(src)); + } + + using Type = V; + PYBIND11_TYPE_CASTER(Type, _("Union[") + detail::concat(make_caster::name...) + _("]")); +}; + +#if defined(PYBIND11_HAS_VARIANT) +template +struct type_caster> : variant_caster> { }; +#endif + +PYBIND11_NAMESPACE_END(detail) + +inline std::ostream &operator<<(std::ostream &os, const handle &obj) { +#ifdef PYBIND11_HAS_STRING_VIEW + os << str(obj).cast(); +#else + os << (std::string) str(obj); +#endif + return os; +} + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/stl/filesystem.h b/Libraries/pybind11-2.8.0/pybind11/stl/filesystem.h new file mode 100644 index 00000000..431b94b4 --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/stl/filesystem.h @@ -0,0 +1,103 @@ +// Copyright (c) 2021 The Pybind Development Team. +// All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#pragma once + +#include "../cast.h" +#include "../pybind11.h" +#include "../pytypes.h" + +#include "../detail/common.h" +#include "../detail/descr.h" + +#include + +#ifdef __has_include +# if defined(PYBIND11_CPP17) && __has_include() && \ + PY_VERSION_HEX >= 0x03060000 +# include +# define PYBIND11_HAS_FILESYSTEM 1 +# endif +#endif + +#if !defined(PYBIND11_HAS_FILESYSTEM) && !defined(PYBIND11_HAS_FILESYSTEM_IS_OPTIONAL) +# error \ + "#include is not available. (Use -DPYBIND11_HAS_FILESYSTEM_IS_OPTIONAL to ignore.)" +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +#if defined(PYBIND11_HAS_FILESYSTEM) +template struct path_caster { + +private: + static PyObject* unicode_from_fs_native(const std::string& w) { +#if !defined(PYPY_VERSION) + return PyUnicode_DecodeFSDefaultAndSize(w.c_str(), ssize_t(w.size())); +#else + // PyPy mistakenly declares the first parameter as non-const. + return PyUnicode_DecodeFSDefaultAndSize( + const_cast(w.c_str()), ssize_t(w.size())); +#endif + } + + static PyObject* unicode_from_fs_native(const std::wstring& w) { + return PyUnicode_FromWideChar(w.c_str(), ssize_t(w.size())); + } + +public: + static handle cast(const T& path, return_value_policy, handle) { + if (auto py_str = unicode_from_fs_native(path.native())) { + return module_::import("pathlib").attr("Path")(reinterpret_steal(py_str)) + .release(); + } + return nullptr; + } + + bool load(handle handle, bool) { + // PyUnicode_FSConverter and PyUnicode_FSDecoder normally take care of + // calling PyOS_FSPath themselves, but that's broken on PyPy (PyPy + // issue #3168) so we do it ourselves instead. + PyObject* buf = PyOS_FSPath(handle.ptr()); + if (!buf) { + PyErr_Clear(); + return false; + } + PyObject* native = nullptr; + if constexpr (std::is_same_v) { + if (PyUnicode_FSConverter(buf, &native) != 0) { + if (auto c_str = PyBytes_AsString(native)) { + // AsString returns a pointer to the internal buffer, which + // must not be free'd. + value = c_str; + } + } + } else if constexpr (std::is_same_v) { + if (PyUnicode_FSDecoder(buf, &native) != 0) { + if (auto c_str = PyUnicode_AsWideCharString(native, nullptr)) { + // AsWideCharString returns a new string that must be free'd. + value = c_str; // Copies the string. + PyMem_Free(c_str); + } + } + } + Py_XDECREF(native); + Py_DECREF(buf); + if (PyErr_Occurred()) { + PyErr_Clear(); + return false; + } + return true; + } + + PYBIND11_TYPE_CASTER(T, _("os.PathLike")); +}; + +template<> struct type_caster + : public path_caster {}; +#endif // PYBIND11_HAS_FILESYSTEM + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/stl_bind.h b/Libraries/pybind11-2.8.0/pybind11/stl_bind.h new file mode 100644 index 00000000..050be83c --- /dev/null +++ b/Libraries/pybind11-2.8.0/pybind11/stl_bind.h @@ -0,0 +1,747 @@ +/* + pybind11/std_bind.h: Binding generators for STL data types + + Copyright (c) 2016 Sergey Lyskov and Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "operators.h" + +#include +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +/* SFINAE helper class used by 'is_comparable */ +template struct container_traits { + template static std::true_type test_comparable(decltype(std::declval() == std::declval())*); + template static std::false_type test_comparable(...); + template static std::true_type test_value(typename T2::value_type *); + template static std::false_type test_value(...); + template static std::true_type test_pair(typename T2::first_type *, typename T2::second_type *); + template static std::false_type test_pair(...); + + static constexpr const bool is_comparable = std::is_same(nullptr))>::value; + static constexpr const bool is_pair = std::is_same(nullptr, nullptr))>::value; + static constexpr const bool is_vector = std::is_same(nullptr))>::value; + static constexpr const bool is_element = !is_pair && !is_vector; +}; + +/* Default: is_comparable -> std::false_type */ +template +struct is_comparable : std::false_type { }; + +/* For non-map data structures, check whether operator== can be instantiated */ +template +struct is_comparable< + T, enable_if_t::is_element && + container_traits::is_comparable>> + : std::true_type { }; + +/* For a vector/map data structure, recursively check the value type (which is std::pair for maps) */ +template +struct is_comparable::is_vector>> { + static constexpr const bool value = + is_comparable::value; +}; + +/* For pairs, recursively check the two data types */ +template +struct is_comparable::is_pair>> { + static constexpr const bool value = + is_comparable::value && + is_comparable::value; +}; + +/* Fallback functions */ +template void vector_if_copy_constructible(const Args &...) { } +template void vector_if_equal_operator(const Args &...) { } +template void vector_if_insertion_operator(const Args &...) { } +template void vector_modifiers(const Args &...) { } + +template +void vector_if_copy_constructible(enable_if_t::value, Class_> &cl) { + cl.def(init(), "Copy constructor"); +} + +template +void vector_if_equal_operator(enable_if_t::value, Class_> &cl) { + using T = typename Vector::value_type; + + cl.def(self == self); + cl.def(self != self); + + cl.def("count", + [](const Vector &v, const T &x) { + return std::count(v.begin(), v.end(), x); + }, + arg("x"), + "Return the number of times ``x`` appears in the list" + ); + + cl.def("remove", [](Vector &v, const T &x) { + auto p = std::find(v.begin(), v.end(), x); + if (p != v.end()) + v.erase(p); + else + throw value_error(); + }, + arg("x"), + "Remove the first item from the list whose value is x. " + "It is an error if there is no such item." + ); + + cl.def("__contains__", + [](const Vector &v, const T &x) { + return std::find(v.begin(), v.end(), x) != v.end(); + }, + arg("x"), + "Return true the container contains ``x``" + ); +} + +// Vector modifiers -- requires a copyable vector_type: +// (Technically, some of these (pop and __delitem__) don't actually require copyability, but it seems +// silly to allow deletion but not insertion, so include them here too.) +template +void vector_modifiers(enable_if_t::value, Class_> &cl) { + using T = typename Vector::value_type; + using SizeType = typename Vector::size_type; + using DiffType = typename Vector::difference_type; + + auto wrap_i = [](DiffType i, SizeType n) { + if (i < 0) + i += n; + if (i < 0 || (SizeType)i >= n) + throw index_error(); + return i; + }; + + cl.def("append", + [](Vector &v, const T &value) { v.push_back(value); }, + arg("x"), + "Add an item to the end of the list"); + + cl.def(init([](const iterable &it) { + auto v = std::unique_ptr(new Vector()); + v->reserve(len_hint(it)); + for (handle h : it) + v->push_back(h.cast()); + return v.release(); + })); + + cl.def("clear", + [](Vector &v) { + v.clear(); + }, + "Clear the contents" + ); + + cl.def("extend", + [](Vector &v, const Vector &src) { + v.insert(v.end(), src.begin(), src.end()); + }, + arg("L"), + "Extend the list by appending all the items in the given list" + ); + + cl.def( + "extend", + [](Vector &v, const iterable &it) { + const size_t old_size = v.size(); + v.reserve(old_size + len_hint(it)); + try { + for (handle h : it) { + v.push_back(h.cast()); + } + } catch (const cast_error &) { + v.erase(v.begin() + static_cast(old_size), + v.end()); + try { + v.shrink_to_fit(); + } catch (const std::exception &) { + // Do nothing + } + throw; + } + }, + arg("L"), + "Extend the list by appending all the items in the given list"); + + cl.def("insert", + [](Vector &v, DiffType i, const T &x) { + // Can't use wrap_i; i == v.size() is OK + if (i < 0) + i += v.size(); + if (i < 0 || (SizeType)i > v.size()) + throw index_error(); + v.insert(v.begin() + i, x); + }, + arg("i") , arg("x"), + "Insert an item at a given position." + ); + + cl.def("pop", + [](Vector &v) { + if (v.empty()) + throw index_error(); + T t = std::move(v.back()); + v.pop_back(); + return t; + }, + "Remove and return the last item" + ); + + cl.def("pop", + [wrap_i](Vector &v, DiffType i) { + i = wrap_i(i, v.size()); + T t = std::move(v[(SizeType) i]); + v.erase(std::next(v.begin(), i)); + return t; + }, + arg("i"), + "Remove and return the item at index ``i``" + ); + + cl.def("__setitem__", + [wrap_i](Vector &v, DiffType i, const T &t) { + i = wrap_i(i, v.size()); + v[(SizeType)i] = t; + } + ); + + /// Slicing protocol + cl.def( + "__getitem__", + [](const Vector &v, slice slice) -> Vector * { + size_t start = 0, stop = 0, step = 0, slicelength = 0; + + if (!slice.compute(v.size(), &start, &stop, &step, &slicelength)) + throw error_already_set(); + + auto *seq = new Vector(); + seq->reserve((size_t) slicelength); + + for (size_t i=0; ipush_back(v[start]); + start += step; + } + return seq; + }, + arg("s"), + "Retrieve list elements using a slice object"); + + cl.def( + "__setitem__", + [](Vector &v, slice slice, const Vector &value) { + size_t start = 0, stop = 0, step = 0, slicelength = 0; + if (!slice.compute(v.size(), &start, &stop, &step, &slicelength)) + throw error_already_set(); + + if (slicelength != value.size()) + throw std::runtime_error("Left and right hand size of slice assignment have different sizes!"); + + for (size_t i=0; i), +// we have to access by copying; otherwise we return by reference. +template using vector_needs_copy = negation< + std::is_same()[typename Vector::size_type()]), typename Vector::value_type &>>; + +// The usual case: access and iterate by reference +template +void vector_accessor(enable_if_t::value, Class_> &cl) { + using T = typename Vector::value_type; + using SizeType = typename Vector::size_type; + using DiffType = typename Vector::difference_type; + using ItType = typename Vector::iterator; + + auto wrap_i = [](DiffType i, SizeType n) { + if (i < 0) + i += n; + if (i < 0 || (SizeType)i >= n) + throw index_error(); + return i; + }; + + cl.def("__getitem__", + [wrap_i](Vector &v, DiffType i) -> T & { + i = wrap_i(i, v.size()); + return v[(SizeType)i]; + }, + return_value_policy::reference_internal // ref + keepalive + ); + + cl.def("__iter__", + [](Vector &v) { + return make_iterator< + return_value_policy::reference_internal, ItType, ItType, T&>( + v.begin(), v.end()); + }, + keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ + ); +} + +// The case for special objects, like std::vector, that have to be returned-by-copy: +template +void vector_accessor(enable_if_t::value, Class_> &cl) { + using T = typename Vector::value_type; + using SizeType = typename Vector::size_type; + using DiffType = typename Vector::difference_type; + using ItType = typename Vector::iterator; + cl.def("__getitem__", + [](const Vector &v, DiffType i) -> T { + if (i < 0 && (i += v.size()) < 0) + throw index_error(); + if ((SizeType)i >= v.size()) + throw index_error(); + return v[(SizeType)i]; + } + ); + + cl.def("__iter__", + [](Vector &v) { + return make_iterator< + return_value_policy::copy, ItType, ItType, T>( + v.begin(), v.end()); + }, + keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ + ); +} + +template auto vector_if_insertion_operator(Class_ &cl, std::string const &name) + -> decltype(std::declval() << std::declval(), void()) { + using size_type = typename Vector::size_type; + + cl.def("__repr__", + [name](Vector &v) { + std::ostringstream s; + s << name << '['; + for (size_type i=0; i < v.size(); ++i) { + s << v[i]; + if (i != v.size() - 1) + s << ", "; + } + s << ']'; + return s.str(); + }, + "Return the canonical string representation of this list." + ); +} + +// Provide the buffer interface for vectors if we have data() and we have a format for it +// GCC seems to have "void std::vector::data()" - doing SFINAE on the existence of data() is insufficient, we need to check it returns an appropriate pointer +template +struct vector_has_data_and_format : std::false_type {}; +template +struct vector_has_data_and_format::format(), std::declval().data()), typename Vector::value_type*>::value>> : std::true_type {}; + +// [workaround(intel)] Separate function required here +// Workaround as the Intel compiler does not compile the enable_if_t part below +// (tested with icc (ICC) 2021.1 Beta 20200827) +template +constexpr bool args_any_are_buffer() { + return detail::any_of...>::value; +} + +// [workaround(intel)] Separate function required here +// [workaround(msvc)] Can't use constexpr bool in return type + +// Add the buffer interface to a vector +template +void vector_buffer_impl(Class_& cl, std::true_type) { + using T = typename Vector::value_type; + + static_assert(vector_has_data_and_format::value, "There is not an appropriate format descriptor for this vector"); + + // numpy.h declares this for arbitrary types, but it may raise an exception and crash hard at runtime if PYBIND11_NUMPY_DTYPE hasn't been called, so check here + format_descriptor::format(); + + cl.def_buffer([](Vector& v) -> buffer_info { + return buffer_info(v.data(), static_cast(sizeof(T)), format_descriptor::format(), 1, {v.size()}, {sizeof(T)}); + }); + + cl.def(init([](const buffer &buf) { + auto info = buf.request(); + if (info.ndim != 1 || info.strides[0] % static_cast(sizeof(T))) + throw type_error("Only valid 1D buffers can be copied to a vector"); + if (!detail::compare_buffer_info::compare(info) || (ssize_t) sizeof(T) != info.itemsize) + throw type_error("Format mismatch (Python: " + info.format + " C++: " + format_descriptor::format() + ")"); + + T *p = static_cast(info.ptr); + ssize_t step = info.strides[0] / static_cast(sizeof(T)); + T *end = p + info.shape[0] * step; + if (step == 1) { + return Vector(p, end); + } + Vector vec; + vec.reserve((size_t) info.shape[0]); + for (; p != end; p += step) + vec.push_back(*p); + return vec; + + })); + + return; +} + +template +void vector_buffer_impl(Class_&, std::false_type) {} + +template +void vector_buffer(Class_& cl) { + vector_buffer_impl(cl, detail::any_of...>{}); +} + +PYBIND11_NAMESPACE_END(detail) + +// +// std::vector +// +template , typename... Args> +class_ bind_vector(handle scope, std::string const &name, Args&&... args) { + using Class_ = class_; + + // If the value_type is unregistered (e.g. a converting type) or is itself registered + // module-local then make the vector binding module-local as well: + using vtype = typename Vector::value_type; + auto vtype_info = detail::get_type_info(typeid(vtype)); + bool local = !vtype_info || vtype_info->module_local; + + Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward(args)...); + + // Declare the buffer interface if a buffer_protocol() is passed in + detail::vector_buffer(cl); + + cl.def(init<>()); + + // Register copy constructor (if possible) + detail::vector_if_copy_constructible(cl); + + // Register comparison-related operators and functions (if possible) + detail::vector_if_equal_operator(cl); + + // Register stream insertion operator (if possible) + detail::vector_if_insertion_operator(cl, name); + + // Modifiers require copyable vector value type + detail::vector_modifiers(cl); + + // Accessor and iterator; return by value if copyable, otherwise we return by ref + keep-alive + detail::vector_accessor(cl); + + cl.def("__bool__", + [](const Vector &v) -> bool { + return !v.empty(); + }, + "Check whether the list is nonempty" + ); + + cl.def("__len__", &Vector::size); + + + + +#if 0 + // C++ style functions deprecated, leaving it here as an example + cl.def(init()); + + cl.def("resize", + (void (Vector::*) (size_type count)) & Vector::resize, + "changes the number of elements stored"); + + cl.def("erase", + [](Vector &v, SizeType i) { + if (i >= v.size()) + throw index_error(); + v.erase(v.begin() + i); + }, "erases element at index ``i``"); + + cl.def("empty", &Vector::empty, "checks whether the container is empty"); + cl.def("size", &Vector::size, "returns the number of elements"); + cl.def("push_back", (void (Vector::*)(const T&)) &Vector::push_back, "adds an element to the end"); + cl.def("pop_back", &Vector::pop_back, "removes the last element"); + + cl.def("max_size", &Vector::max_size, "returns the maximum possible number of elements"); + cl.def("reserve", &Vector::reserve, "reserves storage"); + cl.def("capacity", &Vector::capacity, "returns the number of elements that can be held in currently allocated storage"); + cl.def("shrink_to_fit", &Vector::shrink_to_fit, "reduces memory usage by freeing unused memory"); + + cl.def("clear", &Vector::clear, "clears the contents"); + cl.def("swap", &Vector::swap, "swaps the contents"); + + cl.def("front", [](Vector &v) { + if (v.size()) return v.front(); + else throw index_error(); + }, "access the first element"); + + cl.def("back", [](Vector &v) { + if (v.size()) return v.back(); + else throw index_error(); + }, "access the last element "); + +#endif + + return cl; +} + + + +// +// std::map, std::unordered_map +// + +PYBIND11_NAMESPACE_BEGIN(detail) + +/* Fallback functions */ +template void map_if_insertion_operator(const Args &...) { } +template void map_assignment(const Args &...) { } + +// Map assignment when copy-assignable: just copy the value +template +void map_assignment(enable_if_t::value, Class_> &cl) { + using KeyType = typename Map::key_type; + using MappedType = typename Map::mapped_type; + + cl.def("__setitem__", + [](Map &m, const KeyType &k, const MappedType &v) { + auto it = m.find(k); + if (it != m.end()) it->second = v; + else m.emplace(k, v); + } + ); +} + +// Not copy-assignable, but still copy-constructible: we can update the value by erasing and reinserting +template +void map_assignment(enable_if_t< + !is_copy_assignable::value && + is_copy_constructible::value, + Class_> &cl) { + using KeyType = typename Map::key_type; + using MappedType = typename Map::mapped_type; + + cl.def("__setitem__", + [](Map &m, const KeyType &k, const MappedType &v) { + // We can't use m[k] = v; because value type might not be default constructable + auto r = m.emplace(k, v); + if (!r.second) { + // value type is not copy assignable so the only way to insert it is to erase it first... + m.erase(r.first); + m.emplace(k, v); + } + } + ); +} + + +template auto map_if_insertion_operator(Class_ &cl, std::string const &name) +-> decltype(std::declval() << std::declval() << std::declval(), void()) { + + cl.def("__repr__", + [name](Map &m) { + std::ostringstream s; + s << name << '{'; + bool f = false; + for (auto const &kv : m) { + if (f) + s << ", "; + s << kv.first << ": " << kv.second; + f = true; + } + s << '}'; + return s.str(); + }, + "Return the canonical string representation of this map." + ); +} + +template +struct keys_view +{ + Map ↦ +}; + +template +struct values_view +{ + Map ↦ +}; + +template +struct items_view +{ + Map ↦ +}; + +PYBIND11_NAMESPACE_END(detail) + +template , typename... Args> +class_ bind_map(handle scope, const std::string &name, Args&&... args) { + using KeyType = typename Map::key_type; + using MappedType = typename Map::mapped_type; + using KeysView = detail::keys_view; + using ValuesView = detail::values_view; + using ItemsView = detail::items_view; + using Class_ = class_; + + // If either type is a non-module-local bound type then make the map binding non-local as well; + // otherwise (e.g. both types are either module-local or converting) the map will be + // module-local. + auto tinfo = detail::get_type_info(typeid(MappedType)); + bool local = !tinfo || tinfo->module_local; + if (local) { + tinfo = detail::get_type_info(typeid(KeyType)); + local = !tinfo || tinfo->module_local; + } + + Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward(args)...); + class_ keys_view( + scope, ("KeysView[" + name + "]").c_str(), pybind11::module_local(local)); + class_ values_view( + scope, ("ValuesView[" + name + "]").c_str(), pybind11::module_local(local)); + class_ items_view( + scope, ("ItemsView[" + name + "]").c_str(), pybind11::module_local(local)); + + cl.def(init<>()); + + // Register stream insertion operator (if possible) + detail::map_if_insertion_operator(cl, name); + + cl.def("__bool__", + [](const Map &m) -> bool { return !m.empty(); }, + "Check whether the map is nonempty" + ); + + cl.def("__iter__", + [](Map &m) { return make_key_iterator(m.begin(), m.end()); }, + keep_alive<0, 1>() /* Essential: keep map alive while iterator exists */ + ); + + cl.def("keys", + [](Map &m) { return KeysView{m}; }, + keep_alive<0, 1>() /* Essential: keep map alive while view exists */ + ); + + cl.def("values", + [](Map &m) { return ValuesView{m}; }, + keep_alive<0, 1>() /* Essential: keep map alive while view exists */ + ); + + cl.def("items", + [](Map &m) { return ItemsView{m}; }, + keep_alive<0, 1>() /* Essential: keep map alive while view exists */ + ); + + cl.def("__getitem__", + [](Map &m, const KeyType &k) -> MappedType & { + auto it = m.find(k); + if (it == m.end()) + throw key_error(); + return it->second; + }, + return_value_policy::reference_internal // ref + keepalive + ); + + cl.def("__contains__", + [](Map &m, const KeyType &k) -> bool { + auto it = m.find(k); + if (it == m.end()) + return false; + return true; + } + ); + // Fallback for when the object is not of the key type + cl.def("__contains__", [](Map &, const object &) -> bool { return false; }); + + // Assignment provided only if the type is copyable + detail::map_assignment(cl); + + cl.def("__delitem__", + [](Map &m, const KeyType &k) { + auto it = m.find(k); + if (it == m.end()) + throw key_error(); + m.erase(it); + } + ); + + cl.def("__len__", &Map::size); + + keys_view.def("__len__", [](KeysView &view) { return view.map.size(); }); + keys_view.def("__iter__", + [](KeysView &view) { + return make_key_iterator(view.map.begin(), view.map.end()); + }, + keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */ + ); + keys_view.def("__contains__", + [](KeysView &view, const KeyType &k) -> bool { + auto it = view.map.find(k); + if (it == view.map.end()) + return false; + return true; + } + ); + // Fallback for when the object is not of the key type + keys_view.def("__contains__", [](KeysView &, const object &) -> bool { return false; }); + + values_view.def("__len__", [](ValuesView &view) { return view.map.size(); }); + values_view.def("__iter__", + [](ValuesView &view) { + return make_value_iterator(view.map.begin(), view.map.end()); + }, + keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */ + ); + + items_view.def("__len__", [](ItemsView &view) { return view.map.size(); }); + items_view.def("__iter__", + [](ItemsView &view) { + return make_iterator(view.map.begin(), view.map.end()); + }, + keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */ + ); + + return cl; +} + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp new file mode 100644 index 00000000..d941b310 --- /dev/null +++ b/Source/Python/PythonModule.cpp @@ -0,0 +1,71 @@ +#ifdef KTT_PYTHON + +#include +#include +#include + +#include + +namespace py = pybind11; + +PYBIND11_MODULE(ktt, module) +{ + module.doc() = "Python bindings for KTT auto-tuning framework (https://github.com/HiPerCoRe/KTT)"; + + module.attr("InvalidQueueId") = ktt::InvalidQueueId; + module.attr("InvalidKernelDefinitionId") = ktt::InvalidKernelDefinitionId; + module.attr("InvalidKernelId") = ktt::InvalidKernelId; + module.attr("InvalidArgumentId") = ktt::InvalidArgumentId; + module.attr("InvalidDuration") = ktt::InvalidDuration; + + module.def("GetKttVersion", &ktt::GetKttVersion, "Returns the current KTT framework version in integer format."); + module.def("GetKttVersionString", &ktt::GetKttVersionString, "Returns the current KTT framework version in string format."); + + py::enum_(module, "ComputeApi") + .value("OpenCL", ktt::ComputeApi::OpenCL) + .value("CUDA", ktt::ComputeApi::CUDA) + .value("Vulkan", ktt::ComputeApi::Vulkan); + + py::enum_(module, "ModifierAction") + .value("Add", ktt::ModifierAction::Add) + .value("Subtract", ktt::ModifierAction::Subtract) + .value("Multiply", ktt::ModifierAction::Multiply) + .value("Divide", ktt::ModifierAction::Divide) + .value("DivideCeil", ktt::ModifierAction::DivideCeil); + + py::enum_(module, "ModifierDimension") + .value("X", ktt::ModifierDimension::X) + .value("Y", ktt::ModifierDimension::Y) + .value("Z", ktt::ModifierDimension::Z); + + py::class_(module, "DimensionVector") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def(py::init()) + .def(py::init&>()) + .def("SetSizeX", &ktt::DimensionVector::SetSizeX) + .def("SetSizeY", &ktt::DimensionVector::SetSizeY) + .def("SetSizeZ", &ktt::DimensionVector::SetSizeZ) + .def("SetSize", &ktt::DimensionVector::SetSize) + .def("Multiply", &ktt::DimensionVector::Multiply) + .def("Divide", &ktt::DimensionVector::Divide) + .def("RoundUp", &ktt::DimensionVector::RoundUp) + .def("ModifyByValue", &ktt::DimensionVector::ModifyByValue) + .def("GetSizeX", &ktt::DimensionVector::GetSizeX) + .def("GetSizeY", &ktt::DimensionVector::GetSizeY) + .def("GetSizeZ", &ktt::DimensionVector::GetSizeZ) + .def("GetSize", &ktt::DimensionVector::GetSize) + .def("GetTotalSize", &ktt::DimensionVector::GetTotalSize) + .def("GetVector", &ktt::DimensionVector::GetVector) + .def("GetString", &ktt::DimensionVector::GetString) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__repr__", &ktt::DimensionVector::GetString); + + py::class_(module, "Tuner") + .def(py::init()) + .def("RemoveKernelDefinition", &ktt::Tuner::RemoveKernelDefinition); +} + +#endif // KTT_PYTHON diff --git a/premake5.lua b/premake5.lua index 43bd5796..78fc772b 100644 --- a/premake5.lua +++ b/premake5.lua @@ -188,6 +188,23 @@ function linkVulkan() return true end +function linkPython() + local path = os.getenv("PYTHON_PATH") + + if not path then + return false + end + + defines {"KTT_PYTHON"} + includedirs {"$(PYTHON_PATH)/include", "Libraries/pybind11-2.8.0"} + files {"Libraries/pybind11-2.8.0/**"} + + libdirs {"$(PYTHON_PATH)/libs"} + links {"python3"} + + return true +end + function linkAllLibraries() local librariesFound = linkComputeLibraries() @@ -203,6 +220,14 @@ function linkAllLibraries() error("Vulkan SDK was not found. Please ensure that path to the SDK is correctly set in the environment variables under VULKAN_SDK.") end end + + if _OPTIONS["python"] then + local pythonFound = linkPython() + + if not pythonFound then + error("Python installation was not found. Please ensure that path to Python is correctly set in the environment variables under PYTHON_PATH.") + end + end end -- Command line arguments definition @@ -239,6 +264,12 @@ newoption } } +newoption +{ + trigger = "python", + description = "Enables compilation of Python bindings" +} + newoption { trigger = "outdir", @@ -298,7 +329,7 @@ workspace "Ktt" symbols "Off" filter "action:vs*" - buildoptions {"/Zc:__cplusplus"} + buildoptions {"/Zc:__cplusplus", "/permissive-"} filter {} @@ -308,8 +339,32 @@ workspace "Ktt" -- Library configuration project "Ktt" kind "SharedLib" - files {"Source/**", "Libraries/CTPL-Ahajha/**", "Libraries/date-3/**", "Libraries/Json-3.9.1/**", "Libraries/pugixml-1.11.4/**"} - includedirs {"Source", "Libraries/CTPL-Ahajha", "Libraries/date-3", "Libraries/Json-3.9.1", "Libraries/pugixml-1.11.4"} + + files + { + "Source/**", + "Libraries/CTPL-Ahajha/**", + "Libraries/date-3/**", + "Libraries/Json-3.9.1/**", + "Libraries/pugixml-1.11.4/**" + } + + includedirs + { + "Source", + "Libraries/CTPL-Ahajha", + "Libraries/date-3", + "Libraries/Json-3.9.1", + "Libraries/pugixml-1.11.4" + } + + filter "system:windows" + if _OPTIONS["python"] then + postbuildcommands {"{COPYFILE} %{cfg.targetdir}/ktt.dll %{cfg.targetdir}/ktt.pyd"} + end + + filter {} + defines {"KTT_LIBRARY"} targetname("ktt") linkAllLibraries() From 24fb225830241a6b9111ed02e153a2da85c2946b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 19 Oct 2021 16:26:56 +0200 Subject: [PATCH 13/63] * Further work on Python bindings - added data classes and searchers --- Source/Python/PythonModule.cpp | 235 ++++++++++++++++++++++++++++++++- 1 file changed, 233 insertions(+), 2 deletions(-) diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index d941b310..fe4b29b1 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -8,24 +8,66 @@ namespace py = pybind11; +class PySearcher : public ktt::Searcher +{ +public: + using Searcher::Searcher; + + void OnInitialize() override + { + PYBIND11_OVERRIDE(void, ktt::Searcher, OnInitialize); + } + + void OnReset() override + { + PYBIND11_OVERRIDE(void, ktt::Searcher, OnReset); + } + + bool CalculateNextConfiguration(const ktt::KernelResult& previousResult) override + { + PYBIND11_OVERRIDE_PURE(bool, ktt::Searcher, CalculateNextConfiguration, previousResult); + } + + ktt::KernelConfiguration GetCurrentConfiguration() const override + { + PYBIND11_OVERRIDE_PURE(ktt::KernelConfiguration, ktt::Searcher, GetCurrentConfiguration); + } +}; + PYBIND11_MODULE(ktt, module) { module.doc() = "Python bindings for KTT auto-tuning framework (https://github.com/HiPerCoRe/KTT)"; + module.attr("KTT_VERSION_MAJOR") = KTT_VERSION_MAJOR; + module.attr("KTT_VERSION_MINOR") = KTT_VERSION_MINOR; + module.attr("KTT_VERSION_PATCH") = KTT_VERSION_PATCH; + + module.def("GetKttVersion", &ktt::GetKttVersion); + module.def("GetKttVersionString", &ktt::GetKttVersionString); + module.attr("InvalidQueueId") = ktt::InvalidQueueId; module.attr("InvalidKernelDefinitionId") = ktt::InvalidKernelDefinitionId; module.attr("InvalidKernelId") = ktt::InvalidKernelId; module.attr("InvalidArgumentId") = ktt::InvalidArgumentId; module.attr("InvalidDuration") = ktt::InvalidDuration; - module.def("GetKttVersion", &ktt::GetKttVersion, "Returns the current KTT framework version in integer format."); - module.def("GetKttVersionString", &ktt::GetKttVersionString, "Returns the current KTT framework version in string format."); + py::register_exception(module, "KttException", PyExc_Exception); py::enum_(module, "ComputeApi") .value("OpenCL", ktt::ComputeApi::OpenCL) .value("CUDA", ktt::ComputeApi::CUDA) .value("Vulkan", ktt::ComputeApi::Vulkan); + py::enum_(module, "DeviceType") + .value("CPU", ktt::DeviceType::CPU) + .value("GPU", ktt::DeviceType::GPU) + .value("Custom", ktt::DeviceType::Custom); + + py::enum_(module, "ExceptionReason") + .value("General", ktt::ExceptionReason::General) + .value("CompilerError", ktt::ExceptionReason::CompilerError) + .value("DeviceLimitsExceeded", ktt::ExceptionReason::DeviceLimitsExceeded); + py::enum_(module, "ModifierAction") .value("Add", ktt::ModifierAction::Add) .value("Subtract", ktt::ModifierAction::Subtract) @@ -38,6 +80,21 @@ PYBIND11_MODULE(ktt, module) .value("Y", ktt::ModifierDimension::Y) .value("Z", ktt::ModifierDimension::Z); + py::enum_(module, "ProfilingCounterType") + .value("Int", ktt::ProfilingCounterType::Int) + .value("UnsignedInt", ktt::ProfilingCounterType::UnsignedInt) + .value("Double", ktt::ProfilingCounterType::Double) + .value("Percent", ktt::ProfilingCounterType::Percent) + .value("Throughput", ktt::ProfilingCounterType::Throughput) + .value("UtilizationLevel", ktt::ProfilingCounterType::UtilizationLevel); + + py::enum_(module, "ResultStatus") + .value("Ok", ktt::ResultStatus::Ok) + .value("ComputationFailed", ktt::ResultStatus::ComputationFailed) + .value("ValidationFailed", ktt::ResultStatus::ValidationFailed) + .value("CompilationFailed", ktt::ResultStatus::CompilationFailed) + .value("DeviceLimitsExceeded", ktt::ResultStatus::DeviceLimitsExceeded); + py::class_(module, "DimensionVector") .def(py::init<>()) .def(py::init()) @@ -63,6 +120,180 @@ PYBIND11_MODULE(ktt, module) .def(py::self != py::self) .def("__repr__", &ktt::DimensionVector::GetString); + py::class_(module, "ParameterPair") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def("SetValue", py::overload_cast(&ktt::ParameterPair::SetValue)) + .def("SetValue", py::overload_cast(&ktt::ParameterPair::SetValue)) + .def("GetName", &ktt::ParameterPair::GetName) + .def("GetString", &ktt::ParameterPair::GetString) + .def("GetValueString", &ktt::ParameterPair::GetValueString) + .def("GetValue", &ktt::ParameterPair::GetValue) + .def("GetValueDouble", &ktt::ParameterPair::GetValueDouble) + .def("HasValueDouble", &ktt::ParameterPair::HasValueDouble) + .def("HasSameValue", &ktt::ParameterPair::HasSameValue) + .def_static("GetParameterValue", &ktt::ParameterPair::GetParameterValue) + .def_static("GetParameterValueDouble", &ktt::ParameterPair::GetParameterValue) + .def_static("GetParameterValues", &ktt::ParameterPair::GetParameterValues) + .def_static("GetParameterValuesDouble", &ktt::ParameterPair::GetParameterValues) + .def("__repr__", &ktt::ParameterPair::GetString); + + py::class_(module, "KernelConfiguration") + .def(py::init<>()) + .def(py::init&>()) + .def("GetPairs", &ktt::KernelConfiguration::GetPairs) + .def("IsValid", &ktt::KernelConfiguration::IsValid) + .def("GeneratePrefix", &ktt::KernelConfiguration::GeneratePrefix) + .def("GetString", &ktt::KernelConfiguration::GetString) + .def("Merge", &ktt::KernelConfiguration::Merge) + .def("GenerateNeighbours", &ktt::KernelConfiguration::GenerateNeighbours) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__repr__", &ktt::KernelConfiguration::GetString); + + py::class_(module, "DeviceInfo") + .def(py::init()) + .def("GetIndex", &ktt::DeviceInfo::GetIndex) + .def("GetName", &ktt::DeviceInfo::GetName) + .def("GetVendor", &ktt::DeviceInfo::GetVendor) + .def("GetExtensions", &ktt::DeviceInfo::GetExtensions) + .def("GetDeviceType", &ktt::DeviceInfo::GetDeviceType) + .def("GetDeviceTypeString", &ktt::DeviceInfo::GetDeviceTypeString) + .def("GetGlobalMemorySize", &ktt::DeviceInfo::GetGlobalMemorySize) + .def("GetLocalMemorySize", &ktt::DeviceInfo::GetLocalMemorySize) + .def("GetMaxConstantBufferSize", &ktt::DeviceInfo::GetMaxConstantBufferSize) + .def("GetMaxWorkGroupSize", &ktt::DeviceInfo::GetMaxWorkGroupSize) + .def("GetMaxComputeUnits", &ktt::DeviceInfo::GetMaxComputeUnits) + .def("GetString", &ktt::DeviceInfo::GetString) + .def("SetVendor", &ktt::DeviceInfo::SetVendor) + .def("SetExtensions", &ktt::DeviceInfo::SetExtensions) + .def("SetDeviceType", &ktt::DeviceInfo::SetDeviceType) + .def("SetGlobalMemorySize", &ktt::DeviceInfo::SetGlobalMemorySize) + .def("SetLocalMemorySize", &ktt::DeviceInfo::SetLocalMemorySize) + .def("SetMaxConstantBufferSize", &ktt::DeviceInfo::SetMaxConstantBufferSize) + .def("SetMaxWorkGroupSize", &ktt::DeviceInfo::SetMaxWorkGroupSize) + .def("SetMaxComputeUnits", &ktt::DeviceInfo::SetMaxComputeUnits) + .def("__repr__", &ktt::DeviceInfo::GetString); + + py::class_(module, "PlatformInfo") + .def(py::init()) + .def("GetIndex", &ktt::PlatformInfo::GetIndex) + .def("GetName", &ktt::PlatformInfo::GetName) + .def("GetVendor", &ktt::PlatformInfo::GetVendor) + .def("GetVersion", &ktt::PlatformInfo::GetVersion) + .def("GetExtensions", &ktt::PlatformInfo::GetExtensions) + .def("GetString", &ktt::PlatformInfo::GetString) + .def("SetVendor", &ktt::PlatformInfo::SetVendor) + .def("SetVersion", &ktt::PlatformInfo::SetVersion) + .def("SetExtensions", &ktt::PlatformInfo::SetExtensions) + .def("__repr__", &ktt::PlatformInfo::GetString); + + py::class_(module, "BufferOutputDescriptor") + .def(py::init()) + .def(py::init()) + .def("GetArgumentId", &ktt::BufferOutputDescriptor::GetArgumentId) + .def("GetOutputDestination", &ktt::BufferOutputDescriptor::GetOutputDestination, py::return_value_policy::reference) + .def("GetOutputSize", &ktt::BufferOutputDescriptor::GetOutputSize); + + py::class_(module, "KernelCompilationData") + .def(py::init<>()) + .def_readwrite("m_MaxWorkGroupSize", &ktt::KernelCompilationData::m_MaxWorkGroupSize) + .def_readwrite("m_LocalMemorySize", &ktt::KernelCompilationData::m_LocalMemorySize) + .def_readwrite("m_PrivateMemorySize", &ktt::KernelCompilationData::m_PrivateMemorySize) + .def_readwrite("m_ConstantMemorySize", &ktt::KernelCompilationData::m_ConstantMemorySize) + .def_readwrite("m_RegistersCount", &ktt::KernelCompilationData::m_RegistersCount); + + py::class_(module, "KernelProfilingCounter") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def(py::init()) + .def("GetName", &ktt::KernelProfilingCounter::GetName) + .def("GetType", &ktt::KernelProfilingCounter::GetType) + .def("GetValueInt", &ktt::KernelProfilingCounter::GetValueInt) + .def("GetValueUint", &ktt::KernelProfilingCounter::GetValueUint) + .def("GetValueDouble", &ktt::KernelProfilingCounter::GetValueDouble) + .def(py::self == py::self) + .def(py::self != py::self) + .def(py::self < py::self); + + py::class_(module, "KernelProfilingData") + .def(py::init<>()) + .def(py::init()) + .def(py::init&>()) + .def("IsValid", &ktt::KernelProfilingData::IsValid) + .def("HasCounter", &ktt::KernelProfilingData::HasCounter) + .def("GetCounter", &ktt::KernelProfilingData::GetCounter) + .def("GetCounters", &ktt::KernelProfilingData::GetCounters) + .def("SetCounters", &ktt::KernelProfilingData::SetCounters) + .def("AddCounter", &ktt::KernelProfilingData::AddCounter) + .def("HasRemainingProfilingRuns", &ktt::KernelProfilingData::HasRemainingProfilingRuns) + .def("GetRemainingProfilingRuns", &ktt::KernelProfilingData::GetRemainingProfilingRuns) + .def("DecreaseRemainingProfilingRuns", &ktt::KernelProfilingData::DecreaseRemainingProfilingRuns); + + py::class_(module, "ComputationResult") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def("SetDurationData", &ktt::ComputationResult::SetDurationData) + .def("SetSizeData", &ktt::ComputationResult::SetSizeData) + // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support + //.def("SetCompilationData", &ktt::ComputationResult::SetCompilationData) + //.def("SetProfilingData", &ktt::ComputationResult::SetProfilingData) + .def("GetKernelFunction", &ktt::ComputationResult::GetKernelFunction) + .def("GetGlobalSize", &ktt::ComputationResult::GetGlobalSize) + .def("GetLocalSize", &ktt::ComputationResult::GetLocalSize) + .def("GetDuration", &ktt::ComputationResult::GetDuration) + .def("GetOverhead", &ktt::ComputationResult::GetOverhead) + .def("HasCompilationData", &ktt::ComputationResult::HasCompilationData) + .def("GetCompilationData", &ktt::ComputationResult::GetCompilationData) + .def("HasProfilingData", &ktt::ComputationResult::HasProfilingData) + .def("GetProfilingData", &ktt::ComputationResult::GetProfilingData) + .def("HasRemainingProfilingRuns", &ktt::ComputationResult::HasRemainingProfilingRuns) + .def("assign", &ktt::ComputationResult::operator=); + + py::class_(module, "KernelResult") + .def(py::init<>()) + .def(py::init()) + .def(py::init&>()) + .def("SetStatus", &ktt::KernelResult::SetStatus) + .def("SetExtraDuration", &ktt::KernelResult::SetExtraDuration) + .def("SetExtraOverhead", &ktt::KernelResult::SetExtraOverhead) + .def("GetKernelName", &ktt::KernelResult::GetKernelName) + .def("GetConfiguration", &ktt::KernelResult::GetConfiguration) + .def("GetStatus", &ktt::KernelResult::GetStatus) + .def("GetKernelDuration", &ktt::KernelResult::GetKernelDuration) + .def("GetKernelOverhead", &ktt::KernelResult::GetKernelOverhead) + .def("GetExtraDuration", &ktt::KernelResult::GetExtraDuration) + .def("GetExtraOverhead", &ktt::KernelResult::GetExtraOverhead) + .def("GetTotalDuration", &ktt::KernelResult::GetTotalDuration) + .def("GetTotalOverhead", &ktt::KernelResult::GetTotalOverhead) + .def("IsValid", &ktt::KernelResult::IsValid) + .def("HasRemainingProfilingRuns", &ktt::KernelResult::HasRemainingProfilingRuns); + + py::class_(module, "Searcher") + .def(py::init<>()) + .def("OnInitialize", &ktt::Searcher::OnInitialize) + .def("OnReset", &ktt::Searcher::OnReset) + .def("CalculateNextConfiguration", &ktt::Searcher::CalculateNextConfiguration) + .def("GetCurrentConfiguration", &ktt::Searcher::GetCurrentConfiguration) + .def("GetIndex", &ktt::Searcher::GetIndex) + .def("GetRandomConfiguration", &ktt::Searcher::GetRandomConfiguration) + .def("GetNeighbourConfigurations", &ktt::Searcher::GetNeighbourConfigurations) + .def("GetConfigurationsCount", &ktt::Searcher::GetConfigurationsCount) + .def("GetExploredIndices", &ktt::Searcher::GetExploredIndices) + .def("IsInitialized", &ktt::Searcher::IsInitialized); + + py::class_(module, "DeterministicSearcher") + .def(py::init<>()); + + py::class_(module, "McmcSearcher") + .def(py::init&>()); + + py::class_(module, "RandomSearcher") + .def(py::init<>()); + py::class_(module, "Tuner") .def(py::init()) .def("RemoveKernelDefinition", &ktt::Tuner::RemoveKernelDefinition); From 1787fed3d6e50ea745a84447dfc0de3faff49439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 20 Oct 2021 16:12:53 +0200 Subject: [PATCH 14/63] * More work on Python bindings - added stop conditions, compute interface and tuner methods --- Source/Python/PythonDataHolders.cpp | 197 ++++++++++++ Source/Python/PythonEnums.cpp | 129 ++++++++ Source/Python/PythonModule.cpp | 427 ++++++++++--------------- Source/Python/PythonSearchers.cpp | 61 ++++ Source/Python/PythonStopConditions.cpp | 58 ++++ 5 files changed, 606 insertions(+), 266 deletions(-) create mode 100644 Source/Python/PythonDataHolders.cpp create mode 100644 Source/Python/PythonEnums.cpp create mode 100644 Source/Python/PythonSearchers.cpp create mode 100644 Source/Python/PythonStopConditions.cpp diff --git a/Source/Python/PythonDataHolders.cpp b/Source/Python/PythonDataHolders.cpp new file mode 100644 index 00000000..cc92c624 --- /dev/null +++ b/Source/Python/PythonDataHolders.cpp @@ -0,0 +1,197 @@ +#ifdef KTT_PYTHON + +#include +#include +#include +#include + +#include + +namespace py = pybind11; + +void InitializePythonDataHolders(py::module_& module) +{ + py::class_(module, "DimensionVector") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def(py::init()) + .def(py::init&>()) + .def("SetSizeX", &ktt::DimensionVector::SetSizeX) + .def("SetSizeY", &ktt::DimensionVector::SetSizeY) + .def("SetSizeZ", &ktt::DimensionVector::SetSizeZ) + .def("SetSize", &ktt::DimensionVector::SetSize) + .def("Multiply", &ktt::DimensionVector::Multiply) + .def("Divide", &ktt::DimensionVector::Divide) + .def("RoundUp", &ktt::DimensionVector::RoundUp) + .def("ModifyByValue", &ktt::DimensionVector::ModifyByValue) + .def("GetSizeX", &ktt::DimensionVector::GetSizeX) + .def("GetSizeY", &ktt::DimensionVector::GetSizeY) + .def("GetSizeZ", &ktt::DimensionVector::GetSizeZ) + .def("GetSize", &ktt::DimensionVector::GetSize) + .def("GetTotalSize", &ktt::DimensionVector::GetTotalSize) + .def("GetVector", &ktt::DimensionVector::GetVector) + .def("GetString", &ktt::DimensionVector::GetString) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__repr__", &ktt::DimensionVector::GetString); + + py::class_(module, "ParameterPair") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def("SetValue", py::overload_cast(&ktt::ParameterPair::SetValue)) + .def("SetValue", py::overload_cast(&ktt::ParameterPair::SetValue)) + .def("GetName", &ktt::ParameterPair::GetName) + .def("GetString", &ktt::ParameterPair::GetString) + .def("GetValueString", &ktt::ParameterPair::GetValueString) + .def("GetValue", &ktt::ParameterPair::GetValue) + .def("GetValueDouble", &ktt::ParameterPair::GetValueDouble) + .def("HasValueDouble", &ktt::ParameterPair::HasValueDouble) + .def("HasSameValue", &ktt::ParameterPair::HasSameValue) + .def_static("GetParameterValue", &ktt::ParameterPair::GetParameterValue) + .def_static("GetParameterValueDouble", &ktt::ParameterPair::GetParameterValue) + .def_static("GetParameterValues", &ktt::ParameterPair::GetParameterValues) + .def_static("GetParameterValuesDouble", &ktt::ParameterPair::GetParameterValues) + .def("__repr__", &ktt::ParameterPair::GetString); + + py::class_(module, "KernelConfiguration") + .def(py::init<>()) + .def(py::init&>()) + .def("GetPairs", &ktt::KernelConfiguration::GetPairs) + .def("IsValid", &ktt::KernelConfiguration::IsValid) + .def("GeneratePrefix", &ktt::KernelConfiguration::GeneratePrefix) + .def("GetString", &ktt::KernelConfiguration::GetString) + .def("Merge", &ktt::KernelConfiguration::Merge) + .def("GenerateNeighbours", &ktt::KernelConfiguration::GenerateNeighbours) + .def(py::self == py::self) + .def(py::self != py::self) + .def("__repr__", &ktt::KernelConfiguration::GetString); + + py::class_(module, "DeviceInfo") + .def(py::init()) + .def("GetIndex", &ktt::DeviceInfo::GetIndex) + .def("GetName", &ktt::DeviceInfo::GetName) + .def("GetVendor", &ktt::DeviceInfo::GetVendor) + .def("GetExtensions", &ktt::DeviceInfo::GetExtensions) + .def("GetDeviceType", &ktt::DeviceInfo::GetDeviceType) + .def("GetDeviceTypeString", &ktt::DeviceInfo::GetDeviceTypeString) + .def("GetGlobalMemorySize", &ktt::DeviceInfo::GetGlobalMemorySize) + .def("GetLocalMemorySize", &ktt::DeviceInfo::GetLocalMemorySize) + .def("GetMaxConstantBufferSize", &ktt::DeviceInfo::GetMaxConstantBufferSize) + .def("GetMaxWorkGroupSize", &ktt::DeviceInfo::GetMaxWorkGroupSize) + .def("GetMaxComputeUnits", &ktt::DeviceInfo::GetMaxComputeUnits) + .def("GetString", &ktt::DeviceInfo::GetString) + .def("SetVendor", &ktt::DeviceInfo::SetVendor) + .def("SetExtensions", &ktt::DeviceInfo::SetExtensions) + .def("SetDeviceType", &ktt::DeviceInfo::SetDeviceType) + .def("SetGlobalMemorySize", &ktt::DeviceInfo::SetGlobalMemorySize) + .def("SetLocalMemorySize", &ktt::DeviceInfo::SetLocalMemorySize) + .def("SetMaxConstantBufferSize", &ktt::DeviceInfo::SetMaxConstantBufferSize) + .def("SetMaxWorkGroupSize", &ktt::DeviceInfo::SetMaxWorkGroupSize) + .def("SetMaxComputeUnits", &ktt::DeviceInfo::SetMaxComputeUnits) + .def("__repr__", &ktt::DeviceInfo::GetString); + + py::class_(module, "PlatformInfo") + .def(py::init()) + .def("GetIndex", &ktt::PlatformInfo::GetIndex) + .def("GetName", &ktt::PlatformInfo::GetName) + .def("GetVendor", &ktt::PlatformInfo::GetVendor) + .def("GetVersion", &ktt::PlatformInfo::GetVersion) + .def("GetExtensions", &ktt::PlatformInfo::GetExtensions) + .def("GetString", &ktt::PlatformInfo::GetString) + .def("SetVendor", &ktt::PlatformInfo::SetVendor) + .def("SetVersion", &ktt::PlatformInfo::SetVersion) + .def("SetExtensions", &ktt::PlatformInfo::SetExtensions) + .def("__repr__", &ktt::PlatformInfo::GetString); + + py::class_(module, "BufferOutputDescriptor") + .def(py::init()) + .def(py::init()) + .def("GetArgumentId", &ktt::BufferOutputDescriptor::GetArgumentId) + .def("GetOutputDestination", &ktt::BufferOutputDescriptor::GetOutputDestination, py::return_value_policy::reference) + .def("GetOutputSize", &ktt::BufferOutputDescriptor::GetOutputSize); + + py::class_(module, "KernelCompilationData") + .def(py::init<>()) + .def_readwrite("m_MaxWorkGroupSize", &ktt::KernelCompilationData::m_MaxWorkGroupSize) + .def_readwrite("m_LocalMemorySize", &ktt::KernelCompilationData::m_LocalMemorySize) + .def_readwrite("m_PrivateMemorySize", &ktt::KernelCompilationData::m_PrivateMemorySize) + .def_readwrite("m_ConstantMemorySize", &ktt::KernelCompilationData::m_ConstantMemorySize) + .def_readwrite("m_RegistersCount", &ktt::KernelCompilationData::m_RegistersCount); + + py::class_(module, "KernelProfilingCounter") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def(py::init()) + .def("GetName", &ktt::KernelProfilingCounter::GetName) + .def("GetType", &ktt::KernelProfilingCounter::GetType) + .def("GetValueInt", &ktt::KernelProfilingCounter::GetValueInt) + .def("GetValueUint", &ktt::KernelProfilingCounter::GetValueUint) + .def("GetValueDouble", &ktt::KernelProfilingCounter::GetValueDouble) + .def(py::self == py::self) + .def(py::self != py::self) + .def(py::self < py::self); + + py::class_(module, "KernelProfilingData") + .def(py::init<>()) + .def(py::init()) + .def(py::init&>()) + .def("IsValid", &ktt::KernelProfilingData::IsValid) + .def("HasCounter", &ktt::KernelProfilingData::HasCounter) + .def("GetCounter", &ktt::KernelProfilingData::GetCounter) + .def("GetCounters", &ktt::KernelProfilingData::GetCounters) + .def("SetCounters", &ktt::KernelProfilingData::SetCounters) + .def("AddCounter", &ktt::KernelProfilingData::AddCounter) + .def("HasRemainingProfilingRuns", &ktt::KernelProfilingData::HasRemainingProfilingRuns) + .def("GetRemainingProfilingRuns", &ktt::KernelProfilingData::GetRemainingProfilingRuns) + .def("DecreaseRemainingProfilingRuns", &ktt::KernelProfilingData::DecreaseRemainingProfilingRuns); + + py::class_(module, "ComputationResult") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def("SetDurationData", &ktt::ComputationResult::SetDurationData) + .def("SetSizeData", &ktt::ComputationResult::SetSizeData) + // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support + //.def("SetCompilationData", &ktt::ComputationResult::SetCompilationData) + //.def("SetProfilingData", &ktt::ComputationResult::SetProfilingData) + .def("GetKernelFunction", &ktt::ComputationResult::GetKernelFunction) + .def("GetGlobalSize", &ktt::ComputationResult::GetGlobalSize) + .def("GetLocalSize", &ktt::ComputationResult::GetLocalSize) + .def("GetDuration", &ktt::ComputationResult::GetDuration) + .def("GetOverhead", &ktt::ComputationResult::GetOverhead) + .def("HasCompilationData", &ktt::ComputationResult::HasCompilationData) + .def("GetCompilationData", &ktt::ComputationResult::GetCompilationData) + .def("HasProfilingData", &ktt::ComputationResult::HasProfilingData) + .def("GetProfilingData", &ktt::ComputationResult::GetProfilingData) + .def("HasRemainingProfilingRuns", &ktt::ComputationResult::HasRemainingProfilingRuns) + .def("assign", &ktt::ComputationResult::operator=); + + py::class_(module, "KernelResult") + .def(py::init<>()) + .def(py::init()) + .def(py::init&>()) + .def("SetStatus", &ktt::KernelResult::SetStatus) + .def("SetExtraDuration", &ktt::KernelResult::SetExtraDuration) + .def("SetExtraOverhead", &ktt::KernelResult::SetExtraOverhead) + .def("GetKernelName", &ktt::KernelResult::GetKernelName) + .def("GetConfiguration", &ktt::KernelResult::GetConfiguration) + .def("GetStatus", &ktt::KernelResult::GetStatus) + .def("GetKernelDuration", &ktt::KernelResult::GetKernelDuration) + .def("GetKernelOverhead", &ktt::KernelResult::GetKernelOverhead) + .def("GetExtraDuration", &ktt::KernelResult::GetExtraDuration) + .def("GetExtraOverhead", &ktt::KernelResult::GetExtraOverhead) + .def("GetTotalDuration", &ktt::KernelResult::GetTotalDuration) + .def("GetTotalOverhead", &ktt::KernelResult::GetTotalOverhead) + .def("IsValid", &ktt::KernelResult::IsValid) + .def("HasRemainingProfilingRuns", &ktt::KernelResult::HasRemainingProfilingRuns); + + py::class_(module, "ComputeApiInitializer") + .def(py::init&>()) + .def("GetContext", &ktt::ComputeApiInitializer::GetContext) + .def("GetQueues", &ktt::ComputeApiInitializer::GetQueues); +} + +#endif // KTT_PYTHON diff --git a/Source/Python/PythonEnums.cpp b/Source/Python/PythonEnums.cpp new file mode 100644 index 00000000..57546ff2 --- /dev/null +++ b/Source/Python/PythonEnums.cpp @@ -0,0 +1,129 @@ +#ifdef KTT_PYTHON + +#include + +#include + +namespace py = pybind11; + +void InitializePythonEnums(py::module_& module) +{ + py::enum_(module, "ArgumentAccessType") + .value("Undefined", ktt::ArgumentAccessType::Undefined) + .value("ReadOnly", ktt::ArgumentAccessType::ReadOnly) + .value("WriteOnly", ktt::ArgumentAccessType::WriteOnly) + .value("ReadWrite", ktt::ArgumentAccessType::ReadWrite); + + py::enum_(module, "ArgumentDataType") + .value("Char", ktt::ArgumentDataType::Char) + .value("UnsignedChar", ktt::ArgumentDataType::UnsignedChar) + .value("Short", ktt::ArgumentDataType::Short) + .value("UnsignedShort", ktt::ArgumentDataType::UnsignedShort) + .value("Int", ktt::ArgumentDataType::Int) + .value("UnsignedInt", ktt::ArgumentDataType::UnsignedInt) + .value("Long", ktt::ArgumentDataType::Long) + .value("UnsignedLong", ktt::ArgumentDataType::UnsignedLong) + .value("Half", ktt::ArgumentDataType::Half) + .value("Float", ktt::ArgumentDataType::Float) + .value("Double", ktt::ArgumentDataType::Double) + .value("Custom", ktt::ArgumentDataType::Custom); + + py::enum_(module, "ArgumentManagementType") + .value("Framework", ktt::ArgumentManagementType::Framework) + .value("User", ktt::ArgumentManagementType::User); + + py::enum_(module, "ArgumentMemoryLocation") + .value("Undefined", ktt::ArgumentMemoryLocation::Undefined) + .value("Device", ktt::ArgumentMemoryLocation::Device) + .value("Host", ktt::ArgumentMemoryLocation::Host) + .value("HostZeroCopy", ktt::ArgumentMemoryLocation::HostZeroCopy) + .value("Unified", ktt::ArgumentMemoryLocation::Unified); + + py::enum_(module, "ArgumentMemoryType") + .value("Scalar", ktt::ArgumentMemoryType::Scalar) + .value("Vector", ktt::ArgumentMemoryType::Vector) + .value("Local", ktt::ArgumentMemoryType::Local) + .value("Symbol", ktt::ArgumentMemoryType::Symbol); + + py::enum_(module, "ComputeApi") + .value("OpenCL", ktt::ComputeApi::OpenCL) + .value("CUDA", ktt::ComputeApi::CUDA) + .value("Vulkan", ktt::ComputeApi::Vulkan); + + py::enum_(module, "DeviceType") + .value("CPU", ktt::DeviceType::CPU) + .value("GPU", ktt::DeviceType::GPU) + .value("Custom", ktt::DeviceType::Custom); + + py::enum_(module, "ExceptionReason") + .value("General", ktt::ExceptionReason::General) + .value("CompilerError", ktt::ExceptionReason::CompilerError) + .value("DeviceLimitsExceeded", ktt::ExceptionReason::DeviceLimitsExceeded); + + py::enum_(module, "GlobalSizeType") + .value("OpenCL", ktt::GlobalSizeType::OpenCL) + .value("CUDA", ktt::GlobalSizeType::CUDA) + .value("Vulkan", ktt::GlobalSizeType::Vulkan); + + py::enum_(module, "LoggingLevel") + .value("Off", ktt::LoggingLevel::Off) + .value("Error", ktt::LoggingLevel::Error) + .value("Warning", ktt::LoggingLevel::Warning) + .value("Info", ktt::LoggingLevel::Info) + .value("Debug", ktt::LoggingLevel::Debug); + + py::enum_(module, "ModifierAction") + .value("Add", ktt::ModifierAction::Add) + .value("Subtract", ktt::ModifierAction::Subtract) + .value("Multiply", ktt::ModifierAction::Multiply) + .value("Divide", ktt::ModifierAction::Divide) + .value("DivideCeil", ktt::ModifierAction::DivideCeil); + + py::enum_(module, "ModifierDimension") + .value("X", ktt::ModifierDimension::X) + .value("Y", ktt::ModifierDimension::Y) + .value("Z", ktt::ModifierDimension::Z); + + py::enum_(module, "ModifierType") + .value("Global", ktt::ModifierType::Global) + .value("Local", ktt::ModifierType::Local); + + py::enum_(module, "OutputFormat") + .value("JSON", ktt::OutputFormat::JSON) + .value("XML", ktt::OutputFormat::XML); + + py::enum_(module, "ProfilingCounterType") + .value("Int", ktt::ProfilingCounterType::Int) + .value("UnsignedInt", ktt::ProfilingCounterType::UnsignedInt) + .value("Double", ktt::ProfilingCounterType::Double) + .value("Percent", ktt::ProfilingCounterType::Percent) + .value("Throughput", ktt::ProfilingCounterType::Throughput) + .value("UtilizationLevel", ktt::ProfilingCounterType::UtilizationLevel); + + py::enum_(module, "ResultStatus") + .value("Ok", ktt::ResultStatus::Ok) + .value("ComputationFailed", ktt::ResultStatus::ComputationFailed) + .value("ValidationFailed", ktt::ResultStatus::ValidationFailed) + .value("CompilationFailed", ktt::ResultStatus::CompilationFailed) + .value("DeviceLimitsExceeded", ktt::ResultStatus::DeviceLimitsExceeded); + + py::enum_(module, "TimeUnit") + .value("Nanoseconds", ktt::TimeUnit::Nanoseconds) + .value("Microseconds", ktt::TimeUnit::Microseconds) + .value("Milliseconds", ktt::TimeUnit::Milliseconds) + .value("Seconds", ktt::TimeUnit::Seconds); + + py::enum_(module, "ValidationMethod") + .value("AbsoluteDifference", ktt::ValidationMethod::AbsoluteDifference) + .value("SideBySideComparison", ktt::ValidationMethod::SideBySideComparison) + .value("SideBySideRelativeComparison", ktt::ValidationMethod::SideBySideRelativeComparison); + + py::enum_(module, "ValidationMode", py::arithmetic()) + .value("None", ktt::ValidationMode::None) + .value("Running", ktt::ValidationMode::Running) + .value("OfflineTuning", ktt::ValidationMode::OfflineTuning) + .value("OnlineTuning", ktt::ValidationMode::OnlineTuning) + .value("All", ktt::ValidationMode::All); +} + +#endif // KTT_PYTHON diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index fe4b29b1..e706057f 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -1,5 +1,6 @@ #ifdef KTT_PYTHON +#include #include #include #include @@ -8,31 +9,10 @@ namespace py = pybind11; -class PySearcher : public ktt::Searcher -{ -public: - using Searcher::Searcher; - - void OnInitialize() override - { - PYBIND11_OVERRIDE(void, ktt::Searcher, OnInitialize); - } - - void OnReset() override - { - PYBIND11_OVERRIDE(void, ktt::Searcher, OnReset); - } - - bool CalculateNextConfiguration(const ktt::KernelResult& previousResult) override - { - PYBIND11_OVERRIDE_PURE(bool, ktt::Searcher, CalculateNextConfiguration, previousResult); - } - - ktt::KernelConfiguration GetCurrentConfiguration() const override - { - PYBIND11_OVERRIDE_PURE(ktt::KernelConfiguration, ktt::Searcher, GetCurrentConfiguration); - } -}; +void InitializePythonEnums(py::module_& module); +void InitializePythonDataHolders(py::module_& module); +void InitializePythonSearchers(py::module_& module); +void InitializePythonStopConditions(py::module_& module); PYBIND11_MODULE(ktt, module) { @@ -51,252 +31,167 @@ PYBIND11_MODULE(ktt, module) module.attr("InvalidArgumentId") = ktt::InvalidArgumentId; module.attr("InvalidDuration") = ktt::InvalidDuration; - py::register_exception(module, "KttException", PyExc_Exception); - - py::enum_(module, "ComputeApi") - .value("OpenCL", ktt::ComputeApi::OpenCL) - .value("CUDA", ktt::ComputeApi::CUDA) - .value("Vulkan", ktt::ComputeApi::Vulkan); - - py::enum_(module, "DeviceType") - .value("CPU", ktt::DeviceType::CPU) - .value("GPU", ktt::DeviceType::GPU) - .value("Custom", ktt::DeviceType::Custom); - - py::enum_(module, "ExceptionReason") - .value("General", ktt::ExceptionReason::General) - .value("CompilerError", ktt::ExceptionReason::CompilerError) - .value("DeviceLimitsExceeded", ktt::ExceptionReason::DeviceLimitsExceeded); - - py::enum_(module, "ModifierAction") - .value("Add", ktt::ModifierAction::Add) - .value("Subtract", ktt::ModifierAction::Subtract) - .value("Multiply", ktt::ModifierAction::Multiply) - .value("Divide", ktt::ModifierAction::Divide) - .value("DivideCeil", ktt::ModifierAction::DivideCeil); - - py::enum_(module, "ModifierDimension") - .value("X", ktt::ModifierDimension::X) - .value("Y", ktt::ModifierDimension::Y) - .value("Z", ktt::ModifierDimension::Z); - - py::enum_(module, "ProfilingCounterType") - .value("Int", ktt::ProfilingCounterType::Int) - .value("UnsignedInt", ktt::ProfilingCounterType::UnsignedInt) - .value("Double", ktt::ProfilingCounterType::Double) - .value("Percent", ktt::ProfilingCounterType::Percent) - .value("Throughput", ktt::ProfilingCounterType::Throughput) - .value("UtilizationLevel", ktt::ProfilingCounterType::UtilizationLevel); - - py::enum_(module, "ResultStatus") - .value("Ok", ktt::ResultStatus::Ok) - .value("ComputationFailed", ktt::ResultStatus::ComputationFailed) - .value("ValidationFailed", ktt::ResultStatus::ValidationFailed) - .value("CompilationFailed", ktt::ResultStatus::CompilationFailed) - .value("DeviceLimitsExceeded", ktt::ResultStatus::DeviceLimitsExceeded); - - py::class_(module, "DimensionVector") - .def(py::init<>()) - .def(py::init()) - .def(py::init()) - .def(py::init()) - .def(py::init&>()) - .def("SetSizeX", &ktt::DimensionVector::SetSizeX) - .def("SetSizeY", &ktt::DimensionVector::SetSizeY) - .def("SetSizeZ", &ktt::DimensionVector::SetSizeZ) - .def("SetSize", &ktt::DimensionVector::SetSize) - .def("Multiply", &ktt::DimensionVector::Multiply) - .def("Divide", &ktt::DimensionVector::Divide) - .def("RoundUp", &ktt::DimensionVector::RoundUp) - .def("ModifyByValue", &ktt::DimensionVector::ModifyByValue) - .def("GetSizeX", &ktt::DimensionVector::GetSizeX) - .def("GetSizeY", &ktt::DimensionVector::GetSizeY) - .def("GetSizeZ", &ktt::DimensionVector::GetSizeZ) - .def("GetSize", &ktt::DimensionVector::GetSize) - .def("GetTotalSize", &ktt::DimensionVector::GetTotalSize) - .def("GetVector", &ktt::DimensionVector::GetVector) - .def("GetString", &ktt::DimensionVector::GetString) - .def(py::self == py::self) - .def(py::self != py::self) - .def("__repr__", &ktt::DimensionVector::GetString); - - py::class_(module, "ParameterPair") - .def(py::init<>()) - .def(py::init()) - .def(py::init()) - .def("SetValue", py::overload_cast(&ktt::ParameterPair::SetValue)) - .def("SetValue", py::overload_cast(&ktt::ParameterPair::SetValue)) - .def("GetName", &ktt::ParameterPair::GetName) - .def("GetString", &ktt::ParameterPair::GetString) - .def("GetValueString", &ktt::ParameterPair::GetValueString) - .def("GetValue", &ktt::ParameterPair::GetValue) - .def("GetValueDouble", &ktt::ParameterPair::GetValueDouble) - .def("HasValueDouble", &ktt::ParameterPair::HasValueDouble) - .def("HasSameValue", &ktt::ParameterPair::HasSameValue) - .def_static("GetParameterValue", &ktt::ParameterPair::GetParameterValue) - .def_static("GetParameterValueDouble", &ktt::ParameterPair::GetParameterValue) - .def_static("GetParameterValues", &ktt::ParameterPair::GetParameterValues) - .def_static("GetParameterValuesDouble", &ktt::ParameterPair::GetParameterValues) - .def("__repr__", &ktt::ParameterPair::GetString); + InitializePythonEnums(module); + InitializePythonDataHolders(module); + InitializePythonSearchers(module); + InitializePythonStopConditions(module); - py::class_(module, "KernelConfiguration") - .def(py::init<>()) - .def(py::init&>()) - .def("GetPairs", &ktt::KernelConfiguration::GetPairs) - .def("IsValid", &ktt::KernelConfiguration::IsValid) - .def("GeneratePrefix", &ktt::KernelConfiguration::GeneratePrefix) - .def("GetString", &ktt::KernelConfiguration::GetString) - .def("Merge", &ktt::KernelConfiguration::Merge) - .def("GenerateNeighbours", &ktt::KernelConfiguration::GenerateNeighbours) - .def(py::self == py::self) - .def(py::self != py::self) - .def("__repr__", &ktt::KernelConfiguration::GetString); - - py::class_(module, "DeviceInfo") - .def(py::init()) - .def("GetIndex", &ktt::DeviceInfo::GetIndex) - .def("GetName", &ktt::DeviceInfo::GetName) - .def("GetVendor", &ktt::DeviceInfo::GetVendor) - .def("GetExtensions", &ktt::DeviceInfo::GetExtensions) - .def("GetDeviceType", &ktt::DeviceInfo::GetDeviceType) - .def("GetDeviceTypeString", &ktt::DeviceInfo::GetDeviceTypeString) - .def("GetGlobalMemorySize", &ktt::DeviceInfo::GetGlobalMemorySize) - .def("GetLocalMemorySize", &ktt::DeviceInfo::GetLocalMemorySize) - .def("GetMaxConstantBufferSize", &ktt::DeviceInfo::GetMaxConstantBufferSize) - .def("GetMaxWorkGroupSize", &ktt::DeviceInfo::GetMaxWorkGroupSize) - .def("GetMaxComputeUnits", &ktt::DeviceInfo::GetMaxComputeUnits) - .def("GetString", &ktt::DeviceInfo::GetString) - .def("SetVendor", &ktt::DeviceInfo::SetVendor) - .def("SetExtensions", &ktt::DeviceInfo::SetExtensions) - .def("SetDeviceType", &ktt::DeviceInfo::SetDeviceType) - .def("SetGlobalMemorySize", &ktt::DeviceInfo::SetGlobalMemorySize) - .def("SetLocalMemorySize", &ktt::DeviceInfo::SetLocalMemorySize) - .def("SetMaxConstantBufferSize", &ktt::DeviceInfo::SetMaxConstantBufferSize) - .def("SetMaxWorkGroupSize", &ktt::DeviceInfo::SetMaxWorkGroupSize) - .def("SetMaxComputeUnits", &ktt::DeviceInfo::SetMaxComputeUnits) - .def("__repr__", &ktt::DeviceInfo::GetString); - - py::class_(module, "PlatformInfo") - .def(py::init()) - .def("GetIndex", &ktt::PlatformInfo::GetIndex) - .def("GetName", &ktt::PlatformInfo::GetName) - .def("GetVendor", &ktt::PlatformInfo::GetVendor) - .def("GetVersion", &ktt::PlatformInfo::GetVersion) - .def("GetExtensions", &ktt::PlatformInfo::GetExtensions) - .def("GetString", &ktt::PlatformInfo::GetString) - .def("SetVendor", &ktt::PlatformInfo::SetVendor) - .def("SetVersion", &ktt::PlatformInfo::SetVersion) - .def("SetExtensions", &ktt::PlatformInfo::SetExtensions) - .def("__repr__", &ktt::PlatformInfo::GetString); - - py::class_(module, "BufferOutputDescriptor") - .def(py::init()) - .def(py::init()) - .def("GetArgumentId", &ktt::BufferOutputDescriptor::GetArgumentId) - .def("GetOutputDestination", &ktt::BufferOutputDescriptor::GetOutputDestination, py::return_value_policy::reference) - .def("GetOutputSize", &ktt::BufferOutputDescriptor::GetOutputSize); - - py::class_(module, "KernelCompilationData") - .def(py::init<>()) - .def_readwrite("m_MaxWorkGroupSize", &ktt::KernelCompilationData::m_MaxWorkGroupSize) - .def_readwrite("m_LocalMemorySize", &ktt::KernelCompilationData::m_LocalMemorySize) - .def_readwrite("m_PrivateMemorySize", &ktt::KernelCompilationData::m_PrivateMemorySize) - .def_readwrite("m_ConstantMemorySize", &ktt::KernelCompilationData::m_ConstantMemorySize) - .def_readwrite("m_RegistersCount", &ktt::KernelCompilationData::m_RegistersCount); - - py::class_(module, "KernelProfilingCounter") - .def(py::init<>()) - .def(py::init()) - .def(py::init()) - .def(py::init()) - .def("GetName", &ktt::KernelProfilingCounter::GetName) - .def("GetType", &ktt::KernelProfilingCounter::GetType) - .def("GetValueInt", &ktt::KernelProfilingCounter::GetValueInt) - .def("GetValueUint", &ktt::KernelProfilingCounter::GetValueUint) - .def("GetValueDouble", &ktt::KernelProfilingCounter::GetValueDouble) - .def(py::self == py::self) - .def(py::self != py::self) - .def(py::self < py::self); - - py::class_(module, "KernelProfilingData") - .def(py::init<>()) - .def(py::init()) - .def(py::init&>()) - .def("IsValid", &ktt::KernelProfilingData::IsValid) - .def("HasCounter", &ktt::KernelProfilingData::HasCounter) - .def("GetCounter", &ktt::KernelProfilingData::GetCounter) - .def("GetCounters", &ktt::KernelProfilingData::GetCounters) - .def("SetCounters", &ktt::KernelProfilingData::SetCounters) - .def("AddCounter", &ktt::KernelProfilingData::AddCounter) - .def("HasRemainingProfilingRuns", &ktt::KernelProfilingData::HasRemainingProfilingRuns) - .def("GetRemainingProfilingRuns", &ktt::KernelProfilingData::GetRemainingProfilingRuns) - .def("DecreaseRemainingProfilingRuns", &ktt::KernelProfilingData::DecreaseRemainingProfilingRuns); - - py::class_(module, "ComputationResult") - .def(py::init<>()) - .def(py::init()) - .def(py::init()) - .def("SetDurationData", &ktt::ComputationResult::SetDurationData) - .def("SetSizeData", &ktt::ComputationResult::SetSizeData) - // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support - //.def("SetCompilationData", &ktt::ComputationResult::SetCompilationData) - //.def("SetProfilingData", &ktt::ComputationResult::SetProfilingData) - .def("GetKernelFunction", &ktt::ComputationResult::GetKernelFunction) - .def("GetGlobalSize", &ktt::ComputationResult::GetGlobalSize) - .def("GetLocalSize", &ktt::ComputationResult::GetLocalSize) - .def("GetDuration", &ktt::ComputationResult::GetDuration) - .def("GetOverhead", &ktt::ComputationResult::GetOverhead) - .def("HasCompilationData", &ktt::ComputationResult::HasCompilationData) - .def("GetCompilationData", &ktt::ComputationResult::GetCompilationData) - .def("HasProfilingData", &ktt::ComputationResult::HasProfilingData) - .def("GetProfilingData", &ktt::ComputationResult::GetProfilingData) - .def("HasRemainingProfilingRuns", &ktt::ComputationResult::HasRemainingProfilingRuns) - .def("assign", &ktt::ComputationResult::operator=); - - py::class_(module, "KernelResult") - .def(py::init<>()) - .def(py::init()) - .def(py::init&>()) - .def("SetStatus", &ktt::KernelResult::SetStatus) - .def("SetExtraDuration", &ktt::KernelResult::SetExtraDuration) - .def("SetExtraOverhead", &ktt::KernelResult::SetExtraOverhead) - .def("GetKernelName", &ktt::KernelResult::GetKernelName) - .def("GetConfiguration", &ktt::KernelResult::GetConfiguration) - .def("GetStatus", &ktt::KernelResult::GetStatus) - .def("GetKernelDuration", &ktt::KernelResult::GetKernelDuration) - .def("GetKernelOverhead", &ktt::KernelResult::GetKernelOverhead) - .def("GetExtraDuration", &ktt::KernelResult::GetExtraDuration) - .def("GetExtraOverhead", &ktt::KernelResult::GetExtraOverhead) - .def("GetTotalDuration", &ktt::KernelResult::GetTotalDuration) - .def("GetTotalOverhead", &ktt::KernelResult::GetTotalOverhead) - .def("IsValid", &ktt::KernelResult::IsValid) - .def("HasRemainingProfilingRuns", &ktt::KernelResult::HasRemainingProfilingRuns); - - py::class_(module, "Searcher") - .def(py::init<>()) - .def("OnInitialize", &ktt::Searcher::OnInitialize) - .def("OnReset", &ktt::Searcher::OnReset) - .def("CalculateNextConfiguration", &ktt::Searcher::CalculateNextConfiguration) - .def("GetCurrentConfiguration", &ktt::Searcher::GetCurrentConfiguration) - .def("GetIndex", &ktt::Searcher::GetIndex) - .def("GetRandomConfiguration", &ktt::Searcher::GetRandomConfiguration) - .def("GetNeighbourConfigurations", &ktt::Searcher::GetNeighbourConfigurations) - .def("GetConfigurationsCount", &ktt::Searcher::GetConfigurationsCount) - .def("GetExploredIndices", &ktt::Searcher::GetExploredIndices) - .def("IsInitialized", &ktt::Searcher::IsInitialized); - - py::class_(module, "DeterministicSearcher") - .def(py::init<>()); - - py::class_(module, "McmcSearcher") - .def(py::init&>()); + py::register_exception(module, "KttException", PyExc_Exception); - py::class_(module, "RandomSearcher") - .def(py::init<>()); + py::class_(module, "ComputeInterface") + .def("RunKernel", py::overload_cast(&ktt::ComputeInterface::RunKernel)) + .def("RunKernel", py::overload_cast(&ktt::ComputeInterface::RunKernel)) + .def("RunKernelAsync", py::overload_cast(&ktt::ComputeInterface::RunKernelAsync)) + .def("RunKernelAsync", py::overload_cast(&ktt::ComputeInterface::RunKernelAsync)) + .def("WaitForComputeAction", &ktt::ComputeInterface::WaitForComputeAction) + .def("RunKernelWithProfiling", py::overload_cast(&ktt::ComputeInterface::RunKernelWithProfiling)) + .def("RunKernelWithProfiling", py::overload_cast(&ktt::ComputeInterface::RunKernelWithProfiling)) + // Todo: these overloads do not work for some reason + //.def("GetRemainingProfilingRuns", py::overload_cast(&ktt::ComputeInterface::GetRemainingProfilingRuns)) + //.def("GetRemainingProfilingRuns", py::overload_cast<>(&ktt::ComputeInterface::GetRemainingProfilingRuns)) + .def("GetDefaultQueue", &ktt::ComputeInterface::GetDefaultQueue) + .def("GetAllQueues", &ktt::ComputeInterface::GetAllQueues) + .def("SynchronizeQueue", &ktt::ComputeInterface::SynchronizeQueue) + .def("SynchronizeDevice", &ktt::ComputeInterface::SynchronizeDevice) + .def("GetCurrentGlobalSize", &ktt::ComputeInterface::GetCurrentGlobalSize) + .def("GetCurrentLocalSize", &ktt::ComputeInterface::GetCurrentLocalSize) + .def("GetCurrentConfiguration", &ktt::ComputeInterface::GetCurrentConfiguration) + .def("ChangeArguments", &ktt::ComputeInterface::ChangeArguments) + .def("SwapArguments", &ktt::ComputeInterface::SwapArguments) + .def("UpdateScalarArgument", &ktt::ComputeInterface::UpdateScalarArgument) + .def("UpdateLocalArgument", &ktt::ComputeInterface::UpdateLocalArgument) + .def("UploadBuffer", &ktt::ComputeInterface::UploadBuffer) + .def("UploadBufferAsync", &ktt::ComputeInterface::UploadBufferAsync) + .def("DownloadBuffer", &ktt::ComputeInterface::DownloadBuffer) + .def("DownloadBufferAsync", &ktt::ComputeInterface::DownloadBufferAsync) + .def("UpdateBuffer", &ktt::ComputeInterface::UpdateBuffer) + .def("UpdateBufferAsync", &ktt::ComputeInterface::UpdateBufferAsync) + .def("CopyBuffer", &ktt::ComputeInterface::CopyBuffer) + .def("CopyBufferAsync", &ktt::ComputeInterface::CopyBufferAsync) + .def("WaitForTransferAction", &ktt::ComputeInterface::WaitForTransferAction) + .def("ResizeBuffer", &ktt::ComputeInterface::ResizeBuffer) + .def("ClearBuffer", &ktt::ComputeInterface::ClearBuffer) + .def("HasBuffer", &ktt::ComputeInterface::HasBuffer) + .def("GetUnifiedMemoryBufferHandle", &ktt::ComputeInterface::GetUnifiedMemoryBufferHandle); py::class_(module, "Tuner") .def(py::init()) - .def("RemoveKernelDefinition", &ktt::Tuner::RemoveKernelDefinition); + .def(py::init()) + .def(py::init()) + .def(py::init&>()) + .def("AddKernelDefinition", &ktt::Tuner::AddKernelDefinition) + .def("AddKernelDefinitionFromFile", &ktt::Tuner::AddKernelDefinitionFromFile) + .def("GetKernelDefinitionId", &ktt::Tuner::GetKernelDefinitionId) + .def("RemoveKernelDefinition", &ktt::Tuner::RemoveKernelDefinition) + .def("SetArguments", &ktt::Tuner::SetArguments) + .def("CreateSimpleKernel", &ktt::Tuner::CreateSimpleKernel) + .def("CreateCompositeKernel", &ktt::Tuner::CreateCompositeKernel) + .def("RemoveKernel", &ktt::Tuner::RemoveKernel) + .def("SetLauncher", &ktt::Tuner::SetLauncher) + .def("AddParameter", py::overload_cast&, + const std::string&>(&ktt::Tuner::AddParameter)) + .def("AddParameter", py::overload_cast&, + const std::string&>(&ktt::Tuner::AddParameter)) + .def("AddThreadModifier", py::overload_cast&, const ktt::ModifierType, + const ktt::ModifierDimension, const std::vector&, ktt::ModifierFunction>(&ktt::Tuner::AddThreadModifier)) + .def("AddThreadModifier", py::overload_cast&, const ktt::ModifierType, + const ktt::ModifierDimension, const std::string&, const ktt::ModifierAction>(&ktt::Tuner::AddThreadModifier)) + .def("AddConstraint", &ktt::Tuner::AddConstraint) + .def("SetProfiledDefinitions", &ktt::Tuner::SetProfiledDefinitions) + .def("AddArgumentVectorChar", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorShort", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorInt", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorLong", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorFloat", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorDouble", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorChar", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorShort", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorInt", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorLong", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorFloat", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorDouble", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorChar", py::overload_cast(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorShort", py::overload_cast(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorInt", py::overload_cast(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorLong", py::overload_cast(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorFloat", py::overload_cast(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorDouble", py::overload_cast(&ktt::Tuner::AddArgumentVector)) + //.def("AddArgumentVector", &ktt::Tuner::AddArgumentVector) + .def("AddArgumentScalarChar", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarShort", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarInt", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarLong", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarFloat", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarDouble", &ktt::Tuner::AddArgumentScalar) + //.def("AddArgumentScalar", py::overload_cast(&ktt::Tuner::AddArgumentScalar)) + .def("AddArgumentLocalChar", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalShort", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalInt", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalLong", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalFloat", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalDouble", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentSymbolChar", &ktt::Tuner::AddArgumentSymbol) + .def("AddArgumentSymbolShort", &ktt::Tuner::AddArgumentSymbol) + .def("AddArgumentSymbolInt", &ktt::Tuner::AddArgumentSymbol) + .def("AddArgumentSymbolLong", &ktt::Tuner::AddArgumentSymbol) + .def("AddArgumentSymbolFloat", &ktt::Tuner::AddArgumentSymbol) + .def("AddArgumentSymbolDouble", &ktt::Tuner::AddArgumentSymbol) + .def("RemoveArgument", &ktt::Tuner::RemoveArgument) + .def("SetReadOnlyArgumentCache", &ktt::Tuner::SetReadOnlyArgumentCache) + .def("Run", &ktt::Tuner::Run) + .def("SetProfiling", &ktt::Tuner::SetProfiling) + .def("SetProfilingCounters", &ktt::Tuner::SetProfilingCounters) + .def("SetValidationMethod", &ktt::Tuner::SetValidationMethod) + .def("SetValidationMode", &ktt::Tuner::SetValidationMode) + .def("SetValidationRange", &ktt::Tuner::SetValidationRange) + .def("SetValueComparator", &ktt::Tuner::SetValueComparator) + .def("SetReferenceComputation", &ktt::Tuner::SetReferenceComputation) + .def("SetReferenceKernel", &ktt::Tuner::SetReferenceKernel) + .def("Tune", py::overload_cast(&ktt::Tuner::Tune)) + // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support + //.def("Tune", py::overload_cast>(&ktt::Tuner::Tune)) + .def("TuneIteration", &ktt::Tuner::TuneIteration) + .def("SimulateKernelTuning", &ktt::Tuner::SimulateKernelTuning) + // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support + //.def("SetSearcher", &ktt::Tuner::SetSearcher) + .def("ClearData", &ktt::Tuner::ClearData) + .def("GetBestConfiguration", &ktt::Tuner::GetBestConfiguration) + .def("CreateConfiguration", &ktt::Tuner::CreateConfiguration) + .def("GetKernelSource", &ktt::Tuner::GetKernelSource) + .def("GetKernelDefinitionSource", &ktt::Tuner::GetKernelDefinitionSource) + .def_static("SetTimeUnit", &ktt::Tuner::SetTimeUnit) + .def("SaveResults", &ktt::Tuner::SaveResults) + // Todo: these overloads do not work for some reason + /*.def("LoadResults", py::overload_cast(&ktt::Tuner::LoadResults)) + .def("LoadResults", py::overload_cast(&ktt::Tuner::LoadResults))*/ + .def("AddComputeQueue", &ktt::Tuner::AddComputeQueue) + .def("RemoveComputeQueue", &ktt::Tuner::RemoveComputeQueue) + .def("Synchronize", &ktt::Tuner::Synchronize) + .def("SetCompilerOptions", &ktt::Tuner::SetCompilerOptions) + .def("SetGlobalSizeType", &ktt::Tuner::SetGlobalSizeType) + .def("SetAutomaticGlobalSizeCorrection", &ktt::Tuner::SetAutomaticGlobalSizeCorrection) + .def("SetKernelCacheCapacity", &ktt::Tuner::SetKernelCacheCapacity) + .def("GetPlatformInfo", &ktt::Tuner::GetPlatformInfo) + .def("GetDeviceInfo", &ktt::Tuner::GetDeviceInfo) + .def("GetCurrentDeviceInfo", &ktt::Tuner::GetCurrentDeviceInfo) + .def_static("SetLoggingLevel", &ktt::Tuner::SetLoggingLevel) + .def_static("SetLoggingTarget", py::overload_cast(&ktt::Tuner::SetLoggingTarget)) + .def_static("SetLoggingTarget", py::overload_cast(&ktt::Tuner::SetLoggingTarget)); } #endif // KTT_PYTHON diff --git a/Source/Python/PythonSearchers.cpp b/Source/Python/PythonSearchers.cpp new file mode 100644 index 00000000..788a07c0 --- /dev/null +++ b/Source/Python/PythonSearchers.cpp @@ -0,0 +1,61 @@ +#ifdef KTT_PYTHON + +#include +#include + +#include + +namespace py = pybind11; + +class PySearcher : public ktt::Searcher +{ +public: + using Searcher::Searcher; + + void OnInitialize() override + { + PYBIND11_OVERRIDE(void, ktt::Searcher, OnInitialize); + } + + void OnReset() override + { + PYBIND11_OVERRIDE(void, ktt::Searcher, OnReset); + } + + bool CalculateNextConfiguration(const ktt::KernelResult& previousResult) override + { + PYBIND11_OVERRIDE_PURE(bool, ktt::Searcher, CalculateNextConfiguration, previousResult); + } + + ktt::KernelConfiguration GetCurrentConfiguration() const override + { + PYBIND11_OVERRIDE_PURE(ktt::KernelConfiguration, ktt::Searcher, GetCurrentConfiguration); + } +}; + +void InitializePythonSearchers(py::module_& module) +{ + py::class_(module, "Searcher") + .def(py::init<>()) + .def("OnInitialize", &ktt::Searcher::OnInitialize) + .def("OnReset", &ktt::Searcher::OnReset) + .def("CalculateNextConfiguration", &ktt::Searcher::CalculateNextConfiguration) + .def("GetCurrentConfiguration", &ktt::Searcher::GetCurrentConfiguration) + .def("GetIndex", &ktt::Searcher::GetIndex) + .def("GetRandomConfiguration", &ktt::Searcher::GetRandomConfiguration) + .def("GetNeighbourConfigurations", &ktt::Searcher::GetNeighbourConfigurations) + .def("GetConfigurationsCount", &ktt::Searcher::GetConfigurationsCount) + .def("GetExploredIndices", &ktt::Searcher::GetExploredIndices) + .def("IsInitialized", &ktt::Searcher::IsInitialized); + + py::class_(module, "DeterministicSearcher") + .def(py::init<>()); + + py::class_(module, "McmcSearcher") + .def(py::init&>()); + + py::class_(module, "RandomSearcher") + .def(py::init<>()); +} + +#endif // KTT_PYTHON diff --git a/Source/Python/PythonStopConditions.cpp b/Source/Python/PythonStopConditions.cpp new file mode 100644 index 00000000..1b7ee24f --- /dev/null +++ b/Source/Python/PythonStopConditions.cpp @@ -0,0 +1,58 @@ +#ifdef KTT_PYTHON + +#include +#include + +#include + +namespace py = pybind11; + +class PyStopCondition : public ktt::StopCondition +{ +public: + using StopCondition::StopCondition; + + bool IsFulfilled() const override + { + PYBIND11_OVERRIDE_PURE(bool, ktt::StopCondition, IsFulfilled); + } + + void Initialize(const uint64_t configurationsCount) override + { + PYBIND11_OVERRIDE_PURE(void, ktt::StopCondition, Initialize, configurationsCount); + } + + void Update(const ktt::KernelResult& result) override + { + PYBIND11_OVERRIDE_PURE(void, ktt::StopCondition, Update, result); + } + + std::string GetStatusString() const override + { + PYBIND11_OVERRIDE_PURE(std::string, ktt::StopCondition, GetStatusString); + } +}; + +void InitializePythonStopConditions(py::module_& module) +{ + py::class_(module, "StopCondition") + .def(py::init<>()) + .def("IsFulfilled", &ktt::StopCondition::IsFulfilled) + .def("Initialize", &ktt::StopCondition::Initialize) + .def("Update", &ktt::StopCondition::Update) + .def("GetStatusString", &ktt::StopCondition::GetStatusString); + + py::class_(module, "ConfigurationCount") + .def(py::init()); + + py::class_(module, "ConfigurationDuration") + .def(py::init()); + + py::class_(module, "ConfigurationFraction") + .def(py::init()); + + py::class_(module, "TuningDuration") + .def(py::init()); +} + +#endif // KTT_PYTHON From be9bd376adc6f45242bd4a95c619363a382664bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Thu, 21 Oct 2021 13:20:46 +0200 Subject: [PATCH 15/63] * Added default arguments for relevant methods inside Python bindings --- Source/Python/PythonModule.cpp | 194 ++++++++++++++++++++++++++---- Source/Python/PythonSearchers.cpp | 9 +- premake5.lua | 2 +- 3 files changed, 180 insertions(+), 25 deletions(-) diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index e706057f..ec3573db 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -65,12 +65,57 @@ PYBIND11_MODULE(ktt, module) .def("UpdateLocalArgument", &ktt::ComputeInterface::UpdateLocalArgument) .def("UploadBuffer", &ktt::ComputeInterface::UploadBuffer) .def("UploadBufferAsync", &ktt::ComputeInterface::UploadBufferAsync) - .def("DownloadBuffer", &ktt::ComputeInterface::DownloadBuffer) - .def("DownloadBufferAsync", &ktt::ComputeInterface::DownloadBufferAsync) - .def("UpdateBuffer", &ktt::ComputeInterface::UpdateBuffer) - .def("UpdateBufferAsync", &ktt::ComputeInterface::UpdateBufferAsync) - .def("CopyBuffer", &ktt::ComputeInterface::CopyBuffer) - .def("CopyBufferAsync", &ktt::ComputeInterface::CopyBufferAsync) + .def + ( + "DownloadBuffer", + &ktt::ComputeInterface::DownloadBuffer, + py::arg("id"), + py::arg("destination"), + py::arg("dataSize") = 0 + ) + .def + ( + "DownloadBufferAsync", + &ktt::ComputeInterface::DownloadBufferAsync, + py::arg("id"), + py::arg("queue"), + py::arg("destination"), + py::arg("dataSize") = 0 + ) + .def + ( + "UpdateBuffer", + &ktt::ComputeInterface::UpdateBuffer, + py::arg("id"), + py::arg("data"), + py::arg("dataSize") = 0 + ) + .def + ( + "UpdateBufferAsync", + &ktt::ComputeInterface::UpdateBufferAsync, + py::arg("id"), + py::arg("queue"), + py::arg("data"), + py::arg("dataSize") = 0 + ) + .def + ( + "CopyBuffer", + &ktt::ComputeInterface::CopyBuffer, + py::arg("destination"), + py::arg("source"), + py::arg("dataSize") = 0 + ) + .def + ( + "CopyBufferAsync", + &ktt::ComputeInterface::CopyBufferAsync, + py::arg("destination"), + py::arg("source"), + py::arg("queue"), + py::arg("dataSize") = 0 + ) .def("WaitForTransferAction", &ktt::ComputeInterface::WaitForTransferAction) .def("ResizeBuffer", &ktt::ComputeInterface::ResizeBuffer) .def("ClearBuffer", &ktt::ComputeInterface::ClearBuffer) @@ -82,19 +127,64 @@ PYBIND11_MODULE(ktt, module) .def(py::init()) .def(py::init()) .def(py::init&>()) - .def("AddKernelDefinition", &ktt::Tuner::AddKernelDefinition) - .def("AddKernelDefinitionFromFile", &ktt::Tuner::AddKernelDefinitionFromFile) - .def("GetKernelDefinitionId", &ktt::Tuner::GetKernelDefinitionId) + .def + ( + "AddKernelDefinition", + &ktt::Tuner::AddKernelDefinition, + py::arg("name"), + py::arg("source"), + py::arg("globalSize"), + py::arg("localSize"), + py::arg("typeNames") = std::vector{} + ) + .def + ( + "AddKernelDefinitionFromFile", + &ktt::Tuner::AddKernelDefinitionFromFile, + py::arg("name"), + py::arg("filePath"), + py::arg("globalSize"), + py::arg("localSize"), + py::arg("typeNames") = std::vector{} + ) + .def + ( + "GetKernelDefinitionId", + &ktt::Tuner::GetKernelDefinitionId, + py::arg("name"), + py::arg("typeNames") = std::vector{} + ) .def("RemoveKernelDefinition", &ktt::Tuner::RemoveKernelDefinition) .def("SetArguments", &ktt::Tuner::SetArguments) .def("CreateSimpleKernel", &ktt::Tuner::CreateSimpleKernel) - .def("CreateCompositeKernel", &ktt::Tuner::CreateCompositeKernel) + .def + ( + "CreateCompositeKernel", + &ktt::Tuner::CreateCompositeKernel, + py::arg("name"), + py::arg("definitionIds"), + py::arg("launcher") = static_cast(nullptr) + ) .def("RemoveKernel", &ktt::Tuner::RemoveKernel) .def("SetLauncher", &ktt::Tuner::SetLauncher) - .def("AddParameter", py::overload_cast&, - const std::string&>(&ktt::Tuner::AddParameter)) - .def("AddParameter", py::overload_cast&, - const std::string&>(&ktt::Tuner::AddParameter)) + .def + ( + "AddParameter", + py::overload_cast&, const std::string&>(&ktt::Tuner::AddParameter), + py::arg("id"), + py::arg("name"), + py::arg("values"), + py::arg("group") = std::string() + ) + .def + ( + "AddParameter", + py::overload_cast&, const std::string&>(&ktt::Tuner::AddParameter), + py::arg("id"), + py::arg("name"), + py::arg("values"), + py::arg("group") = std::string() + ) .def("AddThreadModifier", py::overload_cast&, const ktt::ModifierType, const ktt::ModifierDimension, const std::vector&, ktt::ModifierFunction>(&ktt::Tuner::AddThreadModifier)) .def("AddThreadModifier", py::overload_cast&, const ktt::ModifierType, @@ -145,12 +235,48 @@ PYBIND11_MODULE(ktt, module) .def("AddArgumentLocalLong", &ktt::Tuner::AddArgumentLocal) .def("AddArgumentLocalFloat", &ktt::Tuner::AddArgumentLocal) .def("AddArgumentLocalDouble", &ktt::Tuner::AddArgumentLocal) - .def("AddArgumentSymbolChar", &ktt::Tuner::AddArgumentSymbol) - .def("AddArgumentSymbolShort", &ktt::Tuner::AddArgumentSymbol) - .def("AddArgumentSymbolInt", &ktt::Tuner::AddArgumentSymbol) - .def("AddArgumentSymbolLong", &ktt::Tuner::AddArgumentSymbol) - .def("AddArgumentSymbolFloat", &ktt::Tuner::AddArgumentSymbol) - .def("AddArgumentSymbolDouble", &ktt::Tuner::AddArgumentSymbol) + .def + ( + "AddArgumentSymbolChar", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolShort", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolInt", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolLong", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolFloat", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolDouble", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) .def("RemoveArgument", &ktt::Tuner::RemoveArgument) .def("SetReadOnlyArgumentCache", &ktt::Tuner::SetReadOnlyArgumentCache) .def("Run", &ktt::Tuner::Run) @@ -165,8 +291,22 @@ PYBIND11_MODULE(ktt, module) .def("Tune", py::overload_cast(&ktt::Tuner::Tune)) // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support //.def("Tune", py::overload_cast>(&ktt::Tuner::Tune)) - .def("TuneIteration", &ktt::Tuner::TuneIteration) - .def("SimulateKernelTuning", &ktt::Tuner::SimulateKernelTuning) + .def + ( + "TuneIteration", + &ktt::Tuner::TuneIteration, + py::arg("id"), + py::arg("output"), + py::arg("recomputeReference") = false + ) + .def + ( + "SimulateKernelTuning", + &ktt::Tuner::SimulateKernelTuning, + py::arg("id"), + py::arg("results"), + py::arg("iterations") = 0 + ) // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support //.def("SetSearcher", &ktt::Tuner::SetSearcher) .def("ClearData", &ktt::Tuner::ClearData) @@ -175,7 +315,15 @@ PYBIND11_MODULE(ktt, module) .def("GetKernelSource", &ktt::Tuner::GetKernelSource) .def("GetKernelDefinitionSource", &ktt::Tuner::GetKernelDefinitionSource) .def_static("SetTimeUnit", &ktt::Tuner::SetTimeUnit) - .def("SaveResults", &ktt::Tuner::SaveResults) + .def + ( + "SaveResults", + &ktt::Tuner::SaveResults, + py::arg("results"), + py::arg("filePath"), + py::arg("format"), + py::arg("data") = ktt::UserData{} + ) // Todo: these overloads do not work for some reason /*.def("LoadResults", py::overload_cast(&ktt::Tuner::LoadResults)) .def("LoadResults", py::overload_cast(&ktt::Tuner::LoadResults))*/ diff --git a/Source/Python/PythonSearchers.cpp b/Source/Python/PythonSearchers.cpp index 788a07c0..76caa450 100644 --- a/Source/Python/PythonSearchers.cpp +++ b/Source/Python/PythonSearchers.cpp @@ -43,7 +43,14 @@ void InitializePythonSearchers(py::module_& module) .def("GetCurrentConfiguration", &ktt::Searcher::GetCurrentConfiguration) .def("GetIndex", &ktt::Searcher::GetIndex) .def("GetRandomConfiguration", &ktt::Searcher::GetRandomConfiguration) - .def("GetNeighbourConfigurations", &ktt::Searcher::GetNeighbourConfigurations) + .def + ( + "GetNeighbourConfigurations", + &ktt::Searcher::GetNeighbourConfigurations, + py::arg("configuration"), + py::arg("maxDifferences"), + py::arg("maxNeighbours") = 3 + ) .def("GetConfigurationsCount", &ktt::Searcher::GetConfigurationsCount) .def("GetExploredIndices", &ktt::Searcher::GetExploredIndices) .def("IsInitialized", &ktt::Searcher::IsInitialized); diff --git a/premake5.lua b/premake5.lua index 78fc772b..ff228e5e 100644 --- a/premake5.lua +++ b/premake5.lua @@ -361,7 +361,7 @@ project "Ktt" filter "system:windows" if _OPTIONS["python"] then postbuildcommands {"{COPYFILE} %{cfg.targetdir}/ktt.dll %{cfg.targetdir}/ktt.pyd"} - end + end filter {} From d62c104b34f9cb4d8a9d4e93ac6fd3f99b809dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 1 Nov 2021 11:21:03 +0100 Subject: [PATCH 16/63] * Added Python version for some basic tutorials --- Tutorials/01ComputeApiInfo/ComputeApiInfo.py | 18 +++++ Tutorials/03KernelTuning/KernelTuningCuda.py | 83 ++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 Tutorials/01ComputeApiInfo/ComputeApiInfo.py create mode 100644 Tutorials/03KernelTuning/KernelTuningCuda.py diff --git a/Tutorials/01ComputeApiInfo/ComputeApiInfo.py b/Tutorials/01ComputeApiInfo/ComputeApiInfo.py new file mode 100644 index 00000000..ab796c20 --- /dev/null +++ b/Tutorials/01ComputeApiInfo/ComputeApiInfo.py @@ -0,0 +1,18 @@ +import ktt + +def main(): + # Create new tuner which uses CUDA as compute API. + tuner = ktt.Tuner(0, 0, ktt.ComputeApi.CUDA) + + # Print information about platforms and devices to standard output. + platforms = tuner.GetPlatformInfo() + + for i in range(len(platforms)): + print(platforms[i]) + devices = tuner.GetDeviceInfo(i); + + for device in devices: + print(device) + +if __name__ == "__main__": + main() diff --git a/Tutorials/03KernelTuning/KernelTuningCuda.py b/Tutorials/03KernelTuning/KernelTuningCuda.py new file mode 100644 index 00000000..9c115d9f --- /dev/null +++ b/Tutorials/03KernelTuning/KernelTuningCuda.py @@ -0,0 +1,83 @@ +import ctypes +import sys +import ktt + +def computeReference(a, b, scalar, buffer): + ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.POINTER(ctypes.c_float) + ctypes.pythonapi.PyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_void_p] + floatList = ctypes.pythonapi.PyCapsule_GetPointer(buffer, None) + + for i in range(len(a)): + floatList[i] = a[i] + b[i] + scalar + +def main(): + deviceIndex = 0; + kernelFile = "./CudaKernel.cu"; + + argc = len(sys.argv) + + if argc >= 2: + deviceIndex = sys.argv[1] + + if argc >= 3: + kernelFile = sys.argv[2] + + numberOfElements = 1024 * 1024; + gridDimensions = ktt.DimensionVector(numberOfElements); + # Block size is initialized to one in this case, it will be controlled with tuning parameter which is added later. + blockDimensions = ktt.DimensionVector(); + + a = [i * 1.0 for i in range(numberOfElements)] + b = [i * 1.0 for i in range(numberOfElements)] + result = [0.0 for i in range(numberOfElements)] + scalarValue = 3.0 + + tuner = ktt.Tuner(0, deviceIndex, ktt.ComputeApi.CUDA) + + definition = tuner.AddKernelDefinitionFromFile("vectorAddition", kernelFile, gridDimensions, blockDimensions) + + aId = tuner.AddArgumentVectorFloat(a, ktt.ArgumentAccessType.ReadOnly) + bId = tuner.AddArgumentVectorFloat(b, ktt.ArgumentAccessType.ReadOnly) + resultId = tuner.AddArgumentVectorFloat(result, ktt.ArgumentAccessType.WriteOnly) + scalarId = tuner.AddArgumentScalarFloat(scalarValue) + tuner.SetArguments(definition, [aId, bId, resultId, scalarId]) + + kernel = tuner.CreateSimpleKernel("Addition", definition) + + # Set reference computation for the result argument which will be used by the tuner to automatically validate kernel output. + # The computation function receives buffer on input, where the reference result should be saved. The size of buffer corresponds + # to the validated argument size. + reference = lambda buffer : computeReference(a, b, scalarValue, buffer) + tuner.SetReferenceComputation(resultId, reference) + + # Add new kernel parameter. Specify parameter name and possible values. When kernel is tuned, the parameter value is added + # to the beginning of kernel source as preprocessor definition. E.g., for value of this parameter equal to 32, it is added + # as "#define multiply_block_size 32". + tuner.AddParameter(kernel, "multiply_block_size", [32, 64, 128, 256]) + + # In this case, the parameter also affects block size. This is specified by adding a thread modifier. ModifierType specifies + # that parameter affects block size of a kernel, ModifierAction specifies that block size is multiplied by value of the + # parameter, ModifierDimension specifies that dimension X of a thread block is affected by the parameter. It is also possible + # to specify which definitions are affected by the modifier. In this case, only one definition is affected. The default block + # size inside kernel definition was set to one. This means that the block size of the definition is controlled explicitly by + # value of this parameter. E.g., size of one is multiplied by 32, which means that result size is 32. + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Local, ktt.ModifierDimension.X, "multiply_block_size", + ktt.ModifierAction.Multiply) + + # Previously added parameter affects thread block size of kernel. However, when block size is changed, grid size has to be + # modified as well, so that grid size multiplied by block size remains constant. This means that another modifier which divides + # grid size has to be added. + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Global, ktt.ModifierDimension.X, "multiply_block_size", + ktt.ModifierAction.Divide) + + tuner.SetTimeUnit(ktt.TimeUnit.Microseconds) + + # Perform tuning for the specified kernel. This generates multiple versions of the kernel based on provided tuning parameters + # and their values. In this case, 4 different versions of kernel will be run. + results = tuner.Tune(kernel) + + # Save tuning results to JSON file. + tuner.SaveResults(results, "TuningOutput", ktt.OutputFormat.JSON) + +if __name__ == "__main__": + main() From eda3b5510cb4d99aa707f6b18f82fc17edf9d92e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 1 Nov 2021 13:49:43 +0100 Subject: [PATCH 17/63] * Updated Python bindings library to version which supports C++ smart holders such as unique_ptr --- .../pybind11/attr.h | 0 .../pybind11/buffer_info.h | 0 .../pybind11/cast.h | 55 +- .../pybind11/chrono.h | 0 .../pybind11/common.h | 0 .../pybind11/complex.h | 0 .../pybind11/detail/class.h | 2 + .../pybind11/detail/common.h | 5 +- .../pybind11/detail/descr.h | 0 .../detail/dynamic_raw_ptr_cast_if_possible.h | 39 + .../pybind11/detail/init.h | 76 +- .../pybind11/detail/internals.h | 15 +- .../pybind11/detail/smart_holder_poc.h | 343 +++++++ .../detail/smart_holder_sfinae_hooks_only.h | 33 + .../detail/smart_holder_type_casters.h | 924 ++++++++++++++++++ .../pybind11/detail/type_caster_base.h | 11 +- .../pybind11/detail/typeid.h | 0 .../pybind11/eigen.h | 18 +- .../pybind11/embed.h | 0 .../pybind11/eval.h | 2 +- .../pybind11/functional.h | 2 +- .../pybind11/gil.h | 0 .../pybind11/iostream.h | 0 .../pybind11/numpy.h | 2 +- .../pybind11/operators.h | 0 .../pybind11/options.h | 0 .../pybind11/pybind11.h | 205 +++- .../pybind11/pytypes.h | 0 .../pybind11/smart_holder.h | 29 + .../pybind11/stl.h | 16 +- .../pybind11/stl/filesystem.h | 0 .../pybind11/stl_bind.h | 4 +- .../pybind11/trampoline_self_life_support.h | 61 ++ premake5.lua | 6 +- 34 files changed, 1789 insertions(+), 59 deletions(-) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/attr.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/buffer_info.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/cast.h (97%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/chrono.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/common.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/complex.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/detail/class.h (99%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/detail/common.h (99%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/detail/descr.h (100%) create mode 100644 Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/detail/init.h (82%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/detail/internals.h (97%) create mode 100644 Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_poc.h create mode 100644 Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_sfinae_hooks_only.h create mode 100644 Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_type_casters.h rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/detail/type_caster_base.h (99%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/detail/typeid.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/eigen.h (97%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/embed.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/eval.h (98%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/functional.h (99%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/gil.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/iostream.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/numpy.h (99%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/operators.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/options.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/pybind11.h (92%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/pytypes.h (100%) create mode 100644 Libraries/pybind11-2.8.1-smart_holder/pybind11/smart_holder.h rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/stl.h (96%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/stl/filesystem.h (100%) rename Libraries/{pybind11-2.8.0 => pybind11-2.8.1-smart_holder}/pybind11/stl_bind.h (99%) create mode 100644 Libraries/pybind11-2.8.1-smart_holder/pybind11/trampoline_self_life_support.h diff --git a/Libraries/pybind11-2.8.0/pybind11/attr.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/attr.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/attr.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/attr.h diff --git a/Libraries/pybind11-2.8.0/pybind11/buffer_info.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/buffer_info.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/buffer_info.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/buffer_info.h diff --git a/Libraries/pybind11-2.8.0/pybind11/cast.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/cast.h similarity index 97% rename from Libraries/pybind11-2.8.0/pybind11/cast.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/cast.h index 1ec2080f..dd19a74e 100644 --- a/Libraries/pybind11-2.8.0/pybind11/cast.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/cast.h @@ -1,3 +1,4 @@ +// clang-format off /* pybind11/cast.h: Partial template specializations to cast between C++ and Python types @@ -13,6 +14,7 @@ #include "pytypes.h" #include "detail/common.h" #include "detail/descr.h" +#include "detail/smart_holder_sfinae_hooks_only.h" #include "detail/type_caster_base.h" #include "detail/typeid.h" #include @@ -27,6 +29,10 @@ #include #include +#ifdef PYBIND11_USE_SMART_HOLDER_AS_DEFAULT +#include "detail/smart_holder_type_casters.h" +#endif + #if defined(PYBIND11_CPP17) # if defined(__has_include) # if __has_include() @@ -47,8 +53,24 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) PYBIND11_NAMESPACE_BEGIN(detail) -template class type_caster : public type_caster_base { }; -template using make_caster = type_caster>; +// clang-format on +#ifndef PYBIND11_USE_SMART_HOLDER_AS_DEFAULT +template +class type_caster_for_class_ : public type_caster_base {}; +#endif + +template +class type_caster : public type_caster_for_class_ {}; + +template +using make_caster = type_caster>; + +template +struct type_uses_smart_holder_type_caster { + static constexpr bool value + = std::is_base_of>::value; +}; +// clang-format off // Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T template typename make_caster::template cast_op_type cast_op(make_caster &caster) { @@ -733,9 +755,11 @@ struct copyable_holder_caster : public type_caster_base { holder_type holder; }; +#ifndef PYBIND11_USE_SMART_HOLDER_AS_DEFAULT /// Specialize for the common std::shared_ptr, so users don't need to template class type_caster> : public copyable_holder_caster> { }; +#endif /// Type caster for holder types like std::unique_ptr. /// Please consider the SFINAE hook an implementation detail, as explained @@ -752,9 +776,11 @@ struct move_only_holder_caster { static constexpr auto name = type_caster_base::name; }; +#ifndef PYBIND11_USE_SMART_HOLDER_AS_DEFAULT template class type_caster> : public move_only_holder_caster> { }; +#endif template using type_caster_holder = conditional_t::value, @@ -857,6 +883,7 @@ template using move_never = none_of, move_if_unrefer template using cast_is_temporary_value_reference = bool_constant< (std::is_reference::value || std::is_pointer::value) && !std::is_base_of>::value && + !type_uses_smart_holder_type_caster>::value && !std::is_same, void>::value >; @@ -868,7 +895,9 @@ template struct return_value_policy_ov }; template struct return_value_policy_override>::value, void>> { + detail::enable_if_t< + std::is_base_of>::value || + type_uses_smart_holder_type_caster>::value, void>> { static return_value_policy policy(return_value_policy p) { return !std::is_lvalue_reference::value && !std::is_pointer::value @@ -1036,16 +1065,6 @@ template = 0x03030000 -template ()>> -object make_simple_namespace(Args&&... args_) { - PyObject *ns = _PyNamespace_New(dict(std::forward(args_)...).ptr()); - if (!ns) throw error_already_set(); - return reinterpret_steal(ns); -} -#endif - /// \ingroup annotations /// Annotation for arguments struct arg { @@ -1421,10 +1440,10 @@ PYBIND11_NAMESPACE_END(detail) template handle type::handle_of() { - static_assert( - std::is_base_of>::value, - "py::type::of only supports the case where T is a registered C++ types." - ); + static_assert( + detail::any_of>, + detail::type_uses_smart_holder_type_caster>::value, + "py::type::of only supports the case where T is a registered C++ types."); return detail::get_type_handle(typeid(T), true); } @@ -1432,7 +1451,7 @@ handle type::handle_of() { #define PYBIND11_MAKE_OPAQUE(...) \ namespace pybind11 { namespace detail { \ - template<> class type_caster<__VA_ARGS__> : public type_caster_base<__VA_ARGS__> { }; \ + template<> class type_caster<__VA_ARGS__> : public type_caster_for_class_<__VA_ARGS__> { }; \ }} /// Lets you pass a type containing a `,` through a macro parameter without needing a separate diff --git a/Libraries/pybind11-2.8.0/pybind11/chrono.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/chrono.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/chrono.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/chrono.h diff --git a/Libraries/pybind11-2.8.0/pybind11/common.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/common.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/common.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/common.h diff --git a/Libraries/pybind11-2.8.0/pybind11/complex.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/complex.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/complex.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/complex.h diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/class.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/class.h similarity index 99% rename from Libraries/pybind11-2.8.0/pybind11/detail/class.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/class.h index b9376b4c..1cc1e578 100644 --- a/Libraries/pybind11-2.8.0/pybind11/detail/class.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/class.h @@ -397,6 +397,8 @@ inline void clear_instance(PyObject *self) { if (instance->owned || v_h.holder_constructed()) v_h.type->dealloc(v_h); + } else if (v_h.holder_constructed()) { + v_h.type->dealloc(v_h); // Disowned instance. } } // Deallocate the value/holder layout internals: diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/common.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/common.h similarity index 99% rename from Libraries/pybind11-2.8.0/pybind11/detail/common.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/common.h index 31716e5b..713de94b 100644 --- a/Libraries/pybind11-2.8.0/pybind11/detail/common.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/common.h @@ -11,11 +11,11 @@ #define PYBIND11_VERSION_MAJOR 2 #define PYBIND11_VERSION_MINOR 8 -#define PYBIND11_VERSION_PATCH 0 +#define PYBIND11_VERSION_PATCH 1 // Similar to Python's convention: https://docs.python.org/3/c-api/apiabiversion.html // Additional convention: 0xD = dev -#define PYBIND11_VERSION_HEX 0x02080000 +#define PYBIND11_VERSION_HEX 0x02080100 #define PYBIND11_NAMESPACE_BEGIN(name) namespace name { #define PYBIND11_NAMESPACE_END(name) } @@ -831,6 +831,7 @@ PYBIND11_RUNTIME_EXCEPTION(value_error, PyExc_ValueError) PYBIND11_RUNTIME_EXCEPTION(type_error, PyExc_TypeError) PYBIND11_RUNTIME_EXCEPTION(buffer_error, PyExc_BufferError) PYBIND11_RUNTIME_EXCEPTION(import_error, PyExc_ImportError) +PYBIND11_RUNTIME_EXCEPTION(attribute_error, PyExc_AttributeError) PYBIND11_RUNTIME_EXCEPTION(cast_error, PyExc_RuntimeError) /// Thrown when pybind11::cast or handle::call fail due to a type casting error PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used internally diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/descr.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/descr.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/detail/descr.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/descr.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h new file mode 100644 index 00000000..7c00fe98 --- /dev/null +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h @@ -0,0 +1,39 @@ +// Copyright (c) 2021 The Pybind Development Team. +// All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#pragma once + +#include "common.h" + +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +template +struct dynamic_raw_ptr_cast_is_possible : std::false_type {}; + +template +struct dynamic_raw_ptr_cast_is_possible< + To, + From, + detail::enable_if_t::value && std::is_polymorphic::value>> + : std::true_type {}; + +template ::value, int> = 0> +To *dynamic_raw_ptr_cast_if_possible(From * /*ptr*/) { + return nullptr; +} + +template ::value, int> = 0> +To *dynamic_raw_ptr_cast_if_possible(From *ptr) { + return dynamic_cast(ptr); +} + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/init.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/init.h similarity index 82% rename from Libraries/pybind11-2.8.0/pybind11/detail/init.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/init.h index cace3529..a7bda462 100644 --- a/Libraries/pybind11-2.8.0/pybind11/detail/init.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/init.h @@ -1,3 +1,4 @@ +// clang-format off /* pybind11/detail/init.h: init factory function implementation and support code. @@ -10,6 +11,7 @@ #pragma once #include "class.h" +#include "smart_holder_sfinae_hooks_only.h" PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) PYBIND11_NAMESPACE_BEGIN(detail) @@ -106,11 +108,13 @@ void construct(value_and_holder &v_h, Cpp *ptr, bool need_alias) { // the holder and destruction happens when we leave the C++ scope, and the holder // class gets to handle the destruction however it likes. v_h.value_ptr() = ptr; - v_h.set_instance_registered(true); // To prevent init_instance from registering it + v_h.set_instance_registered(true); // SHORTCUT To prevent init_instance from registering it + // DANGER ZONE BEGIN: exceptions will leave v_h in an invalid state. v_h.type->init_instance(v_h.inst, nullptr); // Set up the holder Holder temp_holder(std::move(v_h.holder>())); // Steal the holder v_h.type->dealloc(v_h); // Destroys the moved-out holder remains, resets value ptr to null v_h.set_instance_registered(false); + // DANGER ZONE END. construct_alias_from_cpp(is_alias_constructible{}, v_h, std::move(*ptr)); } else { @@ -130,7 +134,8 @@ void construct(value_and_holder &v_h, Alias *alias_ptr, bool) { // Holder return: copy its pointer, and move or copy the returned holder into the new instance's // holder. This also handles types like std::shared_ptr and std::unique_ptr where T is a // derived type (through those holder's implicit conversion from derived class holder constructors). -template +template >::value, int> = 0> void construct(value_and_holder &v_h, Holder holder, bool need_alias) { PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias); auto *ptr = holder_helper>::get(holder); @@ -169,6 +174,73 @@ void construct(value_and_holder &v_h, Alias &&result, bool) { v_h.value_ptr() = new Alias(std::move(result)); } +// clang-format on +template < + typename Class, + typename D = std::default_delete>, + detail::enable_if_t>::value, int> = 0> +void construct(value_and_holder &v_h, std::unique_ptr, D> &&unq_ptr, bool need_alias) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias); + auto *ptr = unq_ptr.get(); + no_nullptr(ptr); + if (PYBIND11_SILENCE_MSVC_C4127(Class::has_alias) && need_alias && !is_alias(ptr)) + throw type_error("pybind11::init(): construction failed: returned std::unique_ptr pointee " + "is not an alias instance"); + // Here and below: if the new object is a trampoline, the shared_from_this mechanism needs + // to be prevented from accessing the smart_holder vptr, because it does not keep the + // trampoline Python object alive. For types that don't inherit from enable_shared_from_this + // it does not matter if void_cast_raw_ptr is true or false, therefore it's not necessary + // to also inspect the type. + auto smhldr = type_caster>::template smart_holder_from_unique_ptr( + std::move(unq_ptr), /*void_cast_raw_ptr*/ Class::has_alias && is_alias(ptr)); + v_h.value_ptr() = ptr; + v_h.type->init_instance(v_h.inst, &smhldr); +} + +template < + typename Class, + typename D = std::default_delete>, + detail::enable_if_t>::value, int> = 0> +void construct(value_and_holder &v_h, + std::unique_ptr, D> &&unq_ptr, + bool /*need_alias*/) { + auto *ptr = unq_ptr.get(); + no_nullptr(ptr); + auto smhldr = type_caster>::template smart_holder_from_unique_ptr( + std::move(unq_ptr), /*void_cast_raw_ptr*/ true); + v_h.value_ptr() = ptr; + v_h.type->init_instance(v_h.inst, &smhldr); +} + +template < + typename Class, + detail::enable_if_t>::value, int> = 0> +void construct(value_and_holder &v_h, std::shared_ptr> &&shd_ptr, bool need_alias) { + PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias); + auto *ptr = shd_ptr.get(); + no_nullptr(ptr); + if (PYBIND11_SILENCE_MSVC_C4127(Class::has_alias) && need_alias && !is_alias(ptr)) + throw type_error("pybind11::init(): construction failed: returned std::shared_ptr pointee " + "is not an alias instance"); + auto smhldr = type_caster>::template smart_holder_from_shared_ptr(shd_ptr); + v_h.value_ptr() = ptr; + v_h.type->init_instance(v_h.inst, &smhldr); +} + +template < + typename Class, + detail::enable_if_t>::value, int> = 0> +void construct(value_and_holder &v_h, + std::shared_ptr> &&shd_ptr, + bool /*need_alias*/) { + auto *ptr = shd_ptr.get(); + no_nullptr(ptr); + auto smhldr = type_caster>::template smart_holder_from_shared_ptr(shd_ptr); + v_h.value_ptr() = ptr; + v_h.type->init_instance(v_h.inst, &smhldr); +} +// clang-format off + // Implementing class for py::init<...>() template struct constructor { diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/internals.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/internals.h similarity index 97% rename from Libraries/pybind11-2.8.0/pybind11/detail/internals.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/internals.h index 98d21eb9..b6c87160 100644 --- a/Libraries/pybind11-2.8.0/pybind11/detail/internals.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/internals.h @@ -10,6 +10,7 @@ #pragma once #include "../pytypes.h" +#include "smart_holder_sfinae_hooks_only.h" /// Tracks the `internals` and `type_info` ABI version independent of the main library version. /// @@ -267,11 +268,21 @@ struct type_info { # endif #endif +/// See README_smart_holder.rst: +/// Classic / Conservative / Progressive cross-module compatibility +#ifndef PYBIND11_INTERNALS_SH_DEF +# if defined(PYBIND11_USE_SMART_HOLDER_AS_DEFAULT) +# define PYBIND11_INTERNALS_SH_DEF "" +# else +# define PYBIND11_INTERNALS_SH_DEF "_sh_def" +# endif +#endif + #define PYBIND11_INTERNALS_ID "__pybind11_internals_v" \ - PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__" + PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE PYBIND11_INTERNALS_SH_DEF "__" #define PYBIND11_MODULE_LOCAL_ID "__pybind11_module_local_v" \ - PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__" + PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE PYBIND11_INTERNALS_SH_DEF "__" /// Each module locally stores a pointer to the `internals` data. The data /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_poc.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_poc.h new file mode 100644 index 00000000..4a2dc034 --- /dev/null +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_poc.h @@ -0,0 +1,343 @@ +// Copyright (c) 2020-2021 The Pybind Development Team. +// All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +/* Proof-of-Concept for smart pointer interoperability. + +High-level aspects: + +* Support all `unique_ptr`, `shared_ptr` interops that are feasible. + +* Cleanly and clearly report all interops that are infeasible. + +* Meant to fit into a `PyObject`, as a holder for C++ objects. + +* Support a system design that makes it impossible to trigger + C++ Undefined Behavior, especially from Python. + +* Support a system design with clean runtime inheritance casting. From this + it follows that the `smart_holder` needs to be type-erased (`void*`). + +* Handling of RTTI for the type-erased held pointer is NOT implemented here. + It is the responsibility of the caller to ensure that `static_cast` + is well-formed when calling `as_*` member functions. Inheritance casting + needs to be handled in a different layer (similar to the code organization + in boost/python/object/inheritance.hpp). + +Details: + +* The "root holder" chosen here is a `shared_ptr` (named `vptr` in this + implementation). This choice is practically inevitable because `shared_ptr` + has only very limited support for inspecting and accessing its deleter. + +* If created from a raw pointer, or a `unique_ptr` without a custom deleter, + `vptr` always uses a custom deleter, to support `unique_ptr`-like disowning. + The custom deleters could be extended to included life-time management for + external objects (e.g. `PyObject`). + +* If created from an external `shared_ptr`, or a `unique_ptr` with a custom + deleter, including life-time management for external objects is infeasible. + +* By choice, the smart_holder is movable but not copyable, to keep the design + simple, and to guard against accidental copying overhead. + +* The `void_cast_raw_ptr` option is needed to make the `smart_holder` `vptr` + member invisible to the `shared_from_this` mechanism, in case the lifetime + of a `PyObject` is tied to the pointee. +*/ + +#pragma once + +#include +#include +#include +#include +#include + +// pybindit = Python Bindings Innovation Track. +// Currently not in pybind11 namespace to signal that this POC does not depend +// on any existing pybind11 functionality. +namespace pybindit { +namespace memory { + +static constexpr bool type_has_shared_from_this(...) { return false; } + +template +static constexpr bool type_has_shared_from_this(const std::enable_shared_from_this *) { + return true; +} + +struct guarded_delete { + std::weak_ptr released_ptr; // Trick to keep the smart_holder memory footprint small. + void (*del_ptr)(void *); + bool armed_flag; + guarded_delete(void (*del_ptr)(void *), bool armed_flag) + : del_ptr{del_ptr}, armed_flag{armed_flag} {} + void operator()(void *raw_ptr) const { + if (armed_flag) + (*del_ptr)(raw_ptr); + } +}; + +template ::value, int>::type = 0> +inline void builtin_delete_if_destructible(void *raw_ptr) { + delete static_cast(raw_ptr); +} + +template ::value, int>::type = 0> +inline void builtin_delete_if_destructible(void *) { + // This noop operator is needed to avoid a compilation error (for `delete raw_ptr;`), but + // throwing an exception from a destructor will std::terminate the process. Therefore the + // runtime check for lifetime-management correctness is implemented elsewhere (in + // ensure_pointee_is_destructible()). +} + +template +guarded_delete make_guarded_builtin_delete(bool armed_flag) { + return guarded_delete(builtin_delete_if_destructible, armed_flag); +} + +template +inline void custom_delete(void *raw_ptr) { + D()(static_cast(raw_ptr)); +} + +template +guarded_delete make_guarded_custom_deleter(bool armed_flag) { + return guarded_delete(custom_delete, armed_flag); +}; + +template +inline bool is_std_default_delete(const std::type_info &rtti_deleter) { + return rtti_deleter == typeid(std::default_delete) + || rtti_deleter == typeid(std::default_delete); +} + +struct smart_holder { + const std::type_info *rtti_uqp_del = nullptr; + std::shared_ptr vptr; + bool vptr_is_using_noop_deleter : 1; + bool vptr_is_using_builtin_delete : 1; + bool vptr_is_external_shared_ptr : 1; + bool is_populated : 1; + bool is_disowned : 1; + bool pointee_depends_on_holder_owner : 1; // SMART_HOLDER_WIP: See PR #2839. + + // Design choice: smart_holder is movable but not copyable. + smart_holder(smart_holder &&) = default; + smart_holder(const smart_holder &) = delete; + smart_holder &operator=(smart_holder &&) = delete; + smart_holder &operator=(const smart_holder &) = delete; + + smart_holder() + : vptr_is_using_noop_deleter{false}, vptr_is_using_builtin_delete{false}, + vptr_is_external_shared_ptr{false}, is_populated{false}, is_disowned{false}, + pointee_depends_on_holder_owner{false} {} + + bool has_pointee() const { return vptr != nullptr; } + + template + static void ensure_pointee_is_destructible(const char *context) { + if (!std::is_destructible::value) + throw std::invalid_argument(std::string("Pointee is not destructible (") + context + + ")."); + } + + void ensure_is_populated(const char *context) const { + if (!is_populated) { + throw std::runtime_error(std::string("Unpopulated holder (") + context + ")."); + } + } + void ensure_is_not_disowned(const char *context) const { + if (is_disowned) { + throw std::runtime_error(std::string("Holder was disowned already (") + context + + ")."); + } + } + + void ensure_vptr_is_using_builtin_delete(const char *context) const { + if (vptr_is_external_shared_ptr) { + throw std::invalid_argument(std::string("Cannot disown external shared_ptr (") + + context + ")."); + } + if (vptr_is_using_noop_deleter) { + throw std::invalid_argument(std::string("Cannot disown non-owning holder (") + context + + ")."); + } + if (!vptr_is_using_builtin_delete) { + throw std::invalid_argument(std::string("Cannot disown custom deleter (") + context + + ")."); + } + } + + template + void ensure_compatible_rtti_uqp_del(const char *context) const { + const std::type_info *rtti_requested = &typeid(D); + if (!rtti_uqp_del) { + if (!is_std_default_delete(*rtti_requested)) { + throw std::invalid_argument(std::string("Missing unique_ptr deleter (") + context + + ")."); + } + ensure_vptr_is_using_builtin_delete(context); + } else if (!(*rtti_requested == *rtti_uqp_del)) { + throw std::invalid_argument(std::string("Incompatible unique_ptr deleter (") + context + + ")."); + } + } + + void ensure_has_pointee(const char *context) const { + if (!has_pointee()) { + throw std::invalid_argument(std::string("Disowned holder (") + context + ")."); + } + } + + void ensure_use_count_1(const char *context) const { + if (vptr == nullptr) { + throw std::invalid_argument(std::string("Cannot disown nullptr (") + context + ")."); + } + // In multithreaded environments accessing use_count can lead to + // race conditions, but in the context of Python it is a bug (elsewhere) + // if the Global Interpreter Lock (GIL) is not being held when this code + // is reached. + // SMART_HOLDER_WIP: IMPROVABLE: assert(GIL is held). + if (vptr.use_count() != 1) { + throw std::invalid_argument(std::string("Cannot disown use_count != 1 (") + context + + ")."); + } + } + + void reset_vptr_deleter_armed_flag(bool armed_flag) const { + auto vptr_del_ptr = std::get_deleter(vptr); + if (vptr_del_ptr == nullptr) { + throw std::runtime_error( + "smart_holder::reset_vptr_deleter_armed_flag() called in an invalid context."); + } + vptr_del_ptr->armed_flag = armed_flag; + } + + static smart_holder from_raw_ptr_unowned(void *raw_ptr) { + smart_holder hld; + hld.vptr.reset(raw_ptr, [](void *) {}); + hld.vptr_is_using_noop_deleter = true; + hld.is_populated = true; + return hld; + } + + template + T *as_raw_ptr_unowned() const { + return static_cast(vptr.get()); + } + + template + T &as_lvalue_ref() const { + static const char *context = "as_lvalue_ref"; + ensure_is_populated(context); + ensure_has_pointee(context); + return *as_raw_ptr_unowned(); + } + + template + T &&as_rvalue_ref() const { + static const char *context = "as_rvalue_ref"; + ensure_is_populated(context); + ensure_has_pointee(context); + return std::move(*as_raw_ptr_unowned()); + } + + template + static smart_holder from_raw_ptr_take_ownership(T *raw_ptr, bool void_cast_raw_ptr = false) { + ensure_pointee_is_destructible("from_raw_ptr_take_ownership"); + smart_holder hld; + auto gd = make_guarded_builtin_delete(true); + if (void_cast_raw_ptr) + hld.vptr.reset(static_cast(raw_ptr), std::move(gd)); + else + hld.vptr.reset(raw_ptr, std::move(gd)); + hld.vptr_is_using_builtin_delete = true; + hld.is_populated = true; + return hld; + } + + // Caller is responsible for ensuring preconditions (SMART_HOLDER_WIP: details). + void disown() { + reset_vptr_deleter_armed_flag(false); + is_disowned = true; + } + + // Caller is responsible for ensuring preconditions (SMART_HOLDER_WIP: details). + void reclaim_disowned() { + reset_vptr_deleter_armed_flag(true); + is_disowned = false; + } + + // Caller is responsible for ensuring preconditions (SMART_HOLDER_WIP: details). + void release_disowned() { vptr.reset(); } + + // SMART_HOLDER_WIP: review this function. + void ensure_can_release_ownership(const char *context = "ensure_can_release_ownership") const { + ensure_is_not_disowned(context); + ensure_vptr_is_using_builtin_delete(context); + ensure_use_count_1(context); + } + + // Caller is responsible for ensuring preconditions (SMART_HOLDER_WIP: details). + void release_ownership() { + reset_vptr_deleter_armed_flag(false); + release_disowned(); + } + + template + T *as_raw_ptr_release_ownership(const char *context = "as_raw_ptr_release_ownership") { + ensure_can_release_ownership(context); + T *raw_ptr = as_raw_ptr_unowned(); + release_ownership(); + return raw_ptr; + } + + template + static smart_holder from_unique_ptr(std::unique_ptr &&unq_ptr, + bool void_cast_raw_ptr = false) { + smart_holder hld; + hld.rtti_uqp_del = &typeid(D); + hld.vptr_is_using_builtin_delete = is_std_default_delete(*hld.rtti_uqp_del); + guarded_delete gd{nullptr, false}; + if (hld.vptr_is_using_builtin_delete) + gd = make_guarded_builtin_delete(true); + else + gd = make_guarded_custom_deleter(true); + if (void_cast_raw_ptr) + hld.vptr.reset(static_cast(unq_ptr.get()), std::move(gd)); + else + hld.vptr.reset(unq_ptr.get(), std::move(gd)); + (void) unq_ptr.release(); + hld.is_populated = true; + return hld; + } + + template > + std::unique_ptr as_unique_ptr() { + static const char *context = "as_unique_ptr"; + ensure_compatible_rtti_uqp_del(context); + ensure_use_count_1(context); + T *raw_ptr = as_raw_ptr_unowned(); + release_ownership(); + return std::unique_ptr(raw_ptr); + } + + template + static smart_holder from_shared_ptr(std::shared_ptr shd_ptr) { + smart_holder hld; + hld.vptr = std::static_pointer_cast(shd_ptr); + hld.vptr_is_external_shared_ptr = true; + hld.is_populated = true; + return hld; + } + + template + std::shared_ptr as_shared_ptr() const { + return std::static_pointer_cast(vptr); + } +}; + +} // namespace memory +} // namespace pybindit diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_sfinae_hooks_only.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_sfinae_hooks_only.h new file mode 100644 index 00000000..f3248547 --- /dev/null +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_sfinae_hooks_only.h @@ -0,0 +1,33 @@ +// Copyright (c) 2021 The Pybind Development Team. +// All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#pragma once + +#include "common.h" + +#include + +#ifndef PYBIND11_USE_SMART_HOLDER_AS_DEFAULT +// #define PYBIND11_USE_SMART_HOLDER_AS_DEFAULT +// Currently the main purpose of this switch is to enable non-intrusive comprehensive testing. If +// and when `smart_holder` will actually become the released default is currently open. In the +// meantime, the full functionality is easily available by using `py::classh`, which is just a +// handy shortcut for `py::class_` (see `pybind11/smart_holder.h`). Classes +// wrapped in this way are fully compatible with everything existing. +#endif + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +PYBIND11_NAMESPACE_BEGIN(detail) + +template +struct is_smart_holder_type : std::false_type {}; + +// Tag to be used as base class, inspected by type_uses_smart_holder_type_caster test. +struct smart_holder_type_caster_base_tag {}; + +template +struct type_uses_smart_holder_type_caster; + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_type_casters.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_type_casters.h new file mode 100644 index 00000000..98e322cd --- /dev/null +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_type_casters.h @@ -0,0 +1,924 @@ +// Copyright (c) 2021 The Pybind Development Team. +// All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#pragma once + +#include "../gil.h" +#include "../pytypes.h" +#include "../trampoline_self_life_support.h" +#include "common.h" +#include "descr.h" +#include "dynamic_raw_ptr_cast_if_possible.h" +#include "internals.h" +#include "smart_holder_poc.h" +#include "smart_holder_sfinae_hooks_only.h" +#include "type_caster_base.h" +#include "typeid.h" + +#include +#include +#include +#include +#include +#include +#include + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +using pybindit::memory::smart_holder; + +PYBIND11_NAMESPACE_BEGIN(detail) + +template <> +struct is_smart_holder_type : std::true_type {}; + +// SMART_HOLDER_WIP: Needs refactoring of existing pybind11 code. +inline void register_instance(instance *self, void *valptr, const type_info *tinfo); +inline bool deregister_instance(instance *self, void *valptr, const type_info *tinfo); + +// The modified_type_caster_generic_load_impl could replace type_caster_generic::load_impl but not +// vice versa. The main difference is that the original code only propagates a reference to the +// held value, while the modified implementation propagates value_and_holder. +// clang-format off +class modified_type_caster_generic_load_impl { +public: + PYBIND11_NOINLINE explicit modified_type_caster_generic_load_impl(const std::type_info &type_info) + : typeinfo(get_type_info(type_info)), cpptype(&type_info) { } + + explicit modified_type_caster_generic_load_impl(const type_info *typeinfo = nullptr) + : typeinfo(typeinfo), cpptype(typeinfo ? typeinfo->cpptype : nullptr) { } + + bool load(handle src, bool convert) { + return load_impl(src, convert); + } + + // Base methods for generic caster; there are overridden in copyable_holder_caster + void load_value_and_holder(value_and_holder &&v_h) { + if (!v_h.holder_constructed()) { + // This is needed for old-style __init__. + // type_caster_generic::load_value BEGIN + auto *&vptr = v_h.value_ptr(); + // Lazy allocation for unallocated values: + if (vptr == nullptr) { + // Lazy allocation for unallocated values: + auto *type = v_h.type ? v_h.type : typeinfo; + if (type->operator_new) { + vptr = type->operator_new(type->type_size); + } else { + #if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912) + if (type->type_align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) + vptr = ::operator new(type->type_size, + std::align_val_t(type->type_align)); + else + #endif + vptr = ::operator new(type->type_size); + } + } + // type_caster_generic::load_value END + } + loaded_v_h = v_h; + loaded_v_h.type = typeinfo; + } + + bool try_implicit_casts(handle src, bool convert) { + for (auto &cast : typeinfo->implicit_casts) { + modified_type_caster_generic_load_impl sub_caster(*cast.first); + if (sub_caster.load(src, convert)) { + if (loaded_v_h_cpptype != nullptr) { + pybind11_fail("smart_holder_type_casters: try_implicit_casts failure."); + } + loaded_v_h = sub_caster.loaded_v_h; + loaded_v_h_cpptype = cast.first; + implicit_cast = cast.second; + return true; + } + } + return false; + } + + bool try_direct_conversions(handle src) { + for (auto &converter : *typeinfo->direct_conversions) { + if (converter(src.ptr(), unowned_void_ptr_from_direct_conversion)) { + return true; + } + } + return false; + } + + PYBIND11_NOINLINE static void *local_load(PyObject *src, const type_info *ti) { + std::unique_ptr loader( + new modified_type_caster_generic_load_impl(ti)); + if (loader->load(src, false)) { + // Trick to work with the existing pybind11 internals. + // The void pointer is immediately captured in a new unique_ptr in + // try_load_foreign_module_local. If this assumption is violated sanitizers + // will most likely flag a leak (verified to be the case with ASAN). + return static_cast(loader.release()); + } + return nullptr; + } + + /// Try to load with foreign typeinfo, if available. Used when there is no + /// native typeinfo, or when the native one wasn't able to produce a value. + PYBIND11_NOINLINE bool try_load_foreign_module_local(handle src) { + constexpr auto *local_key = PYBIND11_MODULE_LOCAL_ID; + const auto pytype = type::handle_of(src); + if (!hasattr(pytype, local_key)) + return false; + + type_info *foreign_typeinfo = reinterpret_borrow(getattr(pytype, local_key)); + // Only consider this foreign loader if actually foreign and is a loader of the correct cpp type + if (foreign_typeinfo->module_local_load == &local_load + || (cpptype && !same_type(*cpptype, *foreign_typeinfo->cpptype))) + return false; + + void* foreign_loader_void_ptr = + foreign_typeinfo->module_local_load(src.ptr(), foreign_typeinfo); + if (foreign_loader_void_ptr != nullptr) { + auto foreign_loader = std::unique_ptr( + static_cast(foreign_loader_void_ptr)); + // Magic number intentionally hard-coded for simplicity and maximum robustness. + if (foreign_loader->local_load_safety_guard != 1887406645) { + pybind11_fail( + "smart_holder_type_casters: Unexpected local_load_safety_guard," + " possibly due to py::class_ holder mixup."); + } + if (loaded_v_h_cpptype != nullptr) { + pybind11_fail("smart_holder_type_casters: try_load_foreign_module_local failure."); + } + loaded_v_h = foreign_loader->loaded_v_h; + loaded_v_h_cpptype = foreign_loader->loaded_v_h_cpptype; + implicit_cast = foreign_loader->implicit_cast; + return true; + } + return false; + } + + // Implementation of `load`; this takes the type of `this` so that it can dispatch the relevant + // bits of code between here and copyable_holder_caster where the two classes need different + // logic (without having to resort to virtual inheritance). + template + PYBIND11_NOINLINE bool load_impl(handle src, bool convert) { + if (!src) return false; + if (!typeinfo) return try_load_foreign_module_local(src); + + auto &this_ = static_cast(*this); + + PyTypeObject *srctype = Py_TYPE(src.ptr()); + + // Case 1: If src is an exact type match for the target type then we can reinterpret_cast + // the instance's value pointer to the target type: + if (srctype == typeinfo->type) { + this_.load_value_and_holder(reinterpret_cast(src.ptr())->get_value_and_holder()); + return true; + } + // Case 2: We have a derived class + if (PyType_IsSubtype(srctype, typeinfo->type)) { + auto &bases = all_type_info(srctype); // subtype bases + bool no_cpp_mi = typeinfo->simple_type; + + // Case 2a: the python type is a Python-inherited derived class that inherits from just + // one simple (no MI) pybind11 class, or is an exact match, so the C++ instance is of + // the right type and we can use reinterpret_cast. + // (This is essentially the same as case 2b, but because not using multiple inheritance + // is extremely common, we handle it specially to avoid the loop iterator and type + // pointer lookup overhead) + if (bases.size() == 1 && (no_cpp_mi || bases.front()->type == typeinfo->type)) { + this_.load_value_and_holder(reinterpret_cast(src.ptr())->get_value_and_holder()); + loaded_v_h_cpptype = bases.front()->cpptype; + reinterpret_cast_deemed_ok = true; + return true; + } + // Case 2b: the python type inherits from multiple C++ bases. Check the bases to see if + // we can find an exact match (or, for a simple C++ type, an inherited match); if so, we + // can safely reinterpret_cast to the relevant pointer. + if (bases.size() > 1) { + for (auto base : bases) { + if (no_cpp_mi ? PyType_IsSubtype(base->type, typeinfo->type) : base->type == typeinfo->type) { + this_.load_value_and_holder(reinterpret_cast(src.ptr())->get_value_and_holder(base)); + loaded_v_h_cpptype = base->cpptype; + reinterpret_cast_deemed_ok = true; + return true; + } + } + } + + // Case 2c: C++ multiple inheritance is involved and we couldn't find an exact type match + // in the registered bases, above, so try implicit casting (needed for proper C++ casting + // when MI is involved). + if (this_.try_implicit_casts(src, convert)) { + return true; + } + } + + // Perform an implicit conversion + if (convert) { + for (auto &converter : typeinfo->implicit_conversions) { + auto temp = reinterpret_steal(converter(src.ptr(), typeinfo->type)); + if (load_impl(temp, false)) { + loader_life_support::add_patient(temp); + return true; + } + } + if (this_.try_direct_conversions(src)) + return true; + } + + // Failed to match local typeinfo. Try again with global. + if (typeinfo->module_local) { + if (auto gtype = get_global_type_info(*typeinfo->cpptype)) { + typeinfo = gtype; + return load(src, false); + } + } + + // Global typeinfo has precedence over foreign module_local + if (try_load_foreign_module_local(src)) + return true; + + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) return false; + loaded_v_h = value_and_holder(); + return true; + } + + return false; + } + + const type_info *typeinfo = nullptr; + const std::type_info *cpptype = nullptr; + void *unowned_void_ptr_from_direct_conversion = nullptr; + const std::type_info *loaded_v_h_cpptype = nullptr; + void *(*implicit_cast)(void *) = nullptr; + value_and_holder loaded_v_h; + bool reinterpret_cast_deemed_ok = false; + // Magic number intentionally hard-coded, to guard against class_ holder mixups. + // Ideally type_caster_generic would have a similar guard, but this requires a change there. + // SMART_HOLDER_WIP: If it is decided that this guard is useful long term, potentially + // set/reset this value in ctor/dtor, mark volatile. + std::size_t local_load_safety_guard = 1887406645; // 32-bit compatible value for portability. +}; +// clang-format on + +struct smart_holder_type_caster_class_hooks : smart_holder_type_caster_base_tag { + static decltype(&modified_type_caster_generic_load_impl::local_load) + get_local_load_function_ptr() { + return &modified_type_caster_generic_load_impl::local_load; + } + + using holder_type = pybindit::memory::smart_holder; + + template + static bool try_initialization_using_shared_from_this(holder_type *, WrappedType *, ...) { + return false; + } + + // Adopting existing approach used by type_caster_base, although it leads to somewhat fuzzy + // ownership semantics: if we detected via shared_from_this that a shared_ptr exists already, + // it is reused, irrespective of the return_value_policy in effect. + // "SomeBaseOfWrappedType" is needed because std::enable_shared_from_this is not necessarily a + // direct base of WrappedType. + template + static bool try_initialization_using_shared_from_this( + holder_type *uninitialized_location, + WrappedType *value_ptr_w_t, + const std::enable_shared_from_this *) { + auto shd_ptr = std::dynamic_pointer_cast( + detail::try_get_shared_from_this(value_ptr_w_t)); + if (!shd_ptr) + return false; + // Note: inst->owned ignored. + new (uninitialized_location) holder_type(holder_type::from_shared_ptr(shd_ptr)); + return true; + } + + template + static void init_instance_for_type(detail::instance *inst, const void *holder_const_void_ptr) { + // Need for const_cast is a consequence of the type_info::init_instance type: + // void (*init_instance)(instance *, const void *); + auto holder_void_ptr = const_cast(holder_const_void_ptr); + + auto v_h = inst->get_value_and_holder(detail::get_type_info(typeid(WrappedType))); + if (!v_h.instance_registered()) { + register_instance(inst, v_h.value_ptr(), v_h.type); + v_h.set_instance_registered(); + } + auto uninitialized_location = std::addressof(v_h.holder()); + auto value_ptr_w_t = v_h.value_ptr(); + bool pointee_depends_on_holder_owner + = dynamic_raw_ptr_cast_if_possible(value_ptr_w_t) != nullptr; + if (holder_void_ptr) { + // Note: inst->owned ignored. + auto holder_ptr = static_cast(holder_void_ptr); + new (uninitialized_location) holder_type(std::move(*holder_ptr)); + } else if (!try_initialization_using_shared_from_this( + uninitialized_location, value_ptr_w_t, value_ptr_w_t)) { + if (inst->owned) { + new (uninitialized_location) holder_type(holder_type::from_raw_ptr_take_ownership( + value_ptr_w_t, /*void_cast_raw_ptr*/ pointee_depends_on_holder_owner)); + } else { + new (uninitialized_location) + holder_type(holder_type::from_raw_ptr_unowned(value_ptr_w_t)); + } + } + v_h.holder().pointee_depends_on_holder_owner + = pointee_depends_on_holder_owner; + v_h.set_holder_constructed(); + } + + template + static smart_holder smart_holder_from_unique_ptr(std::unique_ptr &&unq_ptr, + bool void_cast_raw_ptr) { + return pybindit::memory::smart_holder::from_unique_ptr(std::move(unq_ptr), + void_cast_raw_ptr); + } + + template + static smart_holder smart_holder_from_shared_ptr(std::shared_ptr shd_ptr) { + return pybindit::memory::smart_holder::from_shared_ptr(shd_ptr); + } +}; + +struct shared_ptr_trampoline_self_life_support { + PyObject *self; + explicit shared_ptr_trampoline_self_life_support(instance *inst) + : self{reinterpret_cast(inst)} { + Py_INCREF(self); + } + void operator()(void *) { + gil_scoped_acquire gil; + Py_DECREF(self); + } +}; + +template +struct smart_holder_type_caster_load { + using holder_type = pybindit::memory::smart_holder; + + bool load(handle src, bool convert) { + static_assert(type_uses_smart_holder_type_caster::value, "Internal consistency error."); + load_impl = modified_type_caster_generic_load_impl(typeid(T)); + if (!load_impl.load(src, convert)) + return false; + return true; + } + + T *loaded_as_raw_ptr_unowned() const { + void *void_ptr = load_impl.unowned_void_ptr_from_direct_conversion; + if (void_ptr == nullptr) { + if (have_holder()) { + throw_if_uninitialized_or_disowned_holder(); + void_ptr = holder().template as_raw_ptr_unowned(); + } else if (load_impl.loaded_v_h.vh != nullptr) + void_ptr = load_impl.loaded_v_h.value_ptr(); + if (void_ptr == nullptr) + return nullptr; + } + return convert_type(void_ptr); + } + + T &loaded_as_lvalue_ref() const { + T *raw_ptr = loaded_as_raw_ptr_unowned(); + if (raw_ptr == nullptr) + throw reference_cast_error(); + return *raw_ptr; + } + + std::shared_ptr loaded_as_shared_ptr() const { + if (load_impl.unowned_void_ptr_from_direct_conversion != nullptr) + throw cast_error("Unowned pointer from direct conversion cannot be converted to a" + " std::shared_ptr."); + if (!have_holder()) + return nullptr; + throw_if_uninitialized_or_disowned_holder(); + holder_type &hld = holder(); + hld.ensure_is_not_disowned("loaded_as_shared_ptr"); + if (hld.vptr_is_using_noop_deleter) { + throw std::runtime_error("Non-owning holder (loaded_as_shared_ptr)."); + } + auto void_raw_ptr = hld.template as_raw_ptr_unowned(); + auto type_raw_ptr = convert_type(void_raw_ptr); + if (hld.pointee_depends_on_holder_owner) { + auto vptr_gd_ptr = std::get_deleter(hld.vptr); + if (vptr_gd_ptr != nullptr) { + std::shared_ptr released_ptr = vptr_gd_ptr->released_ptr.lock(); + if (released_ptr) + return std::shared_ptr(released_ptr, type_raw_ptr); + std::shared_ptr to_be_released( + type_raw_ptr, + shared_ptr_trampoline_self_life_support(load_impl.loaded_v_h.inst)); + vptr_gd_ptr->released_ptr = to_be_released; + return to_be_released; + } + auto sptsls_ptr = std::get_deleter(hld.vptr); + if (sptsls_ptr != nullptr) { + // This code is reachable only if there are multiple registered_instances for the + // same pointee. + if (reinterpret_cast(load_impl.loaded_v_h.inst) == sptsls_ptr->self) { + pybind11_fail("smart_holder_type_casters loaded_as_shared_ptr failure: " + "load_impl.loaded_v_h.inst == sptsls_ptr->self"); + } + } + if (sptsls_ptr != nullptr + || !pybindit::memory::type_has_shared_from_this(type_raw_ptr)) { + return std::shared_ptr( + type_raw_ptr, + shared_ptr_trampoline_self_life_support(load_impl.loaded_v_h.inst)); + } + if (hld.vptr_is_external_shared_ptr) { + pybind11_fail("smart_holder_type_casters loaded_as_shared_ptr failure: not " + "implemented: trampoline-self-life-support for external shared_ptr " + "to type inheriting from std::enable_shared_from_this."); + } + pybind11_fail("smart_holder_type_casters: loaded_as_shared_ptr failure: internal " + "inconsistency."); + } + std::shared_ptr void_shd_ptr = hld.template as_shared_ptr(); + return std::shared_ptr(void_shd_ptr, type_raw_ptr); + } + + template + std::unique_ptr loaded_as_unique_ptr(const char *context = "loaded_as_unique_ptr") { + if (load_impl.unowned_void_ptr_from_direct_conversion != nullptr) + throw cast_error("Unowned pointer from direct conversion cannot be converted to a" + " std::unique_ptr."); + if (!have_holder()) + return nullptr; + throw_if_uninitialized_or_disowned_holder(); + throw_if_instance_is_currently_owned_by_shared_ptr(); + holder().ensure_is_not_disowned(context); + holder().template ensure_compatible_rtti_uqp_del(context); + holder().ensure_use_count_1(context); + auto raw_void_ptr = holder().template as_raw_ptr_unowned(); + + void *value_void_ptr = load_impl.loaded_v_h.value_ptr(); + if (value_void_ptr != raw_void_ptr) { + pybind11_fail("smart_holder_type_casters: loaded_as_unique_ptr failure:" + " value_void_ptr != raw_void_ptr"); + } + + // SMART_HOLDER_WIP: MISSING: Safety checks for type conversions + // (T must be polymorphic or meet certain other conditions). + T *raw_type_ptr = convert_type(raw_void_ptr); + + auto *self_life_support + = dynamic_raw_ptr_cast_if_possible(raw_type_ptr); + if (self_life_support == nullptr && holder().pointee_depends_on_holder_owner) { + throw value_error("Alias class (also known as trampoline) does not inherit from " + "py::trampoline_self_life_support, therefore the ownership of this " + "instance cannot safely be transferred to C++."); + } + + // Critical transfer-of-ownership section. This must stay together. + if (self_life_support != nullptr) { + holder().disown(); + } else { + holder().release_ownership(); + } + auto result = std::unique_ptr(raw_type_ptr); + if (self_life_support != nullptr) { + self_life_support->activate_life_support(load_impl.loaded_v_h); + } else { + load_impl.loaded_v_h.value_ptr() = nullptr; + deregister_instance( + load_impl.loaded_v_h.inst, value_void_ptr, load_impl.loaded_v_h.type); + } + // Critical section end. + + return result; + } + +private: + modified_type_caster_generic_load_impl load_impl; + + bool have_holder() const { + return load_impl.loaded_v_h.vh != nullptr && load_impl.loaded_v_h.holder_constructed(); + } + + holder_type &holder() const { return load_impl.loaded_v_h.holder(); } + + // have_holder() must be true or this function will fail. + void throw_if_uninitialized_or_disowned_holder() const { + if (!holder().is_populated) { + pybind11_fail("Missing value for wrapped C++ type:" + " Python instance is uninitialized."); + } + if (!holder().has_pointee()) { + throw value_error("Missing value for wrapped C++ type:" + " Python instance was disowned."); + } + } + + // have_holder() must be true or this function will fail. + void throw_if_instance_is_currently_owned_by_shared_ptr() const { + auto vptr_gd_ptr = std::get_deleter(holder().vptr); + if (vptr_gd_ptr != nullptr && !vptr_gd_ptr->released_ptr.expired()) { + throw value_error("Python instance is currently owned by a std::shared_ptr."); + } + } + + T *convert_type(void *void_ptr) const { + if (void_ptr != nullptr && load_impl.loaded_v_h_cpptype != nullptr + && !load_impl.reinterpret_cast_deemed_ok && load_impl.implicit_cast != nullptr) { + void_ptr = load_impl.implicit_cast(void_ptr); + } + return static_cast(void_ptr); + } +}; + +// SMART_HOLDER_WIP: Needs refactoring of existing pybind11 code. +struct make_constructor : private type_caster_base { // Any type, nothing special about int. + using type_caster_base::Constructor; + using type_caster_base::make_copy_constructor; + using type_caster_base::make_move_constructor; +}; + +template +struct smart_holder_type_caster : smart_holder_type_caster_load, + smart_holder_type_caster_class_hooks { + static constexpr auto name = _(); + + // static handle cast(T, ...) + // is redundant (leads to ambiguous overloads). + + static handle cast(T &&src, return_value_policy /*policy*/, handle parent) { + // type_caster_base BEGIN + // clang-format off + return cast(&src, return_value_policy::move, parent); + // clang-format on + // type_caster_base END + } + + static handle cast(T const &src, return_value_policy policy, handle parent) { + // type_caster_base BEGIN + // clang-format off + if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference) + policy = return_value_policy::copy; + return cast(&src, policy, parent); + // clang-format on + // type_caster_base END + } + + static handle cast(T &src, return_value_policy policy, handle parent) { + return cast(const_cast(src), policy, parent); // Mutbl2Const + } + + static handle cast(T const *src, return_value_policy policy, handle parent) { + auto st = type_caster_base::src_and_type(src); + return cast_const_raw_ptr( // Originally type_caster_generic::cast. + st.first, + policy, + parent, + st.second, + make_constructor::make_copy_constructor(src), + make_constructor::make_move_constructor(src)); + } + + static handle cast(T *src, return_value_policy policy, handle parent) { + return cast(const_cast(src), policy, parent); // Mutbl2Const + } + +#if defined(_MSC_VER) && _MSC_VER < 1910 + // Working around MSVC 2015 bug. const-correctness is lost. + // SMART_HOLDER_WIP: IMPROVABLE: make common code work with MSVC 2015. + template + using cast_op_type = detail::cast_op_type; +#else + template + using cast_op_type = conditional_t< + std::is_same, T const *>::value, + T const *, + conditional_t, T *>::value, + T *, + conditional_t::value, T const &, T &>>>; +#endif + + // The const operators here prove that the existing type_caster mechanism already supports + // const-correctness. However, fully implementing const-correctness inside this type_caster + // is still a major project. + // NOLINTNEXTLINE(google-explicit-constructor) + operator T const &() const { + return const_cast(this)->loaded_as_lvalue_ref(); + } + // NOLINTNEXTLINE(google-explicit-constructor) + operator T const *() const { + return const_cast(this)->loaded_as_raw_ptr_unowned(); + } + // NOLINTNEXTLINE(google-explicit-constructor) + operator T &() { return this->loaded_as_lvalue_ref(); } + // NOLINTNEXTLINE(google-explicit-constructor) + operator T *() { return this->loaded_as_raw_ptr_unowned(); } + + // Originally type_caster_generic::cast. + PYBIND11_NOINLINE static handle cast_const_raw_ptr(const void *_src, + return_value_policy policy, + handle parent, + const detail::type_info *tinfo, + void *(*copy_constructor)(const void *), + void *(*move_constructor)(const void *), + const void *existing_holder = nullptr) { + if (!tinfo) // no type info: error will be set already + return handle(); + + void *src = const_cast(_src); + if (src == nullptr) + return none().release(); + + if (handle existing_inst = find_registered_python_instance(src, tinfo)) + return existing_inst; + + auto inst = reinterpret_steal(make_new_instance(tinfo->type)); + auto wrapper = reinterpret_cast(inst.ptr()); + wrapper->owned = false; + void *&valueptr = values_and_holders(wrapper).begin()->value_ptr(); + + switch (policy) { + case return_value_policy::automatic: + case return_value_policy::take_ownership: + valueptr = src; + wrapper->owned = true; + break; + + case return_value_policy::automatic_reference: + case return_value_policy::reference: + valueptr = src; + wrapper->owned = false; + break; + + case return_value_policy::copy: + if (copy_constructor) + valueptr = copy_constructor(src); + else { +#if defined(NDEBUG) + throw cast_error("return_value_policy = copy, but type is " + "non-copyable! (compile in debug mode for details)"); +#else + std::string type_name(tinfo->cpptype->name()); + detail::clean_type_id(type_name); + throw cast_error("return_value_policy = copy, but type " + type_name + + " is non-copyable!"); +#endif + } + wrapper->owned = true; + break; + + case return_value_policy::move: + if (move_constructor) + valueptr = move_constructor(src); + else if (copy_constructor) + valueptr = copy_constructor(src); + else { +#if defined(NDEBUG) + throw cast_error("return_value_policy = move, but type is neither " + "movable nor copyable! " + "(compile in debug mode for details)"); +#else + std::string type_name(tinfo->cpptype->name()); + detail::clean_type_id(type_name); + throw cast_error("return_value_policy = move, but type " + type_name + + " is neither movable nor copyable!"); +#endif + } + wrapper->owned = true; + break; + + case return_value_policy::reference_internal: + valueptr = src; + wrapper->owned = false; + keep_alive_impl(inst, parent); + break; + + default: + throw cast_error("unhandled return_value_policy: should not happen!"); + } + + tinfo->init_instance(wrapper, existing_holder); + + return inst.release(); + } +}; + +template +struct smart_holder_type_caster> : smart_holder_type_caster_load, + smart_holder_type_caster_class_hooks { + static constexpr auto name = _>(); + + static handle cast(const std::shared_ptr &src, return_value_policy policy, handle parent) { + switch (policy) { + case return_value_policy::automatic: + case return_value_policy::automatic_reference: + break; + case return_value_policy::take_ownership: + throw cast_error("Invalid return_value_policy for shared_ptr (take_ownership)."); + case return_value_policy::copy: + case return_value_policy::move: + break; + case return_value_policy::reference: + throw cast_error("Invalid return_value_policy for shared_ptr (reference)."); + case return_value_policy::reference_internal: + break; + } + if (!src) + return none().release(); + + auto src_raw_ptr = src.get(); + auto st = type_caster_base::src_and_type(src_raw_ptr); + if (st.second == nullptr) + return handle(); // no type info: error will be set already + + void *src_raw_void_ptr = static_cast(src_raw_ptr); + const detail::type_info *tinfo = st.second; + if (handle existing_inst = find_registered_python_instance(src_raw_void_ptr, tinfo)) + // SMART_HOLDER_WIP: MISSING: Enforcement of consistency with existing smart_holder. + // SMART_HOLDER_WIP: MISSING: keep_alive. + return existing_inst; + + auto inst = reinterpret_steal(make_new_instance(tinfo->type)); + auto *inst_raw_ptr = reinterpret_cast(inst.ptr()); + inst_raw_ptr->owned = true; + void *&valueptr = values_and_holders(inst_raw_ptr).begin()->value_ptr(); + valueptr = src_raw_void_ptr; + + auto smhldr = pybindit::memory::smart_holder::from_shared_ptr(src); + tinfo->init_instance(inst_raw_ptr, static_cast(&smhldr)); + + if (policy == return_value_policy::reference_internal) + keep_alive_impl(inst, parent); + + return inst.release(); + } + + template + using cast_op_type = std::shared_ptr; + + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::shared_ptr() { return this->loaded_as_shared_ptr(); } +}; + +template +struct smart_holder_type_caster> : smart_holder_type_caster_load, + smart_holder_type_caster_class_hooks { + static constexpr auto name = _>(); + + static handle + cast(const std::shared_ptr &src, return_value_policy policy, handle parent) { + return smart_holder_type_caster>::cast( + std::const_pointer_cast(src), // Const2Mutbl + policy, + parent); + } + + template + using cast_op_type = std::shared_ptr; + + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::shared_ptr() { return this->loaded_as_shared_ptr(); } // Mutbl2Const +}; + +template +struct smart_holder_type_caster> : smart_holder_type_caster_load, + smart_holder_type_caster_class_hooks { + static constexpr auto name = _>(); + + static handle cast(std::unique_ptr &&src, return_value_policy policy, handle parent) { + if (policy != return_value_policy::automatic + && policy != return_value_policy::reference_internal + && policy != return_value_policy::move) { + // SMART_HOLDER_WIP: IMPROVABLE: Error message. + throw cast_error("Invalid return_value_policy for unique_ptr."); + } + if (!src) + return none().release(); + + auto src_raw_ptr = src.get(); + auto st = type_caster_base::src_and_type(src_raw_ptr); + if (st.second == nullptr) + return handle(); // no type info: error will be set already + + void *src_raw_void_ptr = static_cast(src_raw_ptr); + const detail::type_info *tinfo = st.second; + if (handle existing_inst = find_registered_python_instance(src_raw_void_ptr, tinfo)) { + auto *self_life_support + = dynamic_raw_ptr_cast_if_possible(src_raw_ptr); + if (self_life_support != nullptr) { + value_and_holder &v_h = self_life_support->v_h; + if (v_h.inst != nullptr && v_h.vh != nullptr) { + auto &holder = v_h.holder(); + if (!holder.is_disowned) { + pybind11_fail("smart_holder_type_casters: unexpected " + "smart_holder.is_disowned failure."); + } + // Critical transfer-of-ownership section. This must stay together. + self_life_support->deactivate_life_support(); + holder.reclaim_disowned(); + (void) src.release(); + // Critical section end. + return existing_inst; + } + } + throw cast_error("Invalid unique_ptr: another instance owns this pointer already."); + } + + auto inst = reinterpret_steal(make_new_instance(tinfo->type)); + auto *inst_raw_ptr = reinterpret_cast(inst.ptr()); + inst_raw_ptr->owned = true; + void *&valueptr = values_and_holders(inst_raw_ptr).begin()->value_ptr(); + valueptr = src_raw_void_ptr; + + auto smhldr = pybindit::memory::smart_holder::from_unique_ptr(std::move(src), + /*void_cast_raw_ptr*/ false); + tinfo->init_instance(inst_raw_ptr, static_cast(&smhldr)); + + if (policy == return_value_policy::reference_internal) + keep_alive_impl(inst, parent); + + return inst.release(); + } + static handle + cast(const std::unique_ptr &src, return_value_policy policy, handle parent) { + if (!src) + return none().release(); + if (policy == return_value_policy::automatic) + policy = return_value_policy::reference_internal; + if (policy != return_value_policy::reference_internal) + throw cast_error("Invalid return_value_policy for unique_ptr&"); + return smart_holder_type_caster::cast(src.get(), policy, parent); + } + + template + using cast_op_type = std::unique_ptr; + + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::unique_ptr() { return this->template loaded_as_unique_ptr(); } +}; + +template +struct smart_holder_type_caster> + : smart_holder_type_caster_load, smart_holder_type_caster_class_hooks { + static constexpr auto name = _>(); + + static handle + cast(std::unique_ptr &&src, return_value_policy policy, handle parent) { + return smart_holder_type_caster>::cast( + std::unique_ptr(const_cast(src.release())), // Const2Mutbl + policy, + parent); + } + + template + using cast_op_type = std::unique_ptr; + + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::unique_ptr() { return this->template loaded_as_unique_ptr(); } +}; + +#ifndef PYBIND11_USE_SMART_HOLDER_AS_DEFAULT + +# define PYBIND11_SMART_HOLDER_TYPE_CASTERS(...) \ + namespace pybind11 { \ + namespace detail { \ + template <> \ + class type_caster<__VA_ARGS__> : public smart_holder_type_caster<__VA_ARGS__> {}; \ + template <> \ + class type_caster> \ + : public smart_holder_type_caster> {}; \ + template <> \ + class type_caster> \ + : public smart_holder_type_caster> {}; \ + template \ + class type_caster> \ + : public smart_holder_type_caster> {}; \ + template \ + class type_caster> \ + : public smart_holder_type_caster> {}; \ + } \ + } +#else + +# define PYBIND11_SMART_HOLDER_TYPE_CASTERS(...) + +template +class type_caster_for_class_ : public smart_holder_type_caster {}; + +template +class type_caster_for_class_> + : public smart_holder_type_caster> {}; + +template +class type_caster_for_class_> + : public smart_holder_type_caster> {}; + +template +class type_caster_for_class_> + : public smart_holder_type_caster> {}; + +template +class type_caster_for_class_> + : public smart_holder_type_caster> {}; + +#endif + +PYBIND11_NAMESPACE_END(detail) +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/type_caster_base.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/type_caster_base.h similarity index 99% rename from Libraries/pybind11-2.8.0/pybind11/detail/type_caster_base.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/type_caster_base.h index f804d9d1..00ce1a7a 100644 --- a/Libraries/pybind11-2.8.0/pybind11/detail/type_caster_base.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/type_caster_base.h @@ -468,12 +468,19 @@ PYBIND11_NOINLINE std::string error_string() { PyFrameObject *frame = trace->tb_frame; errorString += "\n\nAt:\n"; while (frame) { +#if PY_VERSION_HEX >= 0x03090000 + PyCodeObject *f_code = PyFrame_GetCode(frame); +#else + PyCodeObject *f_code = frame->f_code; + Py_INCREF(f_code); +#endif int lineno = PyFrame_GetLineNumber(frame); errorString += - " " + handle(frame->f_code->co_filename).cast() + + " " + handle(f_code->co_filename).cast() + "(" + std::to_string(lineno) + "): " + - handle(frame->f_code->co_name).cast() + "\n"; + handle(f_code->co_name).cast() + "\n"; frame = frame->f_back; + Py_DECREF(f_code); } } #endif diff --git a/Libraries/pybind11-2.8.0/pybind11/detail/typeid.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/typeid.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/detail/typeid.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/typeid.h diff --git a/Libraries/pybind11-2.8.0/pybind11/eigen.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/eigen.h similarity index 97% rename from Libraries/pybind11-2.8.0/pybind11/eigen.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/eigen.h index c0363827..97b1d96b 100644 --- a/Libraries/pybind11-2.8.0/pybind11/eigen.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/eigen.h @@ -17,9 +17,23 @@ #include "numpy.h" +// The C4127 suppression was introduced for Eigen 3.4.0. In theory we could +// make it version specific, or even remove it later, but considering that +// 1. C4127 is generally far more distracting than useful for modern template code, and +// 2. we definitely want to ignore any MSVC warnings originating from Eigen code, +// it is probably best to keep this around indefinitely. +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4127) // C4127: conditional expression is constant +#endif + #include #include +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + // Eigen prior to 3.2.7 doesn't have proper move constructors--but worse, some classes get implicit // move constructors that break things. We could detect this an explicitly copy, but an extra copy // of matrices seems highly undesirable. @@ -559,7 +573,9 @@ struct type_caster::value>> { if (!values || !innerIndices || !outerIndices) return false; - value = Eigen::MappedSparseMatrix( + value = Eigen::MappedSparseMatrix( shape[0].cast(), shape[1].cast(), nnz, outerIndices.mutable_data(), innerIndices.mutable_data(), values.mutable_data()); diff --git a/Libraries/pybind11-2.8.0/pybind11/embed.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/embed.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/embed.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/embed.h diff --git a/Libraries/pybind11-2.8.0/pybind11/eval.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/eval.h similarity index 98% rename from Libraries/pybind11-2.8.0/pybind11/eval.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/eval.h index e0f58bcf..6cc672e2 100644 --- a/Libraries/pybind11-2.8.0/pybind11/eval.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/eval.h @@ -1,5 +1,5 @@ /* - pybind11/exec.h: Support for evaluating Python expressions and statements + pybind11/eval.h: Support for evaluating Python expressions and statements from strings and files Copyright (c) 2016 Klemens Morgenstern and diff --git a/Libraries/pybind11-2.8.0/pybind11/functional.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/functional.h similarity index 99% rename from Libraries/pybind11-2.8.0/pybind11/functional.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/functional.h index 24141ce3..ad5608c2 100644 --- a/Libraries/pybind11-2.8.0/pybind11/functional.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/functional.h @@ -69,7 +69,7 @@ struct type_caster> { // ensure GIL is held during functor destruction struct func_handle { function f; -#if !(defined(_MSC_VER) && _MSC_VER == 1916 && defined(PYBIND11_CPP17) && PY_MAJOR_VERSION < 3) +#if !(defined(_MSC_VER) && _MSC_VER == 1916 && defined(PYBIND11_CPP17)) // This triggers a syntax error under very special conditions (very weird indeed). explicit #endif diff --git a/Libraries/pybind11-2.8.0/pybind11/gil.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/gil.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/gil.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/gil.h diff --git a/Libraries/pybind11-2.8.0/pybind11/iostream.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/iostream.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/iostream.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/iostream.h diff --git a/Libraries/pybind11-2.8.0/pybind11/numpy.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/numpy.h similarity index 99% rename from Libraries/pybind11-2.8.0/pybind11/numpy.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/numpy.h index b7747fae..b43a7716 100644 --- a/Libraries/pybind11-2.8.0/pybind11/numpy.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/numpy.h @@ -518,7 +518,7 @@ class dtype : public object { } /// Single-character for dtype's type. - /// For example, ``float`` is 'f', ``double`` 'd', ``int`` 'i', and ``long`` 'd'. + /// For example, ``float`` is 'f', ``double`` 'd', ``int`` 'i', and ``long`` 'l'. char char_() const { // Note: The signature, `dtype::char_` follows the naming of NumPy's // public Python API (i.e., ``dtype.char``), rather than its internal diff --git a/Libraries/pybind11-2.8.0/pybind11/operators.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/operators.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/operators.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/operators.h diff --git a/Libraries/pybind11-2.8.0/pybind11/options.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/options.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/options.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/options.h diff --git a/Libraries/pybind11-2.8.0/pybind11/pybind11.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/pybind11.h similarity index 92% rename from Libraries/pybind11-2.8.0/pybind11/pybind11.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/pybind11.h index 370e52cf..6f24a360 100644 --- a/Libraries/pybind11-2.8.0/pybind11/pybind11.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/pybind11.h @@ -1,3 +1,4 @@ +// clang-format off /* pybind11/pybind11.h: Main header file of the C++11 python binding generator library @@ -15,6 +16,7 @@ #include "options.h" #include "detail/class.h" #include "detail/init.h" +#include "detail/smart_holder_sfinae_hooks_only.h" #include #include @@ -1124,13 +1126,23 @@ inline dict globals() { return reinterpret_borrow(p ? p : module_::import("__main__").attr("__dict__").ptr()); } +#if PY_VERSION_HEX >= 0x03030000 +template ()>> +PYBIND11_DEPRECATED("make_simple_namespace should be replaced with py::module_::import(\"types\").attr(\"SimpleNamespace\") ") +object make_simple_namespace(Args&&... args_) { + return module_::import("types").attr("SimpleNamespace")(std::forward(args_)...); +} +#endif + PYBIND11_NAMESPACE_BEGIN(detail) /// Generic support for creating new Python heap types class generic_type : public object { public: PYBIND11_OBJECT_DEFAULT(generic_type, object, PyType_Check) protected: - void initialize(const type_record &rec) { + void initialize(const type_record &rec, + void *(*type_caster_module_local_load)(PyObject *, const type_info *)) { if (rec.scope && hasattr(rec.scope, "__dict__") && rec.scope.attr("__dict__").contains(rec.name)) pybind11_fail("generic_type: cannot initialize type \"" + std::string(rec.name) + "\": an object with that name is already defined"); @@ -1177,7 +1189,7 @@ class generic_type : public object { if (rec.module_local) { // Stash the local typeinfo and loader so that external modules can access it. - tinfo->module_local_load = &type_caster_generic::local_load; + tinfo->module_local_load = type_caster_module_local_load; setattr(m_ptr, PYBIND11_MODULE_LOCAL_ID, capsule(tinfo)); } } @@ -1291,11 +1303,63 @@ auto method_adaptor(Return (Class::*pmf)(Args...) const) -> Return (Derived::*)( return pmf; } +// clang-format on +template +#ifndef PYBIND11_USE_SMART_HOLDER_AS_DEFAULT + +using default_holder_type = std::unique_ptr; + +# ifndef PYBIND11_SH_AVL +# define PYBIND11_SH_AVL(...) std::shared_ptr<__VA_ARGS__> // "Smart_Holder if AVaiLable" +// -------- std::shared_ptr(...) -- same length by design, to not disturb the indentation +// of existing code. +# endif + +# define PYBIND11_SH_DEF(...) std::shared_ptr<__VA_ARGS__> // "Smart_Holder if DEFault" +// -------- std::shared_ptr(...) -- same length by design, to not disturb the indentation +// of existing code. + +# define PYBIND11_TYPE_CASTER_BASE_HOLDER(T, ...) + +#else + +using default_holder_type = smart_holder; + +# ifndef PYBIND11_SH_AVL +# define PYBIND11_SH_AVL(...) ::pybind11::smart_holder // "Smart_Holder if AVaiLable" +// -------- std::shared_ptr(...) -- same length by design, to not disturb the indentation +// of existing code. +# endif + +# define PYBIND11_SH_DEF(...) ::pybind11::smart_holder // "Smart_Holder if DEFault" + +// This define could be hidden away inside detail/smart_holder_type_casters.h, but is kept here +// for clarity. +# define PYBIND11_TYPE_CASTER_BASE_HOLDER(T, ...) \ + namespace pybind11 { \ + namespace detail { \ + template <> \ + class type_caster : public type_caster_base {}; \ + template <> \ + class type_caster<__VA_ARGS__> : public type_caster_holder {}; \ + } \ + } + +#endif +// clang-format off + template class class_ : public detail::generic_type { - template using is_holder = detail::is_holder_type; template using is_subtype = detail::is_strict_base_of; template using is_base = detail::is_strict_base_of; + template + // clang-format on + using is_holder + = detail::any_of, + detail::all_of>, + detail::negation>, + detail::type_uses_smart_holder_type_caster>>; + // clang-format off // struct instead of using here to help MSVC: template struct is_valid_class_option : detail::any_of, is_subtype, is_base> {}; @@ -1304,7 +1368,7 @@ class class_ : public detail::generic_type { using type = type_; using type_alias = detail::exactly_one_t; constexpr static bool has_alias = !std::is_void::value; - using holder_type = detail::exactly_one_t, options...>; + using holder_type = detail::exactly_one_t, options...>; static_assert(detail::all_of...>::value, "Unknown/invalid class_ template parameters provided"); @@ -1326,6 +1390,37 @@ class class_ : public detail::generic_type { none_of...>::value), // no multiple_inheritance attr "Error: multiple inheritance bases must be specified via class_ template options"); + // clang-format on + static constexpr bool holder_is_smart_holder + = detail::is_smart_holder_type::value; + static constexpr bool wrapped_type_uses_smart_holder_type_caster + = detail::type_uses_smart_holder_type_caster::value; + static constexpr bool type_caster_type_is_type_caster_base_subtype + = std::is_base_of, detail::type_caster>::value; + // Necessary conditions, but not strict. + static_assert(!(detail::is_instantiation::value + && wrapped_type_uses_smart_holder_type_caster), + "py::class_ holder vs type_caster mismatch:" + " missing PYBIND11_TYPE_CASTER_BASE_HOLDER(T, std::unique_ptr)?"); + static_assert(!(detail::is_instantiation::value + && wrapped_type_uses_smart_holder_type_caster), + "py::class_ holder vs type_caster mismatch:" + " missing PYBIND11_TYPE_CASTER_BASE_HOLDER(T, std::shared_ptr)?"); + static_assert(!(holder_is_smart_holder && type_caster_type_is_type_caster_base_subtype), + "py::class_ holder vs type_caster mismatch:" + " missing PYBIND11_SMART_HOLDER_TYPE_CASTERS(T)?"); +#ifdef PYBIND11_STRICT_ASSERTS_CLASS_HOLDER_VS_TYPE_CASTER_MIX + // Strict conditions cannot be enforced universally at the moment (PR #2836). + static_assert(holder_is_smart_holder == wrapped_type_uses_smart_holder_type_caster, + "py::class_ holder vs type_caster mismatch:" + " missing PYBIND11_SMART_HOLDER_TYPE_CASTERS(T)" + " or collision with custom py::detail::type_caster?"); + static_assert(!holder_is_smart_holder == type_caster_type_is_type_caster_base_subtype, + "py::class_ holder vs type_caster mismatch:" + " missing PYBIND11_TYPE_CASTER_BASE_HOLDER(T, ...)" + " or collision with custom py::detail::type_caster?"); +#endif + // clang-format off type_record record; record.scope = scope; record.name = name; @@ -1335,6 +1430,8 @@ class class_ : public detail::generic_type { record.holder_size = sizeof(holder_type); record.init_instance = init_instance; record.dealloc = dealloc; + + // A more fitting name would be uses_unique_ptr_holder. record.default_holder = detail::is_instantiation::value; set_operator_new(&record); @@ -1345,7 +1442,7 @@ class class_ : public detail::generic_type { /* Process optional arguments, if any */ process_attributes::init(extra..., &record); - generic_type::initialize(record); + generic_type_initialize(record); if (has_alias) { auto &instances = record.module_local ? get_local_internals().registered_types_cpp : get_internals().registered_types_cpp; @@ -1555,6 +1652,20 @@ class class_ : public detail::generic_type { } private: + // clang-format on + template ::value, int> = 0> + void generic_type_initialize(const detail::type_record &record) { + generic_type::initialize(record, &detail::type_caster_generic::local_load); + } + + template ::value, int> = 0> + void generic_type_initialize(const detail::type_record &record) { + generic_type::initialize(record, detail::type_caster::get_local_load_function_ptr()); + } + // clang-format off + /// Initialize holder object, variant 1: object derives from enable_shared_from_this template static void init_holder(detail::instance *inst, detail::value_and_holder &v_h, @@ -1599,6 +1710,9 @@ class class_ : public detail::generic_type { /// instance. Should be called as soon as the `type` value_ptr is set for an instance. Takes an /// optional pointer to an existing holder to use; if not specified and the instance is /// `.owned`, a new holder will be constructed to manage the value pointer. + template < + typename T = type, + detail::enable_if_t::value, int> = 0> static void init_instance(detail::instance *inst, const void *holder_ptr) { auto v_h = inst->get_value_and_holder(detail::get_type_info(typeid(type))); if (!v_h.instance_registered()) { @@ -1608,6 +1722,15 @@ class class_ : public detail::generic_type { init_holder(inst, v_h, (const holder_type *) holder_ptr, v_h.value_ptr()); } + // clang-format on + template ::value, int> = 0> + static void init_instance(detail::instance *inst, const void *holder_ptr) { + detail::type_caster::template init_instance_for_type(inst, holder_ptr); + } + // clang-format off + /// Deallocates an instance; via holder, if constructed; otherwise via operator delete. static void dealloc(detail::value_and_holder &v_h) { // We could be deallocating because we are cleaning up after a Python exception. @@ -1967,29 +2090,54 @@ struct iterator_state { }; // Note: these helpers take the iterator by non-const reference because some -// iterators in the wild can't be dereferenced when const. C++ needs the extra parens in decltype -// to enforce an lvalue. The & after Iterator is required for MSVC < 16.9. SFINAE cannot be -// reused for result_type due to bugs in ICC, NVCC, and PGI compilers. See PR #3293. -template ()))> +// iterators in the wild can't be dereferenced when const. The & after Iterator +// is required for MSVC < 16.9. SFINAE cannot be reused for result_type due to +// bugs in ICC, NVCC, and PGI compilers. See PR #3293. +template ())> struct iterator_access { - using result_type = decltype((*std::declval())); + using result_type = decltype(*std::declval()); // NOLINTNEXTLINE(readability-const-return-type) // PR #3263 result_type operator()(Iterator &it) const { return *it; } }; -template ()).first)) > -struct iterator_key_access { - using result_type = decltype(((*std::declval()).first)); +template ()).first) > +class iterator_key_access { +private: + using pair_type = decltype(*std::declval()); + +public: + /* If either the pair itself or the element of the pair is a reference, we + * want to return a reference, otherwise a value. When the decltype + * expression is parenthesized it is based on the value category of the + * expression; otherwise it is the declared type of the pair member. + * The use of declval in the second branch rather than directly + * using *std::declval() is a workaround for nvcc + * (it's not used in the first branch because going via decltype and back + * through declval does not perfectly preserve references). + */ + using result_type = conditional_t< + std::is_reference())>::value, + decltype(((*std::declval()).first)), + decltype(std::declval().first) + >; result_type operator()(Iterator &it) const { return (*it).first; } }; -template ()).second))> -struct iterator_value_access { - using result_type = decltype(((*std::declval()).second)); +template ()).second)> +class iterator_value_access { +private: + using pair_type = decltype(*std::declval()); + +public: + using result_type = conditional_t< + std::is_reference())>::value, + decltype(((*std::declval()).second)), + decltype(std::declval().second) + >; result_type operator()(Iterator &it) const { return (*it).second; } @@ -2301,6 +2449,29 @@ inline function get_type_override(const void *this_ptr, const type_info *this_ty /* Don't call dispatch code if invoked from overridden function. Unfortunately this doesn't work on PyPy. */ #if !defined(PYPY_VERSION) + +#if PY_VERSION_HEX >= 0x03090000 + PyFrameObject *frame = PyThreadState_GetFrame(PyThreadState_Get()); + if (frame != nullptr) { + PyCodeObject *f_code = PyFrame_GetCode(frame); + // f_code is guaranteed to not be NULL + if ((std::string) str(f_code->co_name) == name && f_code->co_argcount > 0) { + PyObject* locals = PyEval_GetLocals(); + if (locals != nullptr) { + PyObject *self_caller = dict_getitem( + locals, PyTuple_GET_ITEM(f_code->co_varnames, 0) + ); + if (self_caller == self.ptr()) { + Py_DECREF(f_code); + Py_DECREF(frame); + return function(); + } + } + } + Py_DECREF(f_code); + Py_DECREF(frame); + } +#else PyFrameObject *frame = PyThreadState_Get()->frame; if (frame != nullptr && (std::string) str(frame->f_code->co_name) == name && frame->f_code->co_argcount > 0) { @@ -2310,6 +2481,8 @@ inline function get_type_override(const void *this_ptr, const type_info *this_ty if (self_caller == self.ptr()) return function(); } +#endif + #else /* PyPy currently doesn't provide a detailed cpyext emulation of frame objects, so we have to emulate this using Python. This diff --git a/Libraries/pybind11-2.8.0/pybind11/pytypes.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/pytypes.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/pytypes.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/pytypes.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/smart_holder.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/smart_holder.h new file mode 100644 index 00000000..f852f77e --- /dev/null +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/smart_holder.h @@ -0,0 +1,29 @@ +// Copyright (c) 2021 The Pybind Development Team. +// All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#pragma once + +#include "detail/common.h" +#include "detail/smart_holder_type_casters.h" +#include "pybind11.h" + +#undef PYBIND11_SH_AVL // Undoing #define in pybind11.h + +#define PYBIND11_SH_AVL(...) ::pybind11::smart_holder // "Smart_Holder if AVaiLable" +// ---- std::shared_ptr(...) -- same length by design, to not disturb the indentation +// of existing code. + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +// Supports easier switching between py::class_ and py::class_: +// users can simply replace the `_` in `class_` with `h` or vice versa. +// Note though that the PYBIND11_SMART_HOLDER_TYPE_CASTERS(T) macro also needs to be +// added (for `classh`) or commented out (when falling back to `class_`). +template +class classh : public class_ { +public: + using class_::class_; +}; + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.0/pybind11/stl.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl.h similarity index 96% rename from Libraries/pybind11-2.8.0/pybind11/stl.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/stl.h index 2c017b4f..3608d298 100644 --- a/Libraries/pybind11-2.8.0/pybind11/stl.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl.h @@ -245,17 +245,17 @@ template , Key, Value> { }; // This type caster is intended to be used for std::optional and std::experimental::optional -template struct optional_caster { - using value_conv = make_caster; +template struct optional_caster { + using value_conv = make_caster; - template - static handle cast(T_ &&src, return_value_policy policy, handle parent) { + template + static handle cast(T &&src, return_value_policy policy, handle parent) { if (!src) return none().inc_ref(); if (!std::is_lvalue_reference::value) { - policy = return_value_policy_override::policy(policy); + policy = return_value_policy_override::policy(policy); } - return value_conv::cast(*std::forward(src), policy, parent); + return value_conv::cast(*std::forward(src), policy, parent); } bool load(handle src, bool convert) { @@ -269,11 +269,11 @@ template struct optional_caster { if (!inner_caster.load(src, convert)) return false; - value.emplace(cast_op(std::move(inner_caster))); + value.emplace(cast_op(std::move(inner_caster))); return true; } - PYBIND11_TYPE_CASTER(T, _("Optional[") + value_conv::name + _("]")); + PYBIND11_TYPE_CASTER(Type, _("Optional[") + value_conv::name + _("]")); }; #if defined(PYBIND11_HAS_OPTIONAL) diff --git a/Libraries/pybind11-2.8.0/pybind11/stl/filesystem.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl/filesystem.h similarity index 100% rename from Libraries/pybind11-2.8.0/pybind11/stl/filesystem.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/stl/filesystem.h diff --git a/Libraries/pybind11-2.8.0/pybind11/stl_bind.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl_bind.h similarity index 99% rename from Libraries/pybind11-2.8.0/pybind11/stl_bind.h rename to Libraries/pybind11-2.8.1-smart_holder/pybind11/stl_bind.h index 050be83c..95632486 100644 --- a/Libraries/pybind11-2.8.0/pybind11/stl_bind.h +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl_bind.h @@ -437,7 +437,7 @@ PYBIND11_NAMESPACE_END(detail) // // std::vector // -template , typename... Args> +template , typename... Args> class_ bind_vector(handle scope, std::string const &name, Args&&... args) { using Class_ = class_; @@ -615,7 +615,7 @@ struct items_view PYBIND11_NAMESPACE_END(detail) -template , typename... Args> +template , typename... Args> class_ bind_map(handle scope, const std::string &name, Args&&... args) { using KeyType = typename Map::key_type; using MappedType = typename Map::mapped_type; diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/trampoline_self_life_support.h b/Libraries/pybind11-2.8.1-smart_holder/pybind11/trampoline_self_life_support.h new file mode 100644 index 00000000..b7e1f12c --- /dev/null +++ b/Libraries/pybind11-2.8.1-smart_holder/pybind11/trampoline_self_life_support.h @@ -0,0 +1,61 @@ +// Copyright (c) 2021 The Pybind Development Team. +// All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +#pragma once + +#include "detail/common.h" +#include "detail/smart_holder_poc.h" +#include "detail/type_caster_base.h" + +PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +PYBIND11_NAMESPACE_BEGIN(detail) +// SMART_HOLDER_WIP: Needs refactoring of existing pybind11 code. +inline bool deregister_instance(instance *self, void *valptr, const type_info *tinfo); +PYBIND11_NAMESPACE_END(detail) + +// The original core idea for this struct goes back to PyCLIF: +// https://github.com/google/clif/blob/07f95d7e69dca2fcf7022978a55ef3acff506c19/clif/python/runtime.cc#L37 +// URL provided here mainly to give proper credit. To fully explain the `HoldPyObj` feature, more +// context is needed (SMART_HOLDER_WIP). +struct trampoline_self_life_support { + detail::value_and_holder v_h; + + trampoline_self_life_support() = default; + + void activate_life_support(const detail::value_and_holder &v_h_) { + Py_INCREF((PyObject *) v_h_.inst); + v_h = v_h_; + } + + void deactivate_life_support() { + Py_DECREF((PyObject *) v_h.inst); + v_h = detail::value_and_holder(); + } + + ~trampoline_self_life_support() { + if (v_h.inst != nullptr && v_h.vh != nullptr) { + void *value_void_ptr = v_h.value_ptr(); + if (value_void_ptr != nullptr) { + PyGILState_STATE threadstate = PyGILState_Ensure(); + v_h.value_ptr() = nullptr; + v_h.holder().release_disowned(); + detail::deregister_instance(v_h.inst, value_void_ptr, v_h.type); + Py_DECREF((PyObject *) v_h.inst); // Must be after deregister. + PyGILState_Release(threadstate); + } + } + } + + // For the next two, the default implementations generate undefined behavior (ASAN failures + // manually verified). The reason is that v_h needs to be kept default-initialized. + trampoline_self_life_support(const trampoline_self_life_support &) {} + trampoline_self_life_support(trampoline_self_life_support &&) noexcept {} + + // These should never be needed (please provide test cases if you think they are). + trampoline_self_life_support &operator=(const trampoline_self_life_support &) = delete; + trampoline_self_life_support &operator=(trampoline_self_life_support &&) = delete; +}; + +PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/premake5.lua b/premake5.lua index ff228e5e..a8e05982 100644 --- a/premake5.lua +++ b/premake5.lua @@ -195,9 +195,9 @@ function linkPython() return false end - defines {"KTT_PYTHON"} - includedirs {"$(PYTHON_PATH)/include", "Libraries/pybind11-2.8.0"} - files {"Libraries/pybind11-2.8.0/**"} + defines {"KTT_PYTHON", "PYBIND11_USE_SMART_HOLDER_AS_DEFAULT"} + includedirs {"$(PYTHON_PATH)/include", "Libraries/pybind11-2.8.1-smart_holder"} + files {"Libraries/pybind11-2.8.1-smart_holder/**"} libdirs {"$(PYTHON_PATH)/libs"} links {"python3"} From b055636dca08eb4233cdf19b7bc4fb74239a224f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 1 Nov 2021 14:31:49 +0100 Subject: [PATCH 18/63] * Enabled Python bindings for certain overloaded methods and methods with unique pointer arguments --- Source/Python/PythonDataHolders.cpp | 5 ++--- Source/Python/PythonModule.cpp | 20 ++++++++------------ Source/Python/PythonSearchers.cpp | 2 +- Source/Python/PythonStopConditions.cpp | 2 +- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/Source/Python/PythonDataHolders.cpp b/Source/Python/PythonDataHolders.cpp index cc92c624..37c2b01c 100644 --- a/Source/Python/PythonDataHolders.cpp +++ b/Source/Python/PythonDataHolders.cpp @@ -154,9 +154,8 @@ void InitializePythonDataHolders(py::module_& module) .def(py::init()) .def("SetDurationData", &ktt::ComputationResult::SetDurationData) .def("SetSizeData", &ktt::ComputationResult::SetSizeData) - // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support - //.def("SetCompilationData", &ktt::ComputationResult::SetCompilationData) - //.def("SetProfilingData", &ktt::ComputationResult::SetProfilingData) + .def("SetCompilationData", &ktt::ComputationResult::SetCompilationData) + .def("SetProfilingData", &ktt::ComputationResult::SetProfilingData) .def("GetKernelFunction", &ktt::ComputationResult::GetKernelFunction) .def("GetGlobalSize", &ktt::ComputationResult::GetGlobalSize) .def("GetLocalSize", &ktt::ComputationResult::GetLocalSize) diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index ec3573db..e4095851 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -49,9 +49,8 @@ PYBIND11_MODULE(ktt, module) .def("RunKernelWithProfiling", py::overload_cast(&ktt::ComputeInterface::RunKernelWithProfiling)) .def("RunKernelWithProfiling", py::overload_cast(&ktt::ComputeInterface::RunKernelWithProfiling)) - // Todo: these overloads do not work for some reason - //.def("GetRemainingProfilingRuns", py::overload_cast(&ktt::ComputeInterface::GetRemainingProfilingRuns)) - //.def("GetRemainingProfilingRuns", py::overload_cast<>(&ktt::ComputeInterface::GetRemainingProfilingRuns)) + .def("GetRemainingProfilingRuns", [](ktt::ComputeInterface& ci, const ktt::KernelDefinitionId id) { return ci.GetRemainingProfilingRuns(id); }) + .def("GetRemainingProfilingRuns", [](ktt::ComputeInterface& ci) { return ci.GetRemainingProfilingRuns(); }) .def("GetDefaultQueue", &ktt::ComputeInterface::GetDefaultQueue) .def("GetAllQueues", &ktt::ComputeInterface::GetAllQueues) .def("SynchronizeQueue", &ktt::ComputeInterface::SynchronizeQueue) @@ -221,14 +220,14 @@ PYBIND11_MODULE(ktt, module) const ktt::ArgumentMemoryLocation>(&ktt::Tuner::AddArgumentVector)) .def("AddArgumentVectorDouble", py::overload_cast(&ktt::Tuner::AddArgumentVector)) - //.def("AddArgumentVector", &ktt::Tuner::AddArgumentVector) + // Todo: AddArgumentVector version with user buffer .def("AddArgumentScalarChar", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarShort", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarInt", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarLong", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarFloat", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarDouble", &ktt::Tuner::AddArgumentScalar) - //.def("AddArgumentScalar", py::overload_cast(&ktt::Tuner::AddArgumentScalar)) + .def("AddArgumentScalar", [](ktt::Tuner& tuner, const void* data, const size_t dataSize) { return tuner.AddArgumentScalar(data, dataSize); }) .def("AddArgumentLocalChar", &ktt::Tuner::AddArgumentLocal) .def("AddArgumentLocalShort", &ktt::Tuner::AddArgumentLocal) .def("AddArgumentLocalInt", &ktt::Tuner::AddArgumentLocal) @@ -289,8 +288,7 @@ PYBIND11_MODULE(ktt, module) .def("SetReferenceComputation", &ktt::Tuner::SetReferenceComputation) .def("SetReferenceKernel", &ktt::Tuner::SetReferenceKernel) .def("Tune", py::overload_cast(&ktt::Tuner::Tune)) - // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support - //.def("Tune", py::overload_cast>(&ktt::Tuner::Tune)) + .def("Tune", py::overload_cast>(&ktt::Tuner::Tune)) .def ( "TuneIteration", @@ -307,8 +305,7 @@ PYBIND11_MODULE(ktt, module) py::arg("results"), py::arg("iterations") = 0 ) - // Todo: check pybind11 smart_holder branch for unique_ptr argument passing support - //.def("SetSearcher", &ktt::Tuner::SetSearcher) + .def("SetSearcher", &ktt::Tuner::SetSearcher) .def("ClearData", &ktt::Tuner::ClearData) .def("GetBestConfiguration", &ktt::Tuner::GetBestConfiguration) .def("CreateConfiguration", &ktt::Tuner::CreateConfiguration) @@ -324,9 +321,8 @@ PYBIND11_MODULE(ktt, module) py::arg("format"), py::arg("data") = ktt::UserData{} ) - // Todo: these overloads do not work for some reason - /*.def("LoadResults", py::overload_cast(&ktt::Tuner::LoadResults)) - .def("LoadResults", py::overload_cast(&ktt::Tuner::LoadResults))*/ + .def("LoadResults", [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format) { return tuner.LoadResults(filePath, format); }) + .def("LoadResults", [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format, ktt::UserData& data) { return tuner.LoadResults(filePath, format, data); }) .def("AddComputeQueue", &ktt::Tuner::AddComputeQueue) .def("RemoveComputeQueue", &ktt::Tuner::RemoveComputeQueue) .def("Synchronize", &ktt::Tuner::Synchronize) diff --git a/Source/Python/PythonSearchers.cpp b/Source/Python/PythonSearchers.cpp index 76caa450..948c795f 100644 --- a/Source/Python/PythonSearchers.cpp +++ b/Source/Python/PythonSearchers.cpp @@ -7,7 +7,7 @@ namespace py = pybind11; -class PySearcher : public ktt::Searcher +class PySearcher : public ktt::Searcher, public py::trampoline_self_life_support { public: using Searcher::Searcher; diff --git a/Source/Python/PythonStopConditions.cpp b/Source/Python/PythonStopConditions.cpp index 1b7ee24f..8fabedb3 100644 --- a/Source/Python/PythonStopConditions.cpp +++ b/Source/Python/PythonStopConditions.cpp @@ -7,7 +7,7 @@ namespace py = pybind11; -class PyStopCondition : public ktt::StopCondition +class PyStopCondition : public ktt::StopCondition, public py::trampoline_self_life_support { public: using StopCondition::StopCondition; From e164d75bd8ffe326d59d07e3126e63ba5c7bc251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 2 Nov 2021 15:29:35 +0100 Subject: [PATCH 19/63] * Fixed methods with reference return types in Python bindings * Fixed broken kernel launchers in Python --- Source/Python/PythonDataHolders.cpp | 43 ++++++++++--------- Source/Python/PythonModule.cpp | 25 ++++++++--- Source/Python/PythonSearchers.cpp | 2 +- ...omputeApiInfo.py => ComputeApiInfoCuda.py} | 0 4 files changed, 42 insertions(+), 28 deletions(-) rename Tutorials/01ComputeApiInfo/{ComputeApiInfo.py => ComputeApiInfoCuda.py} (100%) diff --git a/Source/Python/PythonDataHolders.cpp b/Source/Python/PythonDataHolders.cpp index 37c2b01c..c41ad5cc 100644 --- a/Source/Python/PythonDataHolders.cpp +++ b/Source/Python/PythonDataHolders.cpp @@ -42,7 +42,7 @@ void InitializePythonDataHolders(py::module_& module) .def(py::init()) .def("SetValue", py::overload_cast(&ktt::ParameterPair::SetValue)) .def("SetValue", py::overload_cast(&ktt::ParameterPair::SetValue)) - .def("GetName", &ktt::ParameterPair::GetName) + .def("GetName", &ktt::ParameterPair::GetName, py::return_value_policy::reference) .def("GetString", &ktt::ParameterPair::GetString) .def("GetValueString", &ktt::ParameterPair::GetValueString) .def("GetValue", &ktt::ParameterPair::GetValue) @@ -58,7 +58,7 @@ void InitializePythonDataHolders(py::module_& module) py::class_(module, "KernelConfiguration") .def(py::init<>()) .def(py::init&>()) - .def("GetPairs", &ktt::KernelConfiguration::GetPairs) + .def("GetPairs", &ktt::KernelConfiguration::GetPairs, py::return_value_policy::reference) .def("IsValid", &ktt::KernelConfiguration::IsValid) .def("GeneratePrefix", &ktt::KernelConfiguration::GeneratePrefix) .def("GetString", &ktt::KernelConfiguration::GetString) @@ -71,9 +71,9 @@ void InitializePythonDataHolders(py::module_& module) py::class_(module, "DeviceInfo") .def(py::init()) .def("GetIndex", &ktt::DeviceInfo::GetIndex) - .def("GetName", &ktt::DeviceInfo::GetName) - .def("GetVendor", &ktt::DeviceInfo::GetVendor) - .def("GetExtensions", &ktt::DeviceInfo::GetExtensions) + .def("GetName", &ktt::DeviceInfo::GetName, py::return_value_policy::reference) + .def("GetVendor", &ktt::DeviceInfo::GetVendor, py::return_value_policy::reference) + .def("GetExtensions", &ktt::DeviceInfo::GetExtensions, py::return_value_policy::reference) .def("GetDeviceType", &ktt::DeviceInfo::GetDeviceType) .def("GetDeviceTypeString", &ktt::DeviceInfo::GetDeviceTypeString) .def("GetGlobalMemorySize", &ktt::DeviceInfo::GetGlobalMemorySize) @@ -95,10 +95,10 @@ void InitializePythonDataHolders(py::module_& module) py::class_(module, "PlatformInfo") .def(py::init()) .def("GetIndex", &ktt::PlatformInfo::GetIndex) - .def("GetName", &ktt::PlatformInfo::GetName) - .def("GetVendor", &ktt::PlatformInfo::GetVendor) - .def("GetVersion", &ktt::PlatformInfo::GetVersion) - .def("GetExtensions", &ktt::PlatformInfo::GetExtensions) + .def("GetName", &ktt::PlatformInfo::GetName, py::return_value_policy::reference) + .def("GetVendor", &ktt::PlatformInfo::GetVendor, py::return_value_policy::reference) + .def("GetVersion", &ktt::PlatformInfo::GetVersion, py::return_value_policy::reference) + .def("GetExtensions", &ktt::PlatformInfo::GetExtensions, py::return_value_policy::reference) .def("GetString", &ktt::PlatformInfo::GetString) .def("SetVendor", &ktt::PlatformInfo::SetVendor) .def("SetVersion", &ktt::PlatformInfo::SetVersion) @@ -109,7 +109,7 @@ void InitializePythonDataHolders(py::module_& module) .def(py::init()) .def(py::init()) .def("GetArgumentId", &ktt::BufferOutputDescriptor::GetArgumentId) - .def("GetOutputDestination", &ktt::BufferOutputDescriptor::GetOutputDestination, py::return_value_policy::reference) + .def("GetOutputDestination", &ktt::BufferOutputDescriptor::GetOutputDestination) .def("GetOutputSize", &ktt::BufferOutputDescriptor::GetOutputSize); py::class_(module, "KernelCompilationData") @@ -125,7 +125,7 @@ void InitializePythonDataHolders(py::module_& module) .def(py::init()) .def(py::init()) .def(py::init()) - .def("GetName", &ktt::KernelProfilingCounter::GetName) + .def("GetName", &ktt::KernelProfilingCounter::GetName, py::return_value_policy::reference) .def("GetType", &ktt::KernelProfilingCounter::GetType) .def("GetValueInt", &ktt::KernelProfilingCounter::GetValueInt) .def("GetValueUint", &ktt::KernelProfilingCounter::GetValueUint) @@ -140,8 +140,8 @@ void InitializePythonDataHolders(py::module_& module) .def(py::init&>()) .def("IsValid", &ktt::KernelProfilingData::IsValid) .def("HasCounter", &ktt::KernelProfilingData::HasCounter) - .def("GetCounter", &ktt::KernelProfilingData::GetCounter) - .def("GetCounters", &ktt::KernelProfilingData::GetCounters) + .def("GetCounter", &ktt::KernelProfilingData::GetCounter, py::return_value_policy::reference) + .def("GetCounters", &ktt::KernelProfilingData::GetCounters, py::return_value_policy::reference) .def("SetCounters", &ktt::KernelProfilingData::SetCounters) .def("AddCounter", &ktt::KernelProfilingData::AddCounter) .def("HasRemainingProfilingRuns", &ktt::KernelProfilingData::HasRemainingProfilingRuns) @@ -156,15 +156,15 @@ void InitializePythonDataHolders(py::module_& module) .def("SetSizeData", &ktt::ComputationResult::SetSizeData) .def("SetCompilationData", &ktt::ComputationResult::SetCompilationData) .def("SetProfilingData", &ktt::ComputationResult::SetProfilingData) - .def("GetKernelFunction", &ktt::ComputationResult::GetKernelFunction) - .def("GetGlobalSize", &ktt::ComputationResult::GetGlobalSize) - .def("GetLocalSize", &ktt::ComputationResult::GetLocalSize) + .def("GetKernelFunction", &ktt::ComputationResult::GetKernelFunction, py::return_value_policy::reference) + .def("GetGlobalSize", &ktt::ComputationResult::GetGlobalSize, py::return_value_policy::reference) + .def("GetLocalSize", &ktt::ComputationResult::GetLocalSize, py::return_value_policy::reference) .def("GetDuration", &ktt::ComputationResult::GetDuration) .def("GetOverhead", &ktt::ComputationResult::GetOverhead) .def("HasCompilationData", &ktt::ComputationResult::HasCompilationData) - .def("GetCompilationData", &ktt::ComputationResult::GetCompilationData) + .def("GetCompilationData", &ktt::ComputationResult::GetCompilationData, py::return_value_policy::reference) .def("HasProfilingData", &ktt::ComputationResult::HasProfilingData) - .def("GetProfilingData", &ktt::ComputationResult::GetProfilingData) + .def("GetProfilingData", &ktt::ComputationResult::GetProfilingData, py::return_value_policy::reference) .def("HasRemainingProfilingRuns", &ktt::ComputationResult::HasRemainingProfilingRuns) .def("assign", &ktt::ComputationResult::operator=); @@ -175,8 +175,9 @@ void InitializePythonDataHolders(py::module_& module) .def("SetStatus", &ktt::KernelResult::SetStatus) .def("SetExtraDuration", &ktt::KernelResult::SetExtraDuration) .def("SetExtraOverhead", &ktt::KernelResult::SetExtraOverhead) - .def("GetKernelName", &ktt::KernelResult::GetKernelName) - .def("GetConfiguration", &ktt::KernelResult::GetConfiguration) + .def("GetKernelName", &ktt::KernelResult::GetKernelName, py::return_value_policy::reference) + .def("GetResults", &ktt::KernelResult::GetResults, py::return_value_policy::reference) + .def("GetConfiguration", &ktt::KernelResult::GetConfiguration, py::return_value_policy::reference) .def("GetStatus", &ktt::KernelResult::GetStatus) .def("GetKernelDuration", &ktt::KernelResult::GetKernelDuration) .def("GetKernelOverhead", &ktt::KernelResult::GetKernelOverhead) @@ -190,7 +191,7 @@ void InitializePythonDataHolders(py::module_& module) py::class_(module, "ComputeApiInitializer") .def(py::init&>()) .def("GetContext", &ktt::ComputeApiInitializer::GetContext) - .def("GetQueues", &ktt::ComputeApiInitializer::GetQueues); + .def("GetQueues", &ktt::ComputeApiInitializer::GetQueues, py::return_value_policy::reference); } #endif // KTT_PYTHON diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index e4095851..8acf8f20 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -55,9 +55,9 @@ PYBIND11_MODULE(ktt, module) .def("GetAllQueues", &ktt::ComputeInterface::GetAllQueues) .def("SynchronizeQueue", &ktt::ComputeInterface::SynchronizeQueue) .def("SynchronizeDevice", &ktt::ComputeInterface::SynchronizeDevice) - .def("GetCurrentGlobalSize", &ktt::ComputeInterface::GetCurrentGlobalSize) - .def("GetCurrentLocalSize", &ktt::ComputeInterface::GetCurrentLocalSize) - .def("GetCurrentConfiguration", &ktt::ComputeInterface::GetCurrentConfiguration) + .def("GetCurrentGlobalSize", &ktt::ComputeInterface::GetCurrentGlobalSize, py::return_value_policy::reference) + .def("GetCurrentLocalSize", &ktt::ComputeInterface::GetCurrentLocalSize, py::return_value_policy::reference) + .def("GetCurrentConfiguration", &ktt::ComputeInterface::GetCurrentConfiguration, py::return_value_policy::reference) .def("ChangeArguments", &ktt::ComputeInterface::ChangeArguments) .def("SwapArguments", &ktt::ComputeInterface::SwapArguments) .def("UpdateScalarArgument", &ktt::ComputeInterface::UpdateScalarArgument) @@ -159,13 +159,26 @@ PYBIND11_MODULE(ktt, module) .def ( "CreateCompositeKernel", - &ktt::Tuner::CreateCompositeKernel, + [](ktt::Tuner& tuner, const std::string& name, const std::vector& definitionIds, + std::function launcher) + { + ktt::KernelLauncher actualLauncher = [launcher](ktt::ComputeInterface& interface) { launcher(&interface); }; + return tuner.CreateCompositeKernel(name, definitionIds, actualLauncher); + }, py::arg("name"), py::arg("definitionIds"), - py::arg("launcher") = static_cast(nullptr) + py::arg("launcher") = static_cast>(nullptr) ) .def("RemoveKernel", &ktt::Tuner::RemoveKernel) - .def("SetLauncher", &ktt::Tuner::SetLauncher) + .def + ( + "SetLauncher", + [](ktt::Tuner& tuner, const ktt::KernelId id, std::function launcher) + { + ktt::KernelLauncher actualLauncher = [launcher](ktt::ComputeInterface& interface) { launcher(&interface); }; + tuner.SetLauncher(id, actualLauncher); + } + ) .def ( "AddParameter", diff --git a/Source/Python/PythonSearchers.cpp b/Source/Python/PythonSearchers.cpp index 948c795f..7ab480b3 100644 --- a/Source/Python/PythonSearchers.cpp +++ b/Source/Python/PythonSearchers.cpp @@ -52,7 +52,7 @@ void InitializePythonSearchers(py::module_& module) py::arg("maxNeighbours") = 3 ) .def("GetConfigurationsCount", &ktt::Searcher::GetConfigurationsCount) - .def("GetExploredIndices", &ktt::Searcher::GetExploredIndices) + .def("GetExploredIndices", &ktt::Searcher::GetExploredIndices, py::return_value_policy::reference) .def("IsInitialized", &ktt::Searcher::IsInitialized); py::class_(module, "DeterministicSearcher") diff --git a/Tutorials/01ComputeApiInfo/ComputeApiInfo.py b/Tutorials/01ComputeApiInfo/ComputeApiInfoCuda.py similarity index 100% rename from Tutorials/01ComputeApiInfo/ComputeApiInfo.py rename to Tutorials/01ComputeApiInfo/ComputeApiInfoCuda.py From a5dbf3384c9decd32f2b10b3284c123b71453a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 9 Nov 2021 11:03:22 +0100 Subject: [PATCH 20/63] * Added more Python tutorials, including tutorial for implementing searchers and stop conditions in Python * Updated readme with information regarding Python usage --- Readme.md | 6 +- .../02KernelRunning/KernelRunningCuda.py | 74 ++++++++++++++ Tutorials/03KernelTuning/KernelTuningCuda.py | 10 +- Tutorials/07PythonInterfaces/CudaKernel.cu | 5 + .../PythonInterfacesCuda.py | 96 +++++++++++++++++++ premake5.lua | 6 ++ 6 files changed, 191 insertions(+), 6 deletions(-) create mode 100644 Tutorials/02KernelRunning/KernelRunningCuda.py create mode 100644 Tutorials/07PythonInterfaces/CudaKernel.cu create mode 100644 Tutorials/07PythonInterfaces/PythonInterfacesCuda.py diff --git a/Readme.md b/Readme.md index 9022d208..0880757b 100644 --- a/Readme.md +++ b/Readme.md @@ -15,6 +15,7 @@ in order to optimize computation for a particular device. * Ability to automatically ensure correctness of tuned computation with reference kernel or C++ function. * Support for multiple compute APIs, switching between CUDA, OpenCL and Vulkan requires only minor changes in C++ code (e.g., changing the kernel source file), no library recompilation is needed. +* Public API available in C++ (native) and Python (bindings). * Large number of customization options, including support for kernel arguments with user-defined data types, ability to change kernel compiler flags and more. @@ -39,6 +40,7 @@ tutorials: * `CustomArgumentTypes`: Usage of kernel arguments with custom data types and validating the output with value comparator. * `ComputeApiInitializer`: Providing tuner with custom compute context, queues and buffers. * `VectorArgumentCustomization`: Showcasing different usage options for vector kernel arguments. +* `PythonInterfaces`: Implementing custom searchers and stop conditions in Python which can afterwards be used with tuner. Examples -------- @@ -62,7 +64,8 @@ systems are Linux and Windows. - C++17 compiler, for example Clang 7.0, GCC 9.1, MSVC 14.16 (Visual Studio 2017) or newer - OpenCL, CUDA or Vulkan library, supported SDKs are AMD OCL SDK, Intel SDK for OpenCL, NVIDIA CUDA Toolkit and Vulkan SDK - - [Premake 5](https://premake.github.io/download) + - Command line build tool [Premake 5](https://premake.github.io/download) + - (Optional) Python 3 for Python bindings support * Build under Linux (inside KTT root folder): - ensure that path to vendor SDK is correctly set in the environment variables @@ -81,6 +84,7 @@ systems are Linux and Windows. - `--platform=vendor` specifies SDK used for building KTT, useful when multiple SDKs are installed - `--profiling=library` enables compilation of kernel profiling functionality using specified library - `--vulkan` enables compilation of experimental Vulkan backend + - `--python` enables compilation of Python bindings - `--no-examples` disables compilation of examples - `--no-tutorials` disables compilation of tutorials - `--tests` enables compilation of unit tests diff --git a/Tutorials/02KernelRunning/KernelRunningCuda.py b/Tutorials/02KernelRunning/KernelRunningCuda.py new file mode 100644 index 00000000..7a0fee40 --- /dev/null +++ b/Tutorials/02KernelRunning/KernelRunningCuda.py @@ -0,0 +1,74 @@ +import ctypes +import sys +import ktt + +def main(): + # Initialize device index and path to kernel. + deviceIndex = 0 + kernelFile = "./CudaKernel.cu" + + argc = len(sys.argv) + + if argc >= 2: + deviceIndex = sys.argv[1] + + if argc >= 3: + kernelFile = sys.argv[2] + + # Declare kernel parameters and data variables. + numberOfElements = 1024 * 1024 + # Dimensions of block and grid are specified with DimensionVector. Only single dimension is utilized in this tutorial. + # In general, DimensionVector supports up to three dimensions. + blockDimensions = ktt.DimensionVector(256) + gridSize = int(numberOfElements / blockDimensions.GetSizeX()) + gridDimensions = ktt.DimensionVector(gridSize) + + a = [i * 1.0 for i in range(numberOfElements)] + b = [i * 1.0 for i in range(numberOfElements)] + result = [0.0 for i in range(numberOfElements)] + + # Create new tuner for the specified device, tuner uses CUDA as compute API. Platform index is ignored when using CUDA. + tuner = ktt.Tuner(0, deviceIndex, ktt.ComputeApi.CUDA) + + # Add new kernel definition. Specify kernel function name, path to source file, default grid dimensions and block dimensions. + # KTT returns handle to the newly added definition, which can be used to reference it in other API methods. + definition = tuner.AddKernelDefinitionFromFile("vectorAddition", kernelFile, gridDimensions, blockDimensions) + + # Add new kernel arguments to tuner. Argument data is copied from std::vector containers. Specify whether the arguments are + # used as input or output. KTT returns handle to the newly added argument, which can be used to reference it in other API + # methods. + aId = tuner.AddArgumentVectorFloat(a, ktt.ArgumentAccessType.ReadOnly) + bId = tuner.AddArgumentVectorFloat(b, ktt.ArgumentAccessType.ReadOnly) + resultId = tuner.AddArgumentVectorFloat(result, ktt.ArgumentAccessType.WriteOnly) + + # Set arguments for the kernel definition. The order of argument ids must match the order of arguments inside corresponding + # CUDA kernel function. + tuner.SetArguments(definition, [aId, bId, resultId]) + + # Create simple kernel from the specified definition. Specify name which will be used during logging and output operations. + # In more complex scenarios, kernels can have multiple definitions. Definitions can be shared between multiple kernels. + kernel = tuner.CreateSimpleKernel("Addition", definition) + + # Set time unit used during printing of kernel duration. The default time unit is milliseconds, but since computation in + # this tutorial is very short, microseconds are used instead. + tuner.SetTimeUnit(ktt.TimeUnit.Microseconds) + + # Run the specified kernel. The second argument is related to kernel tuning and will be described in further tutorials. + # In this case, empty object is passed in its place. The third argument is used to retrieve the kernel output. For each kernel + # argument that is retrieved, one BufferOutputDescriptor must be specified. Each of these descriptors contains id of the retrieved + # argument and memory location where the argument data will be stored. Optionally, it can also include number of bytes to be retrieved, + # if only a part of the argument is needed. Note that the memory location size needs to be equal or greater than the retrieved + # argument size. + array = (ctypes.c_float * numberOfElements)() + ctypes.pythonapi.PyCapsule_New.restype = ctypes.py_object + arrayCapsule = ctypes.pythonapi.PyCapsule_New(array) + tuner.Run(kernel, ktt.KernelConfiguration(), [ktt.BufferOutputDescriptor(resultId, arrayCapsule)]) + + # Print first ten elements from the result to check they were computed correctly. + print("Printing the first 10 elements from result: ") + + for i in range(10): + print(array[i]) + +if __name__ == "__main__": + main() diff --git a/Tutorials/03KernelTuning/KernelTuningCuda.py b/Tutorials/03KernelTuning/KernelTuningCuda.py index 9c115d9f..51170f53 100644 --- a/Tutorials/03KernelTuning/KernelTuningCuda.py +++ b/Tutorials/03KernelTuning/KernelTuningCuda.py @@ -11,8 +11,8 @@ def computeReference(a, b, scalar, buffer): floatList[i] = a[i] + b[i] + scalar def main(): - deviceIndex = 0; - kernelFile = "./CudaKernel.cu"; + deviceIndex = 0 + kernelFile = "./CudaKernel.cu" argc = len(sys.argv) @@ -22,10 +22,10 @@ def main(): if argc >= 3: kernelFile = sys.argv[2] - numberOfElements = 1024 * 1024; - gridDimensions = ktt.DimensionVector(numberOfElements); + numberOfElements = 1024 * 1024 + gridDimensions = ktt.DimensionVector(numberOfElements) # Block size is initialized to one in this case, it will be controlled with tuning parameter which is added later. - blockDimensions = ktt.DimensionVector(); + blockDimensions = ktt.DimensionVector() a = [i * 1.0 for i in range(numberOfElements)] b = [i * 1.0 for i in range(numberOfElements)] diff --git a/Tutorials/07PythonInterfaces/CudaKernel.cu b/Tutorials/07PythonInterfaces/CudaKernel.cu new file mode 100644 index 00000000..383f3f50 --- /dev/null +++ b/Tutorials/07PythonInterfaces/CudaKernel.cu @@ -0,0 +1,5 @@ +__global__ void vectorAddition(const float* a, const float* b, float* result, const float scalar) +{ + int index = blockIdx.x * blockDim.x + threadIdx.x; + result[index] = a[index] + b[index] + scalar; +} diff --git a/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py b/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py new file mode 100644 index 00000000..31947d70 --- /dev/null +++ b/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py @@ -0,0 +1,96 @@ +import ctypes +import sys +import ktt + +# Implement custom stop condition in Python. The interface is the same as in C++. Note that it is necessary to call +# the parent class constructor from inheriting constructor. +class PyConfigurationFraction(ktt.StopCondition): + def __init__(self, fraction): + ktt.StopCondition.__init__(self) + self.fraction = max(min(fraction, 1.0), 0.0) + + def IsFulfilled(self): + return self.currentCount / self.totalCount >= self.fraction + + def Initialize(self, configurationsCount): + self.currentCount = 0; + self.totalCount = max(1, configurationsCount) + + def Update(self, result): + self.currentCount += 1 + + def GetStatusString(self): + return "Current fraction of explored configurations: " + str(self.currentCount / self.totalCount) + " / " + str(self.fraction) + + fraction = 0.0 + currentCount = 0 + totalCount = 0 + +# Implement custom searcher in Python. The interface is the same as in C++, including helper methods defined in +# the parent class. Note that it is necessary to call the parent class constructor from inheriting constructor. +class PyRandomSearcher(ktt.Searcher): + def __init__(self): + ktt.Searcher.__init__(self) + + def OnInitialize(self): + self.currentConfiguration = self.GetRandomConfiguration() + + def CalculateNextConfiguration(self, previousResult): + self.currentConfiguration = self.GetRandomConfiguration() + return True + + def GetCurrentConfiguration(self): + return self.currentConfiguration + + currentConfiguration = ktt.KernelConfiguration() + +def main(): + deviceIndex = 0 + kernelFile = "./CudaKernel.cu" + + argc = len(sys.argv) + + if argc >= 2: + deviceIndex = sys.argv[1] + + if argc >= 3: + kernelFile = sys.argv[2] + + numberOfElements = 1024 * 1024 + gridDimensions = ktt.DimensionVector(numberOfElements) + blockDimensions = ktt.DimensionVector() + + a = [i * 1.0 for i in range(numberOfElements)] + b = [i * 1.0 for i in range(numberOfElements)] + result = [0.0 for i in range(numberOfElements)] + scalarValue = 3.0 + + tuner = ktt.Tuner(0, deviceIndex, ktt.ComputeApi.CUDA) + + definition = tuner.AddKernelDefinitionFromFile("vectorAddition", kernelFile, gridDimensions, blockDimensions) + + aId = tuner.AddArgumentVectorFloat(a, ktt.ArgumentAccessType.ReadOnly) + bId = tuner.AddArgumentVectorFloat(b, ktt.ArgumentAccessType.ReadOnly) + resultId = tuner.AddArgumentVectorFloat(result, ktt.ArgumentAccessType.WriteOnly) + scalarId = tuner.AddArgumentScalarFloat(scalarValue) + tuner.SetArguments(definition, [aId, bId, resultId, scalarId]) + + kernel = tuner.CreateSimpleKernel("Addition", definition) + + tuner.AddParameter(kernel, "multiply_block_size", [32, 64, 128, 256]) + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Local, ktt.ModifierDimension.X, "multiply_block_size", + ktt.ModifierAction.Multiply) + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Global, ktt.ModifierDimension.X, "multiply_block_size", + ktt.ModifierAction.Divide) + + # Make tuner user the searcher implemented in Python. + tuner.SetSearcher(kernel, PyRandomSearcher()) + + tuner.SetTimeUnit(ktt.TimeUnit.Microseconds) + + # Begin tuning utilizing the stop condition implemented in Python. + results = tuner.Tune(kernel, PyConfigurationFraction(0.4)) + tuner.SaveResults(results, "TuningOutput", ktt.OutputFormat.JSON) + +if __name__ == "__main__": + main() diff --git a/premake5.lua b/premake5.lua index a8e05982..ed2ee505 100644 --- a/premake5.lua +++ b/premake5.lua @@ -277,6 +277,12 @@ newoption description = "Specifies output directory for generated project files" } +newoption +{ + trigger = "tests", + description = "Enables compilation of unit tests" +} + newoption { trigger = "no-cuda", From 61d71f3dc89dba2ecbbcbba9ab7b79bc0045a955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 9 Nov 2021 16:00:52 +0100 Subject: [PATCH 21/63] * Fixed issue with interpreter lock interfering with parallel configuration generation when using Python * Added Python version of Coulomb sum example --- Examples/CoulombSum2d/CoulombSum2d.py | 118 ++++++++++++++++++++++++++ Source/Python/PythonModule.cpp | 6 +- ThirdPartyNotices.txt | 34 ++++++++ 3 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 Examples/CoulombSum2d/CoulombSum2d.py diff --git a/Examples/CoulombSum2d/CoulombSum2d.py b/Examples/CoulombSum2d/CoulombSum2d.py new file mode 100644 index 00000000..aff9c8b5 --- /dev/null +++ b/Examples/CoulombSum2d/CoulombSum2d.py @@ -0,0 +1,118 @@ +import ctypes +import random +import sys +import ktt + +def main(): + # Initialize platform index, device index and paths to kernels. + platformIndex = 0 + deviceIndex = 0 + kernelFile = "./CoulombSum2d.cl" + referenceKernelFile = "./CoulombSum2dReference.cl" + argc = len(sys.argv) + + if argc >= 2: + platformIndex = sys.argv[1] + + if argc >= 3: + deviceIndex = sys.argv[2] + + if argc >= 4: + kernelFile = sys.argv[3] + + if argc >= 5: + referenceKernelFile = sys.argv[4] + + # Declare kernel parameters. + ndRangeDimensions = ktt.DimensionVector(512, 512) + workGroupDimensions = ktt.DimensionVector() + referenceWorkGroupDimensions = ktt.DimensionVector(16, 16) + # Total NDRange size matches number of grid points. + numberOfGridPoints = ndRangeDimensions.GetSizeX() * ndRangeDimensions.GetSizeY() + # If higher than 4k, computations with constant memory enabled will be invalid on many devices due to constant memory capacity limit. + numberOfAtoms = 4000 + + # Declare data variables. + gridSpacing = 0.5 + + random.seed(17) + atomInfo = [0.0 for i in range(4 * numberOfAtoms)] + atomInfoX = [random.uniform(0.0, 40.0) for i in range(numberOfAtoms)] + atomInfoY = [random.uniform(0.0, 40.0) for i in range(numberOfAtoms)] + atomInfoZ = [random.uniform(0.0, 40.0) for i in range(numberOfAtoms)] + atomInfoW = [random.uniform(0.0, 1.0) for i in range(numberOfAtoms)] + energyGrid = [0.0 for i in range(numberOfGridPoints)] + + for i in range(numberOfAtoms): + atomInfo[4 * i] = atomInfoX[i] + atomInfo[4 * i + 1] = atomInfoY[i] + atomInfo[4 * i + 2] = atomInfoZ[i] + atomInfo[4 * i + 3] = atomInfoW[i] + + tuner = ktt.Tuner(platformIndex, deviceIndex, ktt.ComputeApi.OpenCL) + tuner.SetCompilerOptions("-cl-fast-relaxed-math") + tuner.SetTimeUnit(ktt.TimeUnit.Microseconds) + + # Add two kernels to tuner, one of the kernels acts as a reference kernel. + definition = tuner.AddKernelDefinitionFromFile("directCoulombSum", kernelFile, ndRangeDimensions, workGroupDimensions) + kernel = tuner.CreateSimpleKernel("CoulombSum", definition) + + referenceDefinition = tuner.AddKernelDefinitionFromFile("directCoulombSumReference", referenceKernelFile, + ndRangeDimensions, referenceWorkGroupDimensions) + referenceKernel = tuner.CreateSimpleKernel("CoulombSumReference", referenceDefinition) + + # Add several parameters to tuned kernel, some of them utilize constraint function and thread modifiers. + tuner.AddParameter(kernel, "INNER_UNROLL_FACTOR", [0, 1, 2, 4, 8, 16, 32]) + tuner.AddParameter(kernel, "USE_CONSTANT_MEMORY", [0, 1]) + tuner.AddParameter(kernel, "VECTOR_TYPE", [1, 2, 4, 8]) + tuner.AddParameter(kernel, "USE_SOA", [0, 1, 2]) + + # Using vectorized SoA only makes sense when vectors are longer than 1. + vectorizedSoA = lambda vector: vector[0] > 1 or vector[1] != 2 + tuner.AddConstraint(kernel, ["VECTOR_TYPE", "USE_SOA"], vectorizedSoA) + + # Divide NDRange in dimension x by OUTER_UNROLL_FACTOR. + tuner.AddParameter(kernel, "OUTER_UNROLL_FACTOR", [1, 2, 4, 8]) + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Global, ktt.ModifierDimension.X, "OUTER_UNROLL_FACTOR", + ktt.ModifierAction.Divide) + + # Multiply work-group size in dimensions x and y by the following parameters (effectively setting work-group size to their values). + tuner.AddParameter(kernel, "WORK_GROUP_SIZE_X", [4, 8, 16, 32]) + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Local, ktt.ModifierDimension.X, "WORK_GROUP_SIZE_X", + ktt.ModifierAction.Multiply) + tuner.AddParameter(kernel, "WORK_GROUP_SIZE_Y", [1, 2, 4, 8, 16, 32]) + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Local, ktt.ModifierDimension.Y, "WORK_GROUP_SIZE_Y", + ktt.ModifierAction.Multiply) + + # Add all kernel arguments. + atomInfoId = tuner.AddArgumentVectorFloat(atomInfo, ktt.ArgumentAccessType.ReadOnly) + atomInfoXId = tuner.AddArgumentVectorFloat(atomInfoX, ktt.ArgumentAccessType.ReadOnly) + atomInfoYId = tuner.AddArgumentVectorFloat(atomInfoY, ktt.ArgumentAccessType.ReadOnly) + atomInfoZId = tuner.AddArgumentVectorFloat(atomInfoZ, ktt.ArgumentAccessType.ReadOnly) + atomInfoWId = tuner.AddArgumentVectorFloat(atomInfoW, ktt.ArgumentAccessType.ReadOnly) + numberOfAtomsId = tuner.AddArgumentScalarInt(numberOfAtoms) + gridSpacingId = tuner.AddArgumentScalarFloat(gridSpacing) + energyGridId = tuner.AddArgumentVectorFloat(energyGrid, ktt.ArgumentAccessType.ReadWrite) + + # Set arguments for both tuned and reference kernel definitions, order of arguments is important. + tuner.SetArguments(definition, [atomInfoId, atomInfoXId, atomInfoYId, atomInfoZId, atomInfoWId, numberOfAtomsId, + gridSpacingId, energyGridId]) + tuner.SetArguments(referenceDefinition, [atomInfoId, numberOfAtomsId, gridSpacingId, energyGridId]) + + # Set searcher to random. + tuner.SetSearcher(kernel, ktt.RandomSearcher()) + + # Specify custom tolerance threshold for validation of floating-point arguments. Default threshold is 1e-4. + tuner.SetValidationMethod(ktt.ValidationMethod.SideBySideComparison, 0.01) + + # Set reference kernel which validates results provided by the tuned kernel. + tuner.SetReferenceKernel(energyGridId, referenceKernel, ktt.KernelConfiguration()) + + # Launch kernel tuning, end after 1 minute. + results = tuner.Tune(kernel, ktt.TuningDuration(60.0)) + + # Save tuning results to JSON file. + tuner.SaveResults(results, "CoulombSum2dOutput", ktt.OutputFormat.JSON) + +if __name__ == "__main__": + main() diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index 8acf8f20..f63f9417 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -300,12 +300,13 @@ PYBIND11_MODULE(ktt, module) .def("SetValueComparator", &ktt::Tuner::SetValueComparator) .def("SetReferenceComputation", &ktt::Tuner::SetReferenceComputation) .def("SetReferenceKernel", &ktt::Tuner::SetReferenceKernel) - .def("Tune", py::overload_cast(&ktt::Tuner::Tune)) - .def("Tune", py::overload_cast>(&ktt::Tuner::Tune)) + .def("Tune", py::overload_cast(&ktt::Tuner::Tune), py::call_guard()) + .def("Tune", py::overload_cast>(&ktt::Tuner::Tune), py::call_guard()) .def ( "TuneIteration", &ktt::Tuner::TuneIteration, + py::call_guard(), py::arg("id"), py::arg("output"), py::arg("recomputeReference") = false @@ -314,6 +315,7 @@ PYBIND11_MODULE(ktt, module) ( "SimulateKernelTuning", &ktt::Tuner::SimulateKernelTuning, + py::call_guard(), py::arg("id"), py::arg("results"), py::arg("iterations") = 0 diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt index 9dfdd72d..e87169a5 100644 --- a/ThirdPartyNotices.txt +++ b/ThirdPartyNotices.txt @@ -24,6 +24,40 @@ THE SOFTWARE. ----------------------------------------------------------------------------------------------------------------------- +pybind11 (https://github.com/pybind/pybind11) + +Copyright (c) 2016 Wenzel Jakob , All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing of +external contributions to this project including patches, pull requests, etc. + +----------------------------------------------------------------------------------------------------------------------- + JSON for Modern C++ (https://github.com/nlohmann/json) Copyright (c) 2013-2021 Niels Lohmann From 828aa005ae49c470b492e3578d19b7cb78849626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Thu, 11 Nov 2021 13:46:49 +0100 Subject: [PATCH 22/63] * Tweaks to certain Python API methods * Removed methods related to custom user structures from Python API * Added Python version of reduction example --- Examples/CoulombSum2d/CoulombSum2d.py | 1 - Examples/Reduction/Reduction.py | 147 ++++++++++++++++++++++++++ Source/Python/PythonDataHolders.cpp | 5 - Source/Python/PythonModule.cpp | 34 +++--- 4 files changed, 163 insertions(+), 24 deletions(-) create mode 100644 Examples/Reduction/Reduction.py diff --git a/Examples/CoulombSum2d/CoulombSum2d.py b/Examples/CoulombSum2d/CoulombSum2d.py index aff9c8b5..6e486d3f 100644 --- a/Examples/CoulombSum2d/CoulombSum2d.py +++ b/Examples/CoulombSum2d/CoulombSum2d.py @@ -1,4 +1,3 @@ -import ctypes import random import sys import ktt diff --git a/Examples/Reduction/Reduction.py b/Examples/Reduction/Reduction.py new file mode 100644 index 00000000..04cf9c9b --- /dev/null +++ b/Examples/Reduction/Reduction.py @@ -0,0 +1,147 @@ +import ctypes +import random +import sys +import ktt + +def reference(buffer, src): + ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.POINTER(ctypes.c_float) + ctypes.pythonapi.PyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_void_p] + result = ctypes.pythonapi.PyCapsule_GetPointer(buffer, None) + resSize = len(src) + resD = [0.0 for i in range(resSize)] + + for i in range(resSize): + resD[i] = src[i] + + while resSize > 1: + for i in range(int(resSize / 2)): + resD[i] = resD[i * 2] + resD[i * 2 + 1] + + if resSize % 2 != 0: + resD[int(resSize / 2) - 1] += resD[resSize - 1] + + resSize = int(resSize / 2) + + print("Reference in double: " + str(resD[0])) + result[0] = resD[0] + +def launcher(interface, definition, srcId, dstId, nId, outOffsetId, inOffsetId): + globalSize = interface.GetCurrentGlobalSize(definition) + localSize = interface.GetCurrentLocalSize(definition) + pairs = interface.GetCurrentConfiguration().GetPairs() + myGlobalSize = globalSize + + # change global size for constant numbers of work-groups + # this may be done by thread modifier operators as well + if ktt.ParameterPair.GetParameterValue(pairs, "UNBOUNDED_WG") == 0: + myGlobalSize = ktt.DimensionVector(ktt.ParameterPair.GetParameterValue(pairs, "WG_NUM") * localSize.GetSizeX()) + + # execute reduction kernel + interface.RunKernel(definition, myGlobalSize, localSize) + + # execute kernel log n times, when atomics are not used + if ktt.ParameterPair.GetParameterValue(pairs, "USE_ATOMICS") == 0: + n = int(globalSize.GetSizeX() / localSize.GetSizeX()) + inOffset = 0 + outOffset = n + vectorSize = ktt.ParameterPair.GetParameterValue(pairs, "VECTOR_SIZE") + wgSize = localSize.GetSizeX() + iterations = 0 # make sure the end result is in the correct buffer + + while n > 1 or iterations % 2 == 1: + interface.SwapArguments(definition, srcId, dstId) + myGlobalSize.SetSizeX(int((n + vectorSize - 1) / vectorSize)) + myGlobalSize.SetSizeX(int((myGlobalSize.GetSizeX() - 1) / wgSize + 1) * wgSize) + + if myGlobalSize == localSize: + outOffset = 0 # only one WG will be executed + + interface.UpdateScalarArgumentInt(nId, n) + interface.UpdateScalarArgumentInt(outOffsetId, outOffset) + interface.UpdateScalarArgumentInt(inOffsetId, inOffset) + + interface.RunKernel(definition, myGlobalSize, localSize) + n = int((n + wgSize * vectorSize - 1) / (wgSize * vectorSize)) + inOffset = int(outOffset / vectorSize) # input is vectorized, output is scalar + outOffset += n + iterations += 1 + +def main(): + platformIndex = 0 + deviceIndex = 0 + kernelFile = "./Reduction.cu" + argc = len(sys.argv) + + if argc >= 2: + platformIndex = sys.argv[1] + + if argc >= 3: + deviceIndex = sys.argv[2] + + if argc >= 4: + kernelFile = sys.argv[3] + + n = 64 * 1024 * 1024 + nAlloc = int((n + 16 - 1) / 16) * 16 # pad to the longest vector size + src = [0.0 for i in range(nAlloc)] + dst = [0.0 for i in range(nAlloc)] + random.seed(17) + + for i in range(n): + src[i] = random.uniform(0.0, 1000.0) + + tuner = ktt.Tuner(platformIndex, deviceIndex, ktt.ComputeApi.CUDA) + tuner.SetGlobalSizeType(ktt.GlobalSizeType.OpenCL) + tuner.SetTimeUnit(ktt.TimeUnit.Microseconds) + + nUp = int((n + 512 - 1) / 512) * 512 # maximum WG size used in tuning parameters + ndRangeDimensions = ktt.DimensionVector(nUp) + workGroupDimensions = ktt.DimensionVector() + definition = tuner.AddKernelDefinitionFromFile("reduce", kernelFile, ndRangeDimensions, workGroupDimensions) + + srcId = tuner.AddArgumentVectorFloat(src, ktt.ArgumentAccessType.ReadWrite) + dstId = tuner.AddArgumentVectorFloat(dst, ktt.ArgumentAccessType.ReadWrite) + nId = tuner.AddArgumentScalarInt(n) + offset = 0 + inOffsetId = tuner.AddArgumentScalarInt(offset) + outOffsetId = tuner.AddArgumentScalarInt(offset) + tuner.SetArguments(definition, [srcId, dstId, nId, inOffsetId, outOffsetId]) + + kernel = tuner.CreateSimpleKernel("Reduction", definition) + + # get number of compute units + di = tuner.GetCurrentDeviceInfo() + print("Number of compute units: " + str(di.GetMaxComputeUnits())) + cus = di.GetMaxComputeUnits() + + tuner.AddParameter(kernel, "WORK_GROUP_SIZE_X", [32, 64, 128, 256, 512]) + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Local, ktt.ModifierDimension.X, "WORK_GROUP_SIZE_X", + ktt.ModifierAction.Multiply) + tuner.AddParameter(kernel, "UNBOUNDED_WG", [0, 1]) + tuner.AddParameter(kernel, "WG_NUM", [0, cus, cus * 2, cus * 4, cus * 8, cus * 16]) + tuner.AddParameter(kernel, "VECTOR_SIZE", [1, 2, 4]) + tuner.AddThreadModifier(kernel, [definition], ktt.ModifierType.Global, ktt.ModifierDimension.X, "VECTOR_SIZE", + ktt.ModifierAction.Divide) + tuner.AddParameter(kernel, "USE_ATOMICS", [0, 1]) + + persistConstraint = lambda v: (v[0] != 0 and v[1] == 0) or (v[0] == 0 and v[1] > 0) + tuner.AddConstraint(kernel, ["UNBOUNDED_WG", "WG_NUM"], persistConstraint) + persistentAtomic = lambda v: (v[0] == 1) or (v[0] == 0 and v[1] == 1) + tuner.AddConstraint(kernel, ["UNBOUNDED_WG", "USE_ATOMICS"], persistentAtomic) + unboundedWG = lambda v: v[0] == 0 or v[1] >= 32 + tuner.AddConstraint(kernel, ["UNBOUNDED_WG", "WORK_GROUP_SIZE_X"], unboundedWG) + + referenceComp = lambda buffer: reference(buffer, src) + tuner.SetReferenceComputation(dstId, referenceComp) + + tuner.SetValidationMethod(ktt.ValidationMethod.SideBySideComparison, float(n) * 10000.0 / 10000000.0) + tuner.SetValidationRange(dstId, 1) + + kernelLauncher = lambda interface: launcher(interface, definition, srcId, dstId, nId, outOffsetId, inOffsetId) + tuner.SetLauncher(kernel, kernelLauncher) + + results = tuner.Tune(kernel) + tuner.SaveResults(results, "ReductionOutput", ktt.OutputFormat.JSON) + +if __name__ == "__main__": + main() diff --git a/Source/Python/PythonDataHolders.cpp b/Source/Python/PythonDataHolders.cpp index c41ad5cc..6578354e 100644 --- a/Source/Python/PythonDataHolders.cpp +++ b/Source/Python/PythonDataHolders.cpp @@ -187,11 +187,6 @@ void InitializePythonDataHolders(py::module_& module) .def("GetTotalOverhead", &ktt::KernelResult::GetTotalOverhead) .def("IsValid", &ktt::KernelResult::IsValid) .def("HasRemainingProfilingRuns", &ktt::KernelResult::HasRemainingProfilingRuns); - - py::class_(module, "ComputeApiInitializer") - .def(py::init&>()) - .def("GetContext", &ktt::ComputeApiInitializer::GetContext) - .def("GetQueues", &ktt::ComputeApiInitializer::GetQueues, py::return_value_policy::reference); } #endif // KTT_PYTHON diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index f63f9417..d5a44ef7 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -60,7 +60,12 @@ PYBIND11_MODULE(ktt, module) .def("GetCurrentConfiguration", &ktt::ComputeInterface::GetCurrentConfiguration, py::return_value_policy::reference) .def("ChangeArguments", &ktt::ComputeInterface::ChangeArguments) .def("SwapArguments", &ktt::ComputeInterface::SwapArguments) - .def("UpdateScalarArgument", &ktt::ComputeInterface::UpdateScalarArgument) + .def("UpdateScalarArgumentChar", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int8_t data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentShort", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int16_t data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentInt", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int32_t data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentLong", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int64_t data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentFloat", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const float data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentDouble", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const double data) { ci.UpdateScalarArgument(id, &data); }) .def("UpdateLocalArgument", &ktt::ComputeInterface::UpdateLocalArgument) .def("UploadBuffer", &ktt::ComputeInterface::UploadBuffer) .def("UploadBufferAsync", &ktt::ComputeInterface::UploadBufferAsync) @@ -124,8 +129,6 @@ PYBIND11_MODULE(ktt, module) py::class_(module, "Tuner") .def(py::init()) .def(py::init()) - .def(py::init()) - .def(py::init&>()) .def ( "AddKernelDefinition", @@ -221,26 +224,12 @@ PYBIND11_MODULE(ktt, module) const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) .def("AddArgumentVectorDouble", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorChar", py::overload_cast(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorShort", py::overload_cast(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorInt", py::overload_cast(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorLong", py::overload_cast(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorFloat", py::overload_cast(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorDouble", py::overload_cast(&ktt::Tuner::AddArgumentVector)) - // Todo: AddArgumentVector version with user buffer .def("AddArgumentScalarChar", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarShort", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarInt", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarLong", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarFloat", &ktt::Tuner::AddArgumentScalar) .def("AddArgumentScalarDouble", &ktt::Tuner::AddArgumentScalar) - .def("AddArgumentScalar", [](ktt::Tuner& tuner, const void* data, const size_t dataSize) { return tuner.AddArgumentScalar(data, dataSize); }) .def("AddArgumentLocalChar", &ktt::Tuner::AddArgumentLocal) .def("AddArgumentLocalShort", &ktt::Tuner::AddArgumentLocal) .def("AddArgumentLocalInt", &ktt::Tuner::AddArgumentLocal) @@ -337,7 +326,16 @@ PYBIND11_MODULE(ktt, module) py::arg("data") = ktt::UserData{} ) .def("LoadResults", [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format) { return tuner.LoadResults(filePath, format); }) - .def("LoadResults", [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format, ktt::UserData& data) { return tuner.LoadResults(filePath, format, data); }) + .def + ( + "LoadResultsWithData", + [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format) + { + ktt::UserData data; + auto results = tuner.LoadResults(filePath, format, data); + return std::make_pair(results, data); + } + ) .def("AddComputeQueue", &ktt::Tuner::AddComputeQueue) .def("RemoveComputeQueue", &ktt::Tuner::RemoveComputeQueue) .def("Synchronize", &ktt::Tuner::Synchronize) From a8fc0f7a3c3797df368f7abf4ea2cc033a8aad91 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Tue, 16 Nov 2021 12:49:21 +0100 Subject: [PATCH 23/63] * Initial work on onboarding guide to KTT --- OnboardingGuide.md | 156 +++++++++++++++++++++++++++++++++++++++++++++ Readme.md | 3 +- 2 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 OnboardingGuide.md diff --git a/OnboardingGuide.md b/OnboardingGuide.md new file mode 100644 index 00000000..bde9f7e5 --- /dev/null +++ b/OnboardingGuide.md @@ -0,0 +1,156 @@ +# Introduction + +When optimizing performance of compute kernels, a programmer has to make a lot of decisions, such as which algorithm to +use, how to arrange data structures in memory, how to block data access to optimize caching or which factor to use for +loop unrolling. Such decisions cannot be typically made in isolation - for example, when data layout in memory is changed, +a different algorithm may perform better. Therefore, it is necessary to explore vast amount of combinations of optimization +decisions in order to reach the best performance. Moreover, the best combination of optimization decisions can differ for +various hardware devices or program setup. Therefore, a way of automatic search for the best combination of these decisions, +called autotuning, is valuable. + +Naturally, in the simple use case, a batch script can be sufficient for autotuning. However, in advanced applications, +usage of an autotuning framework can be beneficial, as it can automatically handle memory objects, detect errors in autotuned +kernels or perform autotuning during program runtime. + +Kernel Tuning Toolkit is a framework which allows autotuning of kernels written in CUDA, OpenCL or Vulkan. It provides unified +interface for those APIs, handles communication between host (CPU) and accelerator (GPU, Xeon Phi, etc.), checks results and +timing of tuned kernels, allows dynamic tuning during program runtime, profiling of autotuned kernels and more. + +---- + +### Table of contents +* [Basic principles behind KTT](#basic-principles-behind-ktt) +* [Offline tuning of a single kernel](#offline-tuning-of-a-single-kernel) +* [Initialization of KTT](#initialization-of-ktt) +* [Kernel arguments](#kernel-arguments) + * [Subsection](#subsection) + +---- + +### Basic principles behind KTT + +When leveraging autotuning, a programmer needs to think about which parts of their computation can be autotuned. For +example, an algorithm may contain for loop which can be unrolled. There are multiple options for unroll factor value +of this loop, e.g., 1 (no unroll), 2, 4, 8. Picking the optimal value for a given device manually is difficult, therefore +we can define a tuning parameter for the unroll factor with the specified values. Afterwards, we can launch four different +versions of our computation to see which value performs best. + +In practice, the computations are often complex enough to contain multiple parts which can be optimized in this way, leading +to definition of multiple tuning parameters. For example we may have the previously mentioned loop unroll parameter with +values {1, 2, 4, 8} and another parameter controlling data arrangement in memory with values {0, 1}. Combinations of these +parameters now define 8 different versions of computation. One such combination is called tuning configuration. Together, all +tuning configurations define configuration space. The size of the space grows exponentially with addition of more tuning +parameters. KTT framework offers functionality to deal with this problem which will be discussed in the follow-up sections. + +---- + +### Offline tuning of a single kernel + +Offline kernel tuning is the simplest use case of KTT framework. It involves creating a kernel, specifying its arguments (data), +defining tuning parameters and then launching autotuning. During autotuning, tuning parameter values are propagated to kernel source +code in a form of preprocessor definitions. E.g., when configuration which contains parameter with name unroll_factor and value 2 +is launched, the following code is added at the beginning of kernel source code: #define unroll_factor 2. The definitions can be used +to alter kernel functionality based on tuning parameter values. + +```cpp +const size_t numberOfElements = 1024 * 1024; +const ktt::DimensionVector globalDimensions(numberOfElements); +const ktt::DimensionVector localDimensions(64); + +const ktt::KernelDefinitionId definition = tuner.AddKernelDefinitionFromFile("kernelName", "kernelFile.cl", globalDimensions, localDimensions); +const ktt::KernelId kernel = tuner.CreateSimpleKernel("TestKernel", definition); + +/* Initialize kernel input and output */ + +tuner.AddParameter(kernel, "unroll_factor", std::vector{1, 2, 4, 8}); +tuner.Tune(kernel); +``` + +In the code snippet above, we create a kernel definition by specifying the name of kernel function and path to its source file. We also define +its default global and local dimensions (e.g., size of ND-range and work-group in OpenCL, size of grid and block in CUDA). We use provided +kernel definition handle to create kernel. We can also specify custom name for the kernel which is used e.g., for logging purposes. Afterwards, +we can use the kernel handle to define tuning parameter and launch autotuning. The step of creating kernel definition and kernel separately may +seem redundant at first, but it plays important role during more complex use cases that will be covered later. + +---- + +### Initialization of KTT + +The first step before we can utilize KTT is creation of tuner instance. Tuner is one of the major KTT classes and implements large portion of +autotuning logic. Practically all of the KTT structures such as kernels, kernel arguments and tuning parameters are tied to a specific tuner instance. +The simplest tuner constructor requires 3 parameters - index for platform, index for device and type of compute API that will be utilized (e.g., CUDA, +OpenCL). The indices for platforms and devices are assigned by KTT - they can be retrieved through PlatformInfo and DeviceInfo structures. These +structures also contain some other useful information such as list of supported extensions, global memory size, number of available compute units and +more. Note that the assigned indices remain the same when autotuning applications are launched multiple times on the same computer. They change only +when the hardware configuration is changed (e.g., new GPU is added, old GPU is removed, device driver is reinstalled). Also note, that the indices +may not be the same across multiple compute APIs (e.g., index for certain device may differ under OpenCL and CUDA). + +```cpp +ktt::Tuner tuner(0, 0, ktt::ComputeApi::OpenCL); + +std::vector platforms = tuner.GetPlatformInfo(); + +for (const auto& platform : platforms) +{ + std::cout << platform.GetString() << std::endl; + std::vector devices = tuner.GetDeviceInfo(platform.GetIndex()); + + for (const auto& device : devices) + { + std::cout << device.GetString() << std::endl; + } +} +``` + +The code above demonstrates how information about all available OpenCL platforms and devices is retrieved from KTT. In this case, the tuner is created +for the first device on the first platform (both platform and device index is 0). + +---- + +### Kernel arguments + +Todo + +#### Subsection + +Todo + +---- + +### Creating and running kernel + +### Tuning the kernel + +### Checking kernel results + +### Using kernel launchers + +#### Motivation example + +#### Launcher implementation + +### Using composite kernels + +#### Motivation example + +#### Composite kernel implementation + +### Stop conditions + +### Searchers + +### Dynamic autotuning + +#### Differences over offline tuning + +#### Handling kernel arguments + +#### Example + +### Advanced topics + +#### Asynchronous execution + +#### Profiling + +#### Interoperability diff --git a/Readme.md b/Readme.md index 0880757b..ffb53f75 100644 --- a/Readme.md +++ b/Readme.md @@ -21,7 +21,8 @@ ability to change kernel compiler flags and more. Getting started --------------- -* Documentation for KTT API can be found [here](https://hipercore.github.io/KTT/). +* KTT introductory guide can be found [here](https://github.com/HiPerCoRe/KTT/blob/development/OnboardingGuide.md). +* Full documentation for KTT API can be found [here](https://hipercore.github.io/KTT/). * KTT FAQ can be found [here](https://hipercore.github.io/KTT/md__docs__resources__faq.html). * The newest release of KTT framework can be found [here](https://github.com/HiPerCoRe/KTT/releases). * Prebuilt binaries are not provided due to many different combinations of compute APIs and build options available. From 4d255329e38e9b9da507a49dad11897ba5aa4b46 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Thu, 18 Nov 2021 14:52:28 +0100 Subject: [PATCH 24/63] * Added kernel arguments section to onboarding guide * Improved assert messages when adding arguments with unsupported data types to tuner --- OnboardingGuide.md | 121 +++++++++++++++++++++++++++++++++++++++++---- Source/Tuner.h | 2 +- Source/Tuner.inl | 4 +- 3 files changed, 115 insertions(+), 12 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index bde9f7e5..bb3a5410 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -1,4 +1,4 @@ -# Introduction +# Introduction to KTT When optimizing performance of compute kernels, a programmer has to make a lot of decisions, such as which algorithm to use, how to arrange data structures in memory, how to block data access to optimize caching or which factor to use for @@ -23,7 +23,10 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Offline tuning of a single kernel](#offline-tuning-of-a-single-kernel) * [Initialization of KTT](#initialization-of-ktt) * [Kernel arguments](#kernel-arguments) - * [Subsection](#subsection) + * [Scalar arguments](#scalar-arguments) + * [Vector arguments](#vector-arguments) + * [Local memory arguments](#local-memory-arguments) + * [Symbol arguments](#symbol-arguments) ---- @@ -49,7 +52,7 @@ parameters. KTT framework offers functionality to deal with this problem which w Offline kernel tuning is the simplest use case of KTT framework. It involves creating a kernel, specifying its arguments (data), defining tuning parameters and then launching autotuning. During autotuning, tuning parameter values are propagated to kernel source code in a form of preprocessor definitions. E.g., when configuration which contains parameter with name unroll_factor and value 2 -is launched, the following code is added at the beginning of kernel source code: #define unroll_factor 2. The definitions can be used +is launched, the following code is added at the beginning of kernel source code: `#define unroll_factor 2`. The definitions can be used to alter kernel functionality based on tuning parameter values. ```cpp @@ -79,11 +82,11 @@ seem redundant at first, but it plays important role during more complex use cas The first step before we can utilize KTT is creation of tuner instance. Tuner is one of the major KTT classes and implements large portion of autotuning logic. Practically all of the KTT structures such as kernels, kernel arguments and tuning parameters are tied to a specific tuner instance. The simplest tuner constructor requires 3 parameters - index for platform, index for device and type of compute API that will be utilized (e.g., CUDA, -OpenCL). The indices for platforms and devices are assigned by KTT - they can be retrieved through PlatformInfo and DeviceInfo structures. These +OpenCL). The indices for platforms and devices are assigned by KTT - they can be retrieved through `PlatformInfo` and `DeviceInfo` structures. These structures also contain some other useful information such as list of supported extensions, global memory size, number of available compute units and more. Note that the assigned indices remain the same when autotuning applications are launched multiple times on the same computer. They change only -when the hardware configuration is changed (e.g., new GPU is added, old GPU is removed, device driver is reinstalled). Also note, that the indices -may not be the same across multiple compute APIs (e.g., index for certain device may differ under OpenCL and CUDA). +when the hardware configuration is changed (e.g., new device is added, old device is removed, device driver is reinstalled). Also note, that the indices +may not be the same across multiple compute APIs (e.g., index for the same device may be different under OpenCL and CUDA). ```cpp ktt::Tuner tuner(0, 0, ktt::ComputeApi::OpenCL); @@ -109,11 +112,109 @@ for the first device on the first platform (both platform and device index is 0) ### Kernel arguments -Todo +Kernel arguments define input and output of a kernel. KTT supports multiple forms of kernel arguments such as buffers, scalars and constant memory +arguments. Before argument can be assigned to kernel, its description must be given to the tuner. In case of a buffer argument, this includes the +initial data placed inside buffer before kernel is launched, its access type (read or write) and memory location from which kernel accesses the buffer +(host or device). Once the information is provided, tuner returns a handle to the argument. Through this handle, arguments can be assigned to kernel +definitions as shown in the code below. KTT supports a wide range of data types for kernel arguments, including all built-in integer and floating-point +types as well as custom types. Note however, that custom types must be trivially copyable, so it is possible to transfer the arguments into device memory. -#### Subsection +```cpp +const size_t numberOfElements = 1024 * 1024; +std::vector a(numberOfElements); +std::vector b(numberOfElements); +std::vector result(numberOfElements, 0.0f); + +// Fill buffers with initial data before adding the arguments to the tuner. +for (size_t i = 0; i < numberOfElements; ++i) +{ + a[i] = static_cast(i); + b[i] = static_cast(i + 1); +} + +const ktt::ArgumentId aId = tuner.AddArgumentVector(a, ktt::ArgumentAccessType::ReadOnly); +const ktt::ArgumentId bId = tuner.AddArgumentVector(b, ktt::ArgumentAccessType::ReadOnly); +const ktt::ArgumentId resultId = tuner.AddArgumentVector(result, ktt::ArgumentAccessType::WriteOnly); +const ktt::ArgumentId scalarId = tuner.AddArgumentScalar(3.0f); +tuner.SetArguments(definition, {aId, bId, resultId, scalarId}); +``` + +#### Scalar arguments + +Scalar arguments are straightforward to add. We simply need to specify the scalar argument value. The scalar value is copied inside the tuner, +so both lvalues and rvalues are supported. + +```cpp +const float lvalueScalar = 322.0f; +const ktt::ArgumentId lvalueId = tuner.AddArgumentScalar(lvalueScalar); +const ktt::ArgumentId rvalueId = tuner.AddArgumentScalar(34); +``` + +#### Vector arguments + +Vector arguments have more customization options available than scalars. Other than the initial data, it is possible to specify whether argument +is used for reading or writing. For read-only arguments, additional optimization is possible during offline tuning - since its contents do not +change, the buffer needs to be copied into memory only once before the first kernel configuration is launched and then remain the same for subsequent +configurations. Setting correct access type can therefore lead to better tuning performance. + +Next, it is possible to decide memory location from which the buffer is accessed by kernel - the two main options are host memory and device memory. +Users may wish to choose different memory depending on the type of device used for autotuning (e.g., host memory for CPUs, device memory for +dedicated GPUs). For host memory, it is possible to use zero-copy option, which makes kernel access the argument data directly, instead of creating +separate buffer and thus reduce memory usage. + +Management type option specifies whether buffer management is handled automatically by the tuner (e.g., write arguments are automatically reset +to initial state before new kernel configuration is launched, buffers are created and deleted automatically) or by the user. In some advanced cases, +users may wish to manage the buffers manually. Note however, that this requires usage of kernel launchers which will be discussed later. + +The final option for vector arguments is whether the initial data provided by user should be copied inside the tuner or referenced directly. By default, +the data is copied which is safer (i.e., temporary arguments work correctly) but less memory efficient. In case the initial data is provided in form of +lvalue argument, direct reference can be used to avoid copying. This requires user to keep the initial data buffer valid during the time argument is +used by the tuner. + +```cpp +std::vector input1; +std::vector input2; +std::vector result; -Todo +/* Initialize data */ + +const ktt::ArgumentId copyInputId = tuner.AddArgumentVector(input1, ktt::ArgumentAccessType::ReadOnly); +const ktt::ArgumentId referenceInputId = tuner.AddArgumentVector(input2, ktt::ArgumentAccessType::ReadOnly, ktt::ArgumentMemoryLocation::Device, ktt::ArgumentManagementType::Framework, true); +const ktt::ArgumentId resultId = tuner.AddArgumentVector(result, ktt::ArgumentAccessType::WriteOnly); + +// Ok - copying temporary buffer. +{ + std::vector temp{0.0f, 1.0f, 2.0f}; + const ktt::ArgumentId okId = tuner.AddArgumentVector(temp, ktt::ArgumentAccessType::WriteOnly, ktt::ArgumentMemoryLocation::Device, ktt::ArgumentManagementType::Framework, false); +} + +// Bad - referencing temporary buffer! +{ + std::vector temp{0.0f, 1.0f, 2.0f}; + const ktt::ArgumentId badId = tuner.AddArgumentVector(temp, ktt::ArgumentAccessType::WriteOnly, ktt::ArgumentMemoryLocation::Device, ktt::ArgumentManagementType::Framework, true); +} +``` + +#### Local memory arguments + +Local (shared in CUDA terminology) memory arguments are used to allocate corresponding amount of cache-like memory which is shared accross all items +(threads) inside a work-group. User has to specify the data type and total size of allocated memory in bytes. + +```cpp +// Allocate local memory for 4 floats and 2 integers. +const ktt::ArgumentId local1Id = tuner.AddArgumentLocal(16); +const ktt::ArgumentId local2Id = tuner.AddArgumentLocal(8); +``` + +#### Symbol arguments + +Symbol arguments were added in order to support CUDA arguments marked as `__constant__` or `__device__`. In other APIs, symbol arguments behave in +the same way as scalars since they do not require special handling. In case of CUDA, the name of symbol argument appearing inside CUDA kernel source +code has to be specified during argument addition to tuner. + +```cpp +const ktt::ArgumentId symbolId = tuner.AddArgumentSymbol(42, "magicNumber"); +``` ---- @@ -154,3 +255,5 @@ Todo #### Profiling #### Interoperability + +#### Python API diff --git a/Source/Tuner.h b/Source/Tuner.h index d9bec045..0d6c8692 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -390,7 +390,7 @@ class KTT_API Tuner /** @fn void SetReadOnlyArgumentCache(const bool flag) * Toggles caching of read-only kernel arguments which have management type set to framework. This can significantly speed up * tuning, since arguments are uploaded into compute API buffers only once. Caching is enabled by default. Users who wish to - * modify read-only arguments inside kernel launcher may wish to disable this behaviour. + * modify read-only arguments inside kernel launcher may want to disable this behaviour. * @param flag If true, read-only argument caching is enabled. It is disabled otherwise. */ void SetReadOnlyArgumentCache(const bool flag); diff --git a/Source/Tuner.inl b/Source/Tuner.inl index 41d4f47c..5faf807e 100644 --- a/Source/Tuner.inl +++ b/Source/Tuner.inl @@ -72,8 +72,8 @@ ArgumentId Tuner::AddArgumentSymbol(const T& data, const std::string& symbolName template ArgumentDataType Tuner::DeriveArgumentDataType() const { - static_assert(std::is_trivially_copyable_v && !std::is_reference_v && !std::is_pointer_v && !std::is_null_pointer_v, - "Unsupported argument data type"); + static_assert(std::is_trivially_copyable_v, "Argument data type must be trivially copyable"); + static_assert(!std::is_reference_v && !std::is_pointer_v && !std::is_null_pointer_v, "Pointer and reference argument data types are not supported"); static_assert(!std::is_same_v, bool>, "Bool argument data type is not supported"); if constexpr (std::is_same_v, half>) From 43fa86213cae94a1157d4f0216f9d2dd15fb4cb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Filipovi=C4=8D?= Date: Thu, 18 Nov 2021 16:38:31 +0100 Subject: [PATCH 25/63] Update OnboardingGuide.md --- OnboardingGuide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index bb3a5410..ea965ffa 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -32,7 +32,7 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling ### Basic principles behind KTT -When leveraging autotuning, a programmer needs to think about which parts of their computation can be autotuned. For +When leveraging autotuning, a programmer needs to think about which properties of their computation can be autotuned. For example, an algorithm may contain for loop which can be unrolled. There are multiple options for unroll factor value of this loop, e.g., 1 (no unroll), 2, 4, 8. Picking the optimal value for a given device manually is difficult, therefore we can define a tuning parameter for the unroll factor with the specified values. Afterwards, we can launch four different From bf148d9eebf57f9116426fe33b81fb05d73dca35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Filipovi=C4=8D?= Date: Thu, 18 Nov 2021 16:43:29 +0100 Subject: [PATCH 26/63] Update OnboardingGuide.md --- OnboardingGuide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index ea965ffa..34b2ed45 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -55,6 +55,8 @@ code in a form of preprocessor definitions. E.g., when configuration which conta is launched, the following code is added at the beginning of kernel source code: `#define unroll_factor 2`. The definitions can be used to alter kernel functionality based on tuning parameter values. +TODO consider adding also code for the kernel implementing tuning parameter + ```cpp const size_t numberOfElements = 1024 * 1024; const ktt::DimensionVector globalDimensions(numberOfElements); From d3097d8a552cc451aa1e70fb0408c83e1b63d850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Filipovi=C4=8D?= Date: Fri, 19 Nov 2021 17:30:38 +0100 Subject: [PATCH 27/63] Update OnboardingGuide.md --- OnboardingGuide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 34b2ed45..d3488394 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -199,8 +199,8 @@ const ktt::ArgumentId resultId = tuner.AddArgumentVector(result, ktt::ArgumentAc #### Local memory arguments -Local (shared in CUDA terminology) memory arguments are used to allocate corresponding amount of cache-like memory which is shared accross all items -(threads) inside a work-group. User has to specify the data type and total size of allocated memory in bytes. +Local (shared in CUDA terminology) memory arguments are used to allocate corresponding amount of cache-like memory which is shared accross all work-items +(threads) inside a work-group (thread block). User has to specify the data type and total size of allocated memory in bytes. ```cpp // Allocate local memory for 4 floats and 2 integers. From 4c7eced9353ba068f3bf05f7b717fc5661040491 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Mon, 22 Nov 2021 08:51:56 +0100 Subject: [PATCH 28/63] * Initial work on kernel definition section in onboarding guide --- OnboardingGuide.md | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index d3488394..c5cffdd8 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -210,7 +210,7 @@ const ktt::ArgumentId local2Id = tuner.AddArgumentLocal(8); #### Symbol arguments -Symbol arguments were added in order to support CUDA arguments marked as `__constant__` or `__device__`. In other APIs, symbol arguments behave in +Symbol arguments were introduced in order to support CUDA arguments marked as `__constant__` or `__device__`. In other APIs, symbol arguments behave in the same way as scalars since they do not require special handling. In case of CUDA, the name of symbol argument appearing inside CUDA kernel source code has to be specified during argument addition to tuner. @@ -220,13 +220,23 @@ const ktt::ArgumentId symbolId = tuner.AddArgumentSymbol(42, "magicNumber"); ---- -### Creating and running kernel +### Kernel definitions and kernels -### Tuning the kernel +Before kernel can be launched with KTT, its source must be loaded into tuner. This is achieved by creating kernel definition. During its creation, +we specify kernel function name and kernel source. The source can be added either from string or from file. Next, we specify default global +(NDrange / grid) and local (work-group / block) sizes. The sizes are specified with KTT structure `DimensionVector` which supports up to three +dimensions. When a kernel is launched during tuning, the thread sizes chosen during definition creation will be used. There are ways to modify these +sizes which will be covered later. For CUDA API, addition of templated kernels is supported as well. When creating definition, it is possible to specify +types that should be used to instantiate kernel function from template. When we need to instantiate the same kernel template with different types, we do +that by adding multiple kernel definitions with corresponding types which are then handled independently. -### Checking kernel results -### Using kernel launchers + +### Tuning parameters and constraints + +### Kernel output validation + +### Kernel launchers #### Motivation example From c55f0f2df8a20076ddb4bce9e23ac6653f72c356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 22 Nov 2021 11:00:42 +0100 Subject: [PATCH 29/63] * More work on onboarding guide * Finished and moved section about kernels above kernel arguments * Added mention of unified memory arguments * Added kernel example with tuning parameter --- OnboardingGuide.md | 84 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index c5cffdd8..73015818 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -55,14 +55,18 @@ code in a form of preprocessor definitions. E.g., when configuration which conta is launched, the following code is added at the beginning of kernel source code: `#define unroll_factor 2`. The definitions can be used to alter kernel functionality based on tuning parameter values. -TODO consider adding also code for the kernel implementing tuning parameter +In the code snippet below, we create a kernel definition by specifying the name of kernel function and path to its source file. We also define +its default global and local dimensions (e.g., size of ND-range and work-group in OpenCL, size of grid and block in CUDA). We use provided +kernel definition handle to create kernel. We can also specify custom name for the kernel which is used e.g., for logging purposes. Afterwards, +we can use the kernel handle to define tuning parameter and launch autotuning. The step of creating kernel definition and kernel separately may +seem redundant at first, but it plays important role during more complex use cases that will be covered later. ```cpp const size_t numberOfElements = 1024 * 1024; const ktt::DimensionVector globalDimensions(numberOfElements); const ktt::DimensionVector localDimensions(64); -const ktt::KernelDefinitionId definition = tuner.AddKernelDefinitionFromFile("kernelName", "kernelFile.cl", globalDimensions, localDimensions); +const ktt::KernelDefinitionId definition = tuner.AddKernelDefinitionFromFile("computeStuff", "kernelFile.cl", globalDimensions, localDimensions); const ktt::KernelId kernel = tuner.CreateSimpleKernel("TestKernel", definition); /* Initialize kernel input and output */ @@ -71,11 +75,24 @@ tuner.AddParameter(kernel, "unroll_factor", std::vector{1, 2, 4, 8}); tuner.Tune(kernel); ``` -In the code snippet above, we create a kernel definition by specifying the name of kernel function and path to its source file. We also define -its default global and local dimensions (e.g., size of ND-range and work-group in OpenCL, size of grid and block in CUDA). We use provided -kernel definition handle to create kernel. We can also specify custom name for the kernel which is used e.g., for logging purposes. Afterwards, -we can use the kernel handle to define tuning parameter and launch autotuning. The step of creating kernel definition and kernel separately may -seem redundant at first, but it plays important role during more complex use cases that will be covered later. +The next snippet demonstrates how our previously defined tuning parameter could be used to alter computation inside kernel. + +```cpp +__kernel void computeStuff(__global float* input, int itemsPerThread, __global float* output) +{ + ... + + #if unroll_factor > 1 + #pragma unroll unroll_factor + #endif + for (int i = 0; i < itemsPerThread; i++) + { + // do some computation + } + + ... +} +``` ---- @@ -90,6 +107,9 @@ more. Note that the assigned indices remain the same when autotuning application when the hardware configuration is changed (e.g., new device is added, old device is removed, device driver is reinstalled). Also note, that the indices may not be the same across multiple compute APIs (e.g., index for the same device may be different under OpenCL and CUDA). +The code below demonstrates how information about all available OpenCL platforms and devices is retrieved from KTT. In this case, the tuner is created +for the first device on the first platform (both platform and device index is 0). + ```cpp ktt::Tuner tuner(0, 0, ktt::ComputeApi::OpenCL); @@ -107,8 +127,39 @@ for (const auto& platform : platforms) } ``` -The code above demonstrates how information about all available OpenCL platforms and devices is retrieved from KTT. In this case, the tuner is created -for the first device on the first platform (both platform and device index is 0). +---- + +### Kernel definitions and kernels + +Before kernel can be launched via KTT, its source must be loaded into tuner. This is achieved by creating kernel definition. During its creation, +we specify kernel function name and kernel source. The source can be added either from string or from file. Next, we specify default global +(NDrange / grid) and local (work-group / block) sizes. The sizes are specified with KTT structure `DimensionVector` which supports up to three +dimensions. When a kernel is launched during tuning, the thread sizes chosen during definition creation will be used. There are ways to launch kernels +with different than default sizes which will be covered later. For CUDA API, addition of templated kernels is supported as well. When creating +definition, it is possible to specify types that should be used to instantiate kernel function from template. When we need to instantiate the same +kernel template with different types, we do that by adding multiple kernel definitions with corresponding types which are then handled independently. + +Once we have kernel definitions, we can create kernels from them. It is possible to create a simple kernel which only uses one definition as well as +composite kernel which uses multiple definitions. Usage of composite kernels is useful for computations which require launching of multiple kernel +functions in order to compute the result. In this case it is also necessary to define kernel launcher which is a function that tells tuner in which +order and how many times each kernel function is launched. Kernel launchers are covered in detail in their own section. + +Note that KTT terminology regarding kernel definitions and kernels differs slightly from regular compute APIs. KTT kernel definition roughly +corresponds to a single kernel function (also called kernel in e.g., OpenCL or CUDA). KTT kernel corresponds to a specific computation which uses +one or more kernel functions and for which it is possible to define tuning parameters. KTT framework allows kernel definitions to be shared across +multiple kernels (i.e., the same kernel function can be used in multiple computations). + +```cpp +// Create convolution kernel, utilizes single kernel function +const ktt::KernelDefinitionId definition = tuner.AddKernelDefinitionFromFile("conv", kernelFile, gridSize, blockSize); +const ktt::KernelId kernel = tuner.CreateSimpleKernel("Convolution", definition); + +// Create kernel which performs radix sort, utilizes 3 separate kernel functions +const ktt::KernelDefinitionId definition0 = tuner.AddKernelDefinitionFromFile("reduce", kernelFile, gridSize, blockSize); +const ktt::KernelDefinitionId definition1 = tuner.AddKernelDefinitionFromFile("top_scan", kernelFile, gridSize, blockSize); +const ktt::KernelDefinitionId definition2 = tuner.AddKernelDefinitionFromFile("bottom_scan", kernelFile, gridSize, blockSize); +const ktt::KernelId kernel = tuner.CreateCompositeKernel("Sort", {definition0, definition1, definition2}); +``` ---- @@ -162,7 +213,8 @@ configurations. Setting correct access type can therefore lead to better tuning Next, it is possible to decide memory location from which the buffer is accessed by kernel - the two main options are host memory and device memory. Users may wish to choose different memory depending on the type of device used for autotuning (e.g., host memory for CPUs, device memory for dedicated GPUs). For host memory, it is possible to use zero-copy option, which makes kernel access the argument data directly, instead of creating -separate buffer and thus reduce memory usage. +separate buffer and thus reduce memory usage. For CUDA and OpenCL 2.0, there exists one additional memory location option - unified. Unified memory +buffers can be accessed both from host and kernel side, relying on device driver to take care of migrating the data automatically. Management type option specifies whether buffer management is handled automatically by the tuner (e.g., write arguments are automatically reset to initial state before new kernel configuration is launched, buffers are created and deleted automatically) or by the user. In some advanced cases, @@ -220,18 +272,6 @@ const ktt::ArgumentId symbolId = tuner.AddArgumentSymbol(42, "magicNumber"); ---- -### Kernel definitions and kernels - -Before kernel can be launched with KTT, its source must be loaded into tuner. This is achieved by creating kernel definition. During its creation, -we specify kernel function name and kernel source. The source can be added either from string or from file. Next, we specify default global -(NDrange / grid) and local (work-group / block) sizes. The sizes are specified with KTT structure `DimensionVector` which supports up to three -dimensions. When a kernel is launched during tuning, the thread sizes chosen during definition creation will be used. There are ways to modify these -sizes which will be covered later. For CUDA API, addition of templated kernels is supported as well. When creating definition, it is possible to specify -types that should be used to instantiate kernel function from template. When we need to instantiate the same kernel template with different types, we do -that by adding multiple kernel definitions with corresponding types which are then handled independently. - - - ### Tuning parameters and constraints ### Kernel output validation From 521adee97b7e644511b5cacc3578caf49c91c1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 22 Nov 2021 12:22:59 +0100 Subject: [PATCH 30/63] * Added tuning parameters section to guide --- OnboardingGuide.md | 65 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 73015818..2270bba7 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -27,7 +27,10 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Vector arguments](#vector-arguments) * [Local memory arguments](#local-memory-arguments) * [Symbol arguments](#symbol-arguments) - +* [Tuning parameters](#tuning-parameters) + * [Parameter constraints](#parameter-constraints) + * [Parameter groups](#parameter-groups) + * [Thread modifiers](#thread-modifiers) ---- ### Basic principles behind KTT @@ -272,21 +275,67 @@ const ktt::ArgumentId symbolId = tuner.AddArgumentSymbol(42, "magicNumber"); ---- -### Tuning parameters and constraints +### Tuning parameters -### Kernel output validation +Tuning parameters in KTT can be either unsigned integers or floats. When defining new parameter, we need to specify its name (i.e., the name through +which it can be referenced in kernel source) and values. With addition of more tuning parameters, the size of tuning space grows exponentially as we +need to explore all parameter combinations. KTT provides two features for users to slow down the tuning space growth. -### Kernel launchers +```cpp +// We add 4 different parameters, the size of tuning space is 40 (5 * 2 * 4 * 1) +tuner.AddParameter(kernel, "unroll_factor", std::vector{1, 2, 4, 8, 16}); +tuner.AddParameter(kernel, "use_constant_memory", std::vector{0, 1}); +tuner.AddParameter(kernel, "vector_type", std::vector{1, 2, 4, 8}); +tuner.AddParameter(kernel, "float_value", std::vector{1.0}); +``` -#### Motivation example +#### Parameter constraints -#### Launcher implementation +The first option are tuning constraints. Through constraints, it is possible to tell tuner to skip generating configurations for certain combinations +of parameters. The constraint is a function which receives values for the specified parameters on input and decides whether that combination should +be launched. User can choose which parameters are evaluated by specific constraint. Note that currently, it is possible to add constraints only +between integer parameters. + +```cpp +// We add 3 different parameters, the size of tuning space is 40 (5 * 2 * 4) +tuner.AddParameter(kernel, "unroll_factor", std::vector{1, 2, 4, 8, 16}); +tuner.AddParameter(kernel, "vectorized_soa", std::vector{0, 1}); +tuner.AddParameter(kernel, "vector_type", std::vector{1, 2, 4, 8}); + +// We add constraint between 2 parameters, reducing size of tuning space from 40 to 35 (vectorized SoA is used only for vector types, +// constraint disables all configurations where vector_type == 1 and vectorized_soa == 1) +auto vectorizedSoA = [](const std::vector& values) {return values[0] > 1 || values[1] != 1;}; +tuner.AddConstraint(kernel, {"vector_type", "vectorized_soa"}, vectorizedSoA); +``` + +#### Parameter groups -### Using composite kernels +The second option are tuning parameter groups. This option is mainly useful for composite kernels with certain tuning parameters only affecting one +kernel definition inside the kernel. For example, if we have composite kernel with 2 kernel definitions and each definition is affected by 3 parameters +(we have 6 parameters in total), and we know that each parameter only affects one specific definition, we can evaluate the two parameter groups +independently. This can greatly reduce the total number of evaluated configurations (e.g., if each of the parameters has 2 different values, total +number of configurations is 64 -- 2^6; with usage of parameter groups, it is only 16 -- 2^3 + 2^3). It is also possible to combine usage of +constraints and groups, however constraint can only be added between parameters which belong into the same group. + +```cpp +// We add 4 different parameters split into 2 independent groups, reducing size of tuning space from 16 to 8 +tuner.AddParameter(kernel, "a1", std::vector{0, 1}, "group_a"); +tuner.AddParameter(kernel, "a2", std::vector{0, 1}, "group_a"); +tuner.AddParameter(kernel, "b1", std::vector{0, 1}, "group_b"); +tuner.AddParameter(kernel, "b2", std::vector{0, 1}, "group_b"); +``` + +#### Thread modifiers + +Todo + +### Kernel output validation + +### Kernel launchers #### Motivation example -#### Composite kernel implementation +#### Launcher implementation ### Stop conditions From a896b1292d4357d3a4fe62e9df986b4e61ec6b77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 24 Nov 2021 13:04:31 +0100 Subject: [PATCH 31/63] * Added sections about thread modifiers and output validation to guide --- OnboardingGuide.md | 83 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 2270bba7..6ca1ec08 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -22,6 +22,7 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Basic principles behind KTT](#basic-principles-behind-ktt) * [Offline tuning of a single kernel](#offline-tuning-of-a-single-kernel) * [Initialization of KTT](#initialization-of-ktt) +* [Kernel definitions and kernels](#kernel-definitions-and-kernels) * [Kernel arguments](#kernel-arguments) * [Scalar arguments](#scalar-arguments) * [Vector arguments](#vector-arguments) @@ -31,6 +32,10 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Parameter constraints](#parameter-constraints) * [Parameter groups](#parameter-groups) * [Thread modifiers](#thread-modifiers) +* [Output validation](#output-validation) + * [Reference computation](#reference-computation) + * [Reference kernel](#reference-kernel) + * [Validation customization](#validation-customization) ---- ### Basic principles behind KTT @@ -327,9 +332,83 @@ tuner.AddParameter(kernel, "b2", std::vector{0, 1}, "group_b"); #### Thread modifiers -Todo +Some tuning parameters can affect global or local number of threads a kernel is launched with. For example we may have a parameter which affects +amount of work performed by each thread. The more work each thread does, the less (global) threads we need in total to perform computation. In KTT, +we can define such dependency via thread modifiers. The thread modifier is a function which takes a default thread size and changes it based on +values of specified tuning parameters. When adding a new modifier, we specify kernel and its definitions whose thread sizes are affected by the +modifier. Then we choose whether modifier affects global or local size, its dimension and names of tuning parameters tied to modifier. The modifier +function can be specified through enum which supports certain simple functions such as multiplication or addition, but allows only one tuning parameter +to be tied to modifier. Another option is using a custom user function which can be more complex and support multiple tuning parameters. It is +possible to create multiple thread modifiers for the same thread type (global / local) and dimension. In that case, the modifiers will be applied +in the order of their addition to tuner. Similar to constraints, it is possible to tie only integer parameters to thread modifiers. -### Kernel output validation +```cpp +tuner.AddParameter(kernel, "block_size", std::vector{32, 64, 128, 256}); + +// block_size parameter decides the number of local threads. +tuner.AddThreadModifier(kernel, {definition}, ktt::ModifierType::Local, ktt::ModifierDimension::X, "block_size", ktt::ModifierAction::Multiply); + +// Larger block size means that the grid size should be smaller, so the total number of threads remains the same. Therefore we divide the grid +// size by block_size parameter. +tuner.AddThreadModifier(kernel, {definition}, ktt::ModifierType::Global, ktt::ModifierDimension::X, {"block_size"}, + [](const uint64_t defaultSize, const std::vector& parameters) +{ + return defaultSize / parameters[0]; +}); +``` + +---- + +### Output validation + +When developing kernels with large number of tuning parameters, it is often necessary to check whether each configuration computes the correct output. +KTT provides a way to automatically compare output from tuned kernel configurations to reference output. That means each time a kernel configuration is +finished, the contents of its output buffer are transferred into host memory and then compared to precomputed reference output. The reference can be +computed in two ways. + +#### Reference computation + +Reference computation is a function which computes the reference output in host code and stores the result in the buffer provided by KTT. The size of +that buffer matches the size of validated kernel output buffer. When defining reference computation, we only need to provide the function and the id +of validated output argument. + +```cpp +tuner.SetReferenceComputation(resultArgument, [&a, &b](void* buffer) +{ + float* result = static_cast(buffer); + + for (size_t i = 0; i < a.size(); ++i) + { + result[i] = a[i] + b[i]; + } +}); +``` + +#### Reference kernel + +Another option is to compute reference result with a kernel. In this case, we need to provide the id of reference kernel and the id of validated output +argument. It is possible for reference kernel to have tuning parameters as well, so there is an option to choose specific reference configuration. If +reference kernel has no parameters, empty configuration can be provided. The reference kernel id may be the same as tuned kernel. + +```cpp +tuner.SetReferenceKernel(outputId, referenceKernel, ktt::KernelConfiguration()); +``` + +#### Validation customization + +There are certain ways to further customize how validation is performed. By default, the entire output buffer is validated. If validating only +a portion of the buffer is sufficient, setting a custom validation range is possible. In this case, the size of reference buffer provided by KTT +for reference computation validation will be automatically adjusted as well. + +Validation works out-of-the-box for integer and floating-point argument data types. In case of floating-point arguments, it is possible to choose +validation method (e.g., comparing each element separately or summing up all elements and comparing the result) and tolerance threshold since +different kernel configurations may have different accuracy of computing floating-point output. + +If arguments with user-defined types are used, it is necessary to define value comparator. Comparator is a function which receives two elements +with the specified type on input and decides whether they are equal. Value comparator can optionally be also used for integer and floating-point +data types to override the default comparison functionality. + +---- ### Kernel launchers From e4a97d92bb4980d5822988b7e25c0d2f24e96d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Thu, 25 Nov 2021 11:43:52 +0100 Subject: [PATCH 32/63] * Added section about kernel launchers to guide --- OnboardingGuide.md | 48 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 6ca1ec08..2b61b9a1 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -36,6 +36,8 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Reference computation](#reference-computation) * [Reference kernel](#reference-kernel) * [Validation customization](#validation-customization) +* [Kernel launchers](#kernel-launchers) +* [Kernel running and tuning modes](#kernel-running-and-tuning-modes) ---- ### Basic principles behind KTT @@ -335,12 +337,14 @@ tuner.AddParameter(kernel, "b2", std::vector{0, 1}, "group_b"); Some tuning parameters can affect global or local number of threads a kernel is launched with. For example we may have a parameter which affects amount of work performed by each thread. The more work each thread does, the less (global) threads we need in total to perform computation. In KTT, we can define such dependency via thread modifiers. The thread modifier is a function which takes a default thread size and changes it based on -values of specified tuning parameters. When adding a new modifier, we specify kernel and its definitions whose thread sizes are affected by the -modifier. Then we choose whether modifier affects global or local size, its dimension and names of tuning parameters tied to modifier. The modifier -function can be specified through enum which supports certain simple functions such as multiplication or addition, but allows only one tuning parameter -to be tied to modifier. Another option is using a custom user function which can be more complex and support multiple tuning parameters. It is -possible to create multiple thread modifiers for the same thread type (global / local) and dimension. In that case, the modifiers will be applied -in the order of their addition to tuner. Similar to constraints, it is possible to tie only integer parameters to thread modifiers. +values of specified tuning parameters. + +When adding a new modifier, we specify kernel and its definitions whose thread sizes are affected by the modifier. Then we choose whether modifier +affects global or local size, its dimension and names of tuning parameters tied to modifier. The modifier function can be specified through enum +which supports certain simple functions such as multiplication or addition, but allows only one tuning parameter to be tied to modifier. Another +option is using a custom user function which can be more complex and support multiple tuning parameters. It is possible to create multiple thread +modifiers for the same thread type (global / local) and dimension. In that case, the modifiers will be applied in the order of their addition to +tuner. Similar to constraints, it is possible to tie only integer parameters to thread modifiers. ```cpp tuner.AddParameter(kernel, "block_size", std::vector{32, 64, 128, 256}); @@ -412,9 +416,33 @@ data types to override the default comparison functionality. ### Kernel launchers -#### Motivation example +Kernel launchers enable users to customize how kernels are run inside KTT. Launcher is a function which defines what happens when kernel under +certain configuration is launched via tuner. For simple kernels, default launcher is provided by KTT. This launcher simply runs the kernel function +tied to kernel definition and waits until its finished. If a computation requires launching a kernel function multiple times, running some part in +host code or using multiple kernel functions, then user needs to define their own launcher. In case of composite kernels, defining custom launcher is +mandatory, since KTT does not know the order in which the individual kernel functions should be run. + +Kernel launcher has access to low-level KTT compute interface on input. Through this interface, it is possible to launch kernel functions, modify +buffers and retrieve the current kernel configuration. This makes it possible for tuning parameters to affect computation behaviour in host code in +addition to modifying kernel behavior. The modifications done to kernel arguments and buffers inside a launcher are isolated to the specific kernel +configuration launch. Therefore, it is not necessary to reset arguments to their original values for each kernel launch, it is done automatically by +the tuner. The only exception to this is usage of user-managed vector arguments, those have to be reset manually. + +```cpp +// This launcher is equivalent in functionality to the default simple kernel launcher provided by KTT. +tuner.SetLauncher(kernel, [definition](ktt::ComputeInterface& interface) +{ + interface.RunKernel(definition); +}); +``` + +---- + +### Kernel running and tuning modes -#### Launcher implementation +Todo + +---- ### Stop conditions @@ -428,6 +456,8 @@ data types to override the default comparison functionality. #### Example +### Utility functions + ### Advanced topics #### Asynchronous execution @@ -437,3 +467,5 @@ data types to override the default comparison functionality. #### Interoperability #### Python API + +### Feature parity across compute APIs From 5028497a6ef459636acf4a92093d8a6f38109afc Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Fri, 26 Nov 2021 12:14:04 +0100 Subject: [PATCH 33/63] * Initial work on kernel running section in guide --- OnboardingGuide.md | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 2b61b9a1..a19d328c 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -38,6 +38,8 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Validation customization](#validation-customization) * [Kernel launchers](#kernel-launchers) * [Kernel running and tuning modes](#kernel-running-and-tuning-modes) + * [Offline tuning](#offline-tuning) + * [Online tuning](#online-tuning) ---- ### Basic principles behind KTT @@ -322,7 +324,7 @@ kernel definition inside the kernel. For example, if we have composite kernel wi (we have 6 parameters in total), and we know that each parameter only affects one specific definition, we can evaluate the two parameter groups independently. This can greatly reduce the total number of evaluated configurations (e.g., if each of the parameters has 2 different values, total number of configurations is 64 -- 2^6; with usage of parameter groups, it is only 16 -- 2^3 + 2^3). It is also possible to combine usage of -constraints and groups, however constraint can only be added between parameters which belong into the same group. +constraints and groups, however constraints can only be added between parameters which belong into the same group. ```cpp // We add 4 different parameters split into 2 independent groups, reducing size of tuning space from 16 to 8 @@ -440,32 +442,46 @@ tuner.SetLauncher(kernel, [definition](ktt::ComputeInterface& interface) ### Kernel running and tuning modes -Todo +KTT supports kernel tuning as well as ordinary kernel running. Running kernels via tuner is often more convenient compared to directly using certain +compute API, since a lot of boilerplate code such as compute queue management and kernel source compilation is abstracted. It is possible to specify +configuration under which the kernel is run, so the workflow where kernel is first tuned and then launched repeatedly with the best configuration is +supported. It is possible to transfer kernel output into host memory by utilizing `BufferOutputDescriptor` structure. When creating this structure, +we need to specify id of buffer that should be transferred and pointer to memory where the buffer contents should be saved. It is possible to pass +multiple such structures into kernel running method -- each structure corresponds to a single buffer that should be transferred. After kernel run is +finished, `KernelResult` structure is returned. This structure contains detailed information about kernel run such as execution times of individual +kernel functions, status of computation (i.e., if it finished successfully) and more. ----- +```cpp +std::vector output(numberOfElements, 0.0f); -### Stop conditions +// Add kernel and buffers to tuner +... -### Searchers +const auto result = tuner.Run(kernel, {}, {ktt::BufferOutputDescriptor(outputId, output.data())}); +``` -### Dynamic autotuning +#### Offline tuning -#### Differences over offline tuning +Todo -#### Handling kernel arguments +#### Online tuning -#### Example +Todo -### Utility functions +---- + +### Stop conditions -### Advanced topics +### Searchers + +### Utility functions -#### Asynchronous execution +### Asynchronous execution -#### Profiling +### Profiling -#### Interoperability +### Interoperability -#### Python API +### Python API ### Feature parity across compute APIs From 43332fd01bd558e27be7f0781643da8a5e54a334 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Mon, 29 Nov 2021 16:11:43 +0100 Subject: [PATCH 34/63] * Completed kernel tuning sections in guide --- OnboardingGuide.md | 50 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index a19d328c..5a18c20a 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -40,6 +40,9 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Kernel running and tuning modes](#kernel-running-and-tuning-modes) * [Offline tuning](#offline-tuning) * [Online tuning](#online-tuning) + * [Accuracy of tuning results](#accuracy-of-tuning-results) +* [Stop conditions](#stop-conditions) +* [Searchers](#searchers) ---- ### Basic principles behind KTT @@ -323,7 +326,7 @@ The second option are tuning parameter groups. This option is mainly useful for kernel definition inside the kernel. For example, if we have composite kernel with 2 kernel definitions and each definition is affected by 3 parameters (we have 6 parameters in total), and we know that each parameter only affects one specific definition, we can evaluate the two parameter groups independently. This can greatly reduce the total number of evaluated configurations (e.g., if each of the parameters has 2 different values, total -number of configurations is 64 -- 2^6; with usage of parameter groups, it is only 16 -- 2^3 + 2^3). It is also possible to combine usage of +number of configurations is 64 - 2^6; with usage of parameter groups, it is only 16 - 2^3 + 2^3). It is also possible to combine usage of constraints and groups, however constraints can only be added between parameters which belong into the same group. ```cpp @@ -442,12 +445,12 @@ tuner.SetLauncher(kernel, [definition](ktt::ComputeInterface& interface) ### Kernel running and tuning modes -KTT supports kernel tuning as well as ordinary kernel running. Running kernels via tuner is often more convenient compared to directly using certain +KTT supports kernel tuning as well as ordinary kernel running. Running kernels via tuner is often more convenient compared to directly using specific compute API, since a lot of boilerplate code such as compute queue management and kernel source compilation is abstracted. It is possible to specify configuration under which the kernel is run, so the workflow where kernel is first tuned and then launched repeatedly with the best configuration is supported. It is possible to transfer kernel output into host memory by utilizing `BufferOutputDescriptor` structure. When creating this structure, we need to specify id of buffer that should be transferred and pointer to memory where the buffer contents should be saved. It is possible to pass -multiple such structures into kernel running method -- each structure corresponds to a single buffer that should be transferred. After kernel run is +multiple such structures into kernel running method - each structure corresponds to a single buffer that should be transferred. After kernel run is finished, `KernelResult` structure is returned. This structure contains detailed information about kernel run such as execution times of individual kernel functions, status of computation (i.e., if it finished successfully) and more. @@ -462,18 +465,55 @@ const auto result = tuner.Run(kernel, {}, {ktt::BufferOutputDescriptor(outputId, #### Offline tuning -Todo +During offline tuning, tuner runs kernel configurations one after another without user interference. This mode therefore separates finding the best +configuration and subsequent usage of tuned kernel in applications. This enables tuner to implement certain optimizations which would otherwise not be +possible - for example caching of read-only buffers over multiple kernel runs in different configurations. By default, the entire configuration space is +explored during offline tuning. This can be altered by leveraging stop conditions, which are described in detail in the next section. + +Kernel output cannot be retrieved during offline tuning, because all of the configurations are launched within a single API call. The list of `KernelResult` +structures corresponding to all tested configurations is returned after the tuning ends. These results can be saved either in XML or JSON format for +further analysis. + +```cpp +const std::vector results = tuner.Tune(kernel); +tuner.SaveResults(results, "TuningOutput", ktt::OutputFormat::JSON); +``` #### Online tuning -Todo +Online tuning combines kernel tuning with regular running. Similar to kernel running, we can retrieve and use output from each kernel run. However, we +do not specify the configuration under which kernel is run, but tuner launches a different configuration each time a kernel is launched, similar to +offline tuning. This mode does not separate tuning and usage of tuned kernel, but rather enables both to happen simultaneously. This can be beneficial +in situations where employment of offline tuning is impractical (e.g., when the size of kernel input is frequently changed which causes the optimal +configuration to change as well). If kernel is launched with online tuning after all configurations were already explored, the best configuration is used. + +```cpp +std::vector output(numberOfElements, 0.0f); + +// Add kernel and buffers to tuner +... + +const auto result = tuner.TuneIteration(kernel, {ktt::BufferOutputDescriptor(outputId, output.data())}); +``` + +#### Accuracy of tuning results + +In order to identify the best configuration accurately, it is necessary to launch all configurations under the same conditions so that metrics such as +kernel function execution times can be objectively compared. This means, that tuned kernels should be launched on the target device in isolation. +Launching multiple kernels concurrently while tuning is performed may cause inaccuracies in collected data. Furthemore, if size of kernel input is changed +(e.g., during online tuning), tuning should be restarted from the beginning, since the size of input often affects the best configuration. The restart can +be achieved with `ClearData` API method. ---- ### Stop conditions +Todo + ### Searchers +Todo + ### Utility functions ### Asynchronous execution From 05020d56ae35312feb938bfd298081bd8ca237e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 30 Nov 2021 11:48:30 +0100 Subject: [PATCH 35/63] * Added searchers and stop conditions to guide --- OnboardingGuide.md | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 5a18c20a..63c2a4e3 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -20,7 +20,7 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling ### Table of contents * [Basic principles behind KTT](#basic-principles-behind-ktt) -* [Offline tuning of a single kernel](#offline-tuning-of-a-single-kernel) +* [Simple tuning example](#simple-tuning-example) * [Initialization of KTT](#initialization-of-ktt) * [Kernel definitions and kernels](#kernel-definitions-and-kernels) * [Kernel arguments](#kernel-arguments) @@ -62,7 +62,7 @@ parameters. KTT framework offers functionality to deal with this problem which w ---- -### Offline tuning of a single kernel +### Simple tuning example Offline kernel tuning is the simplest use case of KTT framework. It involves creating a kernel, specifying its arguments (data), defining tuning parameters and then launching autotuning. During autotuning, tuning parameter values are propagated to kernel source @@ -508,11 +508,30 @@ be achieved with `ClearData` API method. ### Stop conditions -Todo +Stop conditions can be used to stop offline tuning when certain criteria is met. The stop condition is initialized before offline tuning begins and updated +after each tested configuration. Within the update, it has access to `KernelResult` structure whose data it can utilize to check its criteria. KTT currently +offers the following stop conditions: +* ConfigurationCount - tuning stops after reaching the specified number of tested configurations. +* ConfigurationDuration - tuning stops after a configuration with execution time below the specified threshold was found. +* ConfigurationFraction - tuning stops after exploring the specified fraction of configuration space. +* TuningDuration - tuning stops after the specified duration has passed. + +The stop condition API is public, which means that users can also create their own stop conditions. All of the built-in conditions are implemented in public +API, so it possible to modify them as well. ### Searchers -Todo +Searchers decide the order in which kernel configurations are selected during offline and online tuning. Having an efficient searcher can significantly reduce the +time it takes to find well-performing configurations. Similar to a stop condition, a searcher is initialized before tuning begins and is updated after each tested +configuration with access to `KernelResult` structure from the previous run. Searchers are assigned to kernels individually, so each kernel can have a different +seacher. The following searchers are available in KTT API: +* DeterministicSearcher - always explores configurations in the same order (provided that tuning parameters, order of their addition or their values were not changed). +* RandomSearcher - explores configurations in random order. +* McmcSearcher - utilizes Markov chain Monte Carlo method to predict well-performing configurations more accurately than random searcher. + +The searcher API is public, so users can implement their own searchers. The API also includes certain common utility methods to make the custom searcher implementation +easier. These include method to get random unexplored configuration or neighbouring configurations (configurations which differ in small amount of parameter values +compared to the specified configuration). ### Utility functions From c5729947797f2b5cdb6ee11acc13afeed4a20cac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 30 Nov 2021 12:15:57 +0100 Subject: [PATCH 36/63] * Added utility functions section to guide --- OnboardingGuide.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 63c2a4e3..35c10e57 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -43,6 +43,8 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Accuracy of tuning results](#accuracy-of-tuning-results) * [Stop conditions](#stop-conditions) * [Searchers](#searchers) +* [Utility functions](#utility-functions) + ---- ### Basic principles behind KTT @@ -519,6 +521,8 @@ offers the following stop conditions: The stop condition API is public, which means that users can also create their own stop conditions. All of the built-in conditions are implemented in public API, so it possible to modify them as well. +---- + ### Searchers Searchers decide the order in which kernel configurations are selected during offline and online tuning. Having an efficient searcher can significantly reduce the @@ -533,8 +537,24 @@ The searcher API is public, so users can implement their own searchers. The API easier. These include method to get random unexplored configuration or neighbouring configurations (configurations which differ in small amount of parameter values compared to the specified configuration). +---- + ### Utility functions +KTT provides many utility functions to further customize tuner behavior. The following list contains descriptions of certain functions which users may find handy +to use: +* `SetCompilerOptions` - sets options for kernel source code compiler used by compute API (e.g., NVRTC for CUDA). +* `SetGlobalSizeType` - compute APIs use different ways for specifying global thread size (e.g., grid size or ND-range size). This method makes it possible to override +the global thread size format to the one used by the specified API. Usage of this method makes it easier to port programs between different compute APIs. +* `SetAutomaticGlobalSizeCorrection` - tuner automatically ensures that global thread size is divisible by local thread size. This is required by certain compute +APIs such as OpenCL. +* `SetKernelCacheCapacity` - changes size of cache for compiled kernels. KTT utilizes the cache to improve performance when the same kernel function with the same +configuration is launched multiple times (e.g., inside kernel launcher). +* `SetLoggingLevel` - controls the amount of logging information printed to output. Higher levels print more detailed information which is useful for debugging. +* `SetTimeUnit` - specifies time unit used for printing execution times. Affects both console output as well as kernel results serialized into XML or JSON. + +---- + ### Asynchronous execution ### Profiling From 59385012c4e9dfb6dcaae1510a9aac2f7b8177a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 1 Dec 2021 10:50:33 +0100 Subject: [PATCH 37/63] * Added profiling section and interoperability introduction to guide --- OnboardingGuide.md | 47 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 35c10e57..fb8b1772 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -44,6 +44,11 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Stop conditions](#stop-conditions) * [Searchers](#searchers) * [Utility functions](#utility-functions) +* [Collecting profiling metrics](#collecting-profiling-metrics) + * [Interaction with online tuning and kernel running](#interaction-with-online-tuning-and-kernel-running) +* [Interoperability] + * [Custom compute library initialization](#custom-compute-library-initialization) + * [Asynchronous execution](#asynchronous-execution) ---- @@ -555,12 +560,50 @@ configuration is launched multiple times (e.g., inside kernel launcher). ---- -### Asynchronous execution +### Collecting profiling metrics -### Profiling +Apart from execution times, KTT can also collect other types of information from kernel runs. This includes low-level profiling metrics from kernel function +executions such as global memory utilization, number of executed instructions and more. These metrics can be utilized e.g., by searchers to find well-performing +configurations faster. The collection of profiling metrics is disabled by default as it changes the default tuning behaviour. In order to collect all profiling +metrics, it is usually necessary to run the same kernel function multiple times (the number increases when more metrics are collected). It furthemore requires +kernels to be run synchronously. Enabling profiling metrics collection thus decreases tuning performance. It is possible to mitigate performance impact by enabling +only certain metrics, which can be done through KTT API. + +Collection of profiling metrics is currently supported for Nvidia devices on CUDA backend and AMD devices on OpenCL backend. Intel devices are currently unsupported +due to lack of profiling library support. Profiling metrics can also be collected for composite kernels. Note however, that for AMD devices and newer Nvidia devices +(Turing and onwards), collection of metrics is restricted to a single kernel definition within a composite kernel due to profiling library limitations. + +#### Interaction with online tuning and kernel running + +When utilizing kernel running and online tuning, it is possible to further decrease performance impact of having to execute the same kernel function multiple times. +Rather than performing all of the profiling runs at once, it is possible to split the profiling metric collection over multiple online tuning or kernel running API +function invocations and utilize output from each run. The intermediate `KernelResult` structures from such runs will not contain valid profiling counters, but still +have the remaining data accurate. Once the profiling for the current configuration is concluded, the final kernel result will contain valid profiling data. + +---- ### Interoperability +The KTT framework could originally be used only in isolation to create standalone programs which are focused on tuning a specific kernel. In recent versions, the API +was extended to also support tuner integration into larger software suites. There are multiple major features which contribute to this support. They are described +in this section. + +#### Custom compute library initialization + +Todo + +#### Asynchronous execution + +Todo + +---- + ### Python API +Todo + +---- + ### Feature parity across compute APIs + +Todo From 67b1825d67b359c85773307eaf346d61e25459fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 1 Dec 2021 14:43:27 +0100 Subject: [PATCH 38/63] * Finished interoperability section in guide --- OnboardingGuide.md | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index fb8b1772..eac9e91a 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -46,9 +46,13 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Utility functions](#utility-functions) * [Collecting profiling metrics](#collecting-profiling-metrics) * [Interaction with online tuning and kernel running](#interaction-with-online-tuning-and-kernel-running) -* [Interoperability] +* [Interoperability](#interoperability) * [Custom compute library initialization](#custom-compute-library-initialization) * [Asynchronous execution](#asynchronous-execution) + * [Lifetime of internal tuner structures](#lifetime-of-internal-tuner-structures) +* [Python API](#python-api) + * [Python limitations](#python-limitations) +* [Feature parity across compute APIs](#feature-parity-across-compute-apis) ---- @@ -590,16 +594,42 @@ in this section. #### Custom compute library initialization -Todo +By default, when tuner is created, it initializes its own internal compute API structures such as context, compute queues and buffers. It is however possible to +also use tuner with custom structures as well. This makes it possible to integrate tuner into libraries which need to perform their own compute API initialization. +During tuner initialization, we can pass `ComputeApiInitializer` structure to it, which contains our own context and compute queues. When adding a vector argument, +it is possible to pass our own compute buffer which will then be utilized by tuner. All of these structures will still remain under our own management, tuner will +simply reference them and use them when needed. Before releasing these structures, the tuner should be destroyed first, so it can perform proper cleanup (note that +tuner will not destroy the referenced structures on its own). #### Asynchronous execution -Todo +When performing tuning, all kernel function runs and buffer data transfers are synchronized. This is necessary to obtain accurate tuning data. Applications which +combine kernel tuning and kernel running have an option to enable asynchronous kernel launches and buffer transfers after tuning is completed. This can be achieved +by utilizing kernel launchers and compute interface. The compute interface API contains methods for asynchronous operations. They enable user to choose a compute +queue for launching an operation and return event id which can be later used to wait for the operation to complete. Note however, that kernel results returned from +asynchronous launches will contain inaccurate execution times, since the results may be returned before the asynchronous operation has finished. This feature should +only be utilized for kernel running, not tuning. + +#### Lifetime of internal tuner structures + +Internal KTT structures such as kernels, kernel definitions, arguments and configuration data have their lifetimes tied to tuner. Certain applications which utilize +tuner may prefer to remove certain structures on-the-fly to save memory. Currently, it is possible to remove kernels, kernel definitions, arguments and user-provided +compute queues from the tuner by specifying their ids. When removing a kernel, all of its associated data such as generated configurations, parameters and validation +data are removed as well. Note that it is not possible to remove structures which are referenced by other structures. E.g., when removing a kernel definition, user +has to make sure that all kernels which utilize that definition are removed first. ---- ### Python API +The native KTT API is available in C++. Users who prefer Python have an option to build KTT as Python module which can be then imported into Python. Majority of the +KTT API methods can be afterwards called directly from Python while still benefitting from perfomance of KTT module built in C++. It is also possible to provide +custom searcher and stop condition implementations directly in Python. Users can therefore take advantage of certain libraries available in Python but not in C++ for +more complex searcher implementations. Majority of functions, enums and classes have the same names and arguments as in C++. A small number of differences are +described in the followup subsection. + +#### Python limitations + Todo ---- From 0260ffdfd69e8b5e20904a14a2dd6950077b180b Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Fri, 3 Dec 2021 10:46:07 +0100 Subject: [PATCH 39/63] * Finished the final sections in guide --- OnboardingGuide.md | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index eac9e91a..e4bb7afb 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -625,15 +625,27 @@ has to make sure that all kernels which utilize that definition are removed firs The native KTT API is available in C++. Users who prefer Python have an option to build KTT as Python module which can be then imported into Python. Majority of the KTT API methods can be afterwards called directly from Python while still benefitting from perfomance of KTT module built in C++. It is also possible to provide custom searcher and stop condition implementations directly in Python. Users can therefore take advantage of certain libraries available in Python but not in C++ for -more complex searcher implementations. Majority of functions, enums and classes have the same names and arguments as in C++. A small number of differences are -described in the followup subsection. +more complex searcher implementations. Majority of functions, enums and classes have the same names and arguments as in C++. A small number of limitations is +described in the follow-up subsection. #### Python limitations -Todo +Major part of KTT API is available in Python. There are however certain features which are restricuted to C++ API due to limitations in Python language and libraries. +They are the following: +* Templated methods - Python does not support templates, so there are separate versions of method for different data types instead (e.g., `AddArgumentVectorFloat`, +`AddArgumentVectorInt`). Addition of kernel arguments with custom types is not supported. +* Custom library initialization - Custom context, compute queues and buffers cannot be used in Python. +* Methods which use void pointers (void*) in C++ API - Python does not have a direct equivalent to void* type. It is necessary to utilize low-level `ctypes` Python +module to be able to interact with these methods. ---- ### Feature parity across compute APIs -Todo +KTT framework aims to maintain feature parity across all of its supported compute APIs (OpenCL, CUDA and Vulkan). That means if a certain feature is supported in +KTT CUDA backend, it should also be available in OpenCL and Vulkan backends, provided that the feature is natively supported in those APIs. There are certain exceptions +to that: +* Vulkan backend - certain features are currently unsupported in Vulkan due to development time constraints. These include support for profiling metrics, unified +and zero-copy buffers and certain advanced buffer handling methods. The support for these features may still be added at later time. +* Unified memory in OpenCL - usage of unified OpenCL buffers requires support for OpenCL 2.0. Certain devices (e.g., Nvidia GPUs) still have this support unfinished. +* Templated kernel functions - templates are currently limited to CUDA kernels due to lack of support in other APIs From 97170c76cd905b7fb09a2f87bf14e08c40aee2f3 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Fri, 3 Dec 2021 12:08:50 +0100 Subject: [PATCH 40/63] * Expanded certain sections and fixed inconsistencies and errors in guide --- OnboardingGuide.md | 278 +++++++++++++++++++++++++-------------------- 1 file changed, 152 insertions(+), 126 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index e4bb7afb..e1b105fc 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -1,6 +1,6 @@ # Introduction to KTT -When optimizing performance of compute kernels, a programmer has to make a lot of decisions, such as which algorithm to +When optimizing performance of compute kernels, a programmer has to make a lot of decisions such as which algorithm to use, how to arrange data structures in memory, how to block data access to optimize caching or which factor to use for loop unrolling. Such decisions cannot be typically made in isolation - for example, when data layout in memory is changed, a different algorithm may perform better. Therefore, it is necessary to explore vast amount of combinations of optimization @@ -12,16 +12,16 @@ Naturally, in the simple use case, a batch script can be sufficient for autotuni usage of an autotuning framework can be beneficial, as it can automatically handle memory objects, detect errors in autotuned kernels or perform autotuning during program runtime. -Kernel Tuning Toolkit is a framework which allows autotuning of kernels written in CUDA, OpenCL or Vulkan. It provides unified -interface for those APIs, handles communication between host (CPU) and accelerator (GPU, Xeon Phi, etc.), checks results and -timing of tuned kernels, allows dynamic tuning during program runtime, profiling of autotuned kernels and more. +Kernel Tuning Toolkit is a framework which allows autotuning of compute kernels written in CUDA, OpenCL or Vulkan. It provides +unified interface for those APIs, handles communication between host (CPU) and accelerator (GPU, Xeon Phi, etc.), checks results +and timing of tuned kernels, allows dynamic (online) tuning during program runtime, profiling of autotuned kernels and more. ---- ### Table of contents * [Basic principles behind KTT](#basic-principles-behind-ktt) -* [Simple tuning example](#simple-tuning-example) -* [Initialization of KTT](#initialization-of-ktt) +* [Simple autotuning example](#simple-autotuning-example) +* [KTT initialization](#ktt-initialization) * [Kernel definitions and kernels](#kernel-definitions-and-kernels) * [Kernel arguments](#kernel-arguments) * [Scalar arguments](#scalar-arguments) @@ -44,7 +44,7 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling * [Stop conditions](#stop-conditions) * [Searchers](#searchers) * [Utility functions](#utility-functions) -* [Collecting profiling metrics](#collecting-profiling-metrics) +* [Profiling metrics collection](#profiling-metrics-collection) * [Interaction with online tuning and kernel running](#interaction-with-online-tuning-and-kernel-running) * [Interoperability](#interoperability) * [Custom compute library initialization](#custom-compute-library-initialization) @@ -60,7 +60,7 @@ timing of tuned kernels, allows dynamic tuning during program runtime, profiling When leveraging autotuning, a programmer needs to think about which properties of their computation can be autotuned. For example, an algorithm may contain for loop which can be unrolled. There are multiple options for unroll factor value -of this loop, e.g., 1 (no unroll), 2, 4, 8. Picking the optimal value for a given device manually is difficult, therefore +of this loop, e.g., 1 (no unroll), 2, 4, 8. Picking the optimal value for a certain device manually is difficult, therefore we can define a tuning parameter for the unroll factor with the specified values. Afterwards, we can launch four different versions of our computation to see which value performs best. @@ -69,11 +69,11 @@ to definition of multiple tuning parameters. For example we may have the previou values {1, 2, 4, 8} and another parameter controlling data arrangement in memory with values {0, 1}. Combinations of these parameters now define 8 different versions of computation. One such combination is called tuning configuration. Together, all tuning configurations define configuration space. The size of the space grows exponentially with addition of more tuning -parameters. KTT framework offers functionality to deal with this problem which will be discussed in the follow-up sections. +parameters. KTT framework offers functionality to mitigate this problem which will be discussed in the follow-up sections. ---- -### Simple tuning example +### Simple autotuning example Offline kernel tuning is the simplest use case of KTT framework. It involves creating a kernel, specifying its arguments (data), defining tuning parameters and then launching autotuning. During autotuning, tuning parameter values are propagated to kernel source @@ -82,10 +82,10 @@ is launched, the following code is added at the beginning of kernel source code: to alter kernel functionality based on tuning parameter values. In the code snippet below, we create a kernel definition by specifying the name of kernel function and path to its source file. We also define -its default global and local dimensions (e.g., size of ND-range and work-group in OpenCL, size of grid and block in CUDA). We use provided -kernel definition handle to create kernel. We can also specify custom name for the kernel which is used e.g., for logging purposes. Afterwards, -we can use the kernel handle to define tuning parameter and launch autotuning. The step of creating kernel definition and kernel separately may -seem redundant at first, but it plays important role during more complex use cases that will be covered later. +its default global and local dimensions (e.g., size of ND-range and work-group in OpenCL, size of grid and block in CUDA). We use the provided +kernel definition id to create kernel. We can also specify custom name for the kernel which is used e.g., for logging purposes. Afterwards, +we can use the kernel id to define a tuning parameter and launch autotuning. The step of creating kernel definition and kernel separately may +seem redundant at first but it plays an important role during more complex use cases that will be covered later. ```cpp const size_t numberOfElements = 1024 * 1024; @@ -122,9 +122,9 @@ __kernel void computeStuff(__global float* input, int itemsPerThread, __global f ---- -### Initialization of KTT +### KTT initialization -The first step before we can utilize KTT is creation of tuner instance. Tuner is one of the major KTT classes and implements large portion of +The first step before we can utilize KTT is creation of a tuner instance. Tuner is one of the major KTT classes and implements large portion of autotuning logic. Practically all of the KTT structures such as kernels, kernel arguments and tuning parameters are tied to a specific tuner instance. The simplest tuner constructor requires 3 parameters - index for platform, index for device and type of compute API that will be utilized (e.g., CUDA, OpenCL). The indices for platforms and devices are assigned by KTT - they can be retrieved through `PlatformInfo` and `DeviceInfo` structures. These @@ -157,17 +157,17 @@ for (const auto& platform : platforms) ### Kernel definitions and kernels -Before kernel can be launched via KTT, its source must be loaded into tuner. This is achieved by creating kernel definition. During its creation, +Before kernel can be launched via KTT, its source must be loaded into tuner. This is achieved by creating a kernel definition. During its creation, we specify kernel function name and kernel source. The source can be added either from string or from file. Next, we specify default global (NDrange / grid) and local (work-group / block) sizes. The sizes are specified with KTT structure `DimensionVector` which supports up to three -dimensions. When a kernel is launched during tuning, the thread sizes chosen during definition creation will be used. There are ways to launch kernels -with different than default sizes which will be covered later. For CUDA API, addition of templated kernels is supported as well. When creating -definition, it is possible to specify types that should be used to instantiate kernel function from template. When we need to instantiate the same +dimensions. When a kernel is launched during tuning, the thread sizes chosen during kernel definition creation will be used. There are ways to launch +kernels with different than default sizes which will be covered later. For CUDA API, addition of templated kernels is supported as well. When creating +a definition, it is possible to specify types that should be used to instantiate kernel function from template. When we need to instantiate the same kernel template with different types, we do that by adding multiple kernel definitions with corresponding types which are then handled independently. Once we have kernel definitions, we can create kernels from them. It is possible to create a simple kernel which only uses one definition as well as -composite kernel which uses multiple definitions. Usage of composite kernels is useful for computations which require launching of multiple kernel -functions in order to compute the result. In this case it is also necessary to define kernel launcher which is a function that tells tuner in which +a composite kernel which uses multiple definitions. Usage of composite kernels is useful for computations which require launching of multiple kernel +functions in order to compute the result. In this case it is also necessary to define kernel launcher which is a function that tells the tuner in which order and how many times each kernel function is launched. Kernel launchers are covered in detail in their own section. Note that KTT terminology regarding kernel definitions and kernels differs slightly from regular compute APIs. KTT kernel definition roughly @@ -196,7 +196,7 @@ arguments. Before argument can be assigned to kernel, its description must be gi initial data placed inside buffer before kernel is launched, its access type (read or write) and memory location from which kernel accesses the buffer (host or device). Once the information is provided, tuner returns a handle to the argument. Through this handle, arguments can be assigned to kernel definitions as shown in the code below. KTT supports a wide range of data types for kernel arguments, including all built-in integer and floating-point -types as well as custom types. Note however, that custom types must be trivially copyable, so it is possible to transfer the arguments into device memory. +types as well as custom types. Note however, that custom types must be trivially copyable, so it remains possible to transfer the arguments into device memory. ```cpp const size_t numberOfElements = 1024 * 1024; @@ -231,16 +231,16 @@ const ktt::ArgumentId rvalueId = tuner.AddArgumentScalar(34); #### Vector arguments -Vector arguments have more customization options available than scalars. Other than the initial data, it is possible to specify whether argument -is used for reading or writing. For read-only arguments, additional optimization is possible during offline tuning - since its contents do not -change, the buffer needs to be copied into memory only once before the first kernel configuration is launched and then remain the same for subsequent -configurations. Setting correct access type can therefore lead to better tuning performance. +Vector arguments have more customization options available than scalars. Other than the initial data, it is possible to specify whether an argument +is used for reading or writing. For read-only arguments, additional optimization is possible during offline tuning. Since their contents do not +change, the buffers need to be copied into memory only once before the first kernel configuration is launched and then remain the same for subsequent +configurations. Setting correct access types to arguments can therefore lead to better tuning performance. -Next, it is possible to decide memory location from which the buffer is accessed by kernel - the two main options are host memory and device memory. -Users may wish to choose different memory depending on the type of device used for autotuning (e.g., host memory for CPUs, device memory for -dedicated GPUs). For host memory, it is possible to use zero-copy option, which makes kernel access the argument data directly, instead of creating -separate buffer and thus reduce memory usage. For CUDA and OpenCL 2.0, there exists one additional memory location option - unified. Unified memory -buffers can be accessed both from host and kernel side, relying on device driver to take care of migrating the data automatically. +Next, it is possible to decide memory location from which the argument buffer is accessed by kernel - the two main options are host memory and device +memory. Users may wish to choose different location depending on the type of device used for autotuning (e.g., host memory for CPUs, device memory for +dedicated GPUs). For host memory, it is additionally possible to use zero-copy optimization. This optimization causes kernels to access the argument data +directly, instead of creating a separate buffer and thus reduces memory usage. For CUDA and OpenCL 2.0, one additional memory location option exists - unified. +Unified memory buffers can be accessed from both host and kernel side, relying on device driver to take care of migrating the data automatically. Management type option specifies whether buffer management is handled automatically by the tuner (e.g., write arguments are automatically reset to initial state before new kernel configuration is launched, buffers are created and deleted automatically) or by the user. In some advanced cases, @@ -248,7 +248,7 @@ users may wish to manage the buffers manually. Note however, that this requires The final option for vector arguments is whether the initial data provided by user should be copied inside the tuner or referenced directly. By default, the data is copied which is safer (i.e., temporary arguments work correctly) but less memory efficient. In case the initial data is provided in form of -lvalue argument, direct reference can be used to avoid copying. This requires user to keep the initial data buffer valid during the time argument is +lvalue argument, direct reference can be used to avoid copying. This requires user to keep the initial data buffer valid during time the argument is used by the tuner. ```cpp @@ -278,7 +278,7 @@ const ktt::ArgumentId resultId = tuner.AddArgumentVector(result, ktt::ArgumentAc #### Local memory arguments Local (shared in CUDA terminology) memory arguments are used to allocate corresponding amount of cache-like memory which is shared accross all work-items -(threads) inside a work-group (thread block). User has to specify the data type and total size of allocated memory in bytes. +(threads) inside a work-group (thread block). We just need to specify the data type and total size of allocated memory in bytes. ```cpp // Allocate local memory for 4 floats and 2 integers. @@ -300,7 +300,7 @@ const ktt::ArgumentId symbolId = tuner.AddArgumentSymbol(42, "magicNumber"); ### Tuning parameters -Tuning parameters in KTT can be either unsigned integers or floats. When defining new parameter, we need to specify its name (i.e., the name through +Tuning parameters in KTT can be either unsigned integers or floats. When defining a new parameter, we need to specify its name (i.e., the name through which it can be referenced in kernel source) and values. With addition of more tuning parameters, the size of tuning space grows exponentially as we need to explore all parameter combinations. KTT provides two features for users to slow down the tuning space growth. @@ -315,9 +315,9 @@ tuner.AddParameter(kernel, "float_value", std::vector{1.0}); #### Parameter constraints The first option are tuning constraints. Through constraints, it is possible to tell tuner to skip generating configurations for certain combinations -of parameters. The constraint is a function which receives values for the specified parameters on input and decides whether that combination should -be launched. User can choose which parameters are evaluated by specific constraint. Note that currently, it is possible to add constraints only -between integer parameters. +of parameters. Constraint is a function which receives values for the specified parameters on input and decides whether that combination is valid. +We can choose which parameters are evaluated by a specific constraint. Note that currently, it is possible to add constraints only between integer +parameters. ```cpp // We add 3 different parameters, the size of tuning space is 40 (5 * 2 * 4) @@ -334,10 +334,10 @@ tuner.AddConstraint(kernel, {"vector_type", "vectorized_soa"}, vectorizedSoA); #### Parameter groups The second option are tuning parameter groups. This option is mainly useful for composite kernels with certain tuning parameters only affecting one -kernel definition inside the kernel. For example, if we have composite kernel with 2 kernel definitions and each definition is affected by 3 parameters -(we have 6 parameters in total), and we know that each parameter only affects one specific definition, we can evaluate the two parameter groups -independently. This can greatly reduce the total number of evaluated configurations (e.g., if each of the parameters has 2 different values, total -number of configurations is 64 - 2^6; with usage of parameter groups, it is only 16 - 2^3 + 2^3). It is also possible to combine usage of +kernel definition inside the kernel. For example, if we have a composite kernel with two kernel definitions and each definition is affected by three +parameters (we have six parameters in total), and we know that each parameter only affects one specific definition, we can evaluate the two parameter +groups independently. This can greatly reduce the total number of evaluated configurations (e.g., if each of the parameters has two different values, +the total number of configurations is 64 - 2^6; with usage of parameter groups, it is only 16 - 2^3 + 2^3). It is also possible to combine usage of constraints and groups, however constraints can only be added between parameters which belong into the same group. ```cpp @@ -350,15 +350,15 @@ tuner.AddParameter(kernel, "b2", std::vector{0, 1}, "group_b"); #### Thread modifiers -Some tuning parameters can affect global or local number of threads a kernel is launched with. For example we may have a parameter which affects -amount of work performed by each thread. The more work each thread does, the less (global) threads we need in total to perform computation. In KTT, -we can define such dependency via thread modifiers. The thread modifier is a function which takes a default thread size and changes it based on +Some tuning parameters can affect global or local number of threads a kernel function is launched with. For example, we may have a parameter which +affects amount of work performed by each thread. The more work each thread does, the less (global) threads we need in total to perform computation. +In KTT, we can define such dependency via thread modifiers. The thread modifier is a function which takes a default thread size and changes it based on values of specified tuning parameters. When adding a new modifier, we specify kernel and its definitions whose thread sizes are affected by the modifier. Then we choose whether modifier affects global or local size, its dimension and names of tuning parameters tied to modifier. The modifier function can be specified through enum which supports certain simple functions such as multiplication or addition, but allows only one tuning parameter to be tied to modifier. Another -option is using a custom user function which can be more complex and support multiple tuning parameters. It is possible to create multiple thread +option is using a custom function which can be more complex and supports multiple tuning parameters. It is possible to create multiple thread modifiers for the same thread type (global / local) and dimension. In that case, the modifiers will be applied in the order of their addition to tuner. Similar to constraints, it is possible to tie only integer parameters to thread modifiers. @@ -381,7 +381,7 @@ tuner.AddThreadModifier(kernel, {definition}, ktt::ModifierType::Global, ktt::Mo ### Output validation -When developing kernels with large number of tuning parameters, it is often necessary to check whether each configuration computes the correct output. +When developing autotuned kernels with large number of parameters, it is often necessary to check whether each configuration computes the correct output. KTT provides a way to automatically compare output from tuned kernel configurations to reference output. That means each time a kernel configuration is finished, the contents of its output buffer are transferred into host memory and then compared to precomputed reference output. The reference can be computed in two ways. @@ -389,7 +389,7 @@ computed in two ways. #### Reference computation Reference computation is a function which computes the reference output in host code and stores the result in the buffer provided by KTT. The size of -that buffer matches the size of validated kernel output buffer. When defining reference computation, we only need to provide the function and the id +that buffer matches the size of validated kernel output buffer. When defining a reference computation, we only need to provide the function and the id of validated output argument. ```cpp @@ -407,8 +407,9 @@ tuner.SetReferenceComputation(resultArgument, [&a, &b](void* buffer) #### Reference kernel Another option is to compute reference result with a kernel. In this case, we need to provide the id of reference kernel and the id of validated output -argument. It is possible for reference kernel to have tuning parameters as well, so there is an option to choose specific reference configuration. If -reference kernel has no parameters, empty configuration can be provided. The reference kernel id may be the same as tuned kernel. +argument. It is possible for reference kernel to have tuning parameters as well, so there is an option to choose a specific reference configuration. If +a reference kernel has no parameters, empty configuration can be provided. The reference kernel may be the same as tuned kernel (e.g. using some default +configuration that is known to work). ```cpp tuner.SetReferenceKernel(outputId, referenceKernel, ktt::KernelConfiguration()); @@ -424,25 +425,48 @@ Validation works out-of-the-box for integer and floating-point argument data typ validation method (e.g., comparing each element separately or summing up all elements and comparing the result) and tolerance threshold since different kernel configurations may have different accuracy of computing floating-point output. -If arguments with user-defined types are used, it is necessary to define value comparator. Comparator is a function which receives two elements -with the specified type on input and decides whether they are equal. Value comparator can optionally be also used for integer and floating-point -data types to override the default comparison functionality. +If arguments with user-defined types are validated, it is necessary to define a value comparator. Comparator is a function which receives two +elements with the specified type on input and decides whether they are equal. A custom comparator can optionally be used for integer and floating-point +data types as well, in order to override the default comparison functionality. + +```cpp +struct KernelData +{ + float a; + float b; + float result; +}; + +tuner.SetValueComparator(dataId, [](const void* resultPointer, const void* referencePointer) +{ + const auto* result = static_cast(resultPointer); + const auto* reference = static_cast(referencePointer); + + if (result->result != reference->result) + { + std::cerr << "Result " << result->result << " does not equal reference " << reference->result << std::endl; + return false; + } + + return true; +}); +``` ---- ### Kernel launchers Kernel launchers enable users to customize how kernels are run inside KTT. Launcher is a function which defines what happens when kernel under -certain configuration is launched via tuner. For simple kernels, default launcher is provided by KTT. This launcher simply runs the kernel function -tied to kernel definition and waits until its finished. If a computation requires launching a kernel function multiple times, running some part in -host code or using multiple kernel functions, then user needs to define their own launcher. In case of composite kernels, defining custom launcher is -mandatory, since KTT does not know the order in which the individual kernel functions should be run. +certain configuration is launched via tuner. For simple kernels, a default launcher is provided by KTT. This launcher simply runs the kernel +function tied to kernel and waits until it has finished. If a computation requires launching a kernel function multiple times, running some +part in host code or using multiple kernel functions, then we need to define our own launcher. In case of composite kernels, defining a custom +launcher is mandatory, since KTT does not know the order in which the individual kernel functions should be run. -Kernel launcher has access to low-level KTT compute interface on input. Through this interface, it is possible to launch kernel functions, modify -buffers and retrieve the current kernel configuration. This makes it possible for tuning parameters to affect computation behaviour in host code in -addition to modifying kernel behavior. The modifications done to kernel arguments and buffers inside a launcher are isolated to the specific kernel -configuration launch. Therefore, it is not necessary to reset arguments to their original values for each kernel launch, it is done automatically by -the tuner. The only exception to this is usage of user-managed vector arguments, those have to be reset manually. +Kernel launcher has access to low-level KTT compute interface on input. Through this interface, it is possible to launch kernel functions, change +their thread sizes, modify buffers and retrieve the current kernel configuration. This makes it possible for tuning parameters to affect computation +behaviour in host code in addition to modifying kernel behavior. The modifications done to kernel arguments and buffers inside a launcher are isolated +to the specific kernel configuration launch. Therefore, it is not necessary to reset arguments to their original values for each kernel launch, it is +done automatically by the tuner. The only exception to this is usage of user-managed vector arguments, those have to be reset manually. ```cpp // This launcher is equivalent in functionality to the default simple kernel launcher provided by KTT. @@ -458,11 +482,11 @@ tuner.SetLauncher(kernel, [definition](ktt::ComputeInterface& interface) KTT supports kernel tuning as well as ordinary kernel running. Running kernels via tuner is often more convenient compared to directly using specific compute API, since a lot of boilerplate code such as compute queue management and kernel source compilation is abstracted. It is possible to specify -configuration under which the kernel is run, so the workflow where kernel is first tuned and then launched repeatedly with the best configuration is +configuration under which the kernel is run, so the workflow where kernel is first tuned and then launched repeatedly with the best configuration, is supported. It is possible to transfer kernel output into host memory by utilizing `BufferOutputDescriptor` structure. When creating this structure, -we need to specify id of buffer that should be transferred and pointer to memory where the buffer contents should be saved. It is possible to pass -multiple such structures into kernel running method - each structure corresponds to a single buffer that should be transferred. After kernel run is -finished, `KernelResult` structure is returned. This structure contains detailed information about kernel run such as execution times of individual +we need to specify id of a buffer that should be transferred and pointer to memory where the buffer contents should be saved. It is possible to pass +multiple such structures into kernel running method - each structure corresponds to a single buffer that should be transferred. After a kernel run is +finished, `KernelResult` structure is returned. This structure contains detailed information about the run such as execution times of individual kernel functions, status of computation (i.e., if it finished successfully) and more. ```cpp @@ -476,12 +500,12 @@ const auto result = tuner.Run(kernel, {}, {ktt::BufferOutputDescriptor(outputId, #### Offline tuning -During offline tuning, tuner runs kernel configurations one after another without user interference. This mode therefore separates finding the best -configuration and subsequent usage of tuned kernel in applications. This enables tuner to implement certain optimizations which would otherwise not be -possible - for example caching of read-only buffers over multiple kernel runs in different configurations. By default, the entire configuration space is -explored during offline tuning. This can be altered by leveraging stop conditions, which are described in detail in the next section. +During offline tuning, kernel configurations are run one after another without user interference. This mode therefore separates finding the best +configuration and subsequent usage of tuned kernel in an application. This enables tuner to implement certain optimizations which would otherwise not be +possible, for example caching of read-only buffers over multiple kernel runs under different configurations. By default, the entire configuration space is +explored during offline tuning. This can be altered by leveraging stop conditions, which are described in the next section. -Kernel output cannot be retrieved during offline tuning, because all of the configurations are launched within a single API call. The list of `KernelResult` +Kernel output cannot be retrieved during offline tuning because all of the configurations are launched within a single API call. The list of `KernelResult` structures corresponding to all tested configurations is returned after the tuning ends. These results can be saved either in XML or JSON format for further analysis. @@ -494,9 +518,9 @@ tuner.SaveResults(results, "TuningOutput", ktt::OutputFormat::JSON); Online tuning combines kernel tuning with regular running. Similar to kernel running, we can retrieve and use output from each kernel run. However, we do not specify the configuration under which kernel is run, but tuner launches a different configuration each time a kernel is launched, similar to -offline tuning. This mode does not separate tuning and usage of tuned kernel, but rather enables both to happen simultaneously. This can be beneficial +offline tuning. This mode does not separate tuning and usage of a tuned kernel, but rather enables both to happen simultaneously. This can be beneficial in situations where employment of offline tuning is impractical (e.g., when the size of kernel input is frequently changed which causes the optimal -configuration to change as well). If kernel is launched with online tuning after all configurations were already explored, the best configuration is used. +configuration to change as well). If a kernel is launched via online tuning after all configurations were already explored, the best configuration is used. ```cpp std::vector output(numberOfElements, 0.0f); @@ -510,20 +534,20 @@ const auto result = tuner.TuneIteration(kernel, {ktt::BufferOutputDescriptor(out #### Accuracy of tuning results In order to identify the best configuration accurately, it is necessary to launch all configurations under the same conditions so that metrics such as -kernel function execution times can be objectively compared. This means, that tuned kernels should be launched on the target device in isolation. -Launching multiple kernels concurrently while tuning is performed may cause inaccuracies in collected data. Furthemore, if size of kernel input is changed -(e.g., during online tuning), tuning should be restarted from the beginning, since the size of input often affects the best configuration. The restart can -be achieved with `ClearData` API method. +kernel function execution times can be objectively compared. This means that tuned kernels should be launched on the target device in isolation. +Launching multiple kernels concurrently while tuning is performed may cause inaccuracies in collected data. Furthemore, if the size of kernel input is +changed (e.g., during online tuning), the tuning process should be restarted from the beginning, since the size of input often affects the best configuration. +The restart can be achieved by calling `ClearData` API method. ---- ### Stop conditions Stop conditions can be used to stop offline tuning when certain criteria is met. The stop condition is initialized before offline tuning begins and updated -after each tested configuration. Within the update, it has access to `KernelResult` structure whose data it can utilize to check its criteria. KTT currently -offers the following stop conditions: +after each tested configuration. Within the update, it has access to `KernelResult` structure from prior kernel run. It can utilize this data to check or update +its criteria. KTT currently offers the following stop conditions: * ConfigurationCount - tuning stops after reaching the specified number of tested configurations. -* ConfigurationDuration - tuning stops after a configuration with execution time below the specified threshold was found. +* ConfigurationDuration - tuning stops after a configuration with execution time below the specified threshold is found. * ConfigurationFraction - tuning stops after exploring the specified fraction of configuration space. * TuningDuration - tuning stops after the specified duration has passed. @@ -534,37 +558,38 @@ API, so it possible to modify them as well. ### Searchers -Searchers decide the order in which kernel configurations are selected during offline and online tuning. Having an efficient searcher can significantly reduce the -time it takes to find well-performing configurations. Similar to a stop condition, a searcher is initialized before tuning begins and is updated after each tested -configuration with access to `KernelResult` structure from the previous run. Searchers are assigned to kernels individually, so each kernel can have a different -seacher. The following searchers are available in KTT API: -* DeterministicSearcher - always explores configurations in the same order (provided that tuning parameters, order of their addition or their values were not changed). +Searchers decide the order in which kernel configurations are selected and run during offline and online tuning. Having an efficient searcher can significantly +reduce the time it takes to find well-performing configuration. Similar to stop conditions, a searcher is initialized before tuning begins and is updated after +each tested configuration with access to `KernelResult` structure from the previous run. Searchers are assigned to kernels individually, so each kernel can have +a different seacher. The following searchers are available in KTT API: +* DeterministicSearcher - always explores configurations in the same order (provided that tuning parameters, order of their addition and their values were not changed). * RandomSearcher - explores configurations in random order. * McmcSearcher - utilizes Markov chain Monte Carlo method to predict well-performing configurations more accurately than random searcher. -The searcher API is public, so users can implement their own searchers. The API also includes certain common utility methods to make the custom searcher implementation -easier. These include method to get random unexplored configuration or neighbouring configurations (configurations which differ in small amount of parameter values -compared to the specified configuration). +The searcher API is public, so users can implement their own searchers. The API also includes certain common utility methods to make the custom searcher +implementation easier. These include a method to get random unexplored configuration or neighbouring configurations (configurations which differ in a small +number of parameter values compared to the specified configuration). ---- ### Utility functions -KTT provides many utility functions to further customize tuner behavior. The following list contains descriptions of certain functions which users may find handy +KTT provides many utility functions to further customize tuner behavior. The following list contains descriptions of certain functions which can be handy to use: * `SetCompilerOptions` - sets options for kernel source code compiler used by compute API (e.g., NVRTC for CUDA). -* `SetGlobalSizeType` - compute APIs use different ways for specifying global thread size (e.g., grid size or ND-range size). This method makes it possible to override -the global thread size format to the one used by the specified API. Usage of this method makes it easier to port programs between different compute APIs. +* `SetGlobalSizeType` - compute APIs use different ways for specifying global thread size (e.g., grid size or ND-range size). This method makes it possible +to override the global thread size format to the one used by the specified API. Usage of this method makes it easier to port programs between different compute +APIs. * `SetAutomaticGlobalSizeCorrection` - tuner automatically ensures that global thread size is divisible by local thread size. This is required by certain compute APIs such as OpenCL. -* `SetKernelCacheCapacity` - changes size of cache for compiled kernels. KTT utilizes the cache to improve performance when the same kernel function with the same -configuration is launched multiple times (e.g., inside kernel launcher). +* `SetKernelCacheCapacity` - changes size of a cache for compiled kernels. KTT utilizes the cache to improve performance when the same kernel function with the +same configuration is launched multiple times (e.g., inside kernel launcher or during kernel running). * `SetLoggingLevel` - controls the amount of logging information printed to output. Higher levels print more detailed information which is useful for debugging. -* `SetTimeUnit` - specifies time unit used for printing execution times. Affects both console output as well as kernel results serialized into XML or JSON. +* `SetTimeUnit` - specifies time unit used for printing execution times. Affects console output as well as kernel results saved into a file. ---- -### Collecting profiling metrics +### Profiling metrics collection Apart from execution times, KTT can also collect other types of information from kernel runs. This includes low-level profiling metrics from kernel function executions such as global memory utilization, number of executed instructions and more. These metrics can be utilized e.g., by searchers to find well-performing @@ -579,10 +604,11 @@ due to lack of profiling library support. Profiling metrics can also be collecte #### Interaction with online tuning and kernel running -When utilizing kernel running and online tuning, it is possible to further decrease performance impact of having to execute the same kernel function multiple times. -Rather than performing all of the profiling runs at once, it is possible to split the profiling metric collection over multiple online tuning or kernel running API -function invocations and utilize output from each run. The intermediate `KernelResult` structures from such runs will not contain valid profiling counters, but still -have the remaining data accurate. Once the profiling for the current configuration is concluded, the final kernel result will contain valid profiling data. +When utilizing kernel running and online tuning, it is possible to further decrease performance impact of having to execute the same kernel function multiple times +during profiling. Rather than performing all of the profiling runs at once, it is possible to split the profiling metric collection over multiple online tuning or +kernel running API function invocations and utilize output from each run. The intermediate `KernelResult` structures from such runs will not contain valid profiling +metrics, but still have the remaining data accurate. Once the profiling for the current configuration is concluded, the final kernel result will contain valid +profiling data. ---- @@ -595,57 +621,57 @@ in this section. #### Custom compute library initialization By default, when tuner is created, it initializes its own internal compute API structures such as context, compute queues and buffers. It is however possible to -also use tuner with custom structures as well. This makes it possible to integrate tuner into libraries which need to perform their own compute API initialization. -During tuner initialization, we can pass `ComputeApiInitializer` structure to it, which contains our own context and compute queues. When adding a vector argument, -it is possible to pass our own compute buffer which will then be utilized by tuner. All of these structures will still remain under our own management, tuner will -simply reference them and use them when needed. Before releasing these structures, the tuner should be destroyed first, so it can perform proper cleanup (note that -tuner will not destroy the referenced structures on its own). +also use the tuner with custom structures as well. This enables tuner integration into libraries which need to perform their own compute API initialization. +During tuner initialization, we can pass `ComputeApiInitializer` structure to it. This structure contains our own context and compute queues. When adding a vector +argument, it is possible to pass our own compute buffer which will then be utilized by tuner. All of these structures still remain under our own management, tuner +will simply reference them and use them when needed. Before releasing these structures, the tuner should be destroyed first, so it can perform proper cleanup. Note +however, that the tuner will never destroy the referenced structures on its own. #### Asynchronous execution When performing tuning, all kernel function runs and buffer data transfers are synchronized. This is necessary to obtain accurate tuning data. Applications which combine kernel tuning and kernel running have an option to enable asynchronous kernel launches and buffer transfers after tuning is completed. This can be achieved -by utilizing kernel launchers and compute interface. The compute interface API contains methods for asynchronous operations. They enable user to choose a compute +by utilizing kernel launchers and compute interface. The compute interface API contains methods for asynchronous operations. They enable us to choose a compute queue for launching an operation and return event id which can be later used to wait for the operation to complete. Note however, that kernel results returned from asynchronous launches will contain inaccurate execution times, since the results may be returned before the asynchronous operation has finished. This feature should -only be utilized for kernel running, not tuning. +therefore be utilized only for kernel running, not tuning. #### Lifetime of internal tuner structures Internal KTT structures such as kernels, kernel definitions, arguments and configuration data have their lifetimes tied to tuner. Certain applications which utilize -tuner may prefer to remove certain structures on-the-fly to save memory. Currently, it is possible to remove kernels, kernel definitions, arguments and user-provided -compute queues from the tuner by specifying their ids. When removing a kernel, all of its associated data such as generated configurations, parameters and validation -data are removed as well. Note that it is not possible to remove structures which are referenced by other structures. E.g., when removing a kernel definition, user -has to make sure that all kernels which utilize that definition are removed first. +tuner may prefer to remove some of these structures on-the-fly to save memory. Currently, it is possible to remove kernels, kernel definitions, arguments and +user-provided compute queues from the tuner by specifying their ids. When removing a kernel, all of its associated data such as generated configurations, parameters +and validation data are removed as well. Note that it is not possible to remove structures which are referenced by other structures. E.g., when removing a kernel +definition, we must make sure that all kernels which utilize that definition are removed first. ---- ### Python API -The native KTT API is available in C++. Users who prefer Python have an option to build KTT as Python module which can be then imported into Python. Majority of the -KTT API methods can be afterwards called directly from Python while still benefitting from perfomance of KTT module built in C++. It is also possible to provide -custom searcher and stop condition implementations directly in Python. Users can therefore take advantage of certain libraries available in Python but not in C++ for -more complex searcher implementations. Majority of functions, enums and classes have the same names and arguments as in C++. A small number of limitations is -described in the follow-up subsection. +The native KTT API is available in C++. Users who prefer Python have an option to build KTT as Python module which can be then imported into Python. The majority of +KTT API methods can be afterwards called directly from Python while still benefitting from perfomance of KTT module built in C++. It is also possible to implement +custom searchers and stop conditions directly in Python. Users can therefore take advantage of certain libraries available in Python but not in C++ for more +complex searcher implementations. Majority of functions, enums and classes have the same names and arguments as in C++. A small number of limitations is described +in the follow-up subsection. #### Python limitations -Major part of KTT API is available in Python. There are however certain features which are restricuted to C++ API due to limitations in Python language and libraries. -They are the following: -* Templated methods - Python does not support templates, so there are separate versions of method for different data types instead (e.g., `AddArgumentVectorFloat`, -`AddArgumentVectorInt`). Addition of kernel arguments with custom types is not supported. +Almost the entire KTT API is available in Python. There are however certain features which are restricuted to C++ API due to limitations in Python language and +utilized libraries. They are the following: +* Templated methods - Python does not support templates, so there are separate versions of methods for different data types instead (e.g., `AddArgumentVectorFloat`, +`AddArgumentVectorInt`). Addition of kernel arguments with custom types is also not supported. * Custom library initialization - Custom context, compute queues and buffers cannot be used in Python. -* Methods which use void pointers (void*) in C++ API - Python does not have a direct equivalent to void* type. It is necessary to utilize low-level `ctypes` Python -module to be able to interact with these methods. +* Methods which use void pointers in C++ API - Python does not have a direct equivalent to void* type. It is necessary to utilize low-level `ctypes` Python +module to be able to interact with these methods through `PyCapsule` objects. ---- ### Feature parity across compute APIs KTT framework aims to maintain feature parity across all of its supported compute APIs (OpenCL, CUDA and Vulkan). That means if a certain feature is supported in -KTT CUDA backend, it should also be available in OpenCL and Vulkan backends, provided that the feature is natively supported in those APIs. There are certain exceptions -to that: -* Vulkan backend - certain features are currently unsupported in Vulkan due to development time constraints. These include support for profiling metrics, unified -and zero-copy buffers and certain advanced buffer handling methods. The support for these features may still be added at later time. +KTT CUDA backend, it should also be available in OpenCL and Vulkan backends, provided that the feature is natively supported in those APIs. There are certain +exceptions to that: +* Vulkan backend limitations - certain features are currently unsupported in Vulkan due to development time constraints. These include support for profiling metrics, +unified and zero-copy buffers and certain advanced buffer handling methods. The support for these features may still be added at a later time. * Unified memory in OpenCL - usage of unified OpenCL buffers requires support for OpenCL 2.0. Certain devices (e.g., Nvidia GPUs) still have this support unfinished. -* Templated kernel functions - templates are currently limited to CUDA kernels due to lack of support in other APIs +* Templated kernel functions - templates are currently limited to CUDA kernels due to lack of support in other APIs. From c282632410bdb94cf7f8e4190370e379af7dc59a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 6 Dec 2021 14:48:02 +0100 Subject: [PATCH 41/63] * Cleaned up and polished the onboarding guide --- OnboardingGuide.md | 376 ++++++++++++++++++++++----------------------- 1 file changed, 188 insertions(+), 188 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index e1b105fc..b873ff49 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -1,18 +1,18 @@ # Introduction to KTT -When optimizing performance of compute kernels, a programmer has to make a lot of decisions such as which algorithm to +When optimizing the performance of compute kernels, a programmer has to make many decisions such as which algorithm to use, how to arrange data structures in memory, how to block data access to optimize caching or which factor to use for -loop unrolling. Such decisions cannot be typically made in isolation - for example, when data layout in memory is changed, -a different algorithm may perform better. Therefore, it is necessary to explore vast amount of combinations of optimization +loop unrolling. Such decisions cannot be typically made in isolation. For example, when the data layout in memory is changed, +a different algorithm may perform better. Therefore, it is necessary to explore a vast amount of combinations of optimization decisions in order to reach the best performance. Moreover, the best combination of optimization decisions can differ for -various hardware devices or program setup. Therefore, a way of automatic search for the best combination of these decisions, +various hardware devices or program setup. Therefore, an automatic search for the best combination of these decisions, called autotuning, is valuable. -Naturally, in the simple use case, a batch script can be sufficient for autotuning. However, in advanced applications, +Naturally, a batch script can be sufficient for autotuning in a simple use case. However, in advanced applications, usage of an autotuning framework can be beneficial, as it can automatically handle memory objects, detect errors in autotuned kernels or perform autotuning during program runtime. -Kernel Tuning Toolkit is a framework which allows autotuning of compute kernels written in CUDA, OpenCL or Vulkan. It provides +Kernel Tuning Toolkit is a framework that allows autotuning of compute kernels written in CUDA, OpenCL or Vulkan. It provides unified interface for those APIs, handles communication between host (CPU) and accelerator (GPU, Xeon Phi, etc.), checks results and timing of tuned kernels, allows dynamic (online) tuning during program runtime, profiling of autotuned kernels and more. @@ -58,34 +58,34 @@ and timing of tuned kernels, allows dynamic (online) tuning during program runti ### Basic principles behind KTT -When leveraging autotuning, a programmer needs to think about which properties of their computation can be autotuned. For +When leveraging autotuning, a programmer needs to consider which properties of their computation can be autotuned. For example, an algorithm may contain for loop which can be unrolled. There are multiple options for unroll factor value -of this loop, e.g., 1 (no unroll), 2, 4, 8. Picking the optimal value for a certain device manually is difficult, therefore -we can define a tuning parameter for the unroll factor with the specified values. Afterwards, we can launch four different +of this loop, e.g., 1 (no unroll), 2, 4, 8. Picking the optimal value manually for a particular device is difficult. Therefore +we can define a tuning parameter for the unroll factor with the specified values. Afterward, we can launch four different versions of our computation to see which value performs best. -In practice, the computations are often complex enough to contain multiple parts which can be optimized in this way, leading -to definition of multiple tuning parameters. For example we may have the previously mentioned loop unroll parameter with +In practice, the computations are often complex enough to contain multiple parts that can be optimized, leading +to a definition of many tuning parameters. For example, we may have the previously mentioned loop unroll parameter with values {1, 2, 4, 8} and another parameter controlling data arrangement in memory with values {0, 1}. Combinations of these -parameters now define 8 different versions of computation. One such combination is called tuning configuration. Together, all -tuning configurations define configuration space. The size of the space grows exponentially with addition of more tuning -parameters. KTT framework offers functionality to mitigate this problem which will be discussed in the follow-up sections. +parameters now define eight different versions of computation. One such combination is called tuning configuration. Together, all +tuning configurations define configuration space. The size of the space grows exponentially with the addition of more tuning +parameters. KTT framework offers functionality to mitigate this problem which we will discuss in the follow-up sections. ---- ### Simple autotuning example -Offline kernel tuning is the simplest use case of KTT framework. It involves creating a kernel, specifying its arguments (data), +Offline kernel tuning is the simplest use case of the KTT framework. It involves creating a kernel, specifying its arguments (data), defining tuning parameters and then launching autotuning. During autotuning, tuning parameter values are propagated to kernel source -code in a form of preprocessor definitions. E.g., when configuration which contains parameter with name unroll_factor and value 2 +code in the form of preprocessor definitions. E.g., when configuration which contains a parameter with name unroll_factor and value 2 is launched, the following code is added at the beginning of kernel source code: `#define unroll_factor 2`. The definitions can be used to alter kernel functionality based on tuning parameter values. -In the code snippet below, we create a kernel definition by specifying the name of kernel function and path to its source file. We also define -its default global and local dimensions (e.g., size of ND-range and work-group in OpenCL, size of grid and block in CUDA). We use the provided -kernel definition id to create kernel. We can also specify custom name for the kernel which is used e.g., for logging purposes. Afterwards, -we can use the kernel id to define a tuning parameter and launch autotuning. The step of creating kernel definition and kernel separately may -seem redundant at first but it plays an important role during more complex use cases that will be covered later. +In the code snippet below, we create a kernel definition by specifying the name of a kernel function and a path to its source file. We also define +its default global and local dimensions (e.g., ND-range and work-group size in OpenCL; grid and block size in CUDA). We use the provided +kernel definition id to create a kernel. We can also specify a custom name for the kernel that is used, e.g., for logging purposes. Afterward, +we can use the kernel id to define a tuning parameter and launch autotuning. The step of separately creating kernel definition and kernel may +seem redundant at first, but it plays a vital role during more complex use cases that we will cover later. ```cpp const size_t numberOfElements = 1024 * 1024; @@ -101,7 +101,7 @@ tuner.AddParameter(kernel, "unroll_factor", std::vector{1, 2, 4, 8}); tuner.Tune(kernel); ``` -The next snippet demonstrates how our previously defined tuning parameter could be used to alter computation inside kernel. +The following snippet demonstrates how we could use our previously defined tuning parameter to alter computation inside the kernel. ```cpp __kernel void computeStuff(__global float* input, int itemsPerThread, __global float* output) @@ -124,14 +124,14 @@ __kernel void computeStuff(__global float* input, int itemsPerThread, __global f ### KTT initialization -The first step before we can utilize KTT is creation of a tuner instance. Tuner is one of the major KTT classes and implements large portion of -autotuning logic. Practically all of the KTT structures such as kernels, kernel arguments and tuning parameters are tied to a specific tuner instance. -The simplest tuner constructor requires 3 parameters - index for platform, index for device and type of compute API that will be utilized (e.g., CUDA, -OpenCL). The indices for platforms and devices are assigned by KTT - they can be retrieved through `PlatformInfo` and `DeviceInfo` structures. These -structures also contain some other useful information such as list of supported extensions, global memory size, number of available compute units and -more. Note that the assigned indices remain the same when autotuning applications are launched multiple times on the same computer. They change only -when the hardware configuration is changed (e.g., new device is added, old device is removed, device driver is reinstalled). Also note, that the indices -may not be the same across multiple compute APIs (e.g., index for the same device may be different under OpenCL and CUDA). +The first step before we can utilize KTT is a creation of a tuner instance. The tuner is one of the major KTT classes and implements a large portion of +autotuning logic. The KTT structures such as kernels, kernel arguments and tuning parameters are tied to a specific tuner instance. +The simplest tuner constructor requires three parameters - index for a platform, index for a device and compute API that will be utilized (e.g., CUDA, +OpenCL). The indices for platforms and devices are assigned by KTT. We can retrieve them through `PlatformInfo` and `DeviceInfo` structures. These +structures also contain some other useful information such as a list of supported extensions, global memory size, a number of available compute units and +more. Note that the assigned indices remain the same when autotuning applications are launched multiple times on the same computer. They only change +when the hardware configuration changes (e.g., a new device is added, an old device is removed, a device driver is reinstalled). Also note, that the indices +may not be the same across multiple compute APIs (e.g., an index for the same device may be different under OpenCL and CUDA). The code below demonstrates how information about all available OpenCL platforms and devices is retrieved from KTT. In this case, the tuner is created for the first device on the first platform (both platform and device index is 0). @@ -157,23 +157,23 @@ for (const auto& platform : platforms) ### Kernel definitions and kernels -Before kernel can be launched via KTT, its source must be loaded into tuner. This is achieved by creating a kernel definition. During its creation, -we specify kernel function name and kernel source. The source can be added either from string or from file. Next, we specify default global -(NDrange / grid) and local (work-group / block) sizes. The sizes are specified with KTT structure `DimensionVector` which supports up to three +Before launching a kernel via KTT, we must load its source into the tuner. We can achieve this by creating a kernel definition. During its creation, +we specify kernel function name and kernel source. The source can be added either from a string or from a file. Next, we specify default global +(ND-range / grid) and local (work-group / block) sizes. This is done via KTT structure `DimensionVector`, which supports up to three dimensions. When a kernel is launched during tuning, the thread sizes chosen during kernel definition creation will be used. There are ways to launch -kernels with different than default sizes which will be covered later. For CUDA API, addition of templated kernels is supported as well. When creating -a definition, it is possible to specify types that should be used to instantiate kernel function from template. When we need to instantiate the same -kernel template with different types, we do that by adding multiple kernel definitions with corresponding types which are then handled independently. +kernels with sizes different from the default, which we will cover later. For CUDA API, the addition of templated kernels is supported as well. +When creating a definition, it is possible to specify types used to instantiate kernel function from a template. When we need to instantiate the same +kernel template with different types, we add multiple kernel definitions with corresponding types, which are handled independently. -Once we have kernel definitions, we can create kernels from them. It is possible to create a simple kernel which only uses one definition as well as -a composite kernel which uses multiple definitions. Usage of composite kernels is useful for computations which require launching of multiple kernel -functions in order to compute the result. In this case it is also necessary to define kernel launcher which is a function that tells the tuner in which -order and how many times each kernel function is launched. Kernel launchers are covered in detail in their own section. +Once we have kernel definitions, we can create kernels from them. It is possible to create a simple kernel that only uses one definition and +a composite kernel that uses multiple definitions. Usage of composite kernels is useful for computations that launch multiple kernel +functions in order to compute the result. In this case, it is also necessary to define a kernel launcher which is a function that tells the tuner in which +order and how many times each kernel function is launched. Kernel launchers are covered in detail in a separate section. Note that KTT terminology regarding kernel definitions and kernels differs slightly from regular compute APIs. KTT kernel definition roughly -corresponds to a single kernel function (also called kernel in e.g., OpenCL or CUDA). KTT kernel corresponds to a specific computation which uses -one or more kernel functions and for which it is possible to define tuning parameters. KTT framework allows kernel definitions to be shared across -multiple kernels (i.e., the same kernel function can be used in multiple computations). +corresponds to a single kernel function (also called kernel in e.g., OpenCL or CUDA). KTT kernel corresponds to a specific computation that uses +one or more kernel functions and for which it is possible to define tuning parameters. KTT framework allows sharing of kernel definitions across +multiple kernels (i.e., we can use the same kernel function in multiple computations). ```cpp // Create convolution kernel, utilizes single kernel function @@ -191,12 +191,12 @@ const ktt::KernelId kernel = tuner.CreateCompositeKernel("Sort", {definition0, d ### Kernel arguments -Kernel arguments define input and output of a kernel. KTT supports multiple forms of kernel arguments such as buffers, scalars and constant memory -arguments. Before argument can be assigned to kernel, its description must be given to the tuner. In case of a buffer argument, this includes the -initial data placed inside buffer before kernel is launched, its access type (read or write) and memory location from which kernel accesses the buffer -(host or device). Once the information is provided, tuner returns a handle to the argument. Through this handle, arguments can be assigned to kernel -definitions as shown in the code below. KTT supports a wide range of data types for kernel arguments, including all built-in integer and floating-point -types as well as custom types. Note however, that custom types must be trivially copyable, so it remains possible to transfer the arguments into device memory. +Kernel arguments define the input and output of a kernel. KTT supports multiple forms of kernel arguments such as buffers, scalars and constant memory +arguments. The tuner must receive an argument's description before it can be assigned to a kernel. In case of a buffer argument, this includes the +initial data placed inside the buffer before a kernel is launched, its access type (read or write) and the memory location from which kernel accesses the buffer +(host or device). Once the information is provided, the tuner returns a handle to the argument. As the code below shows, we can assign arguments to kernel +definitions through this handle. KTT supports a wide range of data types for kernel arguments, including all built-in integer and floating-point +types as well as custom types. Note, however, that custom types must be trivially copyable, so transferring the arguments into device memory remains possible. ```cpp const size_t numberOfElements = 1024 * 1024; @@ -233,23 +233,23 @@ const ktt::ArgumentId rvalueId = tuner.AddArgumentScalar(34); Vector arguments have more customization options available than scalars. Other than the initial data, it is possible to specify whether an argument is used for reading or writing. For read-only arguments, additional optimization is possible during offline tuning. Since their contents do not -change, the buffers need to be copied into memory only once before the first kernel configuration is launched and then remain the same for subsequent +change, the buffers must be copied into memory only once before the first kernel configuration is launched and remain the same for subsequent configurations. Setting correct access types to arguments can therefore lead to better tuning performance. -Next, it is possible to decide memory location from which the argument buffer is accessed by kernel - the two main options are host memory and device -memory. Users may wish to choose different location depending on the type of device used for autotuning (e.g., host memory for CPUs, device memory for +Next, it is possible to decide the memory location from which a kernel accesses the argument buffer. The two main options are host memory and device +memory. Users may wish to choose a different location depending on the type of device used for autotuning (e.g., host memory for CPUs, device memory for dedicated GPUs). For host memory, it is additionally possible to use zero-copy optimization. This optimization causes kernels to access the argument data -directly, instead of creating a separate buffer and thus reduces memory usage. For CUDA and OpenCL 2.0, one additional memory location option exists - unified. -Unified memory buffers can be accessed from both host and kernel side, relying on device driver to take care of migrating the data automatically. +directly instead of creating a separate buffer and thus reduces memory usage. For CUDA and OpenCL 2.0, one additional memory location option exists - unified. +Unified memory buffers can be accessed from both host and kernel side, relying on a device driver to migrate the data automatically. Management type option specifies whether buffer management is handled automatically by the tuner (e.g., write arguments are automatically reset -to initial state before new kernel configuration is launched, buffers are created and deleted automatically) or by the user. In some advanced cases, -users may wish to manage the buffers manually. Note however, that this requires usage of kernel launchers which will be discussed later. +to initial state before a new kernel configuration is launched, buffers are created and deleted automatically) or by the user. In some advanced cases, +users may wish to manage the buffers manually. Note, however, that this requires the usage of kernel launchers which we will discuss later. -The final option for vector arguments is whether the initial data provided by user should be copied inside the tuner or referenced directly. By default, -the data is copied which is safer (i.e., temporary arguments work correctly) but less memory efficient. In case the initial data is provided in form of -lvalue argument, direct reference can be used to avoid copying. This requires user to keep the initial data buffer valid during time the argument is -used by the tuner. +The final option for vector arguments is whether the initial data provided by the user should be copied inside the tuner or referenced directly. By default, +the data is copied, which is safer (i.e., temporary arguments work correctly) but less memory efficient. If the initial data is provided in the form of +an lvalue argument, the tuner can use a direct reference to avoid copying. This requires the user to keep the initial data buffer valid while the tuner uses +the argument. ```cpp std::vector input1; @@ -277,7 +277,7 @@ const ktt::ArgumentId resultId = tuner.AddArgumentVector(result, ktt::ArgumentAc #### Local memory arguments -Local (shared in CUDA terminology) memory arguments are used to allocate corresponding amount of cache-like memory which is shared accross all work-items +Local (shared in CUDA terminology) memory arguments are used to allocate a corresponding amount of cache-like memory, which is shared across all work-items (threads) inside a work-group (thread block). We just need to specify the data type and total size of allocated memory in bytes. ```cpp @@ -288,9 +288,9 @@ const ktt::ArgumentId local2Id = tuner.AddArgumentLocal(8); #### Symbol arguments -Symbol arguments were introduced in order to support CUDA arguments marked as `__constant__` or `__device__`. In other APIs, symbol arguments behave in -the same way as scalars since they do not require special handling. In case of CUDA, the name of symbol argument appearing inside CUDA kernel source -code has to be specified during argument addition to tuner. +Symbol arguments were introduced to support CUDA variables marked as `__constant__` or `__device__`. The name of a symbol argument appearing +inside a CUDA kernel source code has to be specified during argument addition to the tuner. Symbol arguments behave the same as scalars in +other APIs since they do not require special handling. In that case, the name of a symbol is ignored. ```cpp const ktt::ArgumentId symbolId = tuner.AddArgumentSymbol(42, "magicNumber"); @@ -301,7 +301,7 @@ const ktt::ArgumentId symbolId = tuner.AddArgumentSymbol(42, "magicNumber"); ### Tuning parameters Tuning parameters in KTT can be either unsigned integers or floats. When defining a new parameter, we need to specify its name (i.e., the name through -which it can be referenced in kernel source) and values. With addition of more tuning parameters, the size of tuning space grows exponentially as we +which it can be referenced in kernel source) and values. With the addition of more tuning parameters, the size of tuning space grows exponentially as we need to explore all parameter combinations. KTT provides two features for users to slow down the tuning space growth. ```cpp @@ -314,8 +314,8 @@ tuner.AddParameter(kernel, "float_value", std::vector{1.0}); #### Parameter constraints -The first option are tuning constraints. Through constraints, it is possible to tell tuner to skip generating configurations for certain combinations -of parameters. Constraint is a function which receives values for the specified parameters on input and decides whether that combination is valid. +The first option is tuning constraints. Through constraints, it is possible to tell the tuner to skip generating configurations for certain combinations +of parameters. Parameter constraint is a function that receives values for the specified parameters on input and decides whether that combination is valid. We can choose which parameters are evaluated by a specific constraint. Note that currently, it is possible to add constraints only between integer parameters. @@ -333,12 +333,12 @@ tuner.AddConstraint(kernel, {"vector_type", "vectorized_soa"}, vectorizedSoA); #### Parameter groups -The second option are tuning parameter groups. This option is mainly useful for composite kernels with certain tuning parameters only affecting one +The second option is tuning parameter groups. This option is mainly helpful for composite kernels with some tuning parameters that only affect one kernel definition inside the kernel. For example, if we have a composite kernel with two kernel definitions and each definition is affected by three -parameters (we have six parameters in total), and we know that each parameter only affects one specific definition, we can evaluate the two parameter -groups independently. This can greatly reduce the total number of evaluated configurations (e.g., if each of the parameters has two different values, -the total number of configurations is 64 - 2^6; with usage of parameter groups, it is only 16 - 2^3 + 2^3). It is also possible to combine usage of -constraints and groups, however constraints can only be added between parameters which belong into the same group. +parameters (we have six parameters in total), and we know that each parameter only affects one specific definition, we can evaluate the two groups +independently. This can significantly reduce the total number of evaluated configurations (e.g., if each of the parameters has two different values, +the total number of configurations is 64 - 2^6; with the usage of parameter groups, it is only 16 - 2^3 + 2^3). It is also possible to combine the use +of constraints and groups. However, constraints can only be added between parameters that belong to the same group. ```cpp // We add 4 different parameters split into 2 independent groups, reducing size of tuning space from 16 to 8 @@ -350,17 +350,17 @@ tuner.AddParameter(kernel, "b2", std::vector{0, 1}, "group_b"); #### Thread modifiers -Some tuning parameters can affect global or local number of threads a kernel function is launched with. For example, we may have a parameter which -affects amount of work performed by each thread. The more work each thread does, the less (global) threads we need in total to perform computation. -In KTT, we can define such dependency via thread modifiers. The thread modifier is a function which takes a default thread size and changes it based on +Some tuning parameters can affect the global or local number of threads with which a kernel function is launched. For example, we may have a parameter +that affects the amount of work performed by each thread. The more work each thread does, the fewer (global) threads we need in total to perform computation. +In KTT, we can define such dependency via thread modifiers. The thread modifier is a function that takes a default thread size and changes it based on values of specified tuning parameters. -When adding a new modifier, we specify kernel and its definitions whose thread sizes are affected by the modifier. Then we choose whether modifier -affects global or local size, its dimension and names of tuning parameters tied to modifier. The modifier function can be specified through enum -which supports certain simple functions such as multiplication or addition, but allows only one tuning parameter to be tied to modifier. Another -option is using a custom function which can be more complex and supports multiple tuning parameters. It is possible to create multiple thread -modifiers for the same thread type (global / local) and dimension. In that case, the modifiers will be applied in the order of their addition to -tuner. Similar to constraints, it is possible to tie only integer parameters to thread modifiers. +When adding a new modifier, we specify a kernel and its definitions whose thread sizes are affected by the modifier. Then we choose whether the modifier +affects the global or local size, its dimension and names of tuning parameters tied to the modifier. The modifier function can be specified through enum, +which supports simple operations such as multiplication or addition, but allows only one tuning parameter to be tied to the modifier. Another +option is using a custom function that can be more complex and supports multiple tuning parameters. Creating multiple thread modifiers for the same thread +type (global/local) and dimension is possible. In that case, the modifiers will be applied in the order of their addition to the tuner. Similar to constraints, +it is possible to tie only integer parameters to thread modifiers. ```cpp tuner.AddParameter(kernel, "block_size", std::vector{32, 64, 128, 256}); @@ -381,16 +381,16 @@ tuner.AddThreadModifier(kernel, {definition}, ktt::ModifierType::Global, ktt::Mo ### Output validation -When developing autotuned kernels with large number of parameters, it is often necessary to check whether each configuration computes the correct output. -KTT provides a way to automatically compare output from tuned kernel configurations to reference output. That means each time a kernel configuration is -finished, the contents of its output buffer are transferred into host memory and then compared to precomputed reference output. The reference can be -computed in two ways. +When developing autotuned kernels with a large number of parameters, it is often necessary to check whether each configuration computes the correct output. +KTT provides a way to compare output from tuned kernel configurations with reference output automatically. That means each time a kernel configuration is +finished, the contents of its output buffer are transferred into host memory and then compared to precomputed reference output. It is possible to compute +the reference in two ways. #### Reference computation -Reference computation is a function which computes the reference output in host code and stores the result in the buffer provided by KTT. The size of -that buffer matches the size of validated kernel output buffer. When defining a reference computation, we only need to provide the function and the id -of validated output argument. +Reference computation is a function that computes the reference output in host code and stores the result in the buffer provided by KTT. The size of +that buffer matches the size of the validated kernel output buffer. When defining a reference computation, we only need to provide the function and +the validated output argument's id. ```cpp tuner.SetReferenceComputation(resultArgument, [&a, &b](void* buffer) @@ -406,10 +406,10 @@ tuner.SetReferenceComputation(resultArgument, [&a, &b](void* buffer) #### Reference kernel -Another option is to compute reference result with a kernel. In this case, we need to provide the id of reference kernel and the id of validated output -argument. It is possible for reference kernel to have tuning parameters as well, so there is an option to choose a specific reference configuration. If -a reference kernel has no parameters, empty configuration can be provided. The reference kernel may be the same as tuned kernel (e.g. using some default -configuration that is known to work). +Another option is to compute a reference result with a kernel. We need to provide the reference kernel's id and the validated output argument's id +in this case. A reference kernel can have tuning parameters as well, so there is an option to choose a specific reference configuration. We can +provide an empty configuration if a reference kernel has no parameters. The reference kernel may be the same as the tuned kernel (e.g., using some +default configuration known to work). ```cpp tuner.SetReferenceKernel(outputId, referenceKernel, ktt::KernelConfiguration()); @@ -417,17 +417,17 @@ tuner.SetReferenceKernel(outputId, referenceKernel, ktt::KernelConfiguration()); #### Validation customization -There are certain ways to further customize how validation is performed. By default, the entire output buffer is validated. If validating only -a portion of the buffer is sufficient, setting a custom validation range is possible. In this case, the size of reference buffer provided by KTT +There are certain ways to customize further how the tuner performs validation. By default, the entire output buffer is validated. If validating only +a portion of the buffer is sufficient, setting a custom validation range is possible. In this case, the size of the reference buffer provided by KTT for reference computation validation will be automatically adjusted as well. -Validation works out-of-the-box for integer and floating-point argument data types. In case of floating-point arguments, it is possible to choose +Validation works out-of-the-box for integer and floating-point argument data types. In the case of floating-point arguments, it is possible to choose validation method (e.g., comparing each element separately or summing up all elements and comparing the result) and tolerance threshold since -different kernel configurations may have different accuracy of computing floating-point output. +different kernel configurations may have varying accuracy of computing floating-point output. -If arguments with user-defined types are validated, it is necessary to define a value comparator. Comparator is a function which receives two +If arguments with user-defined types are validated, it is necessary to define a value comparator. A comparator is a function that receives two elements with the specified type on input and decides whether they are equal. A custom comparator can optionally be used for integer and floating-point -data types as well, in order to override the default comparison functionality. +data types as well, to override the default comparison functionality. ```cpp struct KernelData @@ -456,17 +456,17 @@ tuner.SetValueComparator(dataId, [](const void* resultPointer, const void* refer ### Kernel launchers -Kernel launchers enable users to customize how kernels are run inside KTT. Launcher is a function which defines what happens when kernel under -certain configuration is launched via tuner. For simple kernels, a default launcher is provided by KTT. This launcher simply runs the kernel -function tied to kernel and waits until it has finished. If a computation requires launching a kernel function multiple times, running some -part in host code or using multiple kernel functions, then we need to define our own launcher. In case of composite kernels, defining a custom -launcher is mandatory, since KTT does not know the order in which the individual kernel functions should be run. +Kernel launchers enable users to customize how kernels are run inside KTT. Launcher is a function that defines what happens when a kernel under +a particular configuration is launched via the tuner. For simple kernels, a default launcher is provided by KTT. This launcher runs the kernel +function tied to the kernel and waits until it has finished. If a computation requires launching a kernel function multiple times, running some +part in host code or using multiple kernel functions, we need to define our own launcher. In the case of composite kernels, defining a custom +launcher is mandatory since KTT does not know the order in which it should run the individual kernel functions. -Kernel launcher has access to low-level KTT compute interface on input. Through this interface, it is possible to launch kernel functions, change -their thread sizes, modify buffers and retrieve the current kernel configuration. This makes it possible for tuning parameters to affect computation -behaviour in host code in addition to modifying kernel behavior. The modifications done to kernel arguments and buffers inside a launcher are isolated -to the specific kernel configuration launch. Therefore, it is not necessary to reset arguments to their original values for each kernel launch, it is -done automatically by the tuner. The only exception to this is usage of user-managed vector arguments, those have to be reset manually. +Kernel launcher has access to a low-level KTT compute interface on input. This interface makes it possible to launch kernel functions, change +their thread sizes, modify buffers and retrieve the current kernel configuration. This enables tuning parameters to affect computation +behavior in host code in addition to modifying kernel behavior. The modifications to kernel arguments and buffers inside a launcher are isolated +to the specific kernel configuration launch. Therefore, it is not necessary to reset arguments to their original values for each kernel launch; it is +done automatically by the tuner. The only exception to this is the usage of user-managed vector arguments; those have to be reset manually. ```cpp // This launcher is equivalent in functionality to the default simple kernel launcher provided by KTT. @@ -480,19 +480,19 @@ tuner.SetLauncher(kernel, [definition](ktt::ComputeInterface& interface) ### Kernel running and tuning modes -KTT supports kernel tuning as well as ordinary kernel running. Running kernels via tuner is often more convenient compared to directly using specific -compute API, since a lot of boilerplate code such as compute queue management and kernel source compilation is abstracted. It is possible to specify -configuration under which the kernel is run, so the workflow where kernel is first tuned and then launched repeatedly with the best configuration, is -supported. It is possible to transfer kernel output into host memory by utilizing `BufferOutputDescriptor` structure. When creating this structure, -we need to specify id of a buffer that should be transferred and pointer to memory where the buffer contents should be saved. It is possible to pass -multiple such structures into kernel running method - each structure corresponds to a single buffer that should be transferred. After a kernel run is -finished, `KernelResult` structure is returned. This structure contains detailed information about the run such as execution times of individual -kernel functions, status of computation (i.e., if it finished successfully) and more. +KTT supports kernel tuning as well as standard kernel running. Running kernels via tuner is often more convenient than directly using a specific +compute API since a lot of boilerplate code such as compute queue management and kernel source compilation is abstracted. It is possible to specify +a configuration under which the kernel is run, so the workflow where a kernel is first tuned and then launched repeatedly with the best configuration is +supported. It is possible to transfer kernel output into host memory by utilizing the `BufferOutputDescriptor` structure. When creating this structure, +we need to specify the id of a buffer that should be transferred and a pointer to memory where the buffer contents should be saved. It is possible to pass +multiple such structures into the kernel running method - each descriptor corresponds to a single buffer that should be transferred. After a kernel run is +finished, the `KernelResult` structure is returned. This structure contains detailed information about the run, such as execution times of individual +kernel functions, the status of computation (i.e., if it finished successfully) and more. ```cpp std::vector output(numberOfElements, 0.0f); -// Add kernel and buffers to tuner +// Add kernel and buffers to the tuner ... const auto result = tuner.Run(kernel, {}, {ktt::BufferOutputDescriptor(outputId, output.data())}); @@ -500,13 +500,13 @@ const auto result = tuner.Run(kernel, {}, {ktt::BufferOutputDescriptor(outputId, #### Offline tuning -During offline tuning, kernel configurations are run one after another without user interference. This mode therefore separates finding the best -configuration and subsequent usage of tuned kernel in an application. This enables tuner to implement certain optimizations which would otherwise not be -possible, for example caching of read-only buffers over multiple kernel runs under different configurations. By default, the entire configuration space is -explored during offline tuning. This can be altered by leveraging stop conditions, which are described in the next section. +During offline tuning, kernel configurations are run one after another without user interference. Therefore, this mode separates finding the best +configuration and subsequent usage of the tuned kernel in an application. This enables the tuner to implement some optimizations that would otherwise not be +possible, for example, caching of read-only buffers over multiple kernel runs under different configurations. By default, the entire configuration space is +explored during offline tuning. We can alter this by leveraging stop conditions described in the next section. -Kernel output cannot be retrieved during offline tuning because all of the configurations are launched within a single API call. The list of `KernelResult` -structures corresponding to all tested configurations is returned after the tuning ends. These results can be saved either in XML or JSON format for +Kernel output cannot be retrieved during offline tuning because all configurations are launched within a single API call. After the tuning ends, the tuner +returns the list of `KernelResult` structures corresponding to all tested configurations. We can save these results either in XML or JSON format for further analysis. ```cpp @@ -516,16 +516,16 @@ tuner.SaveResults(results, "TuningOutput", ktt::OutputFormat::JSON); #### Online tuning -Online tuning combines kernel tuning with regular running. Similar to kernel running, we can retrieve and use output from each kernel run. However, we -do not specify the configuration under which kernel is run, but tuner launches a different configuration each time a kernel is launched, similar to -offline tuning. This mode does not separate tuning and usage of a tuned kernel, but rather enables both to happen simultaneously. This can be beneficial -in situations where employment of offline tuning is impractical (e.g., when the size of kernel input is frequently changed which causes the optimal -configuration to change as well). If a kernel is launched via online tuning after all configurations were already explored, the best configuration is used. +Online tuning combines kernel tuning with regular running. We can retrieve and use the output from each kernel run like during kernel running. However, +we do not specify the configuration under which kernel is run, but the tuner launches a different configuration each time a kernel is launched, similar to +offline tuning. This mode does not separate tuning and usage of a tuned kernel but enables both to happen simultaneously. This can be beneficial +in situations where offline tuning is impractical (e.g., when the size of kernel input is frequently changed, which causes the optimal configuration +to change as well). If a kernel is launched via online tuning after exploring all configurations, the best configuration is used. ```cpp std::vector output(numberOfElements, 0.0f); -// Add kernel and buffers to tuner +// Add kernel and buffers to the tuner ... const auto result = tuner.TuneIteration(kernel, {ktt::BufferOutputDescriptor(outputId, output.data())}); @@ -535,46 +535,46 @@ const auto result = tuner.TuneIteration(kernel, {ktt::BufferOutputDescriptor(out In order to identify the best configuration accurately, it is necessary to launch all configurations under the same conditions so that metrics such as kernel function execution times can be objectively compared. This means that tuned kernels should be launched on the target device in isolation. -Launching multiple kernels concurrently while tuning is performed may cause inaccuracies in collected data. Furthemore, if the size of kernel input is -changed (e.g., during online tuning), the tuning process should be restarted from the beginning, since the size of input often affects the best configuration. -The restart can be achieved by calling `ClearData` API method. +Launching multiple kernels concurrently while performing tuning may cause inaccuracies in collected data. Furthermore, if the size of kernel input is +changed (e.g., during online tuning), we should restart the tuning process from the beginning since the input size often affects the best configuration. +We can achieve the restart by calling the `ClearData` API method. ---- ### Stop conditions -Stop conditions can be used to stop offline tuning when certain criteria is met. The stop condition is initialized before offline tuning begins and updated -after each tested configuration. Within the update, it has access to `KernelResult` structure from prior kernel run. It can utilize this data to check or update -its criteria. KTT currently offers the following stop conditions: +We can utilize stop conditions to interrupt offline tuning when certain criteria are met. The stop condition is initialized before offline tuning begins +and updated after each tested configuration. Within the update, the condition has access to the `KernelResult` structure from prior kernel run. KTT currently +offers the following stop conditions: * ConfigurationCount - tuning stops after reaching the specified number of tested configurations. * ConfigurationDuration - tuning stops after a configuration with execution time below the specified threshold is found. * ConfigurationFraction - tuning stops after exploring the specified fraction of configuration space. * TuningDuration - tuning stops after the specified duration has passed. -The stop condition API is public, which means that users can also create their own stop conditions. All of the built-in conditions are implemented in public -API, so it possible to modify them as well. +The stop condition API is public, allowing users to create their own stop conditions. All of the built-in conditions are implemented in public API, so +it is possible to modify them as well. ---- ### Searchers Searchers decide the order in which kernel configurations are selected and run during offline and online tuning. Having an efficient searcher can significantly -reduce the time it takes to find well-performing configuration. Similar to stop conditions, a searcher is initialized before tuning begins and is updated after -each tested configuration with access to `KernelResult` structure from the previous run. Searchers are assigned to kernels individually, so each kernel can have -a different seacher. The following searchers are available in KTT API: +reduce the time it takes to find a well-performing configuration. Like stop conditions, a searcher is initialized before tuning begins and updated after +each tested configuration with access to the `KernelResult` structure from the previous run. Searchers are assigned to kernels individually so that each kernel +can have a different searcher. The following searchers are available in KTT API: * DeterministicSearcher - always explores configurations in the same order (provided that tuning parameters, order of their addition and their values were not changed). * RandomSearcher - explores configurations in random order. * McmcSearcher - utilizes Markov chain Monte Carlo method to predict well-performing configurations more accurately than random searcher. -The searcher API is public, so users can implement their own searchers. The API also includes certain common utility methods to make the custom searcher -implementation easier. These include a method to get random unexplored configuration or neighbouring configurations (configurations which differ in a small +The searcher API is public so that users can implement their own searchers. The API also includes utility methods to simplify custom searcher +implementation. These include a method to get random unexplored configuration or neighboring configurations (configurations that differ in a small number of parameter values compared to the specified configuration). ---- ### Utility functions -KTT provides many utility functions to further customize tuner behavior. The following list contains descriptions of certain functions which can be handy +KTT provides many utility functions to customize tuner behavior further. The following list contains descriptions of certain functions which can be handy to use: * `SetCompilerOptions` - sets options for kernel source code compiler used by compute API (e.g., NVRTC for CUDA). * `SetGlobalSizeType` - compute APIs use different ways for specifying global thread size (e.g., grid size or ND-range size). This method makes it possible @@ -584,8 +584,8 @@ APIs. APIs such as OpenCL. * `SetKernelCacheCapacity` - changes size of a cache for compiled kernels. KTT utilizes the cache to improve performance when the same kernel function with the same configuration is launched multiple times (e.g., inside kernel launcher or during kernel running). -* `SetLoggingLevel` - controls the amount of logging information printed to output. Higher levels print more detailed information which is useful for debugging. -* `SetTimeUnit` - specifies time unit used for printing execution times. Affects console output as well as kernel results saved into a file. +* `SetLoggingLevel` - controls the amount of logging information printed to the output. Higher levels print more detailed information which aids debugging. +* `SetTimeUnit` - specifies time unit used for printing execution times. This affects console output as well as kernel results saved into a file. ---- @@ -593,85 +593,85 @@ same configuration is launched multiple times (e.g., inside kernel launcher or d Apart from execution times, KTT can also collect other types of information from kernel runs. This includes low-level profiling metrics from kernel function executions such as global memory utilization, number of executed instructions and more. These metrics can be utilized e.g., by searchers to find well-performing -configurations faster. The collection of profiling metrics is disabled by default as it changes the default tuning behaviour. In order to collect all profiling -metrics, it is usually necessary to run the same kernel function multiple times (the number increases when more metrics are collected). It furthemore requires -kernels to be run synchronously. Enabling profiling metrics collection thus decreases tuning performance. It is possible to mitigate performance impact by enabling -only certain metrics, which can be done through KTT API. +configurations faster. The collection of profiling metrics is disabled by default as it changes the default tuning behavior. In order to collect all profiling +metrics, it is usually necessary to run the same kernel function multiple times (the number increases when more metrics are collected). It furthermore requires +kernels to be run synchronously. Enabling profiling metrics collection thus decreases tuning performance. It is possible to mitigate performance impact by allowing +only specific metrics, which can be done through KTT API. -Collection of profiling metrics is currently supported for Nvidia devices on CUDA backend and AMD devices on OpenCL backend. Intel devices are currently unsupported -due to lack of profiling library support. Profiling metrics can also be collected for composite kernels. Note however, that for AMD devices and newer Nvidia devices -(Turing and onwards), collection of metrics is restricted to a single kernel definition within a composite kernel due to profiling library limitations. +Collection of profiling metrics is currently supported for Nvidia devices on CUDA backend and AMD devices on OpenCL backend. Intel devices are unsupported +at the moment due to a lack of profiling library support. Profiling metrics can also be collected for composite kernels. Note, however, that the metrics +collection is restricted to a single definition within a composite kernel for AMD devices and newer Nvidia devices (Turing and onwards). This is due to profiling +library limitations. #### Interaction with online tuning and kernel running -When utilizing kernel running and online tuning, it is possible to further decrease performance impact of having to execute the same kernel function multiple times +When utilizing kernel running and online tuning, it is possible to decrease further the performance impact of executing the same kernel function multiple times during profiling. Rather than performing all of the profiling runs at once, it is possible to split the profiling metric collection over multiple online tuning or kernel running API function invocations and utilize output from each run. The intermediate `KernelResult` structures from such runs will not contain valid profiling -metrics, but still have the remaining data accurate. Once the profiling for the current configuration is concluded, the final kernel result will contain valid +metrics, but the other data will remain accurate. Once the profiling for the current configuration is concluded, the final kernel result will have valid profiling data. ---- ### Interoperability -The KTT framework could originally be used only in isolation to create standalone programs which are focused on tuning a specific kernel. In recent versions, the API -was extended to also support tuner integration into larger software suites. There are multiple major features which contribute to this support. They are described -in this section. +The KTT framework could originally be used only in isolation to create standalone programs focused on tuning a specific kernel. In recent versions, the API +was extended to support tuner integration into larger software suites. Multiple major features contribute to this support. They are described in this section. #### Custom compute library initialization -By default, when tuner is created, it initializes its own internal compute API structures such as context, compute queues and buffers. It is however possible to -also use the tuner with custom structures as well. This enables tuner integration into libraries which need to perform their own compute API initialization. -During tuner initialization, we can pass `ComputeApiInitializer` structure to it. This structure contains our own context and compute queues. When adding a vector -argument, it is possible to pass our own compute buffer which will then be utilized by tuner. All of these structures still remain under our own management, tuner -will simply reference them and use them when needed. Before releasing these structures, the tuner should be destroyed first, so it can perform proper cleanup. Note -however, that the tuner will never destroy the referenced structures on its own. +By default, when the tuner is created, it initializes its own internal compute API structures such as context, compute queues and buffers. However, it is also +possible to use the tuner with custom structures. This enables tuner integration into libraries that perform their own compute API initialization. +During tuner initialization, we can pass the `ComputeApiInitializer` structure to it. This structure contains our own context and compute queues. When adding +a vector argument, it is possible to pass our own compute buffer, which the tuner will then utilize. These structures remain under our own management; the tuner +will just reference them and use them when needed. Before releasing these structures, the tuner should be destroyed first so that it can perform a proper cleanup. +Note, however, that the tuner will never release the referenced structures on its own. #### Asynchronous execution -When performing tuning, all kernel function runs and buffer data transfers are synchronized. This is necessary to obtain accurate tuning data. Applications which -combine kernel tuning and kernel running have an option to enable asynchronous kernel launches and buffer transfers after tuning is completed. This can be achieved -by utilizing kernel launchers and compute interface. The compute interface API contains methods for asynchronous operations. They enable us to choose a compute -queue for launching an operation and return event id which can be later used to wait for the operation to complete. Note however, that kernel results returned from -asynchronous launches will contain inaccurate execution times, since the results may be returned before the asynchronous operation has finished. This feature should -therefore be utilized only for kernel running, not tuning. +All kernel function runs and buffer data transfers are synchronized when performing tuning. This is necessary to obtain accurate tuning data. Applications that +combine kernel tuning and kernel running can enable asynchronous kernel launches and buffer transfers after tuning is completed. This is achieved by utilizing +kernel launchers and compute interface. The compute interface API contains methods for asynchronous operations. They enable us to choose a compute queue for +launching an operation and return event id, which can be later used to wait for the operation to complete. Note, however, that kernel results returned from +asynchronous launches will contain inaccurate execution timings since the results may be returned before the asynchronous operation has finished. Therefore, +the asynchronous execution should be utilized only for kernel running, not tuning. #### Lifetime of internal tuner structures -Internal KTT structures such as kernels, kernel definitions, arguments and configuration data have their lifetimes tied to tuner. Certain applications which utilize -tuner may prefer to remove some of these structures on-the-fly to save memory. Currently, it is possible to remove kernels, kernel definitions, arguments and +Internal KTT structures such as kernels, kernel definitions, arguments and configuration data have their lifetimes tied to the tuner. Some applications which +utilize the tuner may prefer to remove these structures on the fly to save memory. Currently, it is possible to remove kernels, kernel definitions, arguments and user-provided compute queues from the tuner by specifying their ids. When removing a kernel, all of its associated data such as generated configurations, parameters -and validation data are removed as well. Note that it is not possible to remove structures which are referenced by other structures. E.g., when removing a kernel -definition, we must make sure that all kernels which utilize that definition are removed first. +and validation data are also removed. Note that it is not possible to remove structures referenced by other structures. E.g., when removing a kernel definition, we +must first remove all kernels which utilize that definition. ---- ### Python API -The native KTT API is available in C++. Users who prefer Python have an option to build KTT as Python module which can be then imported into Python. The majority of -KTT API methods can be afterwards called directly from Python while still benefitting from perfomance of KTT module built in C++. It is also possible to implement -custom searchers and stop conditions directly in Python. Users can therefore take advantage of certain libraries available in Python but not in C++ for more -complex searcher implementations. Majority of functions, enums and classes have the same names and arguments as in C++. A small number of limitations is described -in the follow-up subsection. +The native KTT API is available in C++. Users who prefer Python have an option to build KTT as a Python module which can then be imported into Python. The majority +of KTT API methods can be afterward called directly from Python while still benefitting from the performance of the KTT module built in C++. It is also possible to +implement custom searchers and stop conditions directly in Python. Therefore, users can take advantage of libraries available in Python but not in C++ for more +complex searcher implementations. The majority of functions, enums and classes have the same names and arguments as in C++. A small number of limitations is +described in the follow-up subsection. #### Python limitations -Almost the entire KTT API is available in Python. There are however certain features which are restricuted to C++ API due to limitations in Python language and -utilized libraries. They are the following: +Almost the entire KTT API is available in Python. However, certain features are restricted to C++ API due to limitations in Python language and utilized +libraries. They are the following: * Templated methods - Python does not support templates, so there are separate versions of methods for different data types instead (e.g., `AddArgumentVectorFloat`, -`AddArgumentVectorInt`). Addition of kernel arguments with custom types is also not supported. +`AddArgumentVectorInt`). The addition of kernel arguments with custom types is not supported either. * Custom library initialization - Custom context, compute queues and buffers cannot be used in Python. -* Methods which use void pointers in C++ API - Python does not have a direct equivalent to void* type. It is necessary to utilize low-level `ctypes` Python -module to be able to interact with these methods through `PyCapsule` objects. +* Methods that use void pointers in C++ API - Python does not have a direct equivalent to void* type. It is necessary to utilize a low-level `ctypes` Python +module to interact with these methods through `PyCapsule` objects. ---- ### Feature parity across compute APIs -KTT framework aims to maintain feature parity across all of its supported compute APIs (OpenCL, CUDA and Vulkan). That means if a certain feature is supported in -KTT CUDA backend, it should also be available in OpenCL and Vulkan backends, provided that the feature is natively supported in those APIs. There are certain -exceptions to that: +KTT framework aims to maintain feature parity across all of its supported compute APIs (OpenCL, CUDA and Vulkan). That means if a particular feature is supported in +the KTT CUDA backend, it should also be available in OpenCL and Vulkan backends, provided that it is natively supported in those APIs. There are some exceptions +to that: * Vulkan backend limitations - certain features are currently unsupported in Vulkan due to development time constraints. These include support for profiling metrics, -unified and zero-copy buffers and certain advanced buffer handling methods. The support for these features may still be added at a later time. -* Unified memory in OpenCL - usage of unified OpenCL buffers requires support for OpenCL 2.0. Certain devices (e.g., Nvidia GPUs) still have this support unfinished. +unified and zero-copy buffers and a subset of advanced buffer handling methods. The support for these features may still be added at a later time. +* Unified memory in OpenCL - the usage of unified OpenCL buffers requires OpenCL 2.0. Some devices (e.g., Nvidia GPUs) still do not support this OpenCL version. * Templated kernel functions - templates are currently limited to CUDA kernels due to lack of support in other APIs. From 8b08a95c25b7c8c8a203f12ff8cc798bfb25df99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 6 Dec 2021 15:14:42 +0100 Subject: [PATCH 42/63] * Updated readme --- Readme.md | 59 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/Readme.md b/Readme.md index ffb53f75..01e450ba 100644 --- a/Readme.md +++ b/Readme.md @@ -2,63 +2,63 @@ KTT - Kernel Tuning Toolkit =========================== -KTT is an auto-tuning framework for OpenCL, CUDA kernels and GLSL compute shaders. Version 2.0 which contains major -API overhaul as well as new features and improvements is now available. +KTT is an autotuning framework for OpenCL, CUDA kernels and GLSL compute shaders. Version 2.0, which contains +significant API overhaul and new features and improvements, is now available. Main features ------------- * Ability to define kernel tuning parameters such as kernel thread sizes, vector data types and loop unroll factors -in order to optimize computation for a particular device. +to optimize computation for a particular device. * Support for iterative kernel launches and composite kernels. * Support for multiple compute queues and asynchronous operations. * Support for online auto-tuning - kernel tuning combined with regular kernel running. -* Ability to automatically ensure correctness of tuned computation with reference kernel or C++ function. +* Ability to automatically ensure the correctness of tuned computation with reference kernel or C++ function. * Support for multiple compute APIs, switching between CUDA, OpenCL and Vulkan requires only minor changes in C++ code (e.g., changing the kernel source file), no library recompilation is needed. * Public API available in C++ (native) and Python (bindings). -* Large number of customization options, including support for kernel arguments with user-defined data types, -ability to change kernel compiler flags and more. +* Many customization options, including support for kernel arguments with user-defined data types, ability to change +kernel compiler flags and more. Getting started --------------- -* KTT introductory guide can be found [here](https://github.com/HiPerCoRe/KTT/blob/development/OnboardingGuide.md). +* Introductory guide to KTT can be found [here](https://github.com/HiPerCoRe/KTT/blob/development/OnboardingGuide.md). * Full documentation for KTT API can be found [here](https://hipercore.github.io/KTT/). * KTT FAQ can be found [here](https://hipercore.github.io/KTT/md__docs__resources__faq.html). -* The newest release of KTT framework can be found [here](https://github.com/HiPerCoRe/KTT/releases). +* The newest release of the KTT framework can be found [here](https://github.com/HiPerCoRe/KTT/releases). * Prebuilt binaries are not provided due to many different combinations of compute APIs and build options available. -Please check the `Building KTT` section for detailed instructions on how to perform a build. +The `Building KTT` section contains detailed instructions on how to perform a build. Tutorials --------- -Tutorials are short examples which serve as an introduction to KTT framework. Each tutorial covers a specific part of +Tutorials are short examples that serve as an introduction to the KTT framework. Each tutorial covers a specific part of the API. All tutorials are available for both OpenCL and CUDA backends. Most of the tutorials are also available for -Vulkan. Tutorials assume that reader has some knowledge about C++ and GPU programming. List of the currently available +Vulkan. Tutorials assume that the reader has some knowledge about C++ and GPU programming. List of the currently available tutorials: * `Info`: Retrieving information about compute API platforms and devices through KTT API. * `KernelRunning`: Running simple kernel with KTT framework and retrieving output. -* `KernelTuning`: Simple kernel tuning using small number of tuning parameters and reference computation to validate output. +* `KernelTuning`: Simple kernel tuning using a small number of tuning parameters and reference computation to validate output. * `CustomArgumentTypes`: Usage of kernel arguments with custom data types and validating the output with value comparator. * `ComputeApiInitializer`: Providing tuner with custom compute context, queues and buffers. * `VectorArgumentCustomization`: Showcasing different usage options for vector kernel arguments. -* `PythonInterfaces`: Implementing custom searchers and stop conditions in Python which can afterwards be used with tuner. +* `PythonInterfaces`: Implementing custom searchers and stop conditions in Python, which can afterward be used with the tuner. Examples -------- -Examples showcase how KTT framework could be utilized in real-world scenarios. They are more complex than tutorials and -assume that reader is familiar with KTT API. List of some of the currently available examples: +Examples showcase how the KTT framework could be utilized in real-world scenarios. They are more complex than tutorials and +assume that the reader is familiar with KTT API. List of some of the currently available examples: * `CoulombSum2d`: Tuning of electrostatic potential map computation, focuses on a single slice. -* `CoulombSum3dIterative`: 3D version of previous example, utilizes kernel from 2D version and launches it iteratively. -* `CoulombSum3d`: Alternative to iterative version, utilizes kernel which computes the entire map in single invocation. +* `CoulombSum3dIterative`: 3D version of the previous example, utilizes kernel from 2D version and launches it iteratively. +* `CoulombSum3d`: Alternative to iterative version, utilizes kernel which computes the entire map in a single invocation. * `Nbody`: Tuning of N-body simulation. * `Reduction`: Tuning of vector reduction, launches a kernel iteratively. -* `Sort`: Radix sort example, combines multiple kernels into composite kernel. +* `Sort`: Radix sort example, combines multiple kernels into a composite kernel. * `Bicg`: Biconjugate gradients method example, features reference computation, composite kernels and constraints. Building KTT ------------ -* KTT can be built as a dynamic (shared) library using command line build tool Premake. Currently supported operating +* KTT can be built as a dynamic (shared) library using the command line build tool Premake. Currently supported operating systems are Linux and Windows. * The prerequisites to build KTT are: @@ -71,11 +71,11 @@ systems are Linux and Windows. * Build under Linux (inside KTT root folder): - ensure that path to vendor SDK is correctly set in the environment variables - run `./premake5 gmake` to generate makefile - - run `cd Build` to get inside build directory + - run `cd Build` to get inside the build directory - afterwards run `make config={configuration}_{architecture}` to build the project (e.g., `make config=release_x86_64`) * Build under Windows (inside KTT root folder): - - ensure that path to vendor SDK is correctly set in the environment variables, this should be done automatically + - ensure that path to vendor SDK is correctly set in the environment variables; this should be done automatically during SDK installation - run `premake5.exe vs20xx` (e.g., `premake5.exe vs2019`) to generate Visual Studio project files - open generated solution file and build the project inside Visual Studio @@ -89,12 +89,12 @@ systems are Linux and Windows. - `--no-examples` disables compilation of examples - `--no-tutorials` disables compilation of tutorials - `--tests` enables compilation of unit tests - - `--no-cuda` disables inclusion of CUDA API during compilation, only affects Nvidia platform - - `--no-opencl` disables inclusion of OpenCL API during compilation + - `--no-cuda` disables the inclusion of CUDA API during compilation, only affects Nvidia platform + - `--no-opencl` disables the inclusion of OpenCL API during compilation -* KTT and applications utilizing it rely on external dynamic (shared) libraries in order to work correctly. There are - multiple ways to provide access to these libraries, e.g., copying given library inside application folder or adding the - containing folder to library path (example for Linux: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/shared/library). +* KTT and applications that utilize it rely on external dynamic (shared) libraries to work correctly. There are + multiple ways to provide access to these libraries, e.g., copying a given library inside the application folder or adding the + containing folder to the library path (example for Linux: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/shared/library). Libraries which are bundled with device drivers are usually visible by default. The list of libraries currently utilized by KTT: - `OpenCL` distributed with specific device drivers (OpenCL only) @@ -109,11 +109,12 @@ systems are Linux and Windows. Related projects ---------------- -KTT API is based on [CLTune project](https://github.com/CNugteren/CLTune). Certain parts of the API are similar to CLTune, -however internal structure is completely rewritten from scratch. The ClTuneGemm and ClTuneConvolution examples are adopted from CLTune. +KTT API is based on [CLTune project](https://github.com/CNugteren/CLTune). Certain parts of the API are similar to CLTune. However, the internal +structure is completely rewritten from scratch. The ClTuneGemm and ClTuneConvolution examples are adopted from CLTune. KTT search space generation and tuning configuration storage techniques are derived from [ATF project](https://dl.acm.org/doi/10.1145/3427093). -Certain modifications were made to the original ATF algorithms due to differences in API and available framework features. The examples stored in AtfSamples folder are adopted from ATF. +Due to differences in API and available framework features, certain modifications were made to the original ATF algorithms. The examples stored +in AtfSamples folder are adopted from ATF. How to cite ----------- From a425d3a2ffcea58516efcc12622ac99d73ae8299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 6 Dec 2021 15:59:59 +0100 Subject: [PATCH 43/63] * Kernel run mode can now be retrieved through compute interface --- Source/Api/ComputeInterface.h | 7 +++++++ Source/KernelRunner/ComputeLayer.cpp | 9 +++++++-- Source/KernelRunner/ComputeLayer.h | 3 ++- Source/KernelRunner/ComputeLayerData.cpp | 8 +++++++- Source/KernelRunner/ComputeLayerData.h | 5 ++++- Source/KernelRunner/KernelRunner.cpp | 6 +++--- Source/KernelRunner/KernelRunner.h | 4 ++-- 7 files changed, 32 insertions(+), 10 deletions(-) diff --git a/Source/Api/ComputeInterface.h b/Source/Api/ComputeInterface.h index 7a5c1bfd..e9bd2eda 100644 --- a/Source/Api/ComputeInterface.h +++ b/Source/Api/ComputeInterface.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -162,6 +163,12 @@ class KTT_API ComputeInterface */ virtual const KernelConfiguration& GetCurrentConfiguration() const = 0; + /** @fn virtual KernelRunMode GetRunMode() const = 0 + * Returns run mode of the currently launched kernel. + * @return Run mode of the currently launched kernel. See ::KernelRunMode for more information. + */ + virtual KernelRunMode GetRunMode() const = 0; + /** @fn virtual void ChangeArguments(const KernelDefinitionId id, const std::vector& arguments) = 0 * Changes kernel arguments for the specified kernel definitions under currently launched kernel. * @param id Id of kernel definition for which the arguments will be changed. The specified definition must be included diff --git a/Source/KernelRunner/ComputeLayer.cpp b/Source/KernelRunner/ComputeLayer.cpp index bf4b7cc8..6c7dc05b 100644 --- a/Source/KernelRunner/ComputeLayer.cpp +++ b/Source/KernelRunner/ComputeLayer.cpp @@ -135,6 +135,11 @@ const KernelConfiguration& ComputeLayer::GetCurrentConfiguration() const return GetData().GetConfiguration(); } +KernelRunMode ComputeLayer::GetRunMode() const +{ + return GetData().GetRunMode(); +} + void ComputeLayer::ChangeArguments(const KernelDefinitionId id, const std::vector& arguments) { if (!ContainsUniqueElements(arguments)) @@ -314,9 +319,9 @@ void ComputeLayer::ClearComputeEngineData() } } -void ComputeLayer::AddData(const Kernel& kernel, const KernelConfiguration& configuration) +void ComputeLayer::AddData(const Kernel& kernel, const KernelConfiguration& configuration, const KernelRunMode mode) { - m_Data[kernel.GetId()] = std::make_unique(kernel, configuration); + m_Data[kernel.GetId()] = std::make_unique(kernel, configuration, mode); } void ComputeLayer::ClearData(const KernelId id) diff --git a/Source/KernelRunner/ComputeLayer.h b/Source/KernelRunner/ComputeLayer.h index 4f86a6a7..d58d8bce 100644 --- a/Source/KernelRunner/ComputeLayer.h +++ b/Source/KernelRunner/ComputeLayer.h @@ -42,6 +42,7 @@ class ComputeLayer : public ComputeInterface const DimensionVector& GetCurrentGlobalSize(const KernelDefinitionId id) const override; const DimensionVector& GetCurrentLocalSize(const KernelDefinitionId id) const override; const KernelConfiguration& GetCurrentConfiguration() const override; + KernelRunMode GetRunMode() const override; void ChangeArguments(const KernelDefinitionId id, const std::vector& arguments) override; void SwapArguments(const KernelDefinitionId id, const ArgumentId first, const ArgumentId second) override; @@ -70,7 +71,7 @@ class ComputeLayer : public ComputeInterface void ClearComputeEngineData(const KernelDefinitionId id); void ClearComputeEngineData(); - void AddData(const Kernel& kernel, const KernelConfiguration& configuration); + void AddData(const Kernel& kernel, const KernelConfiguration& configuration, const KernelRunMode mode); void ClearData(const KernelId id); KernelResult GenerateResult(const KernelId id, const Nanoseconds launcherDuration) const; diff --git a/Source/KernelRunner/ComputeLayerData.cpp b/Source/KernelRunner/ComputeLayerData.cpp index 2552d857..3e18cda1 100644 --- a/Source/KernelRunner/ComputeLayerData.cpp +++ b/Source/KernelRunner/ComputeLayerData.cpp @@ -8,9 +8,10 @@ namespace ktt { -ComputeLayerData::ComputeLayerData(const Kernel& kernel, const KernelConfiguration& configuration) : +ComputeLayerData::ComputeLayerData(const Kernel& kernel, const KernelConfiguration& configuration, const KernelRunMode runMode) : m_Kernel(kernel), m_Configuration(configuration), + m_RunMode(runMode), m_Overhead(0) { for (const auto* definition : kernel.GetDefinitions()) @@ -103,6 +104,11 @@ const KernelConfiguration& ComputeLayerData::GetConfiguration() const return m_Configuration; } +KernelRunMode ComputeLayerData::GetRunMode() const +{ + return m_RunMode; +} + const KernelComputeData& ComputeLayerData::GetComputeData(const KernelDefinitionId id) const { if (!ContainsKey(m_ComputeData, id)) diff --git a/Source/KernelRunner/ComputeLayerData.h b/Source/KernelRunner/ComputeLayerData.h index 1876e0b2..b57991f4 100644 --- a/Source/KernelRunner/ComputeLayerData.h +++ b/Source/KernelRunner/ComputeLayerData.h @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace ktt @@ -17,7 +18,7 @@ namespace ktt class ComputeLayerData { public: - explicit ComputeLayerData(const Kernel& kernel, const KernelConfiguration& configuration); + explicit ComputeLayerData(const Kernel& kernel, const KernelConfiguration& configuration, const KernelRunMode runMode); void IncreaseOverhead(const Nanoseconds overhead); void AddPartialResult(const ComputationResult& result); @@ -28,6 +29,7 @@ class ComputeLayerData bool IsProfilingEnabled(const KernelDefinitionId id) const; const Kernel& GetKernel() const; const KernelConfiguration& GetConfiguration() const; + KernelRunMode GetRunMode() const; const KernelComputeData& GetComputeData(const KernelDefinitionId id) const; KernelResult GenerateResult(const Nanoseconds launcherDuration) const; @@ -37,6 +39,7 @@ class ComputeLayerData std::vector m_PartialResults; const Kernel& m_Kernel; const KernelConfiguration& m_Configuration; + KernelRunMode m_RunMode; Nanoseconds m_Overhead; Nanoseconds CalculateLauncherOverhead() const; diff --git a/Source/KernelRunner/KernelRunner.cpp b/Source/KernelRunner/KernelRunner.cpp index 5d367ff4..4b23f0cd 100644 --- a/Source/KernelRunner/KernelRunner.cpp +++ b/Source/KernelRunner/KernelRunner.cpp @@ -36,7 +36,7 @@ KernelResult KernelRunner::RunKernel(const Kernel& kernel, const KernelConfigura Logger::LogInfo("Running kernel " + kernel.GetName() + " with configuration: " + configuration.GetString()); auto launcher = GetKernelLauncher(kernel); - KernelResult result = RunKernelInternal(kernel, configuration, launcher, output); + KernelResult result = RunKernelInternal(kernel, configuration, mode, launcher, output); ValidateResult(kernel, result, mode); if (manageBuffers) @@ -209,9 +209,9 @@ KernelLauncher KernelRunner::GetKernelLauncher(const Kernel& kernel) } KernelResult KernelRunner::RunKernelInternal(const Kernel& kernel, const KernelConfiguration& configuration, - KernelLauncher launcher, const std::vector& output) + const KernelRunMode mode, KernelLauncher launcher, const std::vector& output) { - m_ComputeLayer->AddData(kernel, configuration); + m_ComputeLayer->AddData(kernel, configuration, mode); const KernelId id = kernel.GetId(); auto activator = std::make_unique(*m_ComputeLayer, id); diff --git a/Source/KernelRunner/KernelRunner.h b/Source/KernelRunner/KernelRunner.h index 22e5678d..8b14ca85 100644 --- a/Source/KernelRunner/KernelRunner.h +++ b/Source/KernelRunner/KernelRunner.h @@ -51,8 +51,8 @@ class KernelRunner bool m_ProfilingFlag; KernelLauncher GetKernelLauncher(const Kernel& kernel); - KernelResult RunKernelInternal(const Kernel& kernel, const KernelConfiguration& configuration, KernelLauncher launcher, - const std::vector& output); + KernelResult RunKernelInternal(const Kernel& kernel, const KernelConfiguration& configuration, const KernelRunMode mode, + KernelLauncher launcher, const std::vector& output); Nanoseconds RunLauncher(KernelLauncher launcher); void PrepareValidationData(const ArgumentId id); From d2e747c7a5a13400c6e07b753960a321990dea1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 8 Dec 2021 10:44:27 +0100 Subject: [PATCH 44/63] * Added proper support for Python bindings under Linux * Renamed Python bindings library to pyktt --- Examples/CoulombSum2d/CoulombSum2d.py | 2 +- Examples/Reduction/Reduction.py | 2 +- Source/KttPlatform.h | 2 ++ Source/Python/PythonModule.cpp | 2 +- Source/Python/PythonSearchers.cpp | 2 +- Source/Python/PythonStopConditions.cpp | 2 +- .../01ComputeApiInfo/ComputeApiInfoCuda.py | 2 +- .../02KernelRunning/KernelRunningCuda.py | 2 +- Tutorials/03KernelTuning/KernelTuningCuda.py | 2 +- .../PythonInterfacesCuda.py | 2 +- premake5.lua | 33 ++++++++++++------- 11 files changed, 32 insertions(+), 21 deletions(-) diff --git a/Examples/CoulombSum2d/CoulombSum2d.py b/Examples/CoulombSum2d/CoulombSum2d.py index 6e486d3f..c5d9d1d0 100644 --- a/Examples/CoulombSum2d/CoulombSum2d.py +++ b/Examples/CoulombSum2d/CoulombSum2d.py @@ -1,6 +1,6 @@ import random import sys -import ktt +import pyktt as ktt def main(): # Initialize platform index, device index and paths to kernels. diff --git a/Examples/Reduction/Reduction.py b/Examples/Reduction/Reduction.py index 04cf9c9b..ee8b3df1 100644 --- a/Examples/Reduction/Reduction.py +++ b/Examples/Reduction/Reduction.py @@ -1,7 +1,7 @@ import ctypes import random import sys -import ktt +import pyktt as ktt def reference(buffer, src): ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.POINTER(ctypes.c_float) diff --git a/Source/KttPlatform.h b/Source/KttPlatform.h index 3b3fd0df..f887c384 100644 --- a/Source/KttPlatform.h +++ b/Source/KttPlatform.h @@ -17,9 +17,11 @@ #endif // KTT_LIBRARY #define KTT_VIRTUAL_API virtual + #define KTT_VISIBILITY_HIDDEN #else #define KTT_API #define KTT_VIRTUAL_API + #define KTT_VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) #endif // _MSC_VER #endif // KTT_API diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index d5a44ef7..9406e3d4 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -14,7 +14,7 @@ void InitializePythonDataHolders(py::module_& module); void InitializePythonSearchers(py::module_& module); void InitializePythonStopConditions(py::module_& module); -PYBIND11_MODULE(ktt, module) +PYBIND11_MODULE(pyktt, module) { module.doc() = "Python bindings for KTT auto-tuning framework (https://github.com/HiPerCoRe/KTT)"; diff --git a/Source/Python/PythonSearchers.cpp b/Source/Python/PythonSearchers.cpp index 7ab480b3..2f207699 100644 --- a/Source/Python/PythonSearchers.cpp +++ b/Source/Python/PythonSearchers.cpp @@ -7,7 +7,7 @@ namespace py = pybind11; -class PySearcher : public ktt::Searcher, public py::trampoline_self_life_support +class KTT_VISIBILITY_HIDDEN PySearcher : public ktt::Searcher, public py::trampoline_self_life_support { public: using Searcher::Searcher; diff --git a/Source/Python/PythonStopConditions.cpp b/Source/Python/PythonStopConditions.cpp index 8fabedb3..b68e9ec1 100644 --- a/Source/Python/PythonStopConditions.cpp +++ b/Source/Python/PythonStopConditions.cpp @@ -7,7 +7,7 @@ namespace py = pybind11; -class PyStopCondition : public ktt::StopCondition, public py::trampoline_self_life_support +class KTT_VISIBILITY_HIDDEN PyStopCondition : public ktt::StopCondition, public py::trampoline_self_life_support { public: using StopCondition::StopCondition; diff --git a/Tutorials/01ComputeApiInfo/ComputeApiInfoCuda.py b/Tutorials/01ComputeApiInfo/ComputeApiInfoCuda.py index ab796c20..a5b03001 100644 --- a/Tutorials/01ComputeApiInfo/ComputeApiInfoCuda.py +++ b/Tutorials/01ComputeApiInfo/ComputeApiInfoCuda.py @@ -1,4 +1,4 @@ -import ktt +import pyktt as ktt def main(): # Create new tuner which uses CUDA as compute API. diff --git a/Tutorials/02KernelRunning/KernelRunningCuda.py b/Tutorials/02KernelRunning/KernelRunningCuda.py index 7a0fee40..3c6d2140 100644 --- a/Tutorials/02KernelRunning/KernelRunningCuda.py +++ b/Tutorials/02KernelRunning/KernelRunningCuda.py @@ -1,6 +1,6 @@ import ctypes import sys -import ktt +import pyktt as ktt def main(): # Initialize device index and path to kernel. diff --git a/Tutorials/03KernelTuning/KernelTuningCuda.py b/Tutorials/03KernelTuning/KernelTuningCuda.py index 51170f53..1408a132 100644 --- a/Tutorials/03KernelTuning/KernelTuningCuda.py +++ b/Tutorials/03KernelTuning/KernelTuningCuda.py @@ -1,6 +1,6 @@ import ctypes import sys -import ktt +import pyktt as ktt def computeReference(a, b, scalar, buffer): ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.POINTER(ctypes.c_float) diff --git a/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py b/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py index 31947d70..33663c9b 100644 --- a/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py +++ b/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py @@ -1,6 +1,6 @@ import ctypes import sys -import ktt +import pyktt as ktt # Implement custom stop condition in Python. The interface is the same as in C++. Note that it is necessary to call # the parent class constructor from inheriting constructor. diff --git a/premake5.lua b/premake5.lua index ed2ee505..3e2c1132 100644 --- a/premake5.lua +++ b/premake5.lua @@ -189,19 +189,27 @@ function linkVulkan() end function linkPython() - local path = os.getenv("PYTHON_PATH") + local pythonHeaders = os.getenv("PYTHON_HEADERS") + local pythonLibrary = os.getenv("PYTHON_LIB") - if not path then + if not pythonHeaders or not pythonLibrary then return false end defines {"KTT_PYTHON", "PYBIND11_USE_SMART_HOLDER_AS_DEFAULT"} - includedirs {"$(PYTHON_PATH)/include", "Libraries/pybind11-2.8.1-smart_holder"} + includedirs {pythonHeaders, "Libraries/pybind11-2.8.1-smart_holder"} files {"Libraries/pybind11-2.8.1-smart_holder/**"} - libdirs {"$(PYTHON_PATH)/libs"} - links {"python3"} + local libraryPath = path.getdirectory(pythonLibrary) + libdirs {libraryPath} + + local libraryName = path.getbasename(pythonLibrary) + + if os.target() == "linux" and string.startswith(libraryName, "lib") then + libraryName = libraryName:sub(4) + end + links {libraryName} return true end @@ -225,7 +233,7 @@ function linkAllLibraries() local pythonFound = linkPython() if not pythonFound then - error("Python installation was not found. Please ensure that path to Python is correctly set in the environment variables under PYTHON_PATH.") + error("Python installation was not found. Please ensure that paths to Python headers and Python library (including library name) are correctly set in the environment variables under PYTHON_HEADERS and PYTHON_LIB.") end end end @@ -364,12 +372,13 @@ project "Ktt" "Libraries/pugixml-1.11.4" } - filter "system:windows" - if _OPTIONS["python"] then - postbuildcommands {"{COPYFILE} %{cfg.targetdir}/ktt.dll %{cfg.targetdir}/ktt.pyd"} - end - - filter {} + if _OPTIONS["python"] then + if os.target() == "linux" then + postbuildcommands {"{COPYFILE} %{cfg.targetdir}/libktt.so %{cfg.targetdir}/pyktt.so"} + else + postbuildcommands {"{COPYFILE} %{cfg.targetdir}/ktt.dll %{cfg.targetdir}/pyktt.pyd"} + end + end defines {"KTT_LIBRARY"} targetname("ktt") From 69f6ca0284553e291607b146eb1458da52e65dd9 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Wed, 8 Dec 2021 12:35:55 +0100 Subject: [PATCH 45/63] * Fixed Python library path handling on Windows --- premake5.lua | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/premake5.lua b/premake5.lua index 3e2c1132..553d9d5e 100644 --- a/premake5.lua +++ b/premake5.lua @@ -200,6 +200,10 @@ function linkPython() includedirs {pythonHeaders, "Libraries/pybind11-2.8.1-smart_holder"} files {"Libraries/pybind11-2.8.1-smart_holder/**"} + if os.target() == "windows" then + pythonLibrary = pythonLibrary:gsub("\\", "/") + end + local libraryPath = path.getdirectory(pythonLibrary) libdirs {libraryPath} From ead0ace8ca0c77a84657885aa6d1c3b92f4bde42 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Wed, 8 Dec 2021 12:52:31 +0100 Subject: [PATCH 46/63] * Added Python usage section to readme --- Readme.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Readme.md b/Readme.md index 01e450ba..f2ed57b9 100644 --- a/Readme.md +++ b/Readme.md @@ -107,6 +107,14 @@ systems are Linux and Windows. - `vulkan` distributed with specific device drivers (Vulkan only) - `shaderc_shared` bundled with Vulkan SDK (Vulkan only) +Python bindings +--------------- +To be able to use KTT Python API, the KTT module must be built with `--python` option. For the build option to work, access to Python +development headers and library must be provided under environment variables `PYTHON_HEADERS` and `PYTHON_LIB` respectively. Once the +build is finished, in addition to the regular C++ module, a Python module will be created (named `pyktt.pyd` under Windows, `pyktt.so` +under Linux). This module can be imported into Python programs in the same way as regular modules. Note that Python must have access to +all modules which depend on the KTT module (e.g., various profiling libraries), otherwise the loading will fail. + Related projects ---------------- KTT API is based on [CLTune project](https://github.com/CNugteren/CLTune). Certain parts of the API are similar to CLTune. However, the internal From e7084ba881beffe5d055ace4f3a34fce88a38341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Filipovi=C4=8D?= Date: Wed, 8 Dec 2021 14:50:13 +0100 Subject: [PATCH 47/63] Update OnboardingGuide.md --- OnboardingGuide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index b873ff49..a06c93ed 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -330,6 +330,8 @@ tuner.AddParameter(kernel, "vector_type", std::vector{1, 2, 4, 8}); auto vectorizedSoA = [](const std::vector& values) {return values[0] > 1 || values[1] != 1;}; tuner.AddConstraint(kernel, {"vector_type", "vectorized_soa"}, vectorizedSoA); ``` +Note that parameter constraints are typically used in three scenarios. First, constraints can remove points in the tuning space (i.e., combinations of tuning parameters' values), which produces invalid code. Consider an example when two-dimensional blocks (work-groups in OpenCL) are created. The constraint can upper-bound thread block size (computed as block's x-dimension multiplied by block's y-dimension), so it does not exceed the highest thread block size executable on GPU. Second, constraints can prune redundant points in tuning space. In the example above, there is no need to tune vector size when the code is not vectorized. Third, constraints can remove points in the tuning space that produce underperforming code. In our example, considering two-dimensional thread blocks, we can constrain tuning space to avoid sub-warp blocks with less than 32 threads. + #### Parameter groups From a1c7e2b29b8ce2a3fff20ca7a4144da4e226b3e1 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Wed, 8 Dec 2021 16:52:30 +0100 Subject: [PATCH 48/63] * Updated MCMC searcher to use new searcher API features and improved its performance --- Source/Api/Searcher/McmcSearcher.cpp | 135 +++++++++------------------ Source/Api/Searcher/McmcSearcher.h | 18 ++-- Source/Python/PythonSearchers.cpp | 2 +- 3 files changed, 57 insertions(+), 98 deletions(-) diff --git a/Source/Api/Searcher/McmcSearcher.cpp b/Source/Api/Searcher/McmcSearcher.cpp index 7c32cae4..dc16bd30 100644 --- a/Source/Api/Searcher/McmcSearcher.cpp +++ b/Source/Api/Searcher/McmcSearcher.cpp @@ -8,7 +8,7 @@ namespace ktt { -McmcSearcher::McmcSearcher(const std::vector& start) : +McmcSearcher::McmcSearcher(const KernelConfiguration& start) : Searcher(), m_Index(0), m_VisitedStatesCount(0), @@ -21,16 +21,18 @@ McmcSearcher::McmcSearcher(const std::vector& start) : m_ProbabilityDistribution(0.0, 1.0) {} +McmcSearcher::McmcSearcher([[maybe_unused]] const std::vector& start) : + McmcSearcher(KernelConfiguration()) +{} + void McmcSearcher::OnInitialize() { - m_IntDistribution = std::uniform_int_distribution(0, GetConfigurationsCount() - 1), - m_ExecutionTimes.resize(GetConfigurationsCount(), std::numeric_limits::max()); - + m_IntDistribution = std::uniform_int_distribution(0, GetConfigurationsCount() - 1); size_t initialState = 0; - if (!m_Start.empty()) + if (m_Start.IsValid()) { - initialState = SearchStateIndex(m_Start); + initialState = GetIndex(m_Start); } else { @@ -41,11 +43,6 @@ void McmcSearcher::OnInitialize() m_OriginState = initialState; m_CurrentState = initialState; m_Index = initialState; - - for (size_t i = 0; i < GetConfigurationsCount(); ++i) - { - m_UnexploredIndices.insert(i); - } } void McmcSearcher::OnReset() @@ -56,13 +53,11 @@ void McmcSearcher::OnReset() m_CurrentState = 0; m_BestTime = std::numeric_limits::max(); m_ExecutionTimes.clear(); - m_UnexploredIndices.clear(); } bool McmcSearcher::CalculateNextConfiguration(const KernelResult& previousResult) { ++m_VisitedStatesCount; - m_UnexploredIndices.erase(m_Index); if (previousResult.IsValid()) { @@ -77,6 +72,16 @@ bool McmcSearcher::CalculateNextConfiguration(const KernelResult& previousResult // origin of MCMC to the best state if (m_Boot > 0) { + if (m_ExecutionTimes.find(m_CurrentState) == m_ExecutionTimes.cend()) + { + m_ExecutionTimes[m_CurrentState] = std::numeric_limits::max(); + } + + if (m_ExecutionTimes.find(m_OriginState) == m_ExecutionTimes.cend()) + { + m_ExecutionTimes[m_OriginState] = std::numeric_limits::max(); + } + if (m_ExecutionTimes[m_CurrentState] <= m_ExecutionTimes[m_OriginState]) { m_OriginState = m_CurrentState; @@ -87,7 +92,12 @@ bool McmcSearcher::CalculateNextConfiguration(const KernelResult& previousResult --m_Boot; - while (m_UnexploredIndices.find(m_Index) == m_UnexploredIndices.cend() || m_UnexploredIndices.empty()) + if (GetExploredIndices().size() == GetConfigurationsCount()) + { + return false; + } + + while (GetExploredIndices().find(m_Index) != GetExploredIndices().cend()) { m_Index = m_IntDistribution(m_Generator); } @@ -97,6 +107,16 @@ bool McmcSearcher::CalculateNextConfiguration(const KernelResult& previousResult } // acceptation of a new state + if (m_ExecutionTimes.find(m_CurrentState) == m_ExecutionTimes.cend()) + { + m_ExecutionTimes[m_CurrentState] = std::numeric_limits::max(); + } + + if (m_ExecutionTimes.find(m_OriginState) == m_ExecutionTimes.cend()) + { + m_ExecutionTimes[m_OriginState] = std::numeric_limits::max(); + } + if ((m_ExecutionTimes[m_CurrentState] <= m_ExecutionTimes[m_OriginState]) || m_ProbabilityDistribution(m_Generator) < m_EscapeProbability) { @@ -128,19 +148,27 @@ bool McmcSearcher::CalculateNextConfiguration(const KernelResult& previousResult Logger::LogDebug("MCMC step " + std::to_string(m_VisitedStatesCount) + ", continuing searching neighbours"); } - if (m_UnexploredIndices.empty()) + if (GetExploredIndices().size() == GetConfigurationsCount()) { return false; } - std::vector neighbours = GetNeighbours(m_OriginState); + std::vector neighbourConfigurations = GetNeighbourConfigurations(GetConfiguration(m_OriginState), + m_MaximumDifferences, std::numeric_limits::max()); + std::vector neighbours; + + for (const auto& neighbour : neighbourConfigurations) + { + const size_t index = GetIndex(neighbour); + neighbours.push_back(index); + } // reset origin position when there are no neighbours if (neighbours.empty()) { Logger::LogDebug("MCMC step " + std::to_string(m_VisitedStatesCount) + ", no neighbours, resetting position"); - - while (m_UnexploredIndices.find(m_OriginState) == m_UnexploredIndices.cend()) + + while (GetExploredIndices().find(m_OriginState) != GetExploredIndices().cend()) { m_OriginState = m_IntDistribution(m_Generator); } @@ -164,75 +192,4 @@ KernelConfiguration McmcSearcher::GetCurrentConfiguration() const return GetConfiguration(m_Index); } -std::vector McmcSearcher::GetNeighbours(const size_t referenceId) const -{ - std::vector neighbours; - const auto referenceConfiguration = GetConfiguration(referenceId); - const auto& referencePairs = referenceConfiguration.GetPairs(); - - for (const auto i : m_UnexploredIndices) - { - size_t differences = 0; - size_t settingId = 0; - const auto configuration = GetConfiguration(i); - - for (const auto& parameter : configuration.GetPairs()) - { - if (!parameter.HasSameValue(referencePairs[settingId])) - { - ++differences; - } - - ++settingId; - } - - if (differences <= m_MaximumDifferences) - { - neighbours.push_back(i); - } - } - - return neighbours; -} - -size_t McmcSearcher::SearchStateIndex(const std::vector& state) const -{ - size_t states = state.size(); - size_t ret = 0; - bool match = true; - - for (uint64_t index = 0; index < GetConfigurationsCount(); ++index) - { - const auto configuration = GetConfiguration(index); - match = true; - - for (size_t i = 0; i < states; ++i) - { - const auto& pair = configuration.GetPairs()[i]; - - if ((pair.HasValueDouble() && pair.GetValueDouble() != state[i]) - || (pair.GetValue() != static_cast(state[i]))) - { - match = false; - break; - } - } - - if (match) - { - break; - } - - ++ret; - } - - if (!match) - { - Logger::LogWarning("MCMC starting point not found."); - ret = 0; - } - - return ret; -} - } // namespace ktt diff --git a/Source/Api/Searcher/McmcSearcher.h b/Source/Api/Searcher/McmcSearcher.h index 5ad34fce..3feb8cc7 100644 --- a/Source/Api/Searcher/McmcSearcher.h +++ b/Source/Api/Searcher/McmcSearcher.h @@ -4,9 +4,9 @@ #pragma once #include +#include #include #include -#include #include #include @@ -20,11 +20,17 @@ namespace ktt class KTT_API McmcSearcher : public Searcher { public: + /** @fn McmcSearcher(const KernelConfiguration& start = {}) + * Initializes MCMC searcher. + * @param start Optional parameter which specifies the starting point for MCMC searcher. + */ + McmcSearcher(const KernelConfiguration& start = {}); + /** @fn McmcSearcher(const std::vector& start) * Initializes MCMC searcher. * @param start Optional parameter which specifies starting point for MCMC searcher. */ - McmcSearcher(const std::vector& start); + [[deprecated("Use constructor which accepts kernel configuration.")]] McmcSearcher(const std::vector& start); void OnInitialize() override; void OnReset() override; @@ -40,9 +46,8 @@ class KTT_API McmcSearcher : public Searcher size_t m_Boot; double m_BestTime; - std::vector m_Start; - std::vector m_ExecutionTimes; - std::set m_UnexploredIndices; + KernelConfiguration m_Start; + std::map m_ExecutionTimes; std::default_random_engine m_Generator; std::uniform_int_distribution m_IntDistribution; @@ -51,9 +56,6 @@ class KTT_API McmcSearcher : public Searcher inline static size_t m_MaximumDifferences = 2; inline static size_t m_BootIterations = 10; inline static double m_EscapeProbability = 0.02; - - std::vector GetNeighbours(const size_t referenceId) const; - size_t SearchStateIndex(const std::vector& state) const; }; } // namespace ktt diff --git a/Source/Python/PythonSearchers.cpp b/Source/Python/PythonSearchers.cpp index 2f207699..3ca40874 100644 --- a/Source/Python/PythonSearchers.cpp +++ b/Source/Python/PythonSearchers.cpp @@ -59,7 +59,7 @@ void InitializePythonSearchers(py::module_& module) .def(py::init<>()); py::class_(module, "McmcSearcher") - .def(py::init&>()); + .def(py::init(), py::arg("start") = ktt::KernelConfiguration()); py::class_(module, "RandomSearcher") .def(py::init<>()); From d3e8f1d3eb8ccba91de1130af3d2a35544ced356 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Thu, 9 Dec 2021 10:46:26 +0100 Subject: [PATCH 49/63] * Added kernel run mode retrieval method to Python API --- Source/Python/PythonEnums.cpp | 6 ++++++ Source/Python/PythonModule.cpp | 1 + 2 files changed, 7 insertions(+) diff --git a/Source/Python/PythonEnums.cpp b/Source/Python/PythonEnums.cpp index 57546ff2..10c6bb03 100644 --- a/Source/Python/PythonEnums.cpp +++ b/Source/Python/PythonEnums.cpp @@ -65,6 +65,12 @@ void InitializePythonEnums(py::module_& module) .value("CUDA", ktt::GlobalSizeType::CUDA) .value("Vulkan", ktt::GlobalSizeType::Vulkan); + py::enum_(module, "KernelRunMode") + .value("Running", ktt::KernelRunMode::Running) + .value("OfflineTuning", ktt::KernelRunMode::OfflineTuning) + .value("OnlineTuning", ktt::KernelRunMode::OnlineTuning) + .value("ResultValidation", ktt::KernelRunMode::ResultValidation); + py::enum_(module, "LoggingLevel") .value("Off", ktt::LoggingLevel::Off) .value("Error", ktt::LoggingLevel::Error) diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index 9406e3d4..adcd7abe 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -58,6 +58,7 @@ PYBIND11_MODULE(pyktt, module) .def("GetCurrentGlobalSize", &ktt::ComputeInterface::GetCurrentGlobalSize, py::return_value_policy::reference) .def("GetCurrentLocalSize", &ktt::ComputeInterface::GetCurrentLocalSize, py::return_value_policy::reference) .def("GetCurrentConfiguration", &ktt::ComputeInterface::GetCurrentConfiguration, py::return_value_policy::reference) + .def("GetRunMode", &ktt::ComputeInterface::GetRunMode) .def("ChangeArguments", &ktt::ComputeInterface::ChangeArguments) .def("SwapArguments", &ktt::ComputeInterface::SwapArguments) .def("UpdateScalarArgumentChar", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int8_t data) { ci.UpdateScalarArgument(id, &data); }) From 7630eca24de1949fc705f7dff155517ead6c7fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 13 Dec 2021 13:55:06 +0100 Subject: [PATCH 50/63] * Added more synchronization methods to main tuner API * Added new queue synchronization method to compute interface API * Synchronizing queue or device now clears all of the associated compute and transfer actions --- Source/Api/ComputeInterface.h | 31 ++++++----- Source/ComputeEngine/ComputeEngine.h | 1 + .../Cuda/Actions/CudaComputeAction.cpp | 10 +++- .../Cuda/Actions/CudaComputeAction.h | 6 +- .../Cuda/Actions/CudaTransferAction.cpp | 8 ++- .../Cuda/Actions/CudaTransferAction.h | 4 +- .../Cuda/Buffers/CudaDeviceBuffer.cpp | 6 +- .../Cuda/Buffers/CudaHostBuffer.cpp | 6 +- .../Cuda/Buffers/CudaUnifiedBuffer.cpp | 6 +- Source/ComputeEngine/Cuda/CudaContext.cpp | 7 +++ Source/ComputeEngine/Cuda/CudaContext.h | 1 + Source/ComputeEngine/Cuda/CudaEngine.cpp | 24 +++++++- Source/ComputeEngine/Cuda/CudaEngine.h | 2 + Source/ComputeEngine/Cuda/CudaKernel.cpp | 2 +- .../Cuda/CuptiLegacy/CuptiSubscription.cpp | 4 +- .../OpenCl/Actions/OpenClComputeAction.cpp | 8 ++- .../OpenCl/Actions/OpenClComputeAction.h | 6 +- .../OpenCl/Actions/OpenClTransferAction.cpp | 8 ++- .../OpenCl/Actions/OpenClTransferAction.h | 4 +- .../OpenCl/Buffers/OpenClDeviceBuffer.cpp | 6 +- .../OpenCl/Buffers/OpenClHostBuffer.cpp | 6 +- .../OpenCl/Buffers/OpenClUnifiedBuffer.cpp | 6 +- Source/ComputeEngine/OpenCl/OpenClEngine.cpp | 22 +++++++- Source/ComputeEngine/OpenCl/OpenClEngine.h | 2 + Source/ComputeEngine/OpenCl/OpenClKernel.cpp | 2 +- .../Vulkan/Actions/VulkanComputeAction.cpp | 12 +++- .../Vulkan/Actions/VulkanComputeAction.h | 8 ++- .../Vulkan/Actions/VulkanTransferAction.cpp | 10 +++- .../Vulkan/Actions/VulkanTransferAction.h | 7 ++- Source/ComputeEngine/Vulkan/VulkanBuffer.cpp | 8 +-- .../Vulkan/VulkanComputePipeline.cpp | 4 +- Source/ComputeEngine/Vulkan/VulkanEngine.cpp | 24 +++++++- Source/ComputeEngine/Vulkan/VulkanEngine.h | 2 + Source/KernelRunner/ComputeLayer.cpp | 5 ++ Source/KernelRunner/ComputeLayer.h | 1 + Source/Python/PythonModule.cpp | 7 ++- Source/Tuner.cpp | 55 ++++++++++++++++++- Source/Tuner.h | 32 ++++++++++- Source/TunerCore.cpp | 20 +++++++ Source/TunerCore.h | 4 ++ 40 files changed, 318 insertions(+), 69 deletions(-) diff --git a/Source/Api/ComputeInterface.h b/Source/Api/ComputeInterface.h index e9bd2eda..7161e8b7 100644 --- a/Source/Api/ComputeInterface.h +++ b/Source/Api/ComputeInterface.h @@ -52,8 +52,8 @@ class KTT_API ComputeInterface * kernel. * @param queue Id of queue in which the command to run kernel will be submitted. * @return Id of asynchronous action corresponding to the issued kernel run command. The action must be waited for with - * WaitForComputeAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded - * kernel durations may occur. + * e.g., WaitForComputeAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded kernel durations + * may occur. */ virtual ComputeActionId RunKernelAsync(const KernelDefinitionId id, const QueueId queue) = 0; @@ -67,8 +67,8 @@ class KTT_API ComputeInterface * @param globalSize Dimensions for global size with which the kernel will be run. * @param localSize Dimensions for local size with which the kernel will be run. * @return Id of asynchronous action corresponding to the issued kernel run command. The action must be waited for with - * WaitForComputeAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded - * kernel durations may occur. + * e.g., WaitForComputeAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded kernel durations + * may occur. */ virtual ComputeActionId RunKernelAsync(const KernelDefinitionId id, const QueueId queue, const DimensionVector& globalSize, const DimensionVector& localSize) = 0; @@ -136,9 +136,14 @@ class KTT_API ComputeInterface */ virtual void SynchronizeQueue(const QueueId queue) = 0; - /** @fn virtual void SynchronizeDevice() = 0 + /** @fn virtual void SynchronizeQueues() = 0 * Blocks until all commands submitted to all device queues are completed. */ + virtual void SynchronizeQueues() = 0; + + /** @fn virtual void SynchronizeDevice() = 0 + * Blocks until all commands submitted to device are completed. + */ virtual void SynchronizeDevice() = 0; /** @fn virtual const DimensionVector& GetCurrentGlobalSize(const KernelDefinitionId id) const = 0 @@ -214,8 +219,8 @@ class KTT_API ComputeInterface * @param id Id of vector argument which will be uploaded. * @param queue Id of queue in which the command to upload argument will be submitted. * @return Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with - * WaitForTransferAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded - * kernel durations may occur. + * e.g., WaitForTransferAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded transfer + * durations may occur. */ virtual TransferActionId UploadBufferAsync(const ArgumentId id, const QueueId queue) = 0; @@ -240,8 +245,8 @@ class KTT_API ComputeInterface * @param dataSize Size in bytes of buffer portion which will be downloaded to specified destination, starting with the first * byte. If zero, the entire buffer will be downloaded. * @return Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with - * WaitForTransferAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded - * kernel durations may occur. + * e.g., WaitForTransferAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded transfer + * durations may occur. */ virtual TransferActionId DownloadBufferAsync(const ArgumentId id, const QueueId queue, void* destination, const size_t dataSize = 0) = 0; @@ -267,8 +272,8 @@ class KTT_API ComputeInterface * @param dataSize Size in bytes of buffer portion which will be updated, starting with the first byte. If zero, the entire * buffer will be updated. * @return Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with - * WaitForTransferAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded - * kernel durations may occur. + * e.g., WaitForTransferAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded transfer + * durations may occur. */ virtual TransferActionId UpdateBufferAsync(const ArgumentId id, const QueueId queue, const void* data, const size_t dataSize = 0) = 0; @@ -292,8 +297,8 @@ class KTT_API ComputeInterface * @param dataSize Size in bytes of buffer portion which will be copied to destination buffer, starting with the first byte. * If zero, the entire buffer will be copied. * @return Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with - * WaitForTransferAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded - * kernel durations may occur. + * e.g., WaitForTransferAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded transfer + * durations may occur. */ virtual TransferActionId CopyBufferAsync(const ArgumentId destination, const ArgumentId source, const QueueId queue, const size_t dataSize = 0) = 0; diff --git a/Source/ComputeEngine/ComputeEngine.h b/Source/ComputeEngine/ComputeEngine.h index 9bb8957a..1a8b31dd 100644 --- a/Source/ComputeEngine/ComputeEngine.h +++ b/Source/ComputeEngine/ComputeEngine.h @@ -57,6 +57,7 @@ class ComputeEngine virtual QueueId GetDefaultQueue() const = 0; virtual std::vector GetAllQueues() const = 0; virtual void SynchronizeQueue(const QueueId queueId) = 0; + virtual void SynchronizeQueues() = 0; virtual void SynchronizeDevice() = 0; // Information retrieval methods diff --git a/Source/ComputeEngine/Cuda/Actions/CudaComputeAction.cpp b/Source/ComputeEngine/Cuda/Actions/CudaComputeAction.cpp index 1c783ee5..321a6b15 100644 --- a/Source/ComputeEngine/Cuda/Actions/CudaComputeAction.cpp +++ b/Source/ComputeEngine/Cuda/Actions/CudaComputeAction.cpp @@ -9,9 +9,10 @@ namespace ktt { -CudaComputeAction::CudaComputeAction(const ComputeActionId id, std::shared_ptr kernel, const DimensionVector& globalSize, - const DimensionVector& localSize) : +CudaComputeAction::CudaComputeAction(const ComputeActionId id, const QueueId queueId, std::shared_ptr kernel, + const DimensionVector& globalSize, const DimensionVector& localSize) : m_Id(id), + m_QueueId(queueId), m_Kernel(kernel), m_Overhead(0), m_GlobalSize(globalSize), @@ -46,6 +47,11 @@ ComputeActionId CudaComputeAction::GetId() const return m_Id; } +QueueId CudaComputeAction::GetQueueId() const +{ + return m_QueueId; +} + CudaKernel& CudaComputeAction::GetKernel() { return *m_Kernel; diff --git a/Source/ComputeEngine/Cuda/Actions/CudaComputeAction.h b/Source/ComputeEngine/Cuda/Actions/CudaComputeAction.h index 025a8230..f0b2eee6 100644 --- a/Source/ComputeEngine/Cuda/Actions/CudaComputeAction.h +++ b/Source/ComputeEngine/Cuda/Actions/CudaComputeAction.h @@ -17,14 +17,15 @@ namespace ktt class CudaComputeAction { public: - CudaComputeAction(const ComputeActionId id, std::shared_ptr kernel, const DimensionVector& globalSize, - const DimensionVector& localSize); + CudaComputeAction(const ComputeActionId id, const QueueId queueId, std::shared_ptr kernel, + const DimensionVector& globalSize, const DimensionVector& localSize); void IncreaseOverhead(const Nanoseconds overhead); void SetComputeId(const KernelComputeId& id); void WaitForFinish(); ComputeActionId GetId() const; + QueueId GetQueueId() const; CudaKernel& GetKernel(); CUevent GetStartEvent() const; CUevent GetEndEvent() const; @@ -35,6 +36,7 @@ class CudaComputeAction private: ComputeActionId m_Id; + QueueId m_QueueId; std::shared_ptr m_Kernel; std::unique_ptr m_StartEvent; std::unique_ptr m_EndEvent; diff --git a/Source/ComputeEngine/Cuda/Actions/CudaTransferAction.cpp b/Source/ComputeEngine/Cuda/Actions/CudaTransferAction.cpp index 3a2f3c91..b3a48e82 100644 --- a/Source/ComputeEngine/Cuda/Actions/CudaTransferAction.cpp +++ b/Source/ComputeEngine/Cuda/Actions/CudaTransferAction.cpp @@ -9,8 +9,9 @@ namespace ktt { -CudaTransferAction::CudaTransferAction(const TransferActionId id) : +CudaTransferAction::CudaTransferAction(const TransferActionId id, const QueueId queueId) : m_Id(id), + m_QueueId(queueId), m_Overhead(0) { Logger::LogDebug("Initializing CUDA transfer action with id " + std::to_string(id)); @@ -34,6 +35,11 @@ TransferActionId CudaTransferAction::GetId() const return m_Id; } +QueueId CudaTransferAction::GetQueueId() const +{ + return m_QueueId; +} + CUevent CudaTransferAction::GetStartEvent() const { return m_StartEvent->GetEvent(); diff --git a/Source/ComputeEngine/Cuda/Actions/CudaTransferAction.h b/Source/ComputeEngine/Cuda/Actions/CudaTransferAction.h index b2360b96..0ae93b05 100644 --- a/Source/ComputeEngine/Cuda/Actions/CudaTransferAction.h +++ b/Source/ComputeEngine/Cuda/Actions/CudaTransferAction.h @@ -14,12 +14,13 @@ namespace ktt class CudaTransferAction { public: - CudaTransferAction(const TransferActionId id); + CudaTransferAction(const TransferActionId id, const QueueId queueId); void IncreaseOverhead(const Nanoseconds overhead); void WaitForFinish(); TransferActionId GetId() const; + QueueId GetQueueId() const; CUevent GetStartEvent() const; CUevent GetEndEvent() const; Nanoseconds GetDuration() const; @@ -28,6 +29,7 @@ class CudaTransferAction private: TransferActionId m_Id; + QueueId m_QueueId; std::unique_ptr m_StartEvent; std::unique_ptr m_EndEvent; Nanoseconds m_Overhead; diff --git a/Source/ComputeEngine/Cuda/Buffers/CudaDeviceBuffer.cpp b/Source/ComputeEngine/Cuda/Buffers/CudaDeviceBuffer.cpp index ad94026f..502b99bf 100644 --- a/Source/ComputeEngine/Cuda/Buffers/CudaDeviceBuffer.cpp +++ b/Source/ComputeEngine/Cuda/Buffers/CudaDeviceBuffer.cpp @@ -58,7 +58,7 @@ std::unique_ptr CudaDeviceBuffer::UploadData(const CudaStrea } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyHtoDAsync(m_Buffer, source, dataSize, stream.GetStream()), "cuMemcpyHtoDAsync"); @@ -78,7 +78,7 @@ std::unique_ptr CudaDeviceBuffer::DownloadData(const CudaStr } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyDtoHAsync(destination, m_Buffer, dataSize, stream.GetStream()), "cuMemcpyDtoHAsync"); @@ -104,7 +104,7 @@ std::unique_ptr CudaDeviceBuffer::CopyData(const CudaStream& } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyDtoDAsync(m_Buffer, *source.GetBuffer(), dataSize, stream.GetStream()), "cuMemcpyDtoDAsync"); diff --git a/Source/ComputeEngine/Cuda/Buffers/CudaHostBuffer.cpp b/Source/ComputeEngine/Cuda/Buffers/CudaHostBuffer.cpp index f27fbf23..424f12b3 100644 --- a/Source/ComputeEngine/Cuda/Buffers/CudaHostBuffer.cpp +++ b/Source/ComputeEngine/Cuda/Buffers/CudaHostBuffer.cpp @@ -80,7 +80,7 @@ std::unique_ptr CudaHostBuffer::UploadData(const CudaStream& } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyHtoDAsync(m_Buffer, source, dataSize, stream.GetStream()), "cuMemcpyHtoDAsync"); @@ -100,7 +100,7 @@ std::unique_ptr CudaHostBuffer::DownloadData(const CudaStrea } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyDtoHAsync(destination, m_Buffer, dataSize, stream.GetStream()), "cuMemcpyDtoHAsync"); @@ -126,7 +126,7 @@ std::unique_ptr CudaHostBuffer::CopyData(const CudaStream& s } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyDtoDAsync(m_Buffer, *source.GetBuffer(), dataSize, stream.GetStream()), "cuMemcpyDtoDAsync"); diff --git a/Source/ComputeEngine/Cuda/Buffers/CudaUnifiedBuffer.cpp b/Source/ComputeEngine/Cuda/Buffers/CudaUnifiedBuffer.cpp index f51cae19..ae1e783a 100644 --- a/Source/ComputeEngine/Cuda/Buffers/CudaUnifiedBuffer.cpp +++ b/Source/ComputeEngine/Cuda/Buffers/CudaUnifiedBuffer.cpp @@ -59,7 +59,7 @@ std::unique_ptr CudaUnifiedBuffer::UploadData(const CudaStre } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyAsync(m_Buffer, reinterpret_cast(source), dataSize, stream.GetStream()), "cuMemcpyAsync"); @@ -79,7 +79,7 @@ std::unique_ptr CudaUnifiedBuffer::DownloadData(const CudaSt } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyAsync(reinterpret_cast(destination), m_Buffer, dataSize, stream.GetStream()), "cuMemcpyAsync"); @@ -105,7 +105,7 @@ std::unique_ptr CudaUnifiedBuffer::CopyData(const CudaStream } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, stream.GetId()); CheckError(cuEventRecord(action->GetStartEvent(), stream.GetStream()), "cuEventRecord"); CheckError(cuMemcpyAsync(m_Buffer, *source.GetBuffer(), dataSize, stream.GetStream()), "cuMemcpyAsync"); diff --git a/Source/ComputeEngine/Cuda/CudaContext.cpp b/Source/ComputeEngine/Cuda/CudaContext.cpp index c723a96b..3795711f 100644 --- a/Source/ComputeEngine/Cuda/CudaContext.cpp +++ b/Source/ComputeEngine/Cuda/CudaContext.cpp @@ -28,6 +28,7 @@ CudaContext::CudaContext(ComputeContext context) : throw KttException("The provided user CUDA context is not valid"); } + EnsureThreadContext(); CheckError(cuCtxGetDevice(&m_Device), "cuCtxGetDevice"); } @@ -41,6 +42,12 @@ CudaContext::~CudaContext() } } +void CudaContext::Synchronize() const +{ + EnsureThreadContext(); + CheckError(cuCtxSynchronize(), "cuCtxSynchronize"); +} + void CudaContext::EnsureThreadContext() const { CUcontext current; diff --git a/Source/ComputeEngine/Cuda/CudaContext.h b/Source/ComputeEngine/Cuda/CudaContext.h index 861612aa..aa4bf6eb 100644 --- a/Source/ComputeEngine/Cuda/CudaContext.h +++ b/Source/ComputeEngine/Cuda/CudaContext.h @@ -18,6 +18,7 @@ class CudaContext explicit CudaContext(ComputeContext context); ~CudaContext(); + void Synchronize() const; void EnsureThreadContext() const; CUcontext GetContext() const; diff --git a/Source/ComputeEngine/Cuda/CudaEngine.cpp b/Source/ComputeEngine/Cuda/CudaEngine.cpp index d2f38c82..7b5121c9 100644 --- a/Source/ComputeEngine/Cuda/CudaEngine.cpp +++ b/Source/ComputeEngine/Cuda/CudaEngine.cpp @@ -614,16 +614,25 @@ void CudaEngine::SynchronizeQueue(const QueueId queueId) } m_Streams[queueId]->Synchronize(); + ClearStreamActions(queueId); } -void CudaEngine::SynchronizeDevice() +void CudaEngine::SynchronizeQueues() { for (auto& stream : m_Streams) { stream.second->Synchronize(); + ClearStreamActions(stream.first); } } +void CudaEngine::SynchronizeDevice() +{ + m_Context->Synchronize(); + m_ComputeActions.clear(); + m_TransferActions.clear(); +} + std::vector CudaEngine::GetPlatformInfo() const { int driverVersion; @@ -858,6 +867,19 @@ std::string CudaEngine::GetDefaultCompilerOptions() const return result; } +void CudaEngine::ClearStreamActions(const QueueId id) +{ + EraseIf(m_ComputeActions, [id](const auto& pair) + { + return pair.second->GetQueueId() == id; + }); + + EraseIf(m_TransferActions, [id](const auto& pair) + { + return pair.second->GetQueueId() == id; + }); +} + #if defined(KTT_PROFILING_CUPTI) void CudaEngine::InitializeCupti() diff --git a/Source/ComputeEngine/Cuda/CudaEngine.h b/Source/ComputeEngine/Cuda/CudaEngine.h index 6c845de4..f4cf52ec 100644 --- a/Source/ComputeEngine/Cuda/CudaEngine.h +++ b/Source/ComputeEngine/Cuda/CudaEngine.h @@ -71,6 +71,7 @@ class CudaEngine : public ComputeEngine QueueId GetDefaultQueue() const override; std::vector GetAllQueues() const override; void SynchronizeQueue(const QueueId queueId) override; + void SynchronizeQueues() override; void SynchronizeDevice() override; // Information retrieval methods @@ -118,6 +119,7 @@ class CudaEngine : public ComputeEngine std::unique_ptr CreateBuffer(KernelArgument& argument); std::unique_ptr CreateUserBuffer(KernelArgument& argument, ComputeBuffer buffer); std::string GetDefaultCompilerOptions() const; + void ClearStreamActions(const QueueId id); #if defined(KTT_PROFILING_CUPTI) void InitializeCupti(); diff --git a/Source/ComputeEngine/Cuda/CudaKernel.cpp b/Source/ComputeEngine/Cuda/CudaKernel.cpp index 89e4e05c..deaf5525 100644 --- a/Source/ComputeEngine/Cuda/CudaKernel.cpp +++ b/Source/ComputeEngine/Cuda/CudaKernel.cpp @@ -52,7 +52,7 @@ std::unique_ptr CudaKernel::Launch(const CudaStream& stream, const DimensionVector adjustedSize = AdjustGlobalSize(globalSize, localSize); const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, shared_from_this(), adjustedSize, localSize); + auto action = std::make_unique(id, stream.GetId(), shared_from_this(), adjustedSize, localSize); Logger::LogDebug("Launching kernel " + m_Name + " with compute action id " + std::to_string(id) + ", global thread size: " + adjustedSize.GetString() + ", local thread size: " + localSize.GetString()); diff --git a/Source/ComputeEngine/Cuda/CuptiLegacy/CuptiSubscription.cpp b/Source/ComputeEngine/Cuda/CuptiLegacy/CuptiSubscription.cpp index 92a3371c..2e75f080 100644 --- a/Source/ComputeEngine/Cuda/CuptiLegacy/CuptiSubscription.cpp +++ b/Source/ComputeEngine/Cuda/CuptiLegacy/CuptiSubscription.cpp @@ -51,7 +51,7 @@ void CuptiSubscription::MetricCallback(void* data, [[maybe_unused]] CUpti_Callb void CuptiSubscription::BeginCollection(CuptiInstance& instance, const CUpti_CallbackData& info) { - CheckError(cuCtxSynchronize(), "cuCtxSynchronize"); + instance.GetContext().Synchronize(); CheckError(cuptiSetEventCollectionMode(info.context, CUPTI_EVENT_COLLECTION_MODE_KERNEL), "cuptiSetEventCollectionMode"); auto& sets = instance.GetEventSets(); @@ -68,7 +68,7 @@ void CuptiSubscription::BeginCollection(CuptiInstance& instance, const CUpti_Cal void CuptiSubscription::EndCollection(CuptiInstance& instance) { - CheckError(cuCtxSynchronize(), "cuCtxSynchronize"); + instance.GetContext().Synchronize(); auto& sets = instance.GetEventSets(); const uint32_t index = instance.GetCurrentIndex(); diff --git a/Source/ComputeEngine/OpenCl/Actions/OpenClComputeAction.cpp b/Source/ComputeEngine/OpenCl/Actions/OpenClComputeAction.cpp index 72352364..58517c26 100644 --- a/Source/ComputeEngine/OpenCl/Actions/OpenClComputeAction.cpp +++ b/Source/ComputeEngine/OpenCl/Actions/OpenClComputeAction.cpp @@ -9,9 +9,10 @@ namespace ktt { -OpenClComputeAction::OpenClComputeAction(const ComputeActionId id, std::shared_ptr kernel, +OpenClComputeAction::OpenClComputeAction(const ComputeActionId id, const QueueId queueId, std::shared_ptr kernel, const DimensionVector& globalSize, const DimensionVector& localSize) : m_Id(id), + m_QueueId(queueId), m_Kernel(kernel), m_Overhead(0), m_GlobalSize(globalSize), @@ -50,6 +51,11 @@ ComputeActionId OpenClComputeAction::GetId() const return m_Id; } +QueueId OpenClComputeAction::GetQueueId() const +{ + return m_QueueId; +} + OpenClKernel& OpenClComputeAction::GetKernel() { return *m_Kernel; diff --git a/Source/ComputeEngine/OpenCl/Actions/OpenClComputeAction.h b/Source/ComputeEngine/OpenCl/Actions/OpenClComputeAction.h index 13b4f784..40deb56f 100644 --- a/Source/ComputeEngine/OpenCl/Actions/OpenClComputeAction.h +++ b/Source/ComputeEngine/OpenCl/Actions/OpenClComputeAction.h @@ -16,8 +16,8 @@ namespace ktt class OpenClComputeAction { public: - OpenClComputeAction(const ComputeActionId id, std::shared_ptr kernel, const DimensionVector& globalSize, - const DimensionVector& localSize); + OpenClComputeAction(const ComputeActionId id, const QueueId queueId, std::shared_ptr kernel, + const DimensionVector& globalSize, const DimensionVector& localSize); void IncreaseOverhead(const Nanoseconds overhead); void SetComputeId(const KernelComputeId& id); @@ -25,6 +25,7 @@ class OpenClComputeAction void WaitForFinish(); ComputeActionId GetId() const; + QueueId GetQueueId() const; OpenClKernel& GetKernel(); cl_event* GetEvent(); Nanoseconds GetDuration() const; @@ -34,6 +35,7 @@ class OpenClComputeAction private: ComputeActionId m_Id; + QueueId m_QueueId; std::shared_ptr m_Kernel; std::unique_ptr m_Event; Nanoseconds m_Overhead; diff --git a/Source/ComputeEngine/OpenCl/Actions/OpenClTransferAction.cpp b/Source/ComputeEngine/OpenCl/Actions/OpenClTransferAction.cpp index e470e77d..28480161 100644 --- a/Source/ComputeEngine/OpenCl/Actions/OpenClTransferAction.cpp +++ b/Source/ComputeEngine/OpenCl/Actions/OpenClTransferAction.cpp @@ -9,8 +9,9 @@ namespace ktt { -OpenClTransferAction::OpenClTransferAction(const TransferActionId id, const bool isAsync) : +OpenClTransferAction::OpenClTransferAction(const TransferActionId id, const QueueId queueId, const bool isAsync) : m_Id(id), + m_QueueId(queueId), m_Duration(InvalidDuration), m_Overhead(0) { @@ -54,6 +55,11 @@ TransferActionId OpenClTransferAction::GetId() const return m_Id; } +QueueId OpenClTransferAction::GetQueueId() const +{ + return m_QueueId; +} + cl_event* OpenClTransferAction::GetEvent() { KttAssert(IsAsync(), "Only async actions contain valid event"); diff --git a/Source/ComputeEngine/OpenCl/Actions/OpenClTransferAction.h b/Source/ComputeEngine/OpenCl/Actions/OpenClTransferAction.h index f9170680..869e4d25 100644 --- a/Source/ComputeEngine/OpenCl/Actions/OpenClTransferAction.h +++ b/Source/ComputeEngine/OpenCl/Actions/OpenClTransferAction.h @@ -14,7 +14,7 @@ namespace ktt class OpenClTransferAction { public: - OpenClTransferAction(const TransferActionId id, const bool isAsync); + OpenClTransferAction(const TransferActionId id, const QueueId queueId, const bool isAsync); void SetDuration(const Nanoseconds duration); void IncreaseOverhead(const Nanoseconds overhead); @@ -22,6 +22,7 @@ class OpenClTransferAction void WaitForFinish(); TransferActionId GetId() const; + QueueId GetQueueId() const; cl_event* GetEvent(); Nanoseconds GetDuration() const; Nanoseconds GetOverhead() const; @@ -30,6 +31,7 @@ class OpenClTransferAction private: TransferActionId m_Id; + QueueId m_QueueId; std::unique_ptr m_Event; Nanoseconds m_Duration; Nanoseconds m_Overhead; diff --git a/Source/ComputeEngine/OpenCl/Buffers/OpenClDeviceBuffer.cpp b/Source/ComputeEngine/OpenCl/Buffers/OpenClDeviceBuffer.cpp index 002ead77..591aa425 100644 --- a/Source/ComputeEngine/OpenCl/Buffers/OpenClDeviceBuffer.cpp +++ b/Source/ComputeEngine/OpenCl/Buffers/OpenClDeviceBuffer.cpp @@ -66,7 +66,7 @@ std::unique_ptr OpenClDeviceBuffer::UploadData(const OpenC } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, true); + auto action = std::make_unique(id, queue.GetId(), true); cl_int result = clEnqueueWriteBuffer(queue.GetQueue(), m_Buffer, CL_FALSE, 0, dataSize, source, 0, nullptr, action->GetEvent()); @@ -87,7 +87,7 @@ std::unique_ptr OpenClDeviceBuffer::DownloadData(const Ope } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, true); + auto action = std::make_unique(id, queue.GetId(), true); cl_int result = clEnqueueReadBuffer(queue.GetQueue(), m_Buffer, CL_FALSE, 0, dataSize, destination, 0, nullptr, action->GetEvent()); @@ -114,7 +114,7 @@ std::unique_ptr OpenClDeviceBuffer::CopyData(const OpenClC } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, true); + auto action = std::make_unique(id, queue.GetId(), true); cl_int result = clEnqueueCopyBuffer(queue.GetQueue(), source.GetBuffer(), m_Buffer, 0, 0, dataSize, 0, nullptr, action->GetEvent()); diff --git a/Source/ComputeEngine/OpenCl/Buffers/OpenClHostBuffer.cpp b/Source/ComputeEngine/OpenCl/Buffers/OpenClHostBuffer.cpp index f68c12a8..5b6347d9 100644 --- a/Source/ComputeEngine/OpenCl/Buffers/OpenClHostBuffer.cpp +++ b/Source/ComputeEngine/OpenCl/Buffers/OpenClHostBuffer.cpp @@ -92,7 +92,7 @@ std::unique_ptr OpenClHostBuffer::UploadData(const OpenClC } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, false); + auto action = std::make_unique(id, queue.GetId(), false); Timer timer; timer.Start(); @@ -127,7 +127,7 @@ std::unique_ptr OpenClHostBuffer::DownloadData(const OpenC } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, false); + auto action = std::make_unique(id, queue.GetId(), false); Timer timer; timer.Start(); @@ -168,7 +168,7 @@ std::unique_ptr OpenClHostBuffer::CopyData(const OpenClCom } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, true); + auto action = std::make_unique(id, queue.GetId(), true); cl_int result = clEnqueueCopyBuffer(queue.GetQueue(), source.GetBuffer(), m_Buffer, 0, 0, dataSize, 0, nullptr, action->GetEvent()); diff --git a/Source/ComputeEngine/OpenCl/Buffers/OpenClUnifiedBuffer.cpp b/Source/ComputeEngine/OpenCl/Buffers/OpenClUnifiedBuffer.cpp index f042f5d2..cb3ca833 100644 --- a/Source/ComputeEngine/OpenCl/Buffers/OpenClUnifiedBuffer.cpp +++ b/Source/ComputeEngine/OpenCl/Buffers/OpenClUnifiedBuffer.cpp @@ -77,7 +77,7 @@ std::unique_ptr OpenClUnifiedBuffer::UploadData([[maybe_un } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, false); + auto action = std::make_unique(id, queue.GetId(), false); Timer timer; timer.Start(); @@ -100,7 +100,7 @@ std::unique_ptr OpenClUnifiedBuffer::DownloadData([[maybe_ } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, false); + auto action = std::make_unique(id, queue.GetId(), false); Timer timer; timer.Start(); @@ -129,7 +129,7 @@ std::unique_ptr OpenClUnifiedBuffer::CopyData(const OpenCl } const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, false); + auto action = std::make_unique(id, queue.GetId(), false); Timer timer; timer.Start(); diff --git a/Source/ComputeEngine/OpenCl/OpenClEngine.cpp b/Source/ComputeEngine/OpenCl/OpenClEngine.cpp index b625056a..fa1cb33c 100644 --- a/Source/ComputeEngine/OpenCl/OpenClEngine.cpp +++ b/Source/ComputeEngine/OpenCl/OpenClEngine.cpp @@ -552,16 +552,23 @@ void OpenClEngine::SynchronizeQueue(const QueueId queueId) } m_Queues[queueId]->Synchronize(); + ClearQueueActions(queueId); } -void OpenClEngine::SynchronizeDevice() +void OpenClEngine::SynchronizeQueues() { for (auto& queue : m_Queues) { queue.second->Synchronize(); + ClearQueueActions(queue.first); } } +void OpenClEngine::SynchronizeDevice() +{ + SynchronizeQueues(); +} + std::vector OpenClEngine::GetPlatformInfo() const { const auto platforms = OpenClPlatform::GetAllPlatforms(); @@ -756,6 +763,19 @@ std::unique_ptr OpenClEngine::CreateUserBuffer(KernelArgument& arg return userBuffer; } +void OpenClEngine::ClearQueueActions(const QueueId id) +{ + EraseIf(m_ComputeActions, [id](const auto& pair) + { + return pair.second->GetQueueId() == id; + }); + + EraseIf(m_TransferActions, [id](const auto& pair) + { + return pair.second->GetQueueId() == id; + }); +} + #if defined(KTT_PROFILING_GPA) || defined(KTT_PROFILING_GPA_LEGACY) void OpenClEngine::InitializeGpa() diff --git a/Source/ComputeEngine/OpenCl/OpenClEngine.h b/Source/ComputeEngine/OpenCl/OpenClEngine.h index 33ae00ef..0e5724de 100644 --- a/Source/ComputeEngine/OpenCl/OpenClEngine.h +++ b/Source/ComputeEngine/OpenCl/OpenClEngine.h @@ -69,6 +69,7 @@ class OpenClEngine : public ComputeEngine QueueId GetDefaultQueue() const override; std::vector GetAllQueues() const override; void SynchronizeQueue(const QueueId queueId) override; + void SynchronizeQueues() override; void SynchronizeDevice() override; // Information retrieval methods @@ -113,6 +114,7 @@ class OpenClEngine : public ComputeEngine void SetKernelArgument(OpenClKernel& kernel, const KernelArgument& argument); std::unique_ptr CreateBuffer(KernelArgument& argument); std::unique_ptr CreateUserBuffer(KernelArgument& argument, ComputeBuffer buffer); + void ClearQueueActions(const QueueId id); #if defined(KTT_PROFILING_GPA) || defined(KTT_PROFILING_GPA_LEGACY) void InitializeGpa(); diff --git a/Source/ComputeEngine/OpenCl/OpenClKernel.cpp b/Source/ComputeEngine/OpenCl/OpenClKernel.cpp index 3f7c35ae..d918b33d 100644 --- a/Source/ComputeEngine/OpenCl/OpenClKernel.cpp +++ b/Source/ComputeEngine/OpenCl/OpenClKernel.cpp @@ -42,7 +42,7 @@ std::unique_ptr OpenClKernel::Launch(const OpenClCommandQue { const DimensionVector adjustedSize = AdjustGlobalSize(globalSize, localSize); const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, shared_from_this(), adjustedSize, localSize); + auto action = std::make_unique(id, queue.GetId(), shared_from_this(), adjustedSize, localSize); Logger::LogDebug("Launching kernel " + m_Name + " with compute action id " + std::to_string(id) + ", global thread size: " + adjustedSize.GetString() + ", local thread size: " + localSize.GetString()); diff --git a/Source/ComputeEngine/Vulkan/Actions/VulkanComputeAction.cpp b/Source/ComputeEngine/Vulkan/Actions/VulkanComputeAction.cpp index b53b0f7d..d1e2aa3c 100644 --- a/Source/ComputeEngine/Vulkan/Actions/VulkanComputeAction.cpp +++ b/Source/ComputeEngine/Vulkan/Actions/VulkanComputeAction.cpp @@ -14,10 +14,11 @@ namespace ktt { -VulkanComputeAction::VulkanComputeAction(const ComputeActionId id, const VulkanDevice& device, const VulkanCommandPool& commandPool, - VulkanQueryPool& queryPool, std::shared_ptr pipeline, const DimensionVector& globalSize, - const DimensionVector& localSize) : +VulkanComputeAction::VulkanComputeAction(const ComputeActionId id, const QueueId queueId, const VulkanDevice& device, + const VulkanCommandPool& commandPool, VulkanQueryPool& queryPool, std::shared_ptr pipeline, + const DimensionVector& globalSize, const DimensionVector& localSize) : m_Id(id), + m_QueueId(queueId), m_Pipeline(pipeline), m_QueryPool(queryPool), m_Overhead(0), @@ -57,6 +58,11 @@ ComputeActionId VulkanComputeAction::GetId() const return m_Id; } +QueueId VulkanComputeAction::GetQueueId() const +{ + return m_QueueId; +} + VulkanComputePipeline& VulkanComputeAction::GetPipeline() { return *m_Pipeline; diff --git a/Source/ComputeEngine/Vulkan/Actions/VulkanComputeAction.h b/Source/ComputeEngine/Vulkan/Actions/VulkanComputeAction.h index 33c89b9d..30318d70 100644 --- a/Source/ComputeEngine/Vulkan/Actions/VulkanComputeAction.h +++ b/Source/ComputeEngine/Vulkan/Actions/VulkanComputeAction.h @@ -20,15 +20,16 @@ class VulkanQueryPool; class VulkanComputeAction { public: - VulkanComputeAction(const ComputeActionId id, const VulkanDevice& device, const VulkanCommandPool& commandPool, - VulkanQueryPool& queryPool, std::shared_ptr pipeline, const DimensionVector& globalSize, - const DimensionVector& localSize); + VulkanComputeAction(const ComputeActionId id, const QueueId queueId, const VulkanDevice& device, + const VulkanCommandPool& commandPool, VulkanQueryPool& queryPool, std::shared_ptr pipeline, + const DimensionVector& globalSize, const DimensionVector& localSize); void IncreaseOverhead(const Nanoseconds overhead); void SetComputeId(const KernelComputeId& id); void WaitForFinish(); ComputeActionId GetId() const; + QueueId GetQueueId() const; VulkanComputePipeline& GetPipeline(); VkFence GetFence() const; VkCommandBuffer GetCommandBuffer() const; @@ -41,6 +42,7 @@ class VulkanComputeAction private: ComputeActionId m_Id; + QueueId m_QueueId; std::shared_ptr m_Pipeline; std::unique_ptr m_Fence; std::unique_ptr m_CommandBuffers; diff --git a/Source/ComputeEngine/Vulkan/Actions/VulkanTransferAction.cpp b/Source/ComputeEngine/Vulkan/Actions/VulkanTransferAction.cpp index 186003cc..4a64bf42 100644 --- a/Source/ComputeEngine/Vulkan/Actions/VulkanTransferAction.cpp +++ b/Source/ComputeEngine/Vulkan/Actions/VulkanTransferAction.cpp @@ -13,9 +13,10 @@ namespace ktt { -VulkanTransferAction::VulkanTransferAction(const TransferActionId id, const VulkanDevice* device, const VulkanCommandPool* commandPool, - VulkanQueryPool* queryPool, std::unique_ptr stagingBuffer) : +VulkanTransferAction::VulkanTransferAction(const TransferActionId id, const QueueId queueId, const VulkanDevice* device, + const VulkanCommandPool* commandPool, VulkanQueryPool* queryPool, std::unique_ptr stagingBuffer) : m_Id(id), + m_QueueId(queueId), m_StagingBuffer(std::move(stagingBuffer)), m_QueryPool(queryPool), m_Duration(InvalidDuration), @@ -64,6 +65,11 @@ TransferActionId VulkanTransferAction::GetId() const return m_Id; } +QueueId VulkanTransferAction::GetQueueId() const +{ + return m_QueueId; +} + VkFence VulkanTransferAction::GetFence() const { KttAssert(IsAsync(), "Only async actions contain valid fence"); diff --git a/Source/ComputeEngine/Vulkan/Actions/VulkanTransferAction.h b/Source/ComputeEngine/Vulkan/Actions/VulkanTransferAction.h index 34803586..bd940814 100644 --- a/Source/ComputeEngine/Vulkan/Actions/VulkanTransferAction.h +++ b/Source/ComputeEngine/Vulkan/Actions/VulkanTransferAction.h @@ -20,14 +20,16 @@ class VulkanQueryPool; class VulkanTransferAction { public: - VulkanTransferAction(const TransferActionId id, const VulkanDevice* device = nullptr, const VulkanCommandPool* commandPool = nullptr, - VulkanQueryPool* queryPool = nullptr, std::unique_ptr stagingBuffer = nullptr); + VulkanTransferAction(const TransferActionId id, const QueueId queueId, const VulkanDevice* device = nullptr, + const VulkanCommandPool* commandPool = nullptr, VulkanQueryPool* queryPool = nullptr, + std::unique_ptr stagingBuffer = nullptr); void SetDuration(const Nanoseconds duration); void IncreaseOverhead(const Nanoseconds overhead); void WaitForFinish(); TransferActionId GetId() const; + QueueId GetQueueId() const; VkFence GetFence() const; VkCommandBuffer GetCommandBuffer() const; uint32_t GetFirstQueryId() const; @@ -39,6 +41,7 @@ class VulkanTransferAction private: TransferActionId m_Id; + QueueId m_QueueId; std::unique_ptr m_Fence; std::unique_ptr m_CommandBuffers; std::unique_ptr m_StagingBuffer; diff --git a/Source/ComputeEngine/Vulkan/VulkanBuffer.cpp b/Source/ComputeEngine/Vulkan/VulkanBuffer.cpp index aa4144cb..07f2aac9 100644 --- a/Source/ComputeEngine/Vulkan/VulkanBuffer.cpp +++ b/Source/ComputeEngine/Vulkan/VulkanBuffer.cpp @@ -70,7 +70,7 @@ std::unique_ptr VulkanBuffer::UploadData(const void* sourc timer.Stop(); const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, InvalidQueueId); action->SetDuration(timer.GetElapsedTime()); return action; } @@ -95,7 +95,7 @@ std::unique_ptr VulkanBuffer::DownloadData(void* target, c timer.Stop(); const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id); + auto action = std::make_unique(id, InvalidQueueId); action->SetDuration(timer.GetElapsedTime()); return action; } @@ -104,7 +104,7 @@ std::unique_ptr VulkanBuffer::CopyData(const VulkanQueue& VulkanQueryPool& queryPool, const VulkanBuffer& source, const VkDeviceSize dataSize) { const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, &m_Device, &commandPool, &queryPool); + auto action = std::make_unique(id, queue.GetId(), &m_Device, &commandPool, &queryPool); return CopyDataInternal(queue, queryPool, source, dataSize, std::move(action)); } @@ -115,7 +115,7 @@ std::unique_ptr VulkanBuffer::CopyData(const VulkanQueue& const auto& source = *stagingSource; const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, &m_Device, &commandPool, &queryPool, std::move(stagingSource)); + auto action = std::make_unique(id, queue.GetId(), &m_Device, &commandPool, &queryPool, std::move(stagingSource)); return CopyDataInternal(queue, queryPool, source, dataSize, std::move(action)); } diff --git a/Source/ComputeEngine/Vulkan/VulkanComputePipeline.cpp b/Source/ComputeEngine/Vulkan/VulkanComputePipeline.cpp index 2c7f9ab8..eaa1b936 100644 --- a/Source/ComputeEngine/Vulkan/VulkanComputePipeline.cpp +++ b/Source/ComputeEngine/Vulkan/VulkanComputePipeline.cpp @@ -129,8 +129,8 @@ std::unique_ptr VulkanComputePipeline::DispatchShader(const }; const auto id = m_Generator.GenerateId(); - auto action = std::make_unique(id, m_Device, commandPool, queryPool, shared_from_this(), globalSize, - m_LocalSize); + auto action = std::make_unique(id, queue.GetId(), m_Device, commandPool, queryPool, shared_from_this(), + globalSize, m_LocalSize); std::vector sets = m_DescriptorSets->GetSets(); VulkanPushConstant pushConstant(scalarArguments); VkCommandBuffer commandBuffer = action->GetCommandBuffer(); diff --git a/Source/ComputeEngine/Vulkan/VulkanEngine.cpp b/Source/ComputeEngine/Vulkan/VulkanEngine.cpp index a61651ee..f973c73f 100644 --- a/Source/ComputeEngine/Vulkan/VulkanEngine.cpp +++ b/Source/ComputeEngine/Vulkan/VulkanEngine.cpp @@ -329,16 +329,25 @@ void VulkanEngine::SynchronizeQueue(const QueueId queueId) } m_Queues[static_cast(queueId)]->WaitIdle(); + ClearQueueActions(queueId); } -void VulkanEngine::SynchronizeDevice() +void VulkanEngine::SynchronizeQueues() { for (auto& queue : m_Queues) { queue->WaitIdle(); + ClearQueueActions(queue->GetId()); } } +void VulkanEngine::SynchronizeDevice() +{ + m_Device->WaitIdle(); + m_ComputeActions.clear(); + m_TransferActions.clear(); +} + std::vector VulkanEngine::GetPlatformInfo() const { PlatformInfo info(0, "Vulkan"); @@ -523,6 +532,19 @@ std::unique_ptr VulkanEngine::CreateUserBuffer([[maybe_unused]] Ke throw KttException("Support for custom buffers is not yet available for Vulkan backend"); } +void VulkanEngine::ClearQueueActions(const QueueId id) +{ + EraseIf(m_ComputeActions, [id](const auto& pair) + { + return pair.second->GetQueueId() == id || pair.second->GetQueueId() == InvalidQueueId; + }); + + EraseIf(m_TransferActions, [id](const auto& pair) + { + return pair.second->GetQueueId() == id || pair.second->GetQueueId() == InvalidQueueId; + }); +} + std::vector VulkanEngine::GetScalarArguments(const std::vector& arguments) { std::vector result; diff --git a/Source/ComputeEngine/Vulkan/VulkanEngine.h b/Source/ComputeEngine/Vulkan/VulkanEngine.h index ae5282cb..3376296c 100644 --- a/Source/ComputeEngine/Vulkan/VulkanEngine.h +++ b/Source/ComputeEngine/Vulkan/VulkanEngine.h @@ -70,6 +70,7 @@ class VulkanEngine : public ComputeEngine QueueId GetDefaultQueue() const override; std::vector GetAllQueues() const override; void SynchronizeQueue(const QueueId queueId) override; + void SynchronizeQueues() override; void SynchronizeDevice() override; // Information retrieval methods @@ -112,6 +113,7 @@ class VulkanEngine : public ComputeEngine std::vector GetPipelineArguments(const std::vector& arguments); std::unique_ptr CreateBuffer(KernelArgument& argument); std::unique_ptr CreateUserBuffer(KernelArgument& argument, ComputeBuffer buffer); + void ClearQueueActions(const QueueId id); static std::vector GetScalarArguments(const std::vector& arguments); }; diff --git a/Source/KernelRunner/ComputeLayer.cpp b/Source/KernelRunner/ComputeLayer.cpp index 6c7dc05b..eb4a79e3 100644 --- a/Source/KernelRunner/ComputeLayer.cpp +++ b/Source/KernelRunner/ComputeLayer.cpp @@ -115,6 +115,11 @@ void ComputeLayer::SynchronizeQueue(const QueueId queue) m_ComputeEngine.SynchronizeQueue(queue); } +void ComputeLayer::SynchronizeQueues() +{ + m_ComputeEngine.SynchronizeQueues(); +} + void ComputeLayer::SynchronizeDevice() { m_ComputeEngine.SynchronizeDevice(); diff --git a/Source/KernelRunner/ComputeLayer.h b/Source/KernelRunner/ComputeLayer.h index d58d8bce..05d95896 100644 --- a/Source/KernelRunner/ComputeLayer.h +++ b/Source/KernelRunner/ComputeLayer.h @@ -37,6 +37,7 @@ class ComputeLayer : public ComputeInterface QueueId GetDefaultQueue() const override; std::vector GetAllQueues() const override; void SynchronizeQueue(const QueueId queue) override; + void SynchronizeQueues() override; void SynchronizeDevice() override; const DimensionVector& GetCurrentGlobalSize(const KernelDefinitionId id) const override; diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index adcd7abe..58588690 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -54,6 +54,7 @@ PYBIND11_MODULE(pyktt, module) .def("GetDefaultQueue", &ktt::ComputeInterface::GetDefaultQueue) .def("GetAllQueues", &ktt::ComputeInterface::GetAllQueues) .def("SynchronizeQueue", &ktt::ComputeInterface::SynchronizeQueue) + .def("SynchronizeQueues", &ktt::ComputeInterface::SynchronizeQueues) .def("SynchronizeDevice", &ktt::ComputeInterface::SynchronizeDevice) .def("GetCurrentGlobalSize", &ktt::ComputeInterface::GetCurrentGlobalSize, py::return_value_policy::reference) .def("GetCurrentLocalSize", &ktt::ComputeInterface::GetCurrentLocalSize, py::return_value_policy::reference) @@ -339,7 +340,11 @@ PYBIND11_MODULE(pyktt, module) ) .def("AddComputeQueue", &ktt::Tuner::AddComputeQueue) .def("RemoveComputeQueue", &ktt::Tuner::RemoveComputeQueue) - .def("Synchronize", &ktt::Tuner::Synchronize) + .def("WaitForComputeAction", &ktt::Tuner::WaitForComputeAction) + .def("WaitForTransferAction", &ktt::Tuner::WaitForTransferAction) + .def("SynchronizeQueue", &ktt::Tuner::SynchronizeQueue) + .def("SynchronizeQueues", &ktt::Tuner::SynchronizeQueues) + .def("SynchronizeDevice", &ktt::Tuner::SynchronizeDevice) .def("SetCompilerOptions", &ktt::Tuner::SetCompilerOptions) .def("SetGlobalSizeType", &ktt::Tuner::SetGlobalSizeType) .def("SetAutomaticGlobalSizeCorrection", &ktt::Tuner::SetAutomaticGlobalSizeCorrection) diff --git a/Source/Tuner.cpp b/Source/Tuner.cpp index 58640353..aaeb756e 100644 --- a/Source/Tuner.cpp +++ b/Source/Tuner.cpp @@ -583,7 +583,55 @@ void Tuner::RemoveComputeQueue(const QueueId id) } } -void Tuner::Synchronize() +void Tuner::WaitForComputeAction(const ComputeActionId id) +{ + try + { + m_Tuner->WaitForComputeAction(id); + } + catch (const KttException& exception) + { + TunerCore::Log(LoggingLevel::Error, exception.what()); + } +} + +void Tuner::WaitForTransferAction(const TransferActionId id) +{ + try + { + m_Tuner->WaitForTransferAction(id); + } + catch (const KttException& exception) + { + TunerCore::Log(LoggingLevel::Error, exception.what()); + } +} + +void Tuner::SynchronizeQueue(const QueueId id) +{ + try + { + m_Tuner->SynchronizeQueue(id); + } + catch (const KttException& exception) + { + TunerCore::Log(LoggingLevel::Error, exception.what()); + } +} + +void Tuner::SynchronizeQueues() +{ + try + { + m_Tuner->SynchronizeQueues(); + } + catch (const KttException& exception) + { + TunerCore::Log(LoggingLevel::Error, exception.what()); + } +} + +void Tuner::SynchronizeDevice() { try { @@ -595,6 +643,11 @@ void Tuner::Synchronize() } } +void Tuner::Synchronize() +{ + SynchronizeDevice(); +} + void Tuner::SetProfilingCounters(const std::vector& counters) { try diff --git a/Source/Tuner.h b/Source/Tuner.h index 0d6c8692..6d8ac084 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -626,10 +626,38 @@ class KTT_API Tuner */ void RemoveComputeQueue(const QueueId id); - /** @fn void Synchronize() + /** @fn void WaitForComputeAction(const ComputeActionId id) + * Blocks until the specified compute action is finished. + * @param id Id of compute action to wait for. + */ + void WaitForComputeAction(const ComputeActionId id); + + /** @fn void WaitForTransferAction(const TransferActionId id) + * Blocks until the specified buffer transfer action is finished. + * @param id Id of transfer action to wait for. + */ + void WaitForTransferAction(const TransferActionId id); + + /** @fn void SynchronizeQueue(const QueueId id) + * Blocks until all commands submitted to the specified KTT device queue are completed. + * @param id Id of queue which will be synchronized. + */ + void SynchronizeQueue(const QueueId id); + + /** @fn void SynchronizeQueues() * Blocks until all commands submitted to all KTT device queues are completed. */ - void Synchronize(); + void SynchronizeQueues(); + + /** @fn void SynchronizeDevice() + * Blocks until all commands submitted to KTT device are completed. + */ + void SynchronizeDevice(); + + /** @fn void Synchronize() + * Blocks until all commands submitted to KTT device are completed. + */ + [[deprecated("Use SynchronizeDevice() or SynchronizeQueues() method instead.")]] void Synchronize(); /** @fn void SetProfilingCounters(const std::vector& counters) * Specifies profiling counters that will be collected during kernel profiling. Note that not all profiling counters are diff --git a/Source/TunerCore.cpp b/Source/TunerCore.cpp index 9766a9bd..206d3fc5 100644 --- a/Source/TunerCore.cpp +++ b/Source/TunerCore.cpp @@ -327,6 +327,26 @@ void TunerCore::RemoveComputeQueue(const QueueId id) m_ComputeEngine->RemoveComputeQueue(id); } +void TunerCore::WaitForComputeAction(const ComputeActionId id) +{ + m_ComputeEngine->WaitForComputeAction(id); +} + +void TunerCore::WaitForTransferAction(const TransferActionId id) +{ + m_ComputeEngine->WaitForTransferAction(id); +} + +void TunerCore::SynchronizeQueue(const QueueId queueId) +{ + m_ComputeEngine->SynchronizeQueue(queueId); +} + +void TunerCore::SynchronizeQueues() +{ + m_ComputeEngine->SynchronizeQueues(); +} + void TunerCore::SynchronizeDevice() { m_ComputeEngine->SynchronizeDevice(); diff --git a/Source/TunerCore.h b/Source/TunerCore.h index 91616077..6a23c290 100644 --- a/Source/TunerCore.h +++ b/Source/TunerCore.h @@ -87,6 +87,10 @@ class TunerCore // Compute engine QueueId AddComputeQueue(ComputeQueue queue); void RemoveComputeQueue(const QueueId id); + void WaitForComputeAction(const ComputeActionId id); + void WaitForTransferAction(const TransferActionId id); + void SynchronizeQueue(const QueueId queueId); + void SynchronizeQueues(); void SynchronizeDevice(); void SetProfilingCounters(const std::vector& counters); void SetCompilerOptions(const std::string& options); From 655051f41ca67bf3665c9d7b41c5c784342a4fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 14 Dec 2021 12:25:25 +0100 Subject: [PATCH 51/63] * Split tuner and compute interface Python APIs into separate files --- Source/Python/PythonComputeInterface.cpp | 105 ++++++++ Source/Python/PythonModule.cpp | 324 +---------------------- Source/Python/PythonTuner.cpp | 244 +++++++++++++++++ 3 files changed, 353 insertions(+), 320 deletions(-) create mode 100644 Source/Python/PythonComputeInterface.cpp create mode 100644 Source/Python/PythonTuner.cpp diff --git a/Source/Python/PythonComputeInterface.cpp b/Source/Python/PythonComputeInterface.cpp new file mode 100644 index 00000000..b1f21230 --- /dev/null +++ b/Source/Python/PythonComputeInterface.cpp @@ -0,0 +1,105 @@ +#ifdef KTT_PYTHON + +#include +#include +#include +#include + +#include + +namespace py = pybind11; + +void InitializePythonComputeInterface(py::module_& module) +{ + py::class_(module, "ComputeInterface") + .def("RunKernel", py::overload_cast(&ktt::ComputeInterface::RunKernel)) + .def("RunKernel", py::overload_cast(&ktt::ComputeInterface::RunKernel)) + .def("RunKernelAsync", py::overload_cast(&ktt::ComputeInterface::RunKernelAsync)) + .def("RunKernelAsync", py::overload_cast(&ktt::ComputeInterface::RunKernelAsync)) + .def("WaitForComputeAction", &ktt::ComputeInterface::WaitForComputeAction) + .def("RunKernelWithProfiling", py::overload_cast(&ktt::ComputeInterface::RunKernelWithProfiling)) + .def("RunKernelWithProfiling", py::overload_cast(&ktt::ComputeInterface::RunKernelWithProfiling)) + .def("GetRemainingProfilingRuns", [](ktt::ComputeInterface& ci, const ktt::KernelDefinitionId id) { return ci.GetRemainingProfilingRuns(id); }) + .def("GetRemainingProfilingRuns", [](ktt::ComputeInterface& ci) { return ci.GetRemainingProfilingRuns(); }) + .def("GetDefaultQueue", &ktt::ComputeInterface::GetDefaultQueue) + .def("GetAllQueues", &ktt::ComputeInterface::GetAllQueues) + .def("SynchronizeQueue", &ktt::ComputeInterface::SynchronizeQueue) + .def("SynchronizeQueues", &ktt::ComputeInterface::SynchronizeQueues) + .def("SynchronizeDevice", &ktt::ComputeInterface::SynchronizeDevice) + .def("GetCurrentGlobalSize", &ktt::ComputeInterface::GetCurrentGlobalSize, py::return_value_policy::reference) + .def("GetCurrentLocalSize", &ktt::ComputeInterface::GetCurrentLocalSize, py::return_value_policy::reference) + .def("GetCurrentConfiguration", &ktt::ComputeInterface::GetCurrentConfiguration, py::return_value_policy::reference) + .def("GetRunMode", &ktt::ComputeInterface::GetRunMode) + .def("ChangeArguments", &ktt::ComputeInterface::ChangeArguments) + .def("SwapArguments", &ktt::ComputeInterface::SwapArguments) + .def("UpdateScalarArgumentChar", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int8_t data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentShort", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int16_t data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentInt", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int32_t data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentLong", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int64_t data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentFloat", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const float data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateScalarArgumentDouble", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const double data) { ci.UpdateScalarArgument(id, &data); }) + .def("UpdateLocalArgument", &ktt::ComputeInterface::UpdateLocalArgument) + .def("UploadBuffer", &ktt::ComputeInterface::UploadBuffer) + .def("UploadBufferAsync", &ktt::ComputeInterface::UploadBufferAsync) + .def + ( + "DownloadBuffer", + &ktt::ComputeInterface::DownloadBuffer, + py::arg("id"), + py::arg("destination"), + py::arg("dataSize") = 0 + ) + .def + ( + "DownloadBufferAsync", + &ktt::ComputeInterface::DownloadBufferAsync, + py::arg("id"), + py::arg("queue"), + py::arg("destination"), + py::arg("dataSize") = 0 + ) + .def + ( + "UpdateBuffer", + &ktt::ComputeInterface::UpdateBuffer, + py::arg("id"), + py::arg("data"), + py::arg("dataSize") = 0 + ) + .def + ( + "UpdateBufferAsync", + &ktt::ComputeInterface::UpdateBufferAsync, + py::arg("id"), + py::arg("queue"), + py::arg("data"), + py::arg("dataSize") = 0 + ) + .def + ( + "CopyBuffer", + &ktt::ComputeInterface::CopyBuffer, + py::arg("destination"), + py::arg("source"), + py::arg("dataSize") = 0 + ) + .def + ( + "CopyBufferAsync", + &ktt::ComputeInterface::CopyBufferAsync, + py::arg("destination"), + py::arg("source"), + py::arg("queue"), + py::arg("dataSize") = 0 + ) + .def("WaitForTransferAction", &ktt::ComputeInterface::WaitForTransferAction) + .def("ResizeBuffer", &ktt::ComputeInterface::ResizeBuffer) + .def("ClearBuffer", &ktt::ComputeInterface::ClearBuffer) + .def("HasBuffer", &ktt::ComputeInterface::HasBuffer) + .def("GetUnifiedMemoryBufferHandle", &ktt::ComputeInterface::GetUnifiedMemoryBufferHandle); +} + +#endif // KTT_PYTHON diff --git a/Source/Python/PythonModule.cpp b/Source/Python/PythonModule.cpp index 58588690..27cde8ac 100644 --- a/Source/Python/PythonModule.cpp +++ b/Source/Python/PythonModule.cpp @@ -1,9 +1,6 @@ #ifdef KTT_PYTHON -#include -#include #include -#include #include @@ -13,6 +10,8 @@ void InitializePythonEnums(py::module_& module); void InitializePythonDataHolders(py::module_& module); void InitializePythonSearchers(py::module_& module); void InitializePythonStopConditions(py::module_& module); +void InitializePythonComputeInterface(py::module_& module); +void InitializePythonTuner(py::module_& module); PYBIND11_MODULE(pyktt, module) { @@ -38,323 +37,8 @@ PYBIND11_MODULE(pyktt, module) py::register_exception(module, "KttException", PyExc_Exception); - py::class_(module, "ComputeInterface") - .def("RunKernel", py::overload_cast(&ktt::ComputeInterface::RunKernel)) - .def("RunKernel", py::overload_cast(&ktt::ComputeInterface::RunKernel)) - .def("RunKernelAsync", py::overload_cast(&ktt::ComputeInterface::RunKernelAsync)) - .def("RunKernelAsync", py::overload_cast(&ktt::ComputeInterface::RunKernelAsync)) - .def("WaitForComputeAction", &ktt::ComputeInterface::WaitForComputeAction) - .def("RunKernelWithProfiling", py::overload_cast(&ktt::ComputeInterface::RunKernelWithProfiling)) - .def("RunKernelWithProfiling", py::overload_cast(&ktt::ComputeInterface::RunKernelWithProfiling)) - .def("GetRemainingProfilingRuns", [](ktt::ComputeInterface& ci, const ktt::KernelDefinitionId id) { return ci.GetRemainingProfilingRuns(id); }) - .def("GetRemainingProfilingRuns", [](ktt::ComputeInterface& ci) { return ci.GetRemainingProfilingRuns(); }) - .def("GetDefaultQueue", &ktt::ComputeInterface::GetDefaultQueue) - .def("GetAllQueues", &ktt::ComputeInterface::GetAllQueues) - .def("SynchronizeQueue", &ktt::ComputeInterface::SynchronizeQueue) - .def("SynchronizeQueues", &ktt::ComputeInterface::SynchronizeQueues) - .def("SynchronizeDevice", &ktt::ComputeInterface::SynchronizeDevice) - .def("GetCurrentGlobalSize", &ktt::ComputeInterface::GetCurrentGlobalSize, py::return_value_policy::reference) - .def("GetCurrentLocalSize", &ktt::ComputeInterface::GetCurrentLocalSize, py::return_value_policy::reference) - .def("GetCurrentConfiguration", &ktt::ComputeInterface::GetCurrentConfiguration, py::return_value_policy::reference) - .def("GetRunMode", &ktt::ComputeInterface::GetRunMode) - .def("ChangeArguments", &ktt::ComputeInterface::ChangeArguments) - .def("SwapArguments", &ktt::ComputeInterface::SwapArguments) - .def("UpdateScalarArgumentChar", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int8_t data) { ci.UpdateScalarArgument(id, &data); }) - .def("UpdateScalarArgumentShort", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int16_t data) { ci.UpdateScalarArgument(id, &data); }) - .def("UpdateScalarArgumentInt", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int32_t data) { ci.UpdateScalarArgument(id, &data); }) - .def("UpdateScalarArgumentLong", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const int64_t data) { ci.UpdateScalarArgument(id, &data); }) - .def("UpdateScalarArgumentFloat", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const float data) { ci.UpdateScalarArgument(id, &data); }) - .def("UpdateScalarArgumentDouble", [](ktt::ComputeInterface& ci, const ktt::ArgumentId id, const double data) { ci.UpdateScalarArgument(id, &data); }) - .def("UpdateLocalArgument", &ktt::ComputeInterface::UpdateLocalArgument) - .def("UploadBuffer", &ktt::ComputeInterface::UploadBuffer) - .def("UploadBufferAsync", &ktt::ComputeInterface::UploadBufferAsync) - .def - ( - "DownloadBuffer", - &ktt::ComputeInterface::DownloadBuffer, - py::arg("id"), - py::arg("destination"), - py::arg("dataSize") = 0 - ) - .def - ( - "DownloadBufferAsync", - &ktt::ComputeInterface::DownloadBufferAsync, - py::arg("id"), - py::arg("queue"), - py::arg("destination"), - py::arg("dataSize") = 0 - ) - .def - ( - "UpdateBuffer", - &ktt::ComputeInterface::UpdateBuffer, - py::arg("id"), - py::arg("data"), - py::arg("dataSize") = 0 - ) - .def - ( - "UpdateBufferAsync", - &ktt::ComputeInterface::UpdateBufferAsync, - py::arg("id"), - py::arg("queue"), - py::arg("data"), - py::arg("dataSize") = 0 - ) - .def - ( - "CopyBuffer", - &ktt::ComputeInterface::CopyBuffer, - py::arg("destination"), - py::arg("source"), - py::arg("dataSize") = 0 - ) - .def - ( - "CopyBufferAsync", - &ktt::ComputeInterface::CopyBufferAsync, - py::arg("destination"), - py::arg("source"), - py::arg("queue"), - py::arg("dataSize") = 0 - ) - .def("WaitForTransferAction", &ktt::ComputeInterface::WaitForTransferAction) - .def("ResizeBuffer", &ktt::ComputeInterface::ResizeBuffer) - .def("ClearBuffer", &ktt::ComputeInterface::ClearBuffer) - .def("HasBuffer", &ktt::ComputeInterface::HasBuffer) - .def("GetUnifiedMemoryBufferHandle", &ktt::ComputeInterface::GetUnifiedMemoryBufferHandle); - - py::class_(module, "Tuner") - .def(py::init()) - .def(py::init()) - .def - ( - "AddKernelDefinition", - &ktt::Tuner::AddKernelDefinition, - py::arg("name"), - py::arg("source"), - py::arg("globalSize"), - py::arg("localSize"), - py::arg("typeNames") = std::vector{} - ) - .def - ( - "AddKernelDefinitionFromFile", - &ktt::Tuner::AddKernelDefinitionFromFile, - py::arg("name"), - py::arg("filePath"), - py::arg("globalSize"), - py::arg("localSize"), - py::arg("typeNames") = std::vector{} - ) - .def - ( - "GetKernelDefinitionId", - &ktt::Tuner::GetKernelDefinitionId, - py::arg("name"), - py::arg("typeNames") = std::vector{} - ) - .def("RemoveKernelDefinition", &ktt::Tuner::RemoveKernelDefinition) - .def("SetArguments", &ktt::Tuner::SetArguments) - .def("CreateSimpleKernel", &ktt::Tuner::CreateSimpleKernel) - .def - ( - "CreateCompositeKernel", - [](ktt::Tuner& tuner, const std::string& name, const std::vector& definitionIds, - std::function launcher) - { - ktt::KernelLauncher actualLauncher = [launcher](ktt::ComputeInterface& interface) { launcher(&interface); }; - return tuner.CreateCompositeKernel(name, definitionIds, actualLauncher); - }, - py::arg("name"), - py::arg("definitionIds"), - py::arg("launcher") = static_cast>(nullptr) - ) - .def("RemoveKernel", &ktt::Tuner::RemoveKernel) - .def - ( - "SetLauncher", - [](ktt::Tuner& tuner, const ktt::KernelId id, std::function launcher) - { - ktt::KernelLauncher actualLauncher = [launcher](ktt::ComputeInterface& interface) { launcher(&interface); }; - tuner.SetLauncher(id, actualLauncher); - } - ) - .def - ( - "AddParameter", - py::overload_cast&, const std::string&>(&ktt::Tuner::AddParameter), - py::arg("id"), - py::arg("name"), - py::arg("values"), - py::arg("group") = std::string() - ) - .def - ( - "AddParameter", - py::overload_cast&, const std::string&>(&ktt::Tuner::AddParameter), - py::arg("id"), - py::arg("name"), - py::arg("values"), - py::arg("group") = std::string() - ) - .def("AddThreadModifier", py::overload_cast&, const ktt::ModifierType, - const ktt::ModifierDimension, const std::vector&, ktt::ModifierFunction>(&ktt::Tuner::AddThreadModifier)) - .def("AddThreadModifier", py::overload_cast&, const ktt::ModifierType, - const ktt::ModifierDimension, const std::string&, const ktt::ModifierAction>(&ktt::Tuner::AddThreadModifier)) - .def("AddConstraint", &ktt::Tuner::AddConstraint) - .def("SetProfiledDefinitions", &ktt::Tuner::SetProfiledDefinitions) - .def("AddArgumentVectorChar", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorShort", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorInt", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorLong", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorFloat", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorDouble", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorChar", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, - const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorShort", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, - const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorInt", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, - const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorLong", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, - const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorFloat", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, - const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentVectorDouble", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, - const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) - .def("AddArgumentScalarChar", &ktt::Tuner::AddArgumentScalar) - .def("AddArgumentScalarShort", &ktt::Tuner::AddArgumentScalar) - .def("AddArgumentScalarInt", &ktt::Tuner::AddArgumentScalar) - .def("AddArgumentScalarLong", &ktt::Tuner::AddArgumentScalar) - .def("AddArgumentScalarFloat", &ktt::Tuner::AddArgumentScalar) - .def("AddArgumentScalarDouble", &ktt::Tuner::AddArgumentScalar) - .def("AddArgumentLocalChar", &ktt::Tuner::AddArgumentLocal) - .def("AddArgumentLocalShort", &ktt::Tuner::AddArgumentLocal) - .def("AddArgumentLocalInt", &ktt::Tuner::AddArgumentLocal) - .def("AddArgumentLocalLong", &ktt::Tuner::AddArgumentLocal) - .def("AddArgumentLocalFloat", &ktt::Tuner::AddArgumentLocal) - .def("AddArgumentLocalDouble", &ktt::Tuner::AddArgumentLocal) - .def - ( - "AddArgumentSymbolChar", - &ktt::Tuner::AddArgumentSymbol, - py::arg("data"), - py::arg("symbolName") = std::string() - ) - .def - ( - "AddArgumentSymbolShort", - &ktt::Tuner::AddArgumentSymbol, - py::arg("data"), - py::arg("symbolName") = std::string() - ) - .def - ( - "AddArgumentSymbolInt", - &ktt::Tuner::AddArgumentSymbol, - py::arg("data"), - py::arg("symbolName") = std::string() - ) - .def - ( - "AddArgumentSymbolLong", - &ktt::Tuner::AddArgumentSymbol, - py::arg("data"), - py::arg("symbolName") = std::string() - ) - .def - ( - "AddArgumentSymbolFloat", - &ktt::Tuner::AddArgumentSymbol, - py::arg("data"), - py::arg("symbolName") = std::string() - ) - .def - ( - "AddArgumentSymbolDouble", - &ktt::Tuner::AddArgumentSymbol, - py::arg("data"), - py::arg("symbolName") = std::string() - ) - .def("RemoveArgument", &ktt::Tuner::RemoveArgument) - .def("SetReadOnlyArgumentCache", &ktt::Tuner::SetReadOnlyArgumentCache) - .def("Run", &ktt::Tuner::Run) - .def("SetProfiling", &ktt::Tuner::SetProfiling) - .def("SetProfilingCounters", &ktt::Tuner::SetProfilingCounters) - .def("SetValidationMethod", &ktt::Tuner::SetValidationMethod) - .def("SetValidationMode", &ktt::Tuner::SetValidationMode) - .def("SetValidationRange", &ktt::Tuner::SetValidationRange) - .def("SetValueComparator", &ktt::Tuner::SetValueComparator) - .def("SetReferenceComputation", &ktt::Tuner::SetReferenceComputation) - .def("SetReferenceKernel", &ktt::Tuner::SetReferenceKernel) - .def("Tune", py::overload_cast(&ktt::Tuner::Tune), py::call_guard()) - .def("Tune", py::overload_cast>(&ktt::Tuner::Tune), py::call_guard()) - .def - ( - "TuneIteration", - &ktt::Tuner::TuneIteration, - py::call_guard(), - py::arg("id"), - py::arg("output"), - py::arg("recomputeReference") = false - ) - .def - ( - "SimulateKernelTuning", - &ktt::Tuner::SimulateKernelTuning, - py::call_guard(), - py::arg("id"), - py::arg("results"), - py::arg("iterations") = 0 - ) - .def("SetSearcher", &ktt::Tuner::SetSearcher) - .def("ClearData", &ktt::Tuner::ClearData) - .def("GetBestConfiguration", &ktt::Tuner::GetBestConfiguration) - .def("CreateConfiguration", &ktt::Tuner::CreateConfiguration) - .def("GetKernelSource", &ktt::Tuner::GetKernelSource) - .def("GetKernelDefinitionSource", &ktt::Tuner::GetKernelDefinitionSource) - .def_static("SetTimeUnit", &ktt::Tuner::SetTimeUnit) - .def - ( - "SaveResults", - &ktt::Tuner::SaveResults, - py::arg("results"), - py::arg("filePath"), - py::arg("format"), - py::arg("data") = ktt::UserData{} - ) - .def("LoadResults", [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format) { return tuner.LoadResults(filePath, format); }) - .def - ( - "LoadResultsWithData", - [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format) - { - ktt::UserData data; - auto results = tuner.LoadResults(filePath, format, data); - return std::make_pair(results, data); - } - ) - .def("AddComputeQueue", &ktt::Tuner::AddComputeQueue) - .def("RemoveComputeQueue", &ktt::Tuner::RemoveComputeQueue) - .def("WaitForComputeAction", &ktt::Tuner::WaitForComputeAction) - .def("WaitForTransferAction", &ktt::Tuner::WaitForTransferAction) - .def("SynchronizeQueue", &ktt::Tuner::SynchronizeQueue) - .def("SynchronizeQueues", &ktt::Tuner::SynchronizeQueues) - .def("SynchronizeDevice", &ktt::Tuner::SynchronizeDevice) - .def("SetCompilerOptions", &ktt::Tuner::SetCompilerOptions) - .def("SetGlobalSizeType", &ktt::Tuner::SetGlobalSizeType) - .def("SetAutomaticGlobalSizeCorrection", &ktt::Tuner::SetAutomaticGlobalSizeCorrection) - .def("SetKernelCacheCapacity", &ktt::Tuner::SetKernelCacheCapacity) - .def("GetPlatformInfo", &ktt::Tuner::GetPlatformInfo) - .def("GetDeviceInfo", &ktt::Tuner::GetDeviceInfo) - .def("GetCurrentDeviceInfo", &ktt::Tuner::GetCurrentDeviceInfo) - .def_static("SetLoggingLevel", &ktt::Tuner::SetLoggingLevel) - .def_static("SetLoggingTarget", py::overload_cast(&ktt::Tuner::SetLoggingTarget)) - .def_static("SetLoggingTarget", py::overload_cast(&ktt::Tuner::SetLoggingTarget)); + InitializePythonComputeInterface(module); + InitializePythonTuner(module); } #endif // KTT_PYTHON diff --git a/Source/Python/PythonTuner.cpp b/Source/Python/PythonTuner.cpp new file mode 100644 index 00000000..4f30f09f --- /dev/null +++ b/Source/Python/PythonTuner.cpp @@ -0,0 +1,244 @@ +#ifdef KTT_PYTHON + +#include +#include +#include +#include +#include + +#include + +namespace py = pybind11; + +void InitializePythonTuner(py::module_& module) +{ + py::class_(module, "Tuner") + .def(py::init()) + .def(py::init()) + .def + ( + "AddKernelDefinition", + &ktt::Tuner::AddKernelDefinition, + py::arg("name"), + py::arg("source"), + py::arg("globalSize"), + py::arg("localSize"), + py::arg("typeNames") = std::vector{} + ) + .def + ( + "AddKernelDefinitionFromFile", + &ktt::Tuner::AddKernelDefinitionFromFile, + py::arg("name"), + py::arg("filePath"), + py::arg("globalSize"), + py::arg("localSize"), + py::arg("typeNames") = std::vector{} + ) + .def + ( + "GetKernelDefinitionId", + &ktt::Tuner::GetKernelDefinitionId, + py::arg("name"), + py::arg("typeNames") = std::vector{} + ) + .def("RemoveKernelDefinition", &ktt::Tuner::RemoveKernelDefinition) + .def("SetArguments", &ktt::Tuner::SetArguments) + .def("CreateSimpleKernel", &ktt::Tuner::CreateSimpleKernel) + .def + ( + "CreateCompositeKernel", + [](ktt::Tuner& tuner, const std::string& name, const std::vector& definitionIds, + std::function launcher) + { + ktt::KernelLauncher actualLauncher = [launcher](ktt::ComputeInterface& interface) { launcher(&interface); }; + return tuner.CreateCompositeKernel(name, definitionIds, actualLauncher); + }, + py::arg("name"), + py::arg("definitionIds"), + py::arg("launcher") = static_cast>(nullptr) + ) + .def("RemoveKernel", &ktt::Tuner::RemoveKernel) + .def + ( + "SetLauncher", + [](ktt::Tuner& tuner, const ktt::KernelId id, std::function launcher) + { + ktt::KernelLauncher actualLauncher = [launcher](ktt::ComputeInterface& interface) { launcher(&interface); }; + tuner.SetLauncher(id, actualLauncher); + } + ) + .def + ( + "AddParameter", + py::overload_cast&, const std::string&>(&ktt::Tuner::AddParameter), + py::arg("id"), + py::arg("name"), + py::arg("values"), + py::arg("group") = std::string() + ) + .def + ( + "AddParameter", + py::overload_cast&, const std::string&>(&ktt::Tuner::AddParameter), + py::arg("id"), + py::arg("name"), + py::arg("values"), + py::arg("group") = std::string() + ) + .def("AddThreadModifier", py::overload_cast&, const ktt::ModifierType, + const ktt::ModifierDimension, const std::vector&, ktt::ModifierFunction>(&ktt::Tuner::AddThreadModifier)) + .def("AddThreadModifier", py::overload_cast&, const ktt::ModifierType, + const ktt::ModifierDimension, const std::string&, const ktt::ModifierAction>(&ktt::Tuner::AddThreadModifier)) + .def("AddConstraint", &ktt::Tuner::AddConstraint) + .def("SetProfiledDefinitions", &ktt::Tuner::SetProfiledDefinitions) + .def("AddArgumentVectorChar", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorShort", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorInt", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorLong", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorFloat", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorDouble", py::overload_cast&, const ktt::ArgumentAccessType>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorChar", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorShort", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorInt", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorLong", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorFloat", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentVectorDouble", py::overload_cast&, const ktt::ArgumentAccessType, const ktt::ArgumentMemoryLocation, + const ktt::ArgumentManagementType, const bool>(&ktt::Tuner::AddArgumentVector)) + .def("AddArgumentScalarChar", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarShort", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarInt", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarLong", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarFloat", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentScalarDouble", &ktt::Tuner::AddArgumentScalar) + .def("AddArgumentLocalChar", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalShort", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalInt", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalLong", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalFloat", &ktt::Tuner::AddArgumentLocal) + .def("AddArgumentLocalDouble", &ktt::Tuner::AddArgumentLocal) + .def + ( + "AddArgumentSymbolChar", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolShort", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolInt", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolLong", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolFloat", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def + ( + "AddArgumentSymbolDouble", + &ktt::Tuner::AddArgumentSymbol, + py::arg("data"), + py::arg("symbolName") = std::string() + ) + .def("RemoveArgument", &ktt::Tuner::RemoveArgument) + .def("SetReadOnlyArgumentCache", &ktt::Tuner::SetReadOnlyArgumentCache) + .def("Run", &ktt::Tuner::Run) + .def("SetProfiling", &ktt::Tuner::SetProfiling) + .def("SetProfilingCounters", &ktt::Tuner::SetProfilingCounters) + .def("SetValidationMethod", &ktt::Tuner::SetValidationMethod) + .def("SetValidationMode", &ktt::Tuner::SetValidationMode) + .def("SetValidationRange", &ktt::Tuner::SetValidationRange) + .def("SetValueComparator", &ktt::Tuner::SetValueComparator) + .def("SetReferenceComputation", &ktt::Tuner::SetReferenceComputation) + .def("SetReferenceKernel", &ktt::Tuner::SetReferenceKernel) + .def("Tune", py::overload_cast(&ktt::Tuner::Tune), py::call_guard()) + .def("Tune", py::overload_cast>(&ktt::Tuner::Tune), py::call_guard()) + .def + ( + "TuneIteration", + &ktt::Tuner::TuneIteration, + py::call_guard(), + py::arg("id"), + py::arg("output"), + py::arg("recomputeReference") = false + ) + .def + ( + "SimulateKernelTuning", + &ktt::Tuner::SimulateKernelTuning, + py::call_guard(), + py::arg("id"), + py::arg("results"), + py::arg("iterations") = 0 + ) + .def("SetSearcher", &ktt::Tuner::SetSearcher) + .def("ClearData", &ktt::Tuner::ClearData) + .def("GetBestConfiguration", &ktt::Tuner::GetBestConfiguration) + .def("CreateConfiguration", &ktt::Tuner::CreateConfiguration) + .def("GetKernelSource", &ktt::Tuner::GetKernelSource) + .def("GetKernelDefinitionSource", &ktt::Tuner::GetKernelDefinitionSource) + .def_static("SetTimeUnit", &ktt::Tuner::SetTimeUnit) + .def + ( + "SaveResults", + &ktt::Tuner::SaveResults, + py::arg("results"), + py::arg("filePath"), + py::arg("format"), + py::arg("data") = ktt::UserData{} + ) + .def("LoadResults", [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format) { return tuner.LoadResults(filePath, format); }) + .def + ( + "LoadResultsWithData", + [](ktt::Tuner& tuner, const std::string& filePath, const ktt::OutputFormat format) + { + ktt::UserData data; + auto results = tuner.LoadResults(filePath, format, data); + return std::make_pair(results, data); + } + ) + .def("AddComputeQueue", &ktt::Tuner::AddComputeQueue) + .def("RemoveComputeQueue", &ktt::Tuner::RemoveComputeQueue) + .def("WaitForComputeAction", &ktt::Tuner::WaitForComputeAction) + .def("WaitForTransferAction", &ktt::Tuner::WaitForTransferAction) + .def("SynchronizeQueue", &ktt::Tuner::SynchronizeQueue) + .def("SynchronizeQueues", &ktt::Tuner::SynchronizeQueues) + .def("SynchronizeDevice", &ktt::Tuner::SynchronizeDevice) + .def("SetCompilerOptions", &ktt::Tuner::SetCompilerOptions) + .def("SetGlobalSizeType", &ktt::Tuner::SetGlobalSizeType) + .def("SetAutomaticGlobalSizeCorrection", &ktt::Tuner::SetAutomaticGlobalSizeCorrection) + .def("SetKernelCacheCapacity", &ktt::Tuner::SetKernelCacheCapacity) + .def("GetPlatformInfo", &ktt::Tuner::GetPlatformInfo) + .def("GetDeviceInfo", &ktt::Tuner::GetDeviceInfo) + .def("GetCurrentDeviceInfo", &ktt::Tuner::GetCurrentDeviceInfo) + .def_static("SetLoggingLevel", &ktt::Tuner::SetLoggingLevel) + .def_static("SetLoggingTarget", py::overload_cast(&ktt::Tuner::SetLoggingTarget)) + .def_static("SetLoggingTarget", py::overload_cast(&ktt::Tuner::SetLoggingTarget)); +} + +#endif // KTT_PYTHON From 858a37ba609049c9b8d5cb9f27efbc8e3f49f77b Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Thu, 16 Dec 2021 15:38:19 +0100 Subject: [PATCH 52/63] * Buffer output descriptors in Python now work with NumPy arrays --- Readme.md | 2 +- Source/Python/PythonDataHolders.cpp | 18 ++++++++++++++++-- Tutorials/02KernelRunning/KernelRunningCuda.py | 16 +++++++--------- .../07PythonInterfaces/PythonInterfacesCuda.py | 10 +++++----- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/Readme.md b/Readme.md index f2ed57b9..82d32323 100644 --- a/Readme.md +++ b/Readme.md @@ -66,7 +66,7 @@ systems are Linux and Windows. - OpenCL, CUDA or Vulkan library, supported SDKs are AMD OCL SDK, Intel SDK for OpenCL, NVIDIA CUDA Toolkit and Vulkan SDK - Command line build tool [Premake 5](https://premake.github.io/download) - - (Optional) Python 3 for Python bindings support + - (Optional) Python 3 with NumPy for Python bindings support * Build under Linux (inside KTT root folder): - ensure that path to vendor SDK is correctly set in the environment variables diff --git a/Source/Python/PythonDataHolders.cpp b/Source/Python/PythonDataHolders.cpp index 6578354e..da6a20f2 100644 --- a/Source/Python/PythonDataHolders.cpp +++ b/Source/Python/PythonDataHolders.cpp @@ -106,8 +106,22 @@ void InitializePythonDataHolders(py::module_& module) .def("__repr__", &ktt::PlatformInfo::GetString); py::class_(module, "BufferOutputDescriptor") - .def(py::init()) - .def(py::init()) + .def + ( + py::init([](const ktt::ArgumentId id, py::buffer buffer) + { + void* outputDestination = buffer.request(true).ptr; + return ktt::BufferOutputDescriptor(id, outputDestination); + }) + ) + .def + ( + py::init([](const ktt::ArgumentId id, py::buffer buffer, const size_t size) + { + void* outputDestination = buffer.request(true).ptr; + return ktt::BufferOutputDescriptor(id, outputDestination, size); + }) + ) .def("GetArgumentId", &ktt::BufferOutputDescriptor::GetArgumentId) .def("GetOutputDestination", &ktt::BufferOutputDescriptor::GetOutputDestination) .def("GetOutputSize", &ktt::BufferOutputDescriptor::GetOutputSize); diff --git a/Tutorials/02KernelRunning/KernelRunningCuda.py b/Tutorials/02KernelRunning/KernelRunningCuda.py index 3c6d2140..fd84fcff 100644 --- a/Tutorials/02KernelRunning/KernelRunningCuda.py +++ b/Tutorials/02KernelRunning/KernelRunningCuda.py @@ -1,5 +1,5 @@ -import ctypes import sys +import numpy as np import pyktt as ktt def main(): @@ -23,9 +23,10 @@ def main(): gridSize = int(numberOfElements / blockDimensions.GetSizeX()) gridDimensions = ktt.DimensionVector(gridSize) - a = [i * 1.0 for i in range(numberOfElements)] - b = [i * 1.0 for i in range(numberOfElements)] - result = [0.0 for i in range(numberOfElements)] + # Use NumPy arrays to store data + a = np.arange(1.0, numberOfElements, dtype=np.single) + b = np.arange(1.0, numberOfElements, dtype=np.single) + result = np.zeros(numberOfElements, dtype=np.single) # Create new tuner for the specified device, tuner uses CUDA as compute API. Platform index is ignored when using CUDA. tuner = ktt.Tuner(0, deviceIndex, ktt.ComputeApi.CUDA) @@ -59,16 +60,13 @@ def main(): # argument and memory location where the argument data will be stored. Optionally, it can also include number of bytes to be retrieved, # if only a part of the argument is needed. Note that the memory location size needs to be equal or greater than the retrieved # argument size. - array = (ctypes.c_float * numberOfElements)() - ctypes.pythonapi.PyCapsule_New.restype = ctypes.py_object - arrayCapsule = ctypes.pythonapi.PyCapsule_New(array) - tuner.Run(kernel, ktt.KernelConfiguration(), [ktt.BufferOutputDescriptor(resultId, arrayCapsule)]) + tuner.Run(kernel, ktt.KernelConfiguration(), [ktt.BufferOutputDescriptor(resultId, result.data)]) # Print first ten elements from the result to check they were computed correctly. print("Printing the first 10 elements from result: ") for i in range(10): - print(array[i]) + print(result[i]) if __name__ == "__main__": main() diff --git a/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py b/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py index 33663c9b..2d9bb122 100644 --- a/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py +++ b/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py @@ -1,5 +1,5 @@ -import ctypes import sys +import numpy as np import pyktt as ktt # Implement custom stop condition in Python. The interface is the same as in C++. Note that it is necessary to call @@ -60,10 +60,10 @@ def main(): gridDimensions = ktt.DimensionVector(numberOfElements) blockDimensions = ktt.DimensionVector() - a = [i * 1.0 for i in range(numberOfElements)] - b = [i * 1.0 for i in range(numberOfElements)] - result = [0.0 for i in range(numberOfElements)] - scalarValue = 3.0 + a = np.arange(1.0, numberOfElements, dtype=np.single) + b = np.arange(1.0, numberOfElements, dtype=np.single) + result = np.zeros(numberOfElements, dtype=np.single) + scalarValue = np.single(3.0) tuner = ktt.Tuner(0, deviceIndex, ktt.ComputeApi.CUDA) From 25bae91ad22487a7bce39170b33fffa0362547b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 11 Jan 2022 15:53:31 +0100 Subject: [PATCH 53/63] * Added Numpy usage to all Python tutorials and examples --- Examples/CoulombSum2d/CoulombSum2d.py | 16 ++++++++-------- Examples/Reduction/Reduction.py | 11 ++++------- Tutorials/02KernelRunning/KernelRunningCuda.py | 6 +++--- Tutorials/03KernelTuning/KernelTuningCuda.py | 9 +++++---- .../07PythonInterfaces/PythonInterfacesCuda.py | 6 +++--- 5 files changed, 23 insertions(+), 25 deletions(-) diff --git a/Examples/CoulombSum2d/CoulombSum2d.py b/Examples/CoulombSum2d/CoulombSum2d.py index c5d9d1d0..4b292805 100644 --- a/Examples/CoulombSum2d/CoulombSum2d.py +++ b/Examples/CoulombSum2d/CoulombSum2d.py @@ -1,5 +1,5 @@ -import random import sys +import numpy as np import pyktt as ktt def main(): @@ -34,13 +34,13 @@ def main(): # Declare data variables. gridSpacing = 0.5 - random.seed(17) - atomInfo = [0.0 for i in range(4 * numberOfAtoms)] - atomInfoX = [random.uniform(0.0, 40.0) for i in range(numberOfAtoms)] - atomInfoY = [random.uniform(0.0, 40.0) for i in range(numberOfAtoms)] - atomInfoZ = [random.uniform(0.0, 40.0) for i in range(numberOfAtoms)] - atomInfoW = [random.uniform(0.0, 1.0) for i in range(numberOfAtoms)] - energyGrid = [0.0 for i in range(numberOfGridPoints)] + rng = np.random.default_rng() + atomInfo = np.zeros(4 * numberOfAtoms, dtype = np.single) + atomInfoX = 40.0 * rng.random(numberOfAtoms, dtype = np.single) + atomInfoY = 40.0 * rng.random(numberOfAtoms, dtype = np.single) + atomInfoZ = 40.0 * rng.random(numberOfAtoms, dtype = np.single) + atomInfoW = rng.random(numberOfAtoms, dtype = np.single) + energyGrid = np.zeros(numberOfGridPoints, dtype = np.single) for i in range(numberOfAtoms): atomInfo[4 * i] = atomInfoX[i] diff --git a/Examples/Reduction/Reduction.py b/Examples/Reduction/Reduction.py index ee8b3df1..5991c29c 100644 --- a/Examples/Reduction/Reduction.py +++ b/Examples/Reduction/Reduction.py @@ -1,6 +1,6 @@ import ctypes -import random import sys +import numpy as np import pyktt as ktt def reference(buffer, src): @@ -83,12 +83,9 @@ def main(): n = 64 * 1024 * 1024 nAlloc = int((n + 16 - 1) / 16) * 16 # pad to the longest vector size - src = [0.0 for i in range(nAlloc)] - dst = [0.0 for i in range(nAlloc)] - random.seed(17) - - for i in range(n): - src[i] = random.uniform(0.0, 1000.0) + rng = np.random.default_rng() + src = 1000.0 * rng.random(nAlloc, dtype = np.single) + dst = np.zeros(nAlloc, dtype = np.single) tuner = ktt.Tuner(platformIndex, deviceIndex, ktt.ComputeApi.CUDA) tuner.SetGlobalSizeType(ktt.GlobalSizeType.OpenCL) diff --git a/Tutorials/02KernelRunning/KernelRunningCuda.py b/Tutorials/02KernelRunning/KernelRunningCuda.py index fd84fcff..80d3e70c 100644 --- a/Tutorials/02KernelRunning/KernelRunningCuda.py +++ b/Tutorials/02KernelRunning/KernelRunningCuda.py @@ -24,9 +24,9 @@ def main(): gridDimensions = ktt.DimensionVector(gridSize) # Use NumPy arrays to store data - a = np.arange(1.0, numberOfElements, dtype=np.single) - b = np.arange(1.0, numberOfElements, dtype=np.single) - result = np.zeros(numberOfElements, dtype=np.single) + a = np.arange(1.0, numberOfElements + 1, dtype = np.single) + b = np.arange(1.0, numberOfElements + 1, dtype = np.single) + result = np.zeros(numberOfElements, dtype = np.single) # Create new tuner for the specified device, tuner uses CUDA as compute API. Platform index is ignored when using CUDA. tuner = ktt.Tuner(0, deviceIndex, ktt.ComputeApi.CUDA) diff --git a/Tutorials/03KernelTuning/KernelTuningCuda.py b/Tutorials/03KernelTuning/KernelTuningCuda.py index 1408a132..f93f2431 100644 --- a/Tutorials/03KernelTuning/KernelTuningCuda.py +++ b/Tutorials/03KernelTuning/KernelTuningCuda.py @@ -1,5 +1,6 @@ import ctypes import sys +import numpy as np import pyktt as ktt def computeReference(a, b, scalar, buffer): @@ -27,9 +28,9 @@ def main(): # Block size is initialized to one in this case, it will be controlled with tuning parameter which is added later. blockDimensions = ktt.DimensionVector() - a = [i * 1.0 for i in range(numberOfElements)] - b = [i * 1.0 for i in range(numberOfElements)] - result = [0.0 for i in range(numberOfElements)] + a = np.arange(1.0, numberOfElements + 1, dtype = np.single) + b = np.arange(1.0, numberOfElements + 1, dtype = np.single) + result = np.zeros(numberOfElements, dtype = np.single) scalarValue = 3.0 tuner = ktt.Tuner(0, deviceIndex, ktt.ComputeApi.CUDA) @@ -43,7 +44,7 @@ def main(): tuner.SetArguments(definition, [aId, bId, resultId, scalarId]) kernel = tuner.CreateSimpleKernel("Addition", definition) - + # Set reference computation for the result argument which will be used by the tuner to automatically validate kernel output. # The computation function receives buffer on input, where the reference result should be saved. The size of buffer corresponds # to the validated argument size. diff --git a/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py b/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py index 2d9bb122..6bc9e32f 100644 --- a/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py +++ b/Tutorials/07PythonInterfaces/PythonInterfacesCuda.py @@ -60,9 +60,9 @@ def main(): gridDimensions = ktt.DimensionVector(numberOfElements) blockDimensions = ktt.DimensionVector() - a = np.arange(1.0, numberOfElements, dtype=np.single) - b = np.arange(1.0, numberOfElements, dtype=np.single) - result = np.zeros(numberOfElements, dtype=np.single) + a = np.arange(1.0, numberOfElements + 1, dtype = np.single) + b = np.arange(1.0, numberOfElements + 1, dtype = np.single) + result = np.zeros(numberOfElements, dtype = np.single) scalarValue = np.single(3.0) tuner = ktt.Tuner(0, deviceIndex, ktt.ComputeApi.CUDA) From 8d275a9c225fcbd43204c0f0a5644c4b33ef4261 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Wed, 12 Jan 2022 13:03:32 +0100 Subject: [PATCH 54/63] * Reference computation function in Python now works with memory view instead of capsule --- Examples/Reduction/Reduction.py | 9 +++------ OnboardingGuide.md | 4 ++-- Source/Python/PythonTuner.cpp | 16 +++++++++++++++- Tutorials/03KernelTuning/KernelTuningCuda.py | 7 ++----- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/Examples/Reduction/Reduction.py b/Examples/Reduction/Reduction.py index 5991c29c..a0b6ae67 100644 --- a/Examples/Reduction/Reduction.py +++ b/Examples/Reduction/Reduction.py @@ -1,14 +1,11 @@ -import ctypes import sys import numpy as np import pyktt as ktt def reference(buffer, src): - ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.POINTER(ctypes.c_float) - ctypes.pythonapi.PyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_void_p] - result = ctypes.pythonapi.PyCapsule_GetPointer(buffer, None) + result = buffer.cast('f') resSize = len(src) - resD = [0.0 for i in range(resSize)] + resD = np.zeros(resSize, dtype = np.single) for i in range(resSize): resD[i] = src[i] @@ -129,7 +126,7 @@ def main(): tuner.AddConstraint(kernel, ["UNBOUNDED_WG", "WORK_GROUP_SIZE_X"], unboundedWG) referenceComp = lambda buffer: reference(buffer, src) - tuner.SetReferenceComputation(dstId, referenceComp) + tuner.SetReferenceComputation(dstId, 4, referenceComp) tuner.SetValidationMethod(ktt.ValidationMethod.SideBySideComparison, float(n) * 10000.0 / 10000000.0) tuner.SetValidationRange(dstId, 1) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index a06c93ed..f5adeec2 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -663,8 +663,8 @@ libraries. They are the following: * Templated methods - Python does not support templates, so there are separate versions of methods for different data types instead (e.g., `AddArgumentVectorFloat`, `AddArgumentVectorInt`). The addition of kernel arguments with custom types is not supported either. * Custom library initialization - Custom context, compute queues and buffers cannot be used in Python. -* Methods that use void pointers in C++ API - Python does not have a direct equivalent to void* type. It is necessary to utilize a low-level `ctypes` Python -module to interact with these methods through `PyCapsule` objects. +* Methods that use void pointers in C++ API - Python does not have a direct equivalent to void* type. It is necessary to utilize NumPy arrays to interact +with these methods. ---- diff --git a/Source/Python/PythonTuner.cpp b/Source/Python/PythonTuner.cpp index 4f30f09f..03d5caa5 100644 --- a/Source/Python/PythonTuner.cpp +++ b/Source/Python/PythonTuner.cpp @@ -173,7 +173,21 @@ void InitializePythonTuner(py::module_& module) .def("SetValidationMode", &ktt::Tuner::SetValidationMode) .def("SetValidationRange", &ktt::Tuner::SetValidationRange) .def("SetValueComparator", &ktt::Tuner::SetValueComparator) - .def("SetReferenceComputation", &ktt::Tuner::SetReferenceComputation) + .def + ( + "SetReferenceComputation", + [](ktt::Tuner& tuner, const ktt::ArgumentId id, const size_t referenceSize, std::function reference) + { + ktt::ReferenceComputation actualReference = [reference, referenceSize](void* buffer) + { + py::gil_scoped_acquire acquire; + auto view = py::memoryview::from_memory(buffer, referenceSize); + reference(view); + }; + + tuner.SetReferenceComputation(id, actualReference); + } + ) .def("SetReferenceKernel", &ktt::Tuner::SetReferenceKernel) .def("Tune", py::overload_cast(&ktt::Tuner::Tune), py::call_guard()) .def("Tune", py::overload_cast>(&ktt::Tuner::Tune), py::call_guard()) diff --git a/Tutorials/03KernelTuning/KernelTuningCuda.py b/Tutorials/03KernelTuning/KernelTuningCuda.py index f93f2431..733dcca0 100644 --- a/Tutorials/03KernelTuning/KernelTuningCuda.py +++ b/Tutorials/03KernelTuning/KernelTuningCuda.py @@ -1,12 +1,9 @@ -import ctypes import sys import numpy as np import pyktt as ktt def computeReference(a, b, scalar, buffer): - ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.POINTER(ctypes.c_float) - ctypes.pythonapi.PyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_void_p] - floatList = ctypes.pythonapi.PyCapsule_GetPointer(buffer, None) + floatList = buffer.cast('f') for i in range(len(a)): floatList[i] = a[i] + b[i] + scalar @@ -49,7 +46,7 @@ def main(): # The computation function receives buffer on input, where the reference result should be saved. The size of buffer corresponds # to the validated argument size. reference = lambda buffer : computeReference(a, b, scalarValue, buffer) - tuner.SetReferenceComputation(resultId, reference) + tuner.SetReferenceComputation(resultId, 4 * numberOfElements, reference) # Add new kernel parameter. Specify parameter name and possible values. When kernel is tuned, the parameter value is added # to the beginning of kernel source as preprocessor definition. E.g., for value of this parameter equal to 32, it is added From 88b33381145b4db45264944eabf0e4d8faad24cc Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Thu, 13 Jan 2022 11:38:35 +0100 Subject: [PATCH 55/63] * Updated pybind11 library in order to support VS2022 --- .../pybind11/attr.h | 40 ++++--- .../pybind11/buffer_info.h | 0 .../pybind11/cast.h | 70 +++++------- .../pybind11/chrono.h | 8 +- .../pybind11/common.h | 0 .../pybind11/complex.h | 2 +- .../pybind11/detail/class.h | 4 +- .../pybind11/detail/common.h | 29 ++++- .../pybind11/detail/descr.h | 53 ++++++--- .../detail/dynamic_raw_ptr_cast_if_possible.h | 0 .../pybind11/detail/init.h | 2 +- .../pybind11/detail/internals.h | 0 .../pybind11/detail/smart_holder_poc.h | 0 .../detail/smart_holder_sfinae_hooks_only.h | 0 .../detail/smart_holder_type_casters.h | 10 +- .../pybind11/detail/type_caster_base.h | 2 +- .../pybind11/detail/typeid.h | 0 .../pybind11/eigen.h | 44 ++++---- .../pybind11/embed.h | 18 +++- .../pybind11/eval.h | 4 +- .../pybind11/functional.h | 4 +- .../pybind11/gil.h | 0 .../pybind11/iostream.h | 0 .../pybind11/numpy.h | 28 ++--- .../pybind11/operators.h | 0 .../pybind11/options.h | 0 .../pybind11/pybind11.h | 101 ++++++++++++------ .../pybind11/pytypes.h | 55 +++++++++- .../pybind11/smart_holder.h | 0 .../pybind11/stl.h | 12 +-- .../pybind11/stl/filesystem.h | 2 +- .../pybind11/stl_bind.h | 0 .../pybind11/trampoline_self_life_support.h | 0 premake5.lua | 4 +- 34 files changed, 318 insertions(+), 174 deletions(-) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/attr.h (93%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/buffer_info.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/cast.h (97%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/chrono.h (97%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/common.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/complex.h (96%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/class.h (99%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/common.h (98%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/descr.h (60%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/init.h (99%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/internals.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/smart_holder_poc.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/smart_holder_sfinae_hooks_only.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/smart_holder_type_casters.h (99%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/type_caster_base.h (99%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/detail/typeid.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/eigen.h (94%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/embed.h (96%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/eval.h (97%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/functional.h (97%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/gil.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/iostream.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/numpy.h (98%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/operators.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/options.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/pybind11.h (96%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/pytypes.h (97%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/smart_holder.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/stl.h (94%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/stl/filesystem.h (98%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/stl_bind.h (100%) rename Libraries/{pybind11-2.8.1-smart_holder => pybind11-2.9.0-smart_holder}/pybind11/trampoline_self_life_support.h (100%) diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/attr.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/attr.h similarity index 93% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/attr.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/attr.h index 0dedbc08..f1b66fb8 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/attr.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/attr.h @@ -174,7 +174,7 @@ struct function_record { function_record() : is_constructor(false), is_new_style_constructor(false), is_stateless(false), is_operator(false), is_method(false), has_args(false), - has_kwargs(false), has_kw_only_args(false), prepend(false) { } + has_kwargs(false), prepend(false) { } /// Function name char *name = nullptr; /* why no C++ strings? They generate heavier code.. */ @@ -221,17 +221,15 @@ struct function_record { /// True if the function has a '**kwargs' argument bool has_kwargs : 1; - /// True once a 'py::kw_only' is encountered (any following args are keyword-only) - bool has_kw_only_args : 1; - /// True if this function is to be inserted at the beginning of the overload resolution chain bool prepend : 1; /// Number of arguments (including py::args and/or py::kwargs, if present) std::uint16_t nargs; - /// Number of trailing arguments (counted in `nargs`) that are keyword-only - std::uint16_t nargs_kw_only = 0; + /// Number of leading positional arguments, which are terminated by a py::args or py::kwargs + /// argument or by a py::kw_only annotation. + std::uint16_t nargs_pos = 0; /// Number of leading arguments (counted in `nargs`) that are positional-only std::uint16_t nargs_pos_only = 0; @@ -411,20 +409,23 @@ template <> struct process_attribute : process_attribu static void init(const is_new_style_constructor &, function_record *r) { r->is_new_style_constructor = true; } }; -inline void process_kw_only_arg(const arg &a, function_record *r) { - if (!a.name || a.name[0] == '\0') - pybind11_fail("arg(): cannot specify an unnamed argument after an kw_only() annotation"); - ++r->nargs_kw_only; +inline void check_kw_only_arg(const arg &a, function_record *r) { + if (r->args.size() > r->nargs_pos && (!a.name || a.name[0] == '\0')) + pybind11_fail("arg(): cannot specify an unnamed argument after a kw_only() annotation or args() argument"); +} + +inline void append_self_arg_if_needed(function_record *r) { + if (r->is_method && r->args.empty()) + r->args.emplace_back("self", nullptr, handle(), /*convert=*/ true, /*none=*/ false); } /// Process a keyword argument attribute (*without* a default value) template <> struct process_attribute : process_attribute_default { static void init(const arg &a, function_record *r) { - if (r->is_method && r->args.empty()) - r->args.emplace_back("self", nullptr, handle(), true /*convert*/, false /*none not allowed*/); + append_self_arg_if_needed(r); r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none); - if (r->has_kw_only_args) process_kw_only_arg(a, r); + check_kw_only_arg(a, r); } }; @@ -432,7 +433,7 @@ template <> struct process_attribute : process_attribute_default { template <> struct process_attribute : process_attribute_default { static void init(const arg_v &a, function_record *r) { if (r->is_method && r->args.empty()) - r->args.emplace_back("self", nullptr /*descr*/, handle() /*parent*/, true /*convert*/, false /*none not allowed*/); + r->args.emplace_back("self", /*descr=*/ nullptr, /*parent=*/ handle(), /*convert=*/ true, /*none=*/ false); if (!a.value) { #if !defined(NDEBUG) @@ -457,21 +458,28 @@ template <> struct process_attribute : process_attribute_default { } r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none); - if (r->has_kw_only_args) process_kw_only_arg(a, r); + check_kw_only_arg(a, r); } }; /// Process a keyword-only-arguments-follow pseudo argument template <> struct process_attribute : process_attribute_default { static void init(const kw_only &, function_record *r) { - r->has_kw_only_args = true; + append_self_arg_if_needed(r); + if (r->has_args && r->nargs_pos != static_cast(r->args.size())) + pybind11_fail("Mismatched args() and kw_only(): they must occur at the same relative argument location (or omit kw_only() entirely)"); + r->nargs_pos = static_cast(r->args.size()); } }; /// Process a positional-only-argument maker template <> struct process_attribute : process_attribute_default { static void init(const pos_only &, function_record *r) { + append_self_arg_if_needed(r); r->nargs_pos_only = static_cast(r->args.size()); + if (r->nargs_pos_only > r->nargs_pos) + pybind11_fail("pos_only(): cannot follow a py::args() argument"); + // It also can't follow a kw_only, but a static_assert in pybind11.h checks that } }; diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/buffer_info.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/buffer_info.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/buffer_info.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/buffer_info.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/cast.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/cast.h similarity index 97% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/cast.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/cast.h index dd19a74e..48325ba1 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/cast.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/cast.h @@ -33,23 +33,6 @@ #include "detail/smart_holder_type_casters.h" #endif -#if defined(PYBIND11_CPP17) -# if defined(__has_include) -# if __has_include() -# define PYBIND11_HAS_STRING_VIEW -# endif -# elif defined(_MSC_VER) -# define PYBIND11_HAS_STRING_VIEW -# endif -#endif -#ifdef PYBIND11_HAS_STRING_VIEW -#include -#endif - -#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L -# define PYBIND11_HAS_U8STRING -#endif - PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) PYBIND11_NAMESPACE_BEGIN(detail) @@ -172,7 +155,8 @@ struct type_caster::value && !is_std_char_t return false; } else { handle src_or_index = src; -#if PY_VERSION_HEX < 0x03080000 + // PyPy: 7.3.7's 3.8 does not implement PyLong_*'s __index__ calls. +#if PY_VERSION_HEX < 0x03080000 || defined(PYPY_VERSION) object index; if (!PYBIND11_LONG_CHECK(src.ptr())) { // So: index_check(src.ptr()) index = reinterpret_steal(PyNumber_Index(src.ptr())); @@ -246,7 +230,7 @@ struct type_caster::value && !is_std_char_t return PyLong_FromUnsignedLongLong((unsigned long long) src); } - PYBIND11_TYPE_CASTER(T, _::value>("int", "float")); + PYBIND11_TYPE_CASTER(T, const_name::value>("int", "float")); }; template struct void_caster { @@ -259,7 +243,7 @@ template struct void_caster { static handle cast(T, return_value_policy /* policy */, handle /* parent */) { return none().inc_ref(); } - PYBIND11_TYPE_CASTER(T, _("None")); + PYBIND11_TYPE_CASTER(T, const_name("None")); }; template <> class type_caster : public void_caster {}; @@ -302,7 +286,7 @@ template <> class type_caster : public type_caster { template using cast_op_type = void*&; explicit operator void *&() { return value; } - static constexpr auto name = _("capsule"); + static constexpr auto name = const_name("capsule"); private: void *value = nullptr; }; @@ -353,7 +337,7 @@ template <> class type_caster { static handle cast(bool src, return_value_policy /* policy */, handle /* parent */) { return handle(src ? Py_True : Py_False).inc_ref(); } - PYBIND11_TYPE_CASTER(bool, _("bool")); + PYBIND11_TYPE_CASTER(bool, const_name("bool")); }; // Helper class for UTF-{8,16,32} C++ stl strings: @@ -443,7 +427,7 @@ template struct string_caster { return s; } - PYBIND11_TYPE_CASTER(StringType, _(PYBIND11_STRING_NAME)); + PYBIND11_TYPE_CASTER(StringType, const_name(PYBIND11_STRING_NAME)); private: static handle decode_utfN(const char *buffer, ssize_t nbytes) { @@ -580,7 +564,7 @@ template struct type_caster using cast_op_type = pybind11::detail::cast_op_type<_T>; }; @@ -617,7 +601,7 @@ template class Tuple, typename... Ts> class tuple_caster return cast(*src, policy, parent); } - static constexpr auto name = _("Tuple[") + concat(make_caster::name...) + _("]"); + static constexpr auto name = const_name("Tuple[") + concat(make_caster::name...) + const_name("]"); template using cast_op_type = type; @@ -806,14 +790,14 @@ template struct is_holder_type : template struct is_holder_type> : std::true_type {}; -template struct handle_type_name { static constexpr auto name = _(); }; -template <> struct handle_type_name { static constexpr auto name = _(PYBIND11_BYTES_NAME); }; -template <> struct handle_type_name { static constexpr auto name = _("int"); }; -template <> struct handle_type_name { static constexpr auto name = _("Iterable"); }; -template <> struct handle_type_name { static constexpr auto name = _("Iterator"); }; -template <> struct handle_type_name { static constexpr auto name = _("None"); }; -template <> struct handle_type_name { static constexpr auto name = _("*args"); }; -template <> struct handle_type_name { static constexpr auto name = _("**kwargs"); }; +template struct handle_type_name { static constexpr auto name = const_name(); }; +template <> struct handle_type_name { static constexpr auto name = const_name(PYBIND11_BYTES_NAME); }; +template <> struct handle_type_name { static constexpr auto name = const_name("int"); }; +template <> struct handle_type_name { static constexpr auto name = const_name("Iterable"); }; +template <> struct handle_type_name { static constexpr auto name = const_name("Iterator"); }; +template <> struct handle_type_name { static constexpr auto name = const_name("None"); }; +template <> struct handle_type_name { static constexpr auto name = const_name("*args"); }; +template <> struct handle_type_name { static constexpr auto name = const_name("**kwargs"); }; template struct pyobject_caster { @@ -1159,6 +1143,9 @@ constexpr arg operator"" _a(const char *name, size_t) { return arg(name); } PYBIND11_NAMESPACE_BEGIN(detail) +template using is_kw_only = std::is_same, kw_only>; +template using is_pos_only = std::is_same, pos_only>; + // forward declaration (definition in attr.h) struct function_record; @@ -1194,17 +1181,18 @@ class argument_loader { template using argument_is_args = std::is_same, args>; template using argument_is_kwargs = std::is_same, kwargs>; - // Get args/kwargs argument positions relative to the end of the argument list: - static constexpr auto args_pos = constexpr_first() - (int) sizeof...(Args), - kwargs_pos = constexpr_first() - (int) sizeof...(Args); - - static constexpr bool args_kwargs_are_last = kwargs_pos >= - 1 && args_pos >= kwargs_pos - 1; + // Get kwargs argument position, or -1 if not present: + static constexpr auto kwargs_pos = constexpr_last(); - static_assert(args_kwargs_are_last, "py::args/py::kwargs are only permitted as the last argument(s) of a function"); + static_assert(kwargs_pos == -1 || kwargs_pos == (int) sizeof...(Args) - 1, "py::kwargs is only permitted as the last argument of a function"); public: - static constexpr bool has_kwargs = kwargs_pos < 0; - static constexpr bool has_args = args_pos < 0; + static constexpr bool has_kwargs = kwargs_pos != -1; + + // py::args argument position; -1 if not present. + static constexpr int args_pos = constexpr_last(); + + static_assert(args_pos == -1 || args_pos == constexpr_first(), "py::args cannot be specified more than once"); static constexpr auto arg_names = concat(type_descr(make_caster::name)...); diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/chrono.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/chrono.h similarity index 97% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/chrono.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/chrono.h index 61bbcbc5..460a28fa 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/chrono.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/chrono.h @@ -17,8 +17,6 @@ #include #include -#include - #include // Backport the PyDateTime_DELTA functions from Python3.3 if required @@ -97,7 +95,7 @@ template class duration_caster { return PyDelta_FromDSU(dd.count(), ss.count(), us.count()); } - PYBIND11_TYPE_CASTER(type, _("datetime.timedelta")); + PYBIND11_TYPE_CASTER(type, const_name("datetime.timedelta")); }; inline std::tm *localtime_thread_safe(const std::time_t *time, std::tm *buf) { @@ -108,7 +106,7 @@ inline std::tm *localtime_thread_safe(const std::time_t *time, std::tm *buf) { #else static std::mutex mtx; std::lock_guard lock(mtx); - std::tm *tm_ptr = localtime(time); + std::tm *tm_ptr = std::localtime(time); if (tm_ptr != nullptr) { *buf = *tm_ptr; } @@ -195,7 +193,7 @@ template class type_caster class type_caster> { return PyComplex_FromDoubles((double) src.real(), (double) src.imag()); } - PYBIND11_TYPE_CASTER(std::complex, _("complex")); + PYBIND11_TYPE_CASTER(std::complex, const_name("complex")); }; PYBIND11_NAMESPACE_END(detail) PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/class.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/class.h similarity index 99% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/class.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/class.h index 1cc1e578..91750e3f 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/class.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/class.h @@ -626,9 +626,9 @@ inline PyObject* make_new_python_type(const type_record &rec) { if (rec.doc && options::show_user_defined_docstrings()) { /* Allocate memory for docstring (using PyObject_MALLOC, since Python will free this later on) */ - size_t size = strlen(rec.doc) + 1; + size_t size = std::strlen(rec.doc) + 1; tp_doc = (char *) PyObject_MALLOC(size); - memcpy((void *) tp_doc, rec.doc, size); + std::memcpy((void *) tp_doc, rec.doc, size); } auto &internals = get_internals(); diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/common.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/common.h similarity index 98% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/common.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/common.h index 713de94b..b08bbc55 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/common.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/common.h @@ -10,12 +10,12 @@ #pragma once #define PYBIND11_VERSION_MAJOR 2 -#define PYBIND11_VERSION_MINOR 8 -#define PYBIND11_VERSION_PATCH 1 +#define PYBIND11_VERSION_MINOR 9 +#define PYBIND11_VERSION_PATCH 0 // Similar to Python's convention: https://docs.python.org/3/c-api/apiabiversion.html // Additional convention: 0xD = dev -#define PYBIND11_VERSION_HEX 0x02080100 +#define PYBIND11_VERSION_HEX 0x02090000 #define PYBIND11_NAMESPACE_BEGIN(name) namespace name { #define PYBIND11_NAMESPACE_END(name) } @@ -154,6 +154,14 @@ // C4505: 'PySlice_GetIndicesEx': unreferenced local function has been removed (PyPy only) # pragma warning(disable: 4505) # if defined(_DEBUG) && !defined(Py_DEBUG) +// Workaround for a VS 2022 issue. +// NOTE: This workaround knowingly violates the Python.h include order requirement: +// https://docs.python.org/3/c-api/intro.html#include-files +// See https://github.com/pybind/pybind11/pull/3497 for full context. +# include +# if _MSVC_STL_VERSION >= 143 +# include +# endif # define PYBIND11_DEBUG_MARKER # undef _DEBUG # endif @@ -183,6 +191,21 @@ # define PYBIND11_HAS_VARIANT 1 #endif +#if defined(PYBIND11_CPP17) +# if defined(__has_include) +# if __has_include() +# define PYBIND11_HAS_STRING_VIEW +# endif +# elif defined(_MSC_VER) +# define PYBIND11_HAS_STRING_VIEW +# endif +#endif + +#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L +# define PYBIND11_HAS_U8STRING +#endif + + #include #include #include diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/descr.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/descr.h similarity index 60% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/descr.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/descr.h index c62e541b..0f93e06b 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/descr.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/descr.h @@ -54,35 +54,64 @@ constexpr descr operator+(const descr &a, c } template -constexpr descr _(char const(&text)[N]) { return descr(text); } -constexpr descr<0> _(char const(&)[1]) { return {}; } +constexpr descr const_name(char const(&text)[N]) { return descr(text); } +constexpr descr<0> const_name(char const(&)[1]) { return {}; } template struct int_to_str : int_to_str { }; template struct int_to_str<0, Digits...> { + // WARNING: This only works with C++17 or higher. static constexpr auto digits = descr(('0' + Digits)...); }; // Ternary description (like std::conditional) template -constexpr enable_if_t> _(char const(&text1)[N1], char const(&)[N2]) { - return _(text1); +constexpr enable_if_t> const_name(char const(&text1)[N1], char const(&)[N2]) { + return const_name(text1); } template -constexpr enable_if_t> _(char const(&)[N1], char const(&text2)[N2]) { - return _(text2); +constexpr enable_if_t> const_name(char const(&)[N1], char const(&text2)[N2]) { + return const_name(text2); } template -constexpr enable_if_t _(const T1 &d, const T2 &) { return d; } +constexpr enable_if_t const_name(const T1 &d, const T2 &) { return d; } template -constexpr enable_if_t _(const T1 &, const T2 &d) { return d; } +constexpr enable_if_t const_name(const T1 &, const T2 &d) { return d; } template -auto constexpr _() -> remove_cv_t::digits)> { +auto constexpr const_name() -> remove_cv_t::digits)> { return int_to_str::digits; } -template constexpr descr<1, Type> _() { return {'%'}; } +template constexpr descr<1, Type> const_name() { return {'%'}; } + +// If "_" is defined as a macro, py::detail::_ cannot be provided. +// It is therefore best to use py::detail::const_name universally. +// This block is for backward compatibility only. +// (The const_name code is repeated to avoid introducing a "_" #define ourselves.) +#ifndef _ +#define PYBIND11_DETAIL_UNDERSCORE_BACKWARD_COMPATIBILITY +template +constexpr descr _(char const(&text)[N]) { return const_name(text); } +template +constexpr enable_if_t> _(char const(&text1)[N1], char const(&text2)[N2]) { + return const_name(text1, text2); +} +template +constexpr enable_if_t> _(char const(&text1)[N1], char const(&text2)[N2]) { + return const_name(text1, text2); +} +template +constexpr enable_if_t _(const T1 &d1, const T2 &d2) { return const_name(d1, d2); } +template +constexpr enable_if_t _(const T1 &d1, const T2 &d2) { return const_name(d1, d2); } + +template +auto constexpr _() -> remove_cv_t::digits)> { + return const_name(); +} +template constexpr descr<1, Type> _() { return const_name(); } +#endif // #ifndef _ constexpr descr<0> concat() { return {}; } @@ -92,12 +121,12 @@ constexpr descr concat(const descr &descr) { return descr; } template constexpr auto concat(const descr &d, const Args &...args) -> decltype(std::declval>() + concat(args...)) { - return d + _(", ") + concat(args...); + return d + const_name(", ") + concat(args...); } template constexpr descr type_descr(const descr &descr) { - return _("{") + descr + _("}"); + return const_name("{") + descr + const_name("}"); } PYBIND11_NAMESPACE_END(detail) diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/init.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/init.h similarity index 99% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/init.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/init.h index a7bda462..c9076f4c 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/init.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/init.h @@ -26,7 +26,7 @@ class type_caster { template using cast_op_type = value_and_holder &; explicit operator value_and_holder &() { return *value; } - static constexpr auto name = _(); + static constexpr auto name = const_name(); private: value_and_holder *value = nullptr; diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/internals.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/internals.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/internals.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/internals.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_poc.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/smart_holder_poc.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_poc.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/smart_holder_poc.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_sfinae_hooks_only.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/smart_holder_sfinae_hooks_only.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_sfinae_hooks_only.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/smart_holder_sfinae_hooks_only.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_type_casters.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/smart_holder_type_casters.h similarity index 99% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_type_casters.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/smart_holder_type_casters.h index 98e322cd..6eb5ef27 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/smart_holder_type_casters.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/smart_holder_type_casters.h @@ -538,7 +538,7 @@ struct make_constructor : private type_caster_base { // Any type, nothing s template struct smart_holder_type_caster : smart_holder_type_caster_load, smart_holder_type_caster_class_hooks { - static constexpr auto name = _(); + static constexpr auto name = const_name(); // static handle cast(T, ...) // is redundant (leads to ambiguous overloads). @@ -703,7 +703,7 @@ struct smart_holder_type_caster : smart_holder_type_caster_load, template struct smart_holder_type_caster> : smart_holder_type_caster_load, smart_holder_type_caster_class_hooks { - static constexpr auto name = _>(); + static constexpr auto name = const_name(); static handle cast(const std::shared_ptr &src, return_value_policy policy, handle parent) { switch (policy) { @@ -760,7 +760,7 @@ struct smart_holder_type_caster> : smart_holder_type_caster_l template struct smart_holder_type_caster> : smart_holder_type_caster_load, smart_holder_type_caster_class_hooks { - static constexpr auto name = _>(); + static constexpr auto name = const_name(); static handle cast(const std::shared_ptr &src, return_value_policy policy, handle parent) { @@ -780,7 +780,7 @@ struct smart_holder_type_caster> : smart_holder_type_ca template struct smart_holder_type_caster> : smart_holder_type_caster_load, smart_holder_type_caster_class_hooks { - static constexpr auto name = _>(); + static constexpr auto name = const_name(); static handle cast(std::unique_ptr &&src, return_value_policy policy, handle parent) { if (policy != return_value_policy::automatic @@ -857,7 +857,7 @@ struct smart_holder_type_caster> : smart_holder_type_caste template struct smart_holder_type_caster> : smart_holder_type_caster_load, smart_holder_type_caster_class_hooks { - static constexpr auto name = _>(); + static constexpr auto name = const_name(); static handle cast(std::unique_ptr &&src, return_value_policy policy, handle parent) { diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/type_caster_base.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/type_caster_base.h similarity index 99% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/type_caster_base.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/type_caster_base.h index 00ce1a7a..48e218b2 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/type_caster_base.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/type_caster_base.h @@ -897,7 +897,7 @@ template class type_caster_base : public type_caster_generic { using itype = intrinsic_t; public: - static constexpr auto name = _(); + static constexpr auto name = const_name(); type_caster_base() : type_caster_base(typeid(type)) { } explicit type_caster_base(const std::type_info &info) : type_caster_generic(info) { } diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/typeid.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/typeid.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/detail/typeid.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/detail/typeid.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/eigen.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/eigen.h similarity index 94% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/eigen.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/eigen.h index 97b1d96b..696099fa 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/eigen.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/eigen.h @@ -50,8 +50,12 @@ PYBIND11_NAMESPACE_BEGIN(detail) #if EIGEN_VERSION_AT_LEAST(3,3,0) using EigenIndex = Eigen::Index; +template +using EigenMapSparseMatrix = Eigen::Map>; #else using EigenIndex = EIGEN_DEFAULT_DENSE_INDEX_TYPE; +template +using EigenMapSparseMatrix = Eigen::MappedSparseMatrix; #endif // Matches Eigen::Map, Eigen::Ref, blocks, etc: @@ -80,14 +84,12 @@ template struct EigenConformable { // Matrix type: EigenConformable(EigenIndex r, EigenIndex c, EigenIndex rstride, EigenIndex cstride) : - conformable{true}, rows{r}, cols{c} { - // TODO: when Eigen bug #747 is fixed, remove the tests for non-negativity. http://eigen.tuxfamily.org/bz/show_bug.cgi?id=747 - if (rstride < 0 || cstride < 0) { - negativestrides = true; - } else { - stride = {EigenRowMajor ? rstride : cstride /* outer stride */, - EigenRowMajor ? cstride : rstride /* inner stride */ }; - } + conformable{true}, rows{r}, cols{c}, + //TODO: when Eigen bug #747 is fixed, remove the tests for non-negativity. http://eigen.tuxfamily.org/bz/show_bug.cgi?id=747 + stride{EigenRowMajor ? (rstride > 0 ? rstride : 0) : (cstride > 0 ? cstride : 0) /* outer stride */, + EigenRowMajor ? (cstride > 0 ? cstride : 0) : (rstride > 0 ? rstride : 0) /* inner stride */ }, + negativestrides{rstride < 0 || cstride < 0} { + } // Vector type: EigenConformable(EigenIndex r, EigenIndex c, EigenIndex stride) @@ -190,20 +192,20 @@ template struct EigenProps { static constexpr bool show_f_contiguous = !show_c_contiguous && show_order && requires_col_major; static constexpr auto descriptor = - _("numpy.ndarray[") + npy_format_descriptor::name + - _("[") + _(_<(size_t) rows>(), _("m")) + - _(", ") + _(_<(size_t) cols>(), _("n")) + - _("]") + + const_name("numpy.ndarray[") + npy_format_descriptor::name + + const_name("[") + const_name(const_name<(size_t) rows>(), const_name("m")) + + const_name(", ") + const_name(const_name<(size_t) cols>(), const_name("n")) + + const_name("]") + // For a reference type (e.g. Ref) we have other constraints that might need to be // satisfied: writeable=True (for a mutable reference), and, depending on the map's stride // options, possibly f_contiguous or c_contiguous. We include them in the descriptor output // to provide some hint as to why a TypeError is occurring (otherwise it can be confusing to // see that a function accepts a 'numpy.ndarray[float64[3,2]]' and an error message that you // *gave* a numpy.ndarray of the right type and dimensions. - _(", flags.writeable", "") + - _(", flags.c_contiguous", "") + - _(", flags.f_contiguous", "") + - _("]"); + const_name(", flags.writeable", "") + + const_name(", flags.c_contiguous", "") + + const_name(", flags.f_contiguous", "") + + const_name("]"); }; // Casts an Eigen type to numpy array. If given a base, the numpy array references the src data, @@ -573,9 +575,9 @@ struct type_caster::value>> { if (!values || !innerIndices || !outerIndices) return false; - value = Eigen::MappedSparseMatrix( + value = EigenMapSparseMatrix( shape[0].cast(), shape[1].cast(), nnz, outerIndices.mutable_data(), innerIndices.mutable_data(), values.mutable_data()); @@ -598,8 +600,8 @@ struct type_caster::value>> { ).release(); } - PYBIND11_TYPE_CASTER(Type, _<(Type::IsRowMajor) != 0>("scipy.sparse.csr_matrix[", "scipy.sparse.csc_matrix[") - + npy_format_descriptor::name + _("]")); + PYBIND11_TYPE_CASTER(Type, const_name<(Type::IsRowMajor) != 0>("scipy.sparse.csr_matrix[", "scipy.sparse.csc_matrix[") + + npy_format_descriptor::name + const_name("]")); }; PYBIND11_NAMESPACE_END(detail) diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/embed.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/embed.h similarity index 96% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/embed.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/embed.h index 9843f0f9..9ab1ce9c 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/embed.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/embed.h @@ -102,15 +102,27 @@ inline wchar_t *widen_chars(const char *safe_arg) { wchar_t *widened_arg = Py_DecodeLocale(safe_arg, nullptr); #else wchar_t *widened_arg = nullptr; + +// warning C4996: 'mbstowcs': This function or variable may be unsafe. +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable:4996) +#endif + # if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS - size_t count = strlen(safe_arg); + size_t count = std::strlen(safe_arg); # else - size_t count = mbstowcs(nullptr, safe_arg, 0); + size_t count = std::mbstowcs(nullptr, safe_arg, 0); # endif if (count != static_cast(-1)) { widened_arg = new wchar_t[count + 1]; - mbstowcs(widened_arg, safe_arg, count + 1); + std::mbstowcs(widened_arg, safe_arg, count + 1); } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + #endif return widened_arg; } diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/eval.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/eval.h similarity index 97% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/eval.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/eval.h index 6cc672e2..4248551e 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/eval.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/eval.h @@ -19,11 +19,11 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) PYBIND11_NAMESPACE_BEGIN(detail) inline void ensure_builtins_in_globals(object &global) { - #if PY_VERSION_HEX < 0x03080000 + #if defined(PYPY_VERSION) || PY_VERSION_HEX < 0x03080000 // Running exec and eval on Python 2 and 3 adds `builtins` module under // `__builtins__` key to globals if not yet present. // Python 3.8 made PyRun_String behave similarly. Let's also do that for - // older versions, for consistency. + // older versions, for consistency. This was missing from PyPy3.8 7.3.7. if (!global.contains("__builtins__")) global["__builtins__"] = module_::import(PYBIND11_BUILTINS_MODULE); #else diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/functional.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/functional.h similarity index 97% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/functional.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/functional.h index ad5608c2..7912aef1 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/functional.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/functional.h @@ -113,8 +113,8 @@ struct type_caster> { return cpp_function(std::forward(f_), policy).release(); } - PYBIND11_TYPE_CASTER(type, _("Callable[[") + concat(make_caster::name...) + _("], ") - + make_caster::name + _("]")); + PYBIND11_TYPE_CASTER(type, const_name("Callable[[") + concat(make_caster::name...) + const_name("], ") + + make_caster::name + const_name("]")); }; PYBIND11_NAMESPACE_END(detail) diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/gil.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/gil.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/gil.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/gil.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/iostream.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/iostream.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/iostream.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/iostream.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/numpy.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/numpy.h similarity index 98% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/numpy.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/numpy.h index b43a7716..8e83b506 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/numpy.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/numpy.h @@ -39,7 +39,7 @@ class array; // Forward declaration PYBIND11_NAMESPACE_BEGIN(detail) -template <> struct handle_type_name { static constexpr auto name = _("numpy.ndarray"); }; +template <> struct handle_type_name { static constexpr auto name = const_name("numpy.ndarray"); }; template struct npy_format_descriptor; @@ -290,7 +290,7 @@ template struct array_info_scalar { using type = T; static constexpr bool is_array = false; static constexpr bool is_empty = false; - static constexpr auto extents = _(""); + static constexpr auto extents = const_name(""); static void append_extents(list& /* shape */) { } }; // Computes underlying type and a comma-separated list of extents for array @@ -309,8 +309,8 @@ template struct array_info> { array_info::append_extents(shape); } - static constexpr auto extents = _::is_array>( - concat(_(), array_info::extents), _() + static constexpr auto extents = const_name::is_array>( + concat(const_name(), array_info::extents), const_name() ); }; // For numpy we have special handling for arrays of characters, so we don't include @@ -1021,7 +1021,7 @@ template struct format_descriptor::is_array>> { static std::string format() { using namespace detail; - static constexpr auto extents = _("(") + array_info::extents + _(")"); + static constexpr auto extents = const_name("(") + array_info::extents + const_name(")"); return extents.text + format_descriptor>::format(); } }; @@ -1056,28 +1056,28 @@ struct npy_format_descriptor_name; template struct npy_format_descriptor_name::value>> { - static constexpr auto name = _::value>( - _("bool"), _::value>("numpy.int", "numpy.uint") + _() + static constexpr auto name = const_name::value>( + const_name("bool"), const_name::value>("numpy.int", "numpy.uint") + const_name() ); }; template struct npy_format_descriptor_name::value>> { - static constexpr auto name = _::value + static constexpr auto name = const_name::value || std::is_same::value || std::is_same::value || std::is_same::value>( - _("numpy.float") + _(), _("numpy.longdouble") + const_name("numpy.float") + const_name(), const_name("numpy.longdouble") ); }; template struct npy_format_descriptor_name::value>> { - static constexpr auto name = _::value + static constexpr auto name = const_name::value || std::is_same::value || std::is_same::value || std::is_same::value>( - _("numpy.complex") + _(), _("numpy.longcomplex") + const_name("numpy.complex") + const_name(), const_name("numpy.longcomplex") ); }; @@ -1105,7 +1105,7 @@ struct npy_format_descriptor(); \ + static constexpr auto name = const_name("S") + const_name(); \ static pybind11::dtype dtype() { return pybind11::dtype(std::string("S") + std::to_string(N)); } template struct npy_format_descriptor { PYBIND11_DECL_CHAR_FMT }; template struct npy_format_descriptor> { PYBIND11_DECL_CHAR_FMT }; @@ -1117,7 +1117,7 @@ template struct npy_format_descriptor:: public: static_assert(!array_info::is_empty, "Zero-sized arrays are not supported"); - static constexpr auto name = _("(") + array_info::extents + _(")") + base_descr::name; + static constexpr auto name = const_name("(") + array_info::extents + const_name(")") + base_descr::name; static pybind11::dtype dtype() { list shape; array_info::append_extents(shape); @@ -1705,7 +1705,7 @@ vectorize_extractor(const Func &f, Return (*) (Args ...)) { } template struct handle_type_name> { - static constexpr auto name = _("numpy.ndarray[") + npy_format_descriptor::name + _("]"); + static constexpr auto name = const_name("numpy.ndarray[") + npy_format_descriptor::name + const_name("]"); }; PYBIND11_NAMESPACE_END(detail) diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/operators.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/operators.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/operators.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/operators.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/options.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/options.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/options.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/options.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/pybind11.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/pybind11.h similarity index 96% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/pybind11.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/pybind11.h index 6f24a360..9b5a7e5d 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/pybind11.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/pybind11.h @@ -25,7 +25,7 @@ #include #include -#include +#include #if defined(__cpp_lib_launder) && !(defined(_MSC_VER) && (_MSC_VER < 1914)) # define PYBIND11_STD_LAUNDER std::launder @@ -117,7 +117,7 @@ class cpp_function : public function { template // NOLINTNEXTLINE(google-explicit-constructor) cpp_function(Return (Class::*f)(Arg...)&, const Extra&... extra) { - initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(args...); }, + initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(std::forward(args)...); }, (Return (*) (Class *, Arg...)) nullptr, extra...); } @@ -135,7 +135,7 @@ class cpp_function : public function { template // NOLINTNEXTLINE(google-explicit-constructor) cpp_function(Return (Class::*f)(Arg...) const&, const Extra&... extra) { - initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); }, + initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(std::forward(args)...); }, (Return (*)(const Class *, Arg ...)) nullptr, extra...); } @@ -205,7 +205,7 @@ class cpp_function : public function { conditional_t::value, void_type, Return> >; - static_assert(expected_num_args(sizeof...(Args), cast_in::has_args, cast_in::has_kwargs), + static_assert(expected_num_args(sizeof...(Args), cast_in::args_pos >= 0, cast_in::has_kwargs), "The number of argument annotations does not match the number of function arguments"); /* Dispatch code which converts function arguments and performs the actual function call */ @@ -240,30 +240,37 @@ class cpp_function : public function { return result; }; + rec->nargs_pos = cast_in::args_pos >= 0 + ? static_cast(cast_in::args_pos) + : sizeof...(Args) - cast_in::has_kwargs; // Will get reduced more if we have a kw_only + rec->has_args = cast_in::args_pos >= 0; + rec->has_kwargs = cast_in::has_kwargs; + /* Process any user-provided function attributes */ process_attributes::init(extra..., rec); { constexpr bool has_kw_only_args = any_of...>::value, has_pos_only_args = any_of...>::value, - has_args = any_of...>::value, has_arg_annotations = any_of...>::value; static_assert(has_arg_annotations || !has_kw_only_args, "py::kw_only requires the use of argument annotations"); static_assert(has_arg_annotations || !has_pos_only_args, "py::pos_only requires the use of argument annotations (for docstrings and aligning the annotations to the argument)"); - static_assert(!(has_args && has_kw_only_args), "py::kw_only cannot be combined with a py::args argument"); + + static_assert(constexpr_sum(is_kw_only::value...) <= 1, "py::kw_only may be specified only once"); + static_assert(constexpr_sum(is_pos_only::value...) <= 1, "py::pos_only may be specified only once"); + constexpr auto kw_only_pos = constexpr_first(); + constexpr auto pos_only_pos = constexpr_first(); + static_assert(!(has_kw_only_args && has_pos_only_args) || pos_only_pos < kw_only_pos, "py::pos_only must come before py::kw_only"); } /* Generate a readable signature describing the function's arguments and return value types */ - static constexpr auto signature = _("(") + cast_in::arg_names + _(") -> ") + cast_out::name; + static constexpr auto signature = const_name("(") + cast_in::arg_names + const_name(") -> ") + cast_out::name; PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types(); /* Register the function with Python from generic (non-templated) code */ // Pass on the ownership over the `unique_rec` to `initialize_generic`. `rec` stays valid. initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args)); - if (cast_in::has_args) rec->has_args = true; - if (cast_in::has_kwargs) rec->has_kwargs = true; - /* Stash some additional information used by an important optimization in 'functional.h' */ using FunctionType = Return (*)(Args...); constexpr bool is_function_ptr = @@ -322,8 +329,8 @@ class cpp_function : public function { a.descr = guarded_strdup(repr(a.value).cast().c_str()); } - rec->is_constructor - = (strcmp(rec->name, "__init__") == 0) || (strcmp(rec->name, "__setstate__") == 0); + rec->is_constructor = (std::strcmp(rec->name, "__init__") == 0) + || (std::strcmp(rec->name, "__setstate__") == 0); #if !defined(NDEBUG) && !defined(PYBIND11_DISABLE_NEW_STYLE_INIT_WARNING) if (rec->is_constructor && !rec->is_new_style_constructor) { @@ -342,16 +349,18 @@ class cpp_function : public function { /* Generate a proper function signature */ std::string signature; size_t type_index = 0, arg_index = 0; + bool is_starred = false; for (auto *pc = text; *pc != '\0'; ++pc) { const auto c = *pc; if (c == '{') { // Write arg name for everything except *args and **kwargs. - if (*(pc + 1) == '*') + is_starred = *(pc + 1) == '*'; + if (is_starred) continue; // Separator for keyword-only arguments, placed before the kw - // arguments start - if (rec->nargs_kw_only > 0 && arg_index + rec->nargs_kw_only == args) + // arguments start (unless we are already putting an *args) + if (!rec->has_args && arg_index == rec->nargs_pos) signature += "*, "; if (arg_index < rec->args.size() && rec->args[arg_index].name) { signature += rec->args[arg_index].name; @@ -363,7 +372,7 @@ class cpp_function : public function { signature += ": "; } else if (c == '}') { // Write default value if available. - if (arg_index < rec->args.size() && rec->args[arg_index].descr) { + if (!is_starred && arg_index < rec->args.size() && rec->args[arg_index].descr) { signature += " = "; signature += rec->args[arg_index].descr; } @@ -371,7 +380,8 @@ class cpp_function : public function { // argument, rather than before like * if (rec->nargs_pos_only > 0 && (arg_index + 1) == rec->nargs_pos_only) signature += ", /"; - arg_index++; + if (!is_starred) + arg_index++; } else if (c == '%') { const std::type_info *t = types[type_index++]; if (!t) @@ -397,14 +407,14 @@ class cpp_function : public function { } } - if (arg_index != args || types[type_index] != nullptr) + if (arg_index != args - rec->has_args - rec->has_kwargs || types[type_index] != nullptr) pybind11_fail("Internal error while parsing type signature (2)"); #if PY_MAJOR_VERSION < 3 - if (strcmp(rec->name, "__next__") == 0) { + if (std::strcmp(rec->name, "__next__") == 0) { std::free(rec->name); rec->name = guarded_strdup("next"); - } else if (strcmp(rec->name, "__bool__") == 0) { + } else if (std::strcmp(rec->name, "__bool__") == 0) { std::free(rec->name); rec->name = guarded_strdup("__nonzero__"); } @@ -633,7 +643,7 @@ class cpp_function : public function { named positional arguments weren't *also* specified via kwarg. 2. If we weren't given enough, try to make up the omitted ones by checking whether they were provided by a kwarg matching the `py::arg("name")` name. If - so, use it (and remove it from kwargs; if not, see if the function binding + so, use it (and remove it from kwargs); if not, see if the function binding provided a default that we can use. 3. Ensure that either all keyword arguments were "consumed", or that the function takes a kwargs argument to accept unconsumed kwargs. @@ -651,7 +661,7 @@ class cpp_function : public function { size_t num_args = func.nargs; // Number of positional arguments that we need if (func.has_args) --num_args; // (but don't count py::args if (func.has_kwargs) --num_args; // or py::kwargs) - size_t pos_args = num_args - func.nargs_kw_only; + size_t pos_args = func.nargs_pos; if (!func.has_args && n_args_in > pos_args) continue; // Too many positional arguments for this overload @@ -697,6 +707,10 @@ class cpp_function : public function { if (bad_arg) continue; // Maybe it was meant for another overload (issue #688) + // Keep track of how many position args we copied out in case we need to come back + // to copy the rest into a py::args argument. + size_t positional_args_copied = args_copied; + // We'll need to copy this if we steal some kwargs for defaults dict kwargs = reinterpret_borrow(kwargs_in); @@ -749,6 +763,10 @@ class cpp_function : public function { } if (value) { + // If we're at the py::args index then first insert a stub for it to be replaced later + if (func.has_args && call.args.size() == func.nargs_pos) + call.args.push_back(none()); + call.args.push_back(value); call.args_convert.push_back(arg_rec.convert); } @@ -771,16 +789,19 @@ class cpp_function : public function { // We didn't copy out any position arguments from the args_in tuple, so we // can reuse it directly without copying: extra_args = reinterpret_borrow(args_in); - } else if (args_copied >= n_args_in) { + } else if (positional_args_copied >= n_args_in) { extra_args = tuple(0); } else { - size_t args_size = n_args_in - args_copied; + size_t args_size = n_args_in - positional_args_copied; extra_args = tuple(args_size); for (size_t i = 0; i < args_size; ++i) { - extra_args[i] = PyTuple_GET_ITEM(args_in, args_copied + i); + extra_args[i] = PyTuple_GET_ITEM(args_in, positional_args_copied + i); } } - call.args.push_back(extra_args); + if (call.args.size() <= func.nargs_pos) + call.args.push_back(extra_args); + else + call.args[func.nargs_pos] = extra_args; call.args_convert.push_back(false); call.args_ref = std::move(extra_args); } @@ -977,6 +998,13 @@ class cpp_function : public function { "Python type! The signature was\n\t"; msg += it->signature; append_note_if_missing_header_is_suspected(msg); +#if PY_VERSION_HEX >= 0x03030000 + // Attach additional error info to the exception if supported + if (PyErr_Occurred()) { + raise_from(PyExc_TypeError, msg.c_str()); + return nullptr; + } +#endif PyErr_SetString(PyExc_TypeError, msg.c_str()); return nullptr; } @@ -1277,8 +1305,8 @@ inline void call_operator_delete(void *p, size_t s, size_t a) { inline void add_class_method(object& cls, const char *name_, const cpp_function &cf) { cls.attr(cf.name()) = cf; - if (strcmp(name_, "__eq__") == 0 && !cls.attr("__dict__").contains("__hash__")) { - cls.attr("__hash__") = none(); + if (std::strcmp(name_, "__eq__") == 0 && !cls.attr("__dict__").contains("__hash__")) { + cls.attr("__hash__") = none(); } } @@ -2072,6 +2100,16 @@ inline std::pair all_t // gets destroyed: weakref((PyObject *) type, cpp_function([type](handle wr) { get_internals().registered_types_py.erase(type); + + // TODO consolidate the erasure code in pybind11_meta_dealloc() in class.h + auto &cache = get_internals().inactive_override_cache; + for (auto it = cache.begin(), last = cache.end(); it != last; ) { + if (it->first == reinterpret_cast(type)) + it = cache.erase(it); + else + ++it; + } + wr.dec_ref(); })).release(); } @@ -2448,8 +2486,9 @@ inline function get_type_override(const void *this_ptr, const type_info *this_ty /* Don't call dispatch code if invoked from overridden function. Unfortunately this doesn't work on PyPy. */ -#if !defined(PYPY_VERSION) - +#if !defined(PYPY_VERSION) && PY_VERSION_HEX < 0x030B0000 + // TODO: Remove PyPy workaround for Python 3.11. + // Current API fails on 3.11 since co_varnames can be null. #if PY_VERSION_HEX >= 0x03090000 PyFrameObject *frame = PyThreadState_GetFrame(PyThreadState_Get()); if (frame != nullptr) { @@ -2457,7 +2496,7 @@ inline function get_type_override(const void *this_ptr, const type_info *this_ty // f_code is guaranteed to not be NULL if ((std::string) str(f_code->co_name) == name && f_code->co_argcount > 0) { PyObject* locals = PyEval_GetLocals(); - if (locals != nullptr) { + if (locals != nullptr && f_code->co_varnames != nullptr) { PyObject *self_caller = dict_getitem( locals, PyTuple_GET_ITEM(f_code->co_varnames, 0) ); diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/pytypes.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/pytypes.h similarity index 97% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/pytypes.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/pytypes.h index f54d5fad..902fb1f0 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/pytypes.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/pytypes.h @@ -18,6 +18,10 @@ # include #endif +#ifdef PYBIND11_HAS_STRING_VIEW +# include +#endif + PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) /* A few forward declarations */ @@ -287,10 +291,10 @@ class object : public handle { struct borrowed_t { }; struct stolen_t { }; -#ifndef DOXYGEN_SHOULD_SKIP_THIS // Issue in breathe 4.26.1 + /// @cond BROKEN template friend T reinterpret_borrow(handle); template friend T reinterpret_steal(handle); -#endif + /// @endcond public: // Only accessible from derived classes and the reinterpret_* functions @@ -431,7 +435,7 @@ inline void raise_from(error_already_set& err, PyObject *type, const char *messa #endif -/** \defgroup python_builtins _ +/** \defgroup python_builtins const_name Unless stated otherwise, the following C++ functions behave the same as their Python counterparts. */ @@ -1085,6 +1089,20 @@ class str : public object { // NOLINTNEXTLINE(google-explicit-constructor) str(const std::string &s) : str(s.data(), s.size()) { } +#ifdef PYBIND11_HAS_STRING_VIEW + // enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521). + template ::value, int> = 0> + // NOLINTNEXTLINE(google-explicit-constructor) + str(T s) : str(s.data(), s.size()) { } + +# ifdef PYBIND11_HAS_U8STRING + // reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char) + // NOLINTNEXTLINE(google-explicit-constructor) + str(std::u8string_view s) : str(reinterpret_cast(s.data()), s.size()) { } +# endif + +#endif + explicit str(const bytes &b); /** \rst @@ -1167,6 +1185,26 @@ class bytes : public object { pybind11_fail("Unable to extract bytes contents!"); return std::string(buffer, (size_t) length); } + +#ifdef PYBIND11_HAS_STRING_VIEW + // enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521). + template ::value, int> = 0> + // NOLINTNEXTLINE(google-explicit-constructor) + bytes(T s) : bytes(s.data(), s.size()) { } + + // Obtain a string view that views the current `bytes` buffer value. Note that this is only + // valid so long as the `bytes` instance remains alive and so generally should not outlive the + // lifetime of the `bytes` instance. + // NOLINTNEXTLINE(google-explicit-constructor) + operator std::string_view() const { + char *buffer = nullptr; + ssize_t length = 0; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length)) + pybind11_fail("Unable to extract bytes contents!"); + return {buffer, static_cast(length)}; + } +#endif + }; // Note: breathe >= 4.17.0 will fail to build docs if the below two constructors // are included in the doxygen group; close here and reopen after as a workaround @@ -1714,10 +1752,17 @@ class memoryview : public object { static memoryview from_memory(const void *mem, ssize_t size) { return memoryview::from_memory(const_cast(mem), size, true); } + +#ifdef PYBIND11_HAS_STRING_VIEW + static memoryview from_memory(std::string_view mem) { + return from_memory(const_cast(mem.data()), static_cast(mem.size()), true); + } +#endif + #endif }; -#ifndef DOXYGEN_SHOULD_SKIP_THIS +/// @cond DUPLICATE inline memoryview memoryview::from_buffer( void *ptr, ssize_t itemsize, const char* format, detail::any_container shape, @@ -1745,7 +1790,7 @@ inline memoryview memoryview::from_buffer( throw error_already_set(); return memoryview(object(obj, stolen_t{})); } -#endif // DOXYGEN_SHOULD_SKIP_THIS +/// @endcond /// @} pytypes /// \addtogroup python_builtins diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/smart_holder.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/smart_holder.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/smart_holder.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/smart_holder.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/stl.h similarity index 94% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/stl.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/stl.h index 3608d298..43034948 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/stl.h @@ -78,7 +78,7 @@ template struct set_caster { return s.release(); } - PYBIND11_TYPE_CASTER(type, _("Set[") + key_conv::name + _("]")); + PYBIND11_TYPE_CASTER(type, const_name("Set[") + key_conv::name + const_name("]")); }; template struct map_caster { @@ -120,7 +120,7 @@ template struct map_caster { return d.release(); } - PYBIND11_TYPE_CASTER(Type, _("Dict[") + key_conv::name + _(", ") + value_conv::name + _("]")); + PYBIND11_TYPE_CASTER(Type, const_name("Dict[") + key_conv::name + const_name(", ") + value_conv::name + const_name("]")); }; template struct list_caster { @@ -166,7 +166,7 @@ template struct list_caster { return l.release(); } - PYBIND11_TYPE_CASTER(Type, _("List[") + value_conv::name + _("]")); + PYBIND11_TYPE_CASTER(Type, const_name("List[") + value_conv::name + const_name("]")); }; template struct type_caster> @@ -223,7 +223,7 @@ template s return l.release(); } - PYBIND11_TYPE_CASTER(ArrayType, _("List[") + value_conv::name + _(_(""), _("[") + _() + _("]")) + _("]")); + PYBIND11_TYPE_CASTER(ArrayType, const_name("List[") + value_conv::name + const_name(const_name(""), const_name("[") + const_name() + const_name("]")) + const_name("]")); }; template struct type_caster> @@ -273,7 +273,7 @@ template struct optio return true; } - PYBIND11_TYPE_CASTER(Type, _("Optional[") + value_conv::name + _("]")); + PYBIND11_TYPE_CASTER(Type, const_name("Optional[") + value_conv::name + const_name("]")); }; #if defined(PYBIND11_HAS_OPTIONAL) @@ -353,7 +353,7 @@ struct variant_caster> { } using Type = V; - PYBIND11_TYPE_CASTER(Type, _("Union[") + detail::concat(make_caster::name...) + _("]")); + PYBIND11_TYPE_CASTER(Type, const_name("Union[") + detail::concat(make_caster::name...) + const_name("]")); }; #if defined(PYBIND11_HAS_VARIANT) diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl/filesystem.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/stl/filesystem.h similarity index 98% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/stl/filesystem.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/stl/filesystem.h index 431b94b4..a9a6c851 100644 --- a/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl/filesystem.h +++ b/Libraries/pybind11-2.9.0-smart_holder/pybind11/stl/filesystem.h @@ -92,7 +92,7 @@ template struct path_caster { return true; } - PYBIND11_TYPE_CASTER(T, _("os.PathLike")); + PYBIND11_TYPE_CASTER(T, const_name("os.PathLike")); }; template<> struct type_caster diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/stl_bind.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/stl_bind.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/stl_bind.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/stl_bind.h diff --git a/Libraries/pybind11-2.8.1-smart_holder/pybind11/trampoline_self_life_support.h b/Libraries/pybind11-2.9.0-smart_holder/pybind11/trampoline_self_life_support.h similarity index 100% rename from Libraries/pybind11-2.8.1-smart_holder/pybind11/trampoline_self_life_support.h rename to Libraries/pybind11-2.9.0-smart_holder/pybind11/trampoline_self_life_support.h diff --git a/premake5.lua b/premake5.lua index 553d9d5e..b47ccabc 100644 --- a/premake5.lua +++ b/premake5.lua @@ -197,8 +197,8 @@ function linkPython() end defines {"KTT_PYTHON", "PYBIND11_USE_SMART_HOLDER_AS_DEFAULT"} - includedirs {pythonHeaders, "Libraries/pybind11-2.8.1-smart_holder"} - files {"Libraries/pybind11-2.8.1-smart_holder/**"} + includedirs {pythonHeaders, "Libraries/pybind11-2.9.0-smart_holder"} + files {"Libraries/pybind11-2.9.0-smart_holder/**"} if os.target() == "windows" then pythonLibrary = pythonLibrary:gsub("\\", "/") From aa52d17def3e44544741fd781c74c4c97156679a Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Tue, 18 Jan 2022 12:14:18 +0100 Subject: [PATCH 56/63] * Updated new CUPTI implementation to utilize newer API functions --- Source/ComputeEngine/Cuda/CudaEngine.cpp | 2 +- .../Cuda/Cupti/CuptiMetricInterface.cpp | 399 +++++++++++++----- .../Cuda/Cupti/CuptiMetricInterface.h | 13 +- 3 files changed, 300 insertions(+), 114 deletions(-) diff --git a/Source/ComputeEngine/Cuda/CudaEngine.cpp b/Source/ComputeEngine/Cuda/CudaEngine.cpp index 7b5121c9..32d62703 100644 --- a/Source/ComputeEngine/Cuda/CudaEngine.cpp +++ b/Source/ComputeEngine/Cuda/CudaEngine.cpp @@ -885,7 +885,7 @@ void CudaEngine::ClearStreamActions(const QueueId id) void CudaEngine::InitializeCupti() { m_Profiler = std::make_unique(); - m_MetricInterface = std::make_unique(m_DeviceIndex); + m_MetricInterface = std::make_unique(m_DeviceIndex, *m_Context); } void CudaEngine::InitializeProfiling(const KernelComputeId& id) diff --git a/Source/ComputeEngine/Cuda/Cupti/CuptiMetricInterface.cpp b/Source/ComputeEngine/Cuda/Cupti/CuptiMetricInterface.cpp index 9f9f6000..bac30c2a 100644 --- a/Source/ComputeEngine/Cuda/Cupti/CuptiMetricInterface.cpp +++ b/Source/ComputeEngine/Cuda/Cupti/CuptiMetricInterface.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -16,13 +17,14 @@ namespace ktt { -CuptiMetricInterface::CuptiMetricInterface(const DeviceIndex index) : +CuptiMetricInterface::CuptiMetricInterface(const DeviceIndex index, const CudaContext& context) : m_DeviceName(GetDeviceName(index)), - m_Context(nullptr), + m_Evaluator(nullptr), m_MaxProfiledRanges(2), m_MaxRangeNameLength(64) { Logger::LogDebug("Initializing CUPTI metric interface"); + InitializeCounterAvailabilityImage(context); NVPW_InitializeHost_Params hostParams = { @@ -32,16 +34,33 @@ CuptiMetricInterface::CuptiMetricInterface(const DeviceIndex index) : CheckError(NVPW_InitializeHost(&hostParams), "NVPW_InitializeHost"); - NVPW_CUDA_MetricsContext_Create_Params params = + NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params scratchBufferSizeParams = { - NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE, + NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE, nullptr, - m_DeviceName.data(), + m_DeviceName.c_str(), + m_CounterAvailabilityImage.data(), + 0 + }; + + CheckError(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(&scratchBufferSizeParams), "NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize"); + m_ScratchBuffer.resize(scratchBufferSizeParams.scratchBufferSize); + + NVPW_CUDA_MetricsEvaluator_Initialize_Params evaluatorInitializeParams = + { + NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE, + nullptr, + m_ScratchBuffer.data(), + m_ScratchBuffer.size(), + m_DeviceName.c_str(), + m_CounterAvailabilityImage.data(), + nullptr, + 0, nullptr }; - CheckError(NVPW_CUDA_MetricsContext_Create(¶ms), "NVPW_CUDA_MetricsContext_Create"); - m_Context = params.pMetricsContext; + CheckError(NVPW_CUDA_MetricsEvaluator_Initialize(&evaluatorInitializeParams), "NVPW_CUDA_MetricsEvaluator_Initialize"); + m_Evaluator = evaluatorInitializeParams.pMetricsEvaluator; SetMetrics(GetDefaultMetrics()); } @@ -49,14 +68,14 @@ CuptiMetricInterface::~CuptiMetricInterface() { Logger::LogDebug("Releasing CUPTI metric interface"); - NVPW_MetricsContext_Destroy_Params params = + NVPW_MetricsEvaluator_Destroy_Params params = { - NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE, + NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE, nullptr, - m_Context + m_Evaluator }; - CheckError(NVPW_MetricsContext_Destroy(¶ms), "NVPW_MetricsContext_Destroy"); + CheckError(NVPW_MetricsEvaluator_Destroy(¶ms), "NVPW_MetricsEvaluator_Destroy"); } void CuptiMetricInterface::SetMetrics(const std::vector& metrics) @@ -116,60 +135,72 @@ std::unique_ptr CuptiMetricInterface::GenerateProfilingData }; CheckError(NVPW_CounterData_GetNumRanges(¶ms), "NVPW_CounterData_GetNumRanges"); - const auto& metricNames = configuration.m_MetricNames; - std::vector parsedNames(metricNames.size()); - std::vector metricNamePtrs; bool isolated = true; bool keepInstances = true; + std::vector counters; for (size_t metricIndex = 0; metricIndex < metricNames.size(); ++metricIndex) { - [[maybe_unused]] const bool success = ParseMetricNameString(metricNames[metricIndex], parsedNames[metricIndex], isolated, - keepInstances); + std::string parsedName; + [[maybe_unused]] const bool success = ParseMetricNameString(metricNames[metricIndex], parsedName, isolated, keepInstances); KttAssert(success, "Unable to parse metric name " + metricNames[metricIndex]); - metricNamePtrs.push_back(parsedNames[metricIndex].c_str()); - } - - std::vector counters; + NVPW_MetricEvalRequest evalRequest; - for (size_t rangeIndex = 0; rangeIndex < params.numRanges; ++rangeIndex) - { - NVPW_MetricsContext_SetCounterData_Params dataParams = + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params evalRequestParams = { - NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE, + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE, nullptr, - m_Context, - counterDataImage.data(), - rangeIndex, - isolated - }; - - CheckError(NVPW_MetricsContext_SetCounterData(&dataParams), "NVPW_MetricsContext_SetCounterData"); - std::vector gpuValues(metricNames.size()); - - NVPW_MetricsContext_EvaluateToGpuValues_Params evalParams = - { - NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE, - nullptr, - m_Context, - metricNamePtrs.size(), - metricNamePtrs.data(), - gpuValues.data() + m_Evaluator, + parsedName.c_str(), + &evalRequest, + NVPW_MetricEvalRequest_STRUCT_SIZE }; - CheckError(NVPW_MetricsContext_EvaluateToGpuValues(&evalParams), "NVPW_MetricsContext_EvaluateToGpuValues"); + CheckError(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(&evalRequestParams), + "NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest"); + double counterValue = 0.0; - if (rangeIndex > 0) + for (size_t rangeIndex = 0; rangeIndex < params.numRanges; ++rangeIndex) { - // Only values from the first range are currently utilized for counters - continue; + NVPW_MetricsEvaluator_SetDeviceAttributes_Params attributesParams = + { + NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE, + nullptr, + m_Evaluator, + counterDataImage.data(), + counterDataImage.size() + }; + + CheckError(NVPW_MetricsEvaluator_SetDeviceAttributes(&attributesParams), "NVPW_MetricsEvaluator_SetDeviceAttributes"); + double metricValue = 0.0; + + NVPW_MetricsEvaluator_EvaluateToGpuValues_Params evaluateParams = + { + NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE, + nullptr, + m_Evaluator, + &evalRequest, + 1, + NVPW_MetricEvalRequest_STRUCT_SIZE, + sizeof(NVPW_MetricEvalRequest), + counterDataImage.data(), + counterDataImage.size(), + rangeIndex, + isolated, + &metricValue + }; + + CheckError(NVPW_MetricsEvaluator_EvaluateToGpuValues(&evaluateParams), "NVPW_MetricsEvaluator_EvaluateToGpuValues"); + + if (rangeIndex == 0) + { + // Only values from the first range are currently utilized for counters + counterValue = metricValue; + } } - for (size_t metricIndex = 0; metricIndex < metricNames.size(); ++metricIndex) - { - counters.emplace_back(metricNames[metricIndex], ProfilingCounterType::Double, gpuValues[metricIndex]); - } + counters.emplace_back(metricNames[metricIndex], ProfilingCounterType::Double, counterValue); } return std::make_unique(counters); @@ -204,35 +235,67 @@ void CuptiMetricInterface::ListSupportedChips() std::set CuptiMetricInterface::GetSupportedMetrics(const bool listSubMetrics) const { - NVPW_MetricsContext_GetMetricNames_Begin_Params params = - { - NVPW_MetricsContext_GetMetricNames_Begin_Params_STRUCT_SIZE, - nullptr, - m_Context, - 0, - nullptr, - !listSubMetrics, - !listSubMetrics, - !listSubMetrics, - !listSubMetrics, - }; - - CheckError(NVPW_MetricsContext_GetMetricNames_Begin(¶ms), "NVPW_MetricsContext_GetMetricNames_Begin"); std::set result; - for (size_t i = 0; i < params.numMetrics; ++i) + for (int i = 0; i < static_cast(NVPW_MetricType::NVPW_METRIC_TYPE__COUNT); ++i) { - result.insert(params.ppMetricNames[i]); - } + const auto metricType = static_cast(i); - NVPW_MetricsContext_GetMetricNames_End_Params endParams = - { - NVPW_MetricsContext_GetMetricNames_End_Params_STRUCT_SIZE, - nullptr, - m_Context - }; + NVPW_MetricsEvaluator_GetMetricNames_Params params = + { + NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE, + nullptr, + m_Evaluator, + static_cast(metricType), + nullptr, + nullptr, + 0 + }; + + CheckError(NVPW_MetricsEvaluator_GetMetricNames(¶ms), "NVPW_MetricsEvaluator_GetMetricNames"); + + for (size_t metricIndex = 0; metricIndex < params.numMetrics; ++metricIndex) + { + size_t metricNameIndex = params.pMetricNameBeginIndices[metricIndex]; + + for (int rollupOp = 0; rollupOp < static_cast(NVPW_RollupOp::NVPW_ROLLUP_OP__COUNT); ++rollupOp) + { + std::string metricName = ¶ms.pMetricNames[metricNameIndex]; + + if (metricType != NVPW_MetricType::NVPW_METRIC_TYPE_RATIO) + { + metricName += GetMetricRollupOpString(static_cast(rollupOp)); + } + + if (!listSubMetrics) + { + result.insert(metricName); + continue; + } + + NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params submetricsParmas = + { + NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE, + nullptr, + m_Evaluator, + static_cast(metricType), + nullptr, + 0 + }; + + CheckError(NVPW_MetricsEvaluator_GetSupportedSubmetrics(&submetricsParmas), + "NVPW_MetricsEvaluator_GetSupportedSubmetrics"); + + for (size_t submetricIndex = 0; submetricIndex < submetricsParmas.numSupportedSubmetrics; ++submetricIndex) + { + const auto submetric = static_cast(submetricsParmas.pSupportedSubmetrics[submetricIndex]); + std::string submetricName = metricName + GetSubmetricString(submetric); + result.insert(submetricName); + } + } + } + } - CheckError(NVPW_MetricsContext_GetMetricNames_End(&endParams), "NVPW_MetricsContext_GetMetricNames_End"); return result; } @@ -244,19 +307,34 @@ std::set CuptiMetricInterface::GetSupportedMetrics(const bool listS std::vector CuptiMetricInterface::GetConfigImage(const std::vector& metrics) const { std::vector rawMetricRequests; - std::vector temp; - GetRawMetricRequests(metrics, rawMetricRequests, temp); + GetRawMetricRequests(metrics, rawMetricRequests); - NVPA_RawMetricsConfigOptions configOptions = + NVPW_CUDA_RawMetricsConfig_Create_V2_Params configCreateParams = { - NVPA_RAW_METRICS_CONFIG_OPTIONS_STRUCT_SIZE, + NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE, nullptr, NVPA_ACTIVITY_KIND_PROFILER, - m_DeviceName.c_str() + m_DeviceName.c_str(), + m_CounterAvailabilityImage.data(), + nullptr }; - NVPA_RawMetricsConfig* rawMetricsConfig; - CheckError(NVPA_RawMetricsConfig_Create(&configOptions, &rawMetricsConfig), "NVPA_RawMetricsConfig_Create"); + CheckError(NVPW_CUDA_RawMetricsConfig_Create_V2(&configCreateParams), "NVPW_CUDA_RawMetricsConfig_Create_V2"); + NVPA_RawMetricsConfig* rawMetricsConfig = configCreateParams.pRawMetricsConfig; + + if (!m_CounterAvailabilityImage.empty()) + { + NVPW_RawMetricsConfig_SetCounterAvailability_Params counterAvailabilityParams = + { + NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE, + nullptr, + rawMetricsConfig, + m_CounterAvailabilityImage.data() + }; + + CheckError(NVPW_RawMetricsConfig_SetCounterAvailability(&counterAvailabilityParams), + "NVPW_RawMetricsConfig_SetCounterAvailability"); + } NVPW_RawMetricsConfig_BeginPassGroup_Params beginParams = { @@ -332,18 +410,18 @@ std::vector CuptiMetricInterface::GetConfigImage(const std::vector CuptiMetricInterface::GetCounterDataImagePrefix(const std::vector& metrics) const { std::vector rawMetricRequests; - std::vector temp; - GetRawMetricRequests(metrics, rawMetricRequests, temp); + GetRawMetricRequests(metrics, rawMetricRequests); - NVPW_CounterDataBuilder_Create_Params createParams = + NVPW_CUDA_CounterDataBuilder_Create_Params createParams = { - NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE, + NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE, nullptr, - nullptr, - m_DeviceName.c_str() + m_DeviceName.c_str(), + m_CounterAvailabilityImage.data(), + nullptr }; - CheckError(NVPW_CounterDataBuilder_Create(&createParams), "NVPW_CounterDataBuilder_Create"); + CheckError(NVPW_CUDA_CounterDataBuilder_Create(&createParams), "NVPW_CUDA_CounterDataBuilder_Create"); NVPW_CounterDataBuilder_AddMetrics_Params addParams = { @@ -451,10 +529,11 @@ void CuptiMetricInterface::CreateCounterDataImage(const std::vector& co } void CuptiMetricInterface::GetRawMetricRequests(const std::vector& metrics, - std::vector& rawMetricRequests, std::vector& temp) const + std::vector& rawMetricRequests) const { bool isolated = true; bool keepInstances = true; + std::vector rawMetricNames; for (const auto& metricName : metrics) { @@ -463,44 +542,57 @@ void CuptiMetricInterface::GetRawMetricRequests(const std::vector& KttAssert(success, "Unable to parse metric name " + metricName); keepInstances = true; // Bug in collection with collection of metrics without instances, keep it to true + NVPW_MetricEvalRequest evalRequest; - NVPW_MetricsContext_GetMetricProperties_Begin_Params params = + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params metricToEvalRequest = { - NVPW_MetricsContext_GetMetricProperties_Begin_Params_STRUCT_SIZE, + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE, nullptr, - m_Context, + m_Evaluator, parsedName.c_str(), + &evalRequest, + NVPW_MetricEvalRequest_STRUCT_SIZE + }; + + CheckError(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(&metricToEvalRequest), + "NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest"); + std::vector rawDependencies; + + NVPW_MetricsEvaluator_GetMetricRawDependencies_Params rawDependenciesParams = + { + NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE, nullptr, + m_Evaluator, + &evalRequest, + 1, + NVPW_MetricEvalRequest_STRUCT_SIZE, + sizeof(NVPW_MetricEvalRequest), nullptr, + 0, nullptr, - 0.0, - 0.0 + 0 }; - CheckError(NVPW_MetricsContext_GetMetricProperties_Begin(¶ms), "NVPW_MetricsContext_GetMetricProperties_Begin"); + CheckError(NVPW_MetricsEvaluator_GetMetricRawDependencies(&rawDependenciesParams), + "NVPW_MetricsEvaluator_GetMetricRawDependencies"); + rawDependencies.resize(rawDependenciesParams.numRawDependencies); + rawDependenciesParams.ppRawDependencies = rawDependencies.data(); + CheckError(NVPW_MetricsEvaluator_GetMetricRawDependencies(&rawDependenciesParams), + "NVPW_MetricsEvaluator_GetMetricRawDependencies"); - for (const char** metricDependencies = params.ppRawMetricDependencies; *metricDependencies != nullptr; ++metricDependencies) + for (size_t i = 0; i < rawDependencies.size(); ++i) { - temp.push_back(*metricDependencies); + rawMetricNames.push_back(rawDependencies[i]); } - - NVPW_MetricsContext_GetMetricProperties_End_Params endParams = - { - NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE, - nullptr, - m_Context - }; - - CheckError(NVPW_MetricsContext_GetMetricProperties_End(&endParams), "NVPW_MetricsContext_GetMetricProperties_End"); } - for (const auto& rawMetricName : temp) + for (const auto* rawMetricName : rawMetricNames) { NVPA_RawMetricRequest request = { NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE, nullptr, - rawMetricName.c_str(), + rawMetricName, isolated, keepInstances }; @@ -509,6 +601,23 @@ void CuptiMetricInterface::GetRawMetricRequests(const std::vector& } } +void CuptiMetricInterface::InitializeCounterAvailabilityImage(const CudaContext& context) +{ + CUpti_Profiler_GetCounterAvailability_Params counterAvailabilityParams = + { + CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE, + nullptr, + context.GetContext(), + 0, + nullptr + }; + + CheckError(cuptiProfilerGetCounterAvailability(&counterAvailabilityParams), "cuptiProfilerGetCounterAvailability"); + m_CounterAvailabilityImage.resize(counterAvailabilityParams.counterAvailabilityImageSize); + counterAvailabilityParams.pCounterAvailabilityImage = m_CounterAvailabilityImage.data(); + CheckError(cuptiProfilerGetCounterAvailability(&counterAvailabilityParams), "cuptiProfilerGetCounterAvailability"); +} + const std::vector& CuptiMetricInterface::GetDefaultMetrics() { static const std::vector result @@ -575,8 +684,6 @@ bool CuptiMetricInterface::ParseMetricNameString(const std::string& metric, std: } outputName = metric; - keepInstances = false; - isolated = true; // boost program_options sometimes inserts a \n between the metric name and a '&' at the end size_t pos = outputName.find('\n'); @@ -597,6 +704,8 @@ bool CuptiMetricInterface::ParseMetricNameString(const std::string& metric, std: } } + keepInstances = false; + if (outputName.back() == '+') { keepInstances = true; @@ -608,6 +717,8 @@ bool CuptiMetricInterface::ParseMetricNameString(const std::string& metric, std: } } + isolated = true; + if (outputName.back() == '$') { outputName.pop_back(); @@ -631,6 +742,76 @@ bool CuptiMetricInterface::ParseMetricNameString(const std::string& metric, std: return true; } +std::string CuptiMetricInterface::GetMetricRollupOpString(const NVPW_RollupOp rollupOp) +{ + switch (rollupOp) + { + case NVPW_ROLLUP_OP_AVG: + return ".avg"; + case NVPW_ROLLUP_OP_MAX: + return ".max"; + case NVPW_ROLLUP_OP_MIN: + return ".min"; + case NVPW_ROLLUP_OP_SUM: + return ".sum"; + default: + return ""; + } +} + +std::string CuptiMetricInterface::GetSubmetricString(const NVPW_Submetric submetric) +{ + switch (submetric) + { + case NVPW_SUBMETRIC_NONE: + return ""; + case NVPW_SUBMETRIC_PEAK_SUSTAINED: + return ".peak_sustained"; + case NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE: + return ".peak_sustained_active"; + case NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE_PER_SECOND: + return ".peak_sustained_active.per_second"; + case NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED: + return ".peak_sustained_elapsed"; + case NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED_PER_SECOND: + return ".peak_sustained_elapsed.per_second"; + case NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME: + return ".peak_sustained_frame"; + case NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME_PER_SECOND: + return ".peak_sustained_frame.per_second"; + case NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION: + return ".peak_sustained_region"; + case NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION_PER_SECOND: + return ".peak_sustained_region.per_second"; + case NVPW_SUBMETRIC_PER_CYCLE_ACTIVE: + return ".per_cycle_active"; + case NVPW_SUBMETRIC_PER_CYCLE_ELAPSED: + return ".per_cycle_elapsed"; + case NVPW_SUBMETRIC_PER_CYCLE_IN_FRAME: + return ".per_cycle_in_frame"; + case NVPW_SUBMETRIC_PER_CYCLE_IN_REGION: + return ".per_cycle_in_region"; + case NVPW_SUBMETRIC_PER_SECOND: + return ".per_second"; + case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ACTIVE: + return ".pct_of_peak_sustained_active"; + case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ELAPSED: + return ".pct_of_peak_sustained_elapsed"; + case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_FRAME: + return ".pct_of_peak_sustained_frame"; + case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_REGION: + return ".pct_of_peak_sustained_region"; + case NVPW_SUBMETRIC_MAX_RATE: + return ".max_rate"; + case NVPW_SUBMETRIC_PCT: + return ".pct"; + case NVPW_SUBMETRIC_RATIO: + return ".ratio"; + default: + return ""; + } +} + } // namespace ktt #endif // KTT_PROFILING_CUPTI diff --git a/Source/ComputeEngine/Cuda/Cupti/CuptiMetricInterface.h b/Source/ComputeEngine/Cuda/Cupti/CuptiMetricInterface.h index ce47720b..005d15ff 100644 --- a/Source/ComputeEngine/Cuda/Cupti/CuptiMetricInterface.h +++ b/Source/ComputeEngine/Cuda/Cupti/CuptiMetricInterface.h @@ -15,12 +15,13 @@ namespace ktt { +class CudaContext; class KernelProfilingData; class CuptiMetricInterface { public: - CuptiMetricInterface(const DeviceIndex index); + CuptiMetricInterface(const DeviceIndex index, const CudaContext& context); ~CuptiMetricInterface(); void SetMetrics(const std::vector& metrics); @@ -32,8 +33,10 @@ class CuptiMetricInterface private: std::vector m_Metrics; + std::vector m_CounterAvailabilityImage; + std::vector m_ScratchBuffer; std::string m_DeviceName; - NVPA_MetricsContext* m_Context; + NVPW_MetricsEvaluator* m_Evaluator; uint32_t m_MaxProfiledRanges; uint32_t m_MaxRangeNameLength; @@ -42,12 +45,14 @@ class CuptiMetricInterface std::vector GetCounterDataImagePrefix(const std::vector& metrics) const; void CreateCounterDataImage(const std::vector& counterDataImagePrefix, std::vector& counterDataImage, std::vector& counterDataScratchBuffer) const; - void GetRawMetricRequests(const std::vector& metrics, std::vector& rawMetricRequests, - std::vector& temp) const; + void GetRawMetricRequests(const std::vector& metrics, std::vector& rawMetricRequests) const; + void InitializeCounterAvailabilityImage(const CudaContext& context); static const std::vector& GetDefaultMetrics(); static std::string GetDeviceName(const DeviceIndex index); static bool ParseMetricNameString(const std::string& metric, std::string& outputName, bool& isolated, bool& keepInstances); + static std::string GetMetricRollupOpString(const NVPW_RollupOp rollupOp); + static std::string GetSubmetricString(const NVPW_Submetric submetric); }; } // namespace ktt From 16df09c73a2dc65fc13efffc977e42306a6276f2 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Tue, 18 Jan 2022 12:31:50 +0100 Subject: [PATCH 57/63] * Tweaked constraints section in onboarding guide --- OnboardingGuide.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index f5adeec2..98ffd975 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -330,8 +330,11 @@ tuner.AddParameter(kernel, "vector_type", std::vector{1, 2, 4, 8}); auto vectorizedSoA = [](const std::vector& values) {return values[0] > 1 || values[1] != 1;}; tuner.AddConstraint(kernel, {"vector_type", "vectorized_soa"}, vectorizedSoA); ``` -Note that parameter constraints are typically used in three scenarios. First, constraints can remove points in the tuning space (i.e., combinations of tuning parameters' values), which produces invalid code. Consider an example when two-dimensional blocks (work-groups in OpenCL) are created. The constraint can upper-bound thread block size (computed as block's x-dimension multiplied by block's y-dimension), so it does not exceed the highest thread block size executable on GPU. Second, constraints can prune redundant points in tuning space. In the example above, there is no need to tune vector size when the code is not vectorized. Third, constraints can remove points in the tuning space that produce underperforming code. In our example, considering two-dimensional thread blocks, we can constrain tuning space to avoid sub-warp blocks with less than 32 threads. - +The parameter constraints are typically used in three scenarios. First, constraints can remove configurations that produce invalid code. Consider an example +where two-dimensional blocks (work-groups in OpenCL) are created. The constraint can upper-bound thread block size (computed as block's x-dimension multiplied +by block's y-dimension), so it does not exceed the highest thread block size executable on a GPU. Second, constraints can prune redundant configurations. In +the example above, there is no need to tune vector size when the code is not vectorized. Third, constraints can remove configurations that are known to produce +underperforming code. In our example, considering two-dimensional thread blocks, we can constrain the tuning space to avoid sub-warp blocks with less than 32 threads. #### Parameter groups @@ -339,7 +342,7 @@ The second option is tuning parameter groups. This option is mainly helpful for kernel definition inside the kernel. For example, if we have a composite kernel with two kernel definitions and each definition is affected by three parameters (we have six parameters in total), and we know that each parameter only affects one specific definition, we can evaluate the two groups independently. This can significantly reduce the total number of evaluated configurations (e.g., if each of the parameters has two different values, -the total number of configurations is 64 - 2^6; with the usage of parameter groups, it is only 16 - 2^3 + 2^3). It is also possible to combine the use +the total number of configurations is 64 = 2^6; with the usage of parameter groups, it is only 16 = 2^3 + 2^3). It is also possible to combine the use of constraints and groups. However, constraints can only be added between parameters that belong to the same group. ```cpp From b629d542c28cd3ca7990ce6648e2a7e7da500aa9 Mon Sep 17 00:00:00 2001 From: Fillo7 Date: Tue, 18 Jan 2022 13:24:30 +0100 Subject: [PATCH 58/63] * Removed caps from logging header messages * Updated copyright year in license --- License.txt | 2 +- Source/Utility/Logger/Logger.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/License.txt b/License.txt index ecd8e7b4..2a048019 100644 --- a/License.txt +++ b/License.txt @@ -1,4 +1,4 @@ -Copyright (c) 2017-2021 Filip Petrovič +Copyright (c) 2017-2022 Filip Petrovič Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Source/Utility/Logger/Logger.cpp b/Source/Utility/Logger/Logger.cpp index fcef53ea..a1e98a63 100644 --- a/Source/Utility/Logger/Logger.cpp +++ b/Source/Utility/Logger/Logger.cpp @@ -86,15 +86,15 @@ std::string Logger::GetLoggingLevelString(const LoggingLevel level) switch (level) { case LoggingLevel::Off: - return "[OFF]"; + return "[Off]"; case LoggingLevel::Error: - return "[ERROR]"; + return "[Error]"; case LoggingLevel::Warning: - return "[WARNING]"; + return "[Warning]"; case LoggingLevel::Info: - return "[INFO]"; + return "[Info]"; case LoggingLevel::Debug: - return "[DEBUG]"; + return "[Debug]"; default: KttError("Unhandled logging level value"); return ""; From 717348f76fa61e07aac51382a0a06321d4430086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Filipovi=C4=8D?= Date: Thu, 20 Jan 2022 13:53:18 +0100 Subject: [PATCH 59/63] Update OnboardingGuide.md Small modifications of the onboarding guide up to 'Output validation' --- OnboardingGuide.md | 48 ++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index 98ffd975..bd1a271c 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -66,7 +66,7 @@ versions of our computation to see which value performs best. In practice, the computations are often complex enough to contain multiple parts that can be optimized, leading to a definition of many tuning parameters. For example, we may have the previously mentioned loop unroll parameter with -values {1, 2, 4, 8} and another parameter controlling data arrangement in memory with values {0, 1}. Combinations of these +values {1, 2, 4, 8} and another parameter switching two types of data arrangement in memory with values {0, 1}. Combinations of these parameters now define eight different versions of computation. One such combination is called tuning configuration. Together, all tuning configurations define configuration space. The size of the space grows exponentially with the addition of more tuning parameters. KTT framework offers functionality to mitigate this problem which we will discuss in the follow-up sections. @@ -167,8 +167,9 @@ kernel template with different types, we add multiple kernel definitions with co Once we have kernel definitions, we can create kernels from them. It is possible to create a simple kernel that only uses one definition and a composite kernel that uses multiple definitions. Usage of composite kernels is useful for computations that launch multiple kernel -functions in order to compute the result. In this case, it is also necessary to define a kernel launcher which is a function that tells the tuner in which -order and how many times each kernel function is launched. Kernel launchers are covered in detail in a separate section. +functions in order to compute the result. Those kernel functions can share some tuning parameters, so it is desirable to tune them together. With +composite kernels, it is also necessary to define a kernel launcher which is a function that tells the tuner in which order and how many times each +kernel function is launched. Kernel launchers are covered in detail in a separate section. Note that KTT terminology regarding kernel definitions and kernels differs slightly from regular compute APIs. KTT kernel definition roughly corresponds to a single kernel function (also called kernel in e.g., OpenCL or CUDA). KTT kernel corresponds to a specific computation that uses @@ -246,10 +247,13 @@ Management type option specifies whether buffer management is handled automatica to initial state before a new kernel configuration is launched, buffers are created and deleted automatically) or by the user. In some advanced cases, users may wish to manage the buffers manually. Note, however, that this requires the usage of kernel launchers which we will discuss later. -The final option for vector arguments is whether the initial data provided by the user should be copied inside the tuner or referenced directly. By default, -the data is copied, which is safer (i.e., temporary arguments work correctly) but less memory efficient. If the initial data is provided in the form of -an lvalue argument, the tuner can use a direct reference to avoid copying. This requires the user to keep the initial data buffer valid while the tuner uses -the argument. +The final option for vector arguments is whether the initial data provided by the user should be copied inside the tuner or referenced directly (note that +this is different option than memory location of data accessed by the kernel -- KTT can make its private copy of provided buffer and then copy it to the +host or device to be directly used by the kernel). By default, the data is copied, which is safer (i.e., temporary arguments work correctly) but less memory +efficient. If the initial data is provided in the form of an lvalue argument, the tuner can use a direct reference to avoid copying. This requires the user +to keep the initial data buffer valid while the tuner uses the argument. + +The comprehensive diagram of KTT buffer types is located [here](https://github.com/HiPerCoRe/KTT/tree/master/Docs/Resources/KttBufferTypes.png). ```cpp std::vector input1; @@ -278,7 +282,7 @@ const ktt::ArgumentId resultId = tuner.AddArgumentVector(result, ktt::ArgumentAc #### Local memory arguments Local (shared in CUDA terminology) memory arguments are used to allocate a corresponding amount of cache-like memory, which is shared across all work-items -(threads) inside a work-group (thread block). We just need to specify the data type and total size of allocated memory in bytes. +(threads) inside a work-group (thread block). We just need to specify the data type and total size of allocated memory in bytes. Note that the local (shared) memory of static size can be also allocated inside the kernel code by using __local (__shared__) declaration specifier. ```cpp // Allocate local memory for 4 floats and 2 integers. @@ -355,26 +359,28 @@ tuner.AddParameter(kernel, "b2", std::vector{0, 1}, "group_b"); #### Thread modifiers -Some tuning parameters can affect the global or local number of threads with which a kernel function is launched. For example, we may have a parameter -that affects the amount of work performed by each thread. The more work each thread does, the fewer (global) threads we need in total to perform computation. -In KTT, we can define such dependency via thread modifiers. The thread modifier is a function that takes a default thread size and changes it based on -values of specified tuning parameters. +Some tuning parameters can affect the NDRange size (global number of threads), or work-group (thread block) size with which a kernel function is launched. +For example, we may have a parameter that affects the amount of work performed by each work-item (thread). The more work each work-item (thread) does, the +fewer (global) work-items (threads) we need in total to perform computation. In KTT, we can define such dependency via thread modifiers. The thread modifier +is a function that takes a default thread size and changes it based on values of specified tuning parameters. + +When adding a new modifier, we specify a kernel and its definitions whose sizes are affected by the modifier. Then we choose whether the +modifier affects the global size (NDRange or grid size) or local size (work-group or thread block size), its dimension and names of tuning +parameters tied to the modifier. The modifier function can be specified through enum, which supports simple operations such as multiplication or addition, but +allows only one tuning parameter to be tied to the modifier. Another option is using a custom function that can be more complex and supports multiple tuning +parameters. Creating multiple thread modifiers for the same thread size type (global/local) and dimension is possible. In that case, the modifiers will be +applied in the order of their addition to the tuner. Similar to constraints, it is possible to tie only integer parameters to thread modifiers. -When adding a new modifier, we specify a kernel and its definitions whose thread sizes are affected by the modifier. Then we choose whether the modifier -affects the global or local size, its dimension and names of tuning parameters tied to the modifier. The modifier function can be specified through enum, -which supports simple operations such as multiplication or addition, but allows only one tuning parameter to be tied to the modifier. Another -option is using a custom function that can be more complex and supports multiple tuning parameters. Creating multiple thread modifiers for the same thread -type (global/local) and dimension is possible. In that case, the modifiers will be applied in the order of their addition to the tuner. Similar to constraints, -it is possible to tie only integer parameters to thread modifiers. +Note that KTT can be configured to use global and local sizes according to OpenCL standard (global size is overal number of work-items in NDRange, local size is work-group size) or CUDA (global size is number of thread block and local size is number of threads per thread block). In the example below, CUDA standard is used. ```cpp tuner.AddParameter(kernel, "block_size", std::vector{32, 64, 128, 256}); -// block_size parameter decides the number of local threads. +// block_size parameter decides the work-group (thread block) size, i.e. the local size. tuner.AddThreadModifier(kernel, {definition}, ktt::ModifierType::Local, ktt::ModifierDimension::X, "block_size", ktt::ModifierAction::Multiply); -// Larger block size means that the grid size should be smaller, so the total number of threads remains the same. Therefore we divide the grid -// size by block_size parameter. +// Larger block size means that the global number of threads should be smaller, so the total number of threads remains the same. Therefore we divide the grid size +// by block_size parameter. tuner.AddThreadModifier(kernel, {definition}, ktt::ModifierType::Global, ktt::ModifierDimension::X, {"block_size"}, [](const uint64_t defaultSize, const std::vector& parameters) { From a90265642e3c81537ed4e5233111a17e9ab46de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Thu, 20 Jan 2022 14:00:41 +0100 Subject: [PATCH 60/63] * Preparations for 2.1 release --- Changelog.txt | 14 + Docs/_argument_access_type_8h.html | 36 +- Docs/_argument_access_type_8h_source.html | 66 +- Docs/_argument_data_type_8h.html | 36 +- Docs/_argument_data_type_8h_source.html | 106 ++- Docs/_argument_management_type_8h.html | 36 +- Docs/_argument_management_type_8h_source.html | 58 +- Docs/_argument_memory_location_8h.html | 36 +- Docs/_argument_memory_location_8h_source.html | 78 +- Docs/_argument_memory_type_8h.html | 37 +- Docs/_argument_memory_type_8h.js | 3 +- Docs/_argument_memory_type_8h_source.html | 67 +- Docs/_buffer_output_descriptor_8h.html | 36 +- Docs/_buffer_output_descriptor_8h_source.html | 92 ++- Docs/_computation_result_8h.html | 38 +- Docs/_computation_result_8h_source.html | 164 +++-- Docs/_compute_api_8h.html | 36 +- Docs/_compute_api_8h_source.html | 62 +- Docs/_compute_api_initializer_8h.html | 36 +- Docs/_compute_api_initializer_8h_source.html | 82 +-- Docs/_compute_interface_8h.html | 37 +- Docs/_compute_interface_8h_source.html | 227 +++--- Docs/_configuration_count_8h.html | 36 +- Docs/_configuration_count_8h_source.html | 84 ++- Docs/_configuration_duration_8h.html | 36 +- Docs/_configuration_duration_8h_source.html | 80 +- Docs/_configuration_fraction_8h.html | 36 +- Docs/_configuration_fraction_8h_source.html | 86 ++- Docs/_deterministic_searcher_8h.html | 36 +- Docs/_deterministic_searcher_8h_source.html | 82 +-- Docs/_device_info_8h.html | 36 +- Docs/_device_info_8h_source.html | 174 +++-- Docs/_device_type_8h.html | 36 +- Docs/_device_type_8h_source.html | 62 +- Docs/_dimension_vector_8h.html | 36 +- Docs/_dimension_vector_8h_source.html | 178 +++-- Docs/_exception_reason_8h.html | 36 +- Docs/_exception_reason_8h_source.html | 62 +- Docs/_global_size_type_8h.html | 36 +- Docs/_global_size_type_8h_source.html | 68 +- Docs/_kernel_compilation_data_8h.html | 36 +- Docs/_kernel_compilation_data_8h_source.html | 84 ++- Docs/_kernel_configuration_8h.html | 36 +- Docs/_kernel_configuration_8h_source.html | 110 ++- Docs/_kernel_profiling_counter_8h.html | 36 +- Docs/_kernel_profiling_counter_8h_source.html | 124 ++-- Docs/_kernel_profiling_data_8h.html | 36 +- Docs/_kernel_profiling_data_8h_source.html | 118 ++- Docs/_kernel_result_8h_source.html | 158 ++-- Docs/_kernel_run_mode_8h.html | 129 ++++ Docs/_kernel_run_mode_8h.js | 9 + Docs/_kernel_run_mode_8h_source.html | 131 ++++ Docs/_ktt_8h.html | 30 +- Docs/_ktt_8h_source.html | 58 +- Docs/_ktt_exception_8h.html | 36 +- Docs/_ktt_exception_8h_source.html | 84 ++- Docs/_ktt_platform_8h.html | 66 +- Docs/_ktt_platform_8h.js | 1 - Docs/_ktt_platform_8h_source.html | 103 +-- Docs/_ktt_types_8h.html | 42 +- Docs/_ktt_types_8h.js | 3 +- Docs/_ktt_types_8h_source.html | 181 ++--- Docs/_logging_level_8h.html | 36 +- Docs/_logging_level_8h_source.html | 70 +- Docs/_mcmc_searcher_8h.html | 38 +- Docs/_mcmc_searcher_8h_source.html | 129 ++-- Docs/_modifier_action_8h.html | 36 +- Docs/_modifier_action_8h_source.html | 70 +- Docs/_modifier_dimension_8h.html | 36 +- Docs/_modifier_dimension_8h_source.html | 62 +- Docs/_modifier_type_8h.html | 36 +- Docs/_modifier_type_8h_source.html | 58 +- Docs/_output_format_8h.html | 36 +- Docs/_output_format_8h_source.html | 58 +- Docs/_parameter_pair_8h.html | 36 +- Docs/_parameter_pair_8h_source.html | 138 ++-- Docs/_platform_info_8h.html | 36 +- Docs/_platform_info_8h_source.html | 116 ++- Docs/_profiling_counter_type_8h.html | 36 +- Docs/_profiling_counter_type_8h_source.html | 74 +- Docs/_random_searcher_8h.html | 36 +- Docs/_random_searcher_8h_source.html | 78 +- Docs/_result_status_8h.html | 36 +- Docs/_result_status_8h_source.html | 72 +- Docs/_searcher_8h.html | 36 +- Docs/_searcher_8h_source.html | 140 ++-- Docs/_stop_condition_8h.html | 36 +- Docs/_stop_condition_8h_source.html | 84 ++- Docs/_time_unit_8h.html | 36 +- Docs/_time_unit_8h_source.html | 68 +- Docs/_tuner_8h.html | 36 +- Docs/_tuner_8h_source.html | 484 +++++++------ Docs/_tuning_duration_8h.html | 36 +- Docs/_tuning_duration_8h_source.html | 86 ++- Docs/_validation_method_8h.html | 36 +- Docs/_validation_method_8h_source.html | 62 +- Docs/_validation_mode_8h.html | 38 +- Docs/_validation_mode_8h.js | 2 +- Docs/_validation_mode_8h_source.html | 92 ++- Docs/annotated.html | 74 +- Docs/annotated_dup.js | 28 +- Docs/classes.html | 48 +- ..._1_1_buffer_output_descriptor-members.html | 32 +- ...classktt_1_1_buffer_output_descriptor.html | 52 +- ...assktt_1_1_computation_result-members.html | 46 +- Docs/classktt_1_1_computation_result.html | 104 ++- ...t_1_1_compute_api_initializer-members.html | 30 +- .../classktt_1_1_compute_api_initializer.html | 44 +- ...lassktt_1_1_compute_interface-members.html | 76 +- Docs/classktt_1_1_compute_interface.html | 236 +++--- Docs/classktt_1_1_compute_interface.js | 2 + ...ssktt_1_1_configuration_count-members.html | 34 +- Docs/classktt_1_1_configuration_count.html | 60 +- ...tt_1_1_configuration_duration-members.html | 34 +- Docs/classktt_1_1_configuration_duration.html | 60 +- ...tt_1_1_configuration_fraction-members.html | 34 +- Docs/classktt_1_1_configuration_fraction.html | 60 +- ...tt_1_1_deterministic_searcher-members.html | 44 +- Docs/classktt_1_1_deterministic_searcher.html | 54 +- Docs/classktt_1_1_device_info-members.html | 48 +- Docs/classktt_1_1_device_info.html | 116 ++- ...classktt_1_1_dimension_vector-members.html | 50 +- Docs/classktt_1_1_dimension_vector.html | 120 ++- ...sktt_1_1_kernel_configuration-members.html | 38 +- Docs/classktt_1_1_kernel_configuration.html | 72 +- ..._1_1_kernel_profiling_counter-members.html | 40 +- ...classktt_1_1_kernel_profiling_counter.html | 80 +- ...ktt_1_1_kernel_profiling_data-members.html | 40 +- Docs/classktt_1_1_kernel_profiling_data.html | 80 +- Docs/classktt_1_1_kernel_result-members.html | 46 +- Docs/classktt_1_1_kernel_result.html | 104 ++- Docs/classktt_1_1_ktt_exception-members.html | 30 +- Docs/classktt_1_1_ktt_exception.html | 44 +- Docs/classktt_1_1_mcmc_searcher-members.html | 51 +- Docs/classktt_1_1_mcmc_searcher.html | 88 ++- Docs/classktt_1_1_mcmc_searcher.js | 1 + Docs/classktt_1_1_parameter_pair-members.html | 42 +- Docs/classktt_1_1_parameter_pair.html | 90 ++- Docs/classktt_1_1_platform_info-members.html | 38 +- Docs/classktt_1_1_platform_info.html | 72 +- .../classktt_1_1_random_searcher-members.html | 44 +- Docs/classktt_1_1_random_searcher.html | 54 +- Docs/classktt_1_1_searcher-members.html | 42 +- Docs/classktt_1_1_searcher.html | 100 ++- Docs/classktt_1_1_stop_condition-members.html | 32 +- Docs/classktt_1_1_stop_condition.html | 60 +- Docs/classktt_1_1_tuner-members.html | 110 +-- Docs/classktt_1_1_tuner.html | 683 ++++++++++++++---- Docs/classktt_1_1_tuner.js | 14 +- .../classktt_1_1_tuning_duration-members.html | 34 +- Docs/classktt_1_1_tuning_duration.html | 60 +- .../dir_0b41b623c32abd77cf87d94fa0e997df.html | 34 +- .../dir_4027c6dcc37a2421c6b4a72f50c05b6d.html | 46 +- Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.js | 10 +- .../dir_44a111874746047cefd3f7a73e059188.html | 36 +- .../dir_49c4ffe5cd0c89340eeffad86272a62d.html | 40 +- Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.js | 10 +- .../dir_548aaaa9c70dca9b5c9a52a3e1ba8d1d.html | 28 +- .../dir_5baf679cf4943df8904dce5d41d4d3dc.html | 28 +- .../dir_5d7cbe695353800ab9a622e4d83d7d91.html | 50 +- Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.js | 6 +- .../dir_6130151139ac5df9513b64611b721a4c.html | 36 +- .../dir_74389ed8173ad57b461b9d623a1f3867.html | 52 +- Docs/dir_74389ed8173ad57b461b9d623a1f3867.js | 2 +- .../dir_80eb1528bafb2f92d16ff19227bfa56f.html | 32 +- .../dir_8307084b275792eca5324684a4bab095.html | 40 +- .../dir_9494064b2e4fad632a20bee9cc16985c.html | 36 +- Docs/dir_9494064b2e4fad632a20bee9cc16985c.js | 4 +- .../dir_99be523abdf748dd5dce316df9c29ee3.html | 36 +- Docs/dir_99be523abdf748dd5dce316df9c29ee3.js | 6 +- .../dir_a84262e66930ad9071f68edb7a79274b.html | 32 +- .../dir_b86836361da653cbb02f0dbb253faee5.html | 36 +- Docs/dir_b86836361da653cbb02f0dbb253faee5.js | 1 + .../dir_c6d7e35021e38f2156a4334dff14b70a.html | 38 +- Docs/dir_c6d7e35021e38f2156a4334dff14b70a.js | 8 +- .../dir_f3e3b7636ae42a1b29e0b190d8ce34c2.html | 32 +- Docs/doxygen.css | 58 +- Docs/files.html | 55 +- Docs/functions.html | 65 +- Docs/functions_b.html | 31 +- Docs/functions_c.html | 86 +-- Docs/functions_d.html | 55 +- Docs/functions_func.html | 65 +- Docs/functions_func_b.html | 31 +- Docs/functions_func_c.html | 86 +-- Docs/functions_func_d.html | 55 +- Docs/functions_func_g.html | 343 +++------ Docs/functions_func_h.html | 57 +- Docs/functions_func_i.html | 54 +- Docs/functions_func_k.html | 51 +- Docs/functions_func_l.html | 31 +- Docs/functions_func_m.html | 43 +- Docs/functions_func_o.html | 59 +- Docs/functions_func_p.html | 35 +- Docs/functions_func_r.html | 72 +- Docs/functions_func_s.html | 226 ++---- Docs/functions_func_t.html | 43 +- Docs/functions_func_u.html | 59 +- Docs/functions_func_w.html | 39 +- Docs/functions_func_~.html | 43 +- Docs/functions_g.html | 343 +++------ Docs/functions_h.html | 57 +- Docs/functions_i.html | 54 +- Docs/functions_k.html | 51 +- Docs/functions_l.html | 31 +- Docs/functions_m.html | 67 +- Docs/functions_o.html | 59 +- Docs/functions_p.html | 35 +- Docs/functions_r.html | 72 +- Docs/functions_s.html | 226 ++---- Docs/functions_t.html | 43 +- Docs/functions_u.html | 59 +- Docs/functions_vars.html | 49 +- Docs/functions_w.html | 39 +- Docs/functions_~.html | 43 +- Docs/globals.html | 37 +- Docs/globals_defs.html | 37 +- Docs/hierarchy.html | 28 +- Docs/index.html | 85 ++- Docs/jquery.js | 4 +- Docs/md__docs__resources__faq.html | 42 +- Docs/namespacektt.html | 521 +++++++------ Docs/namespacemembers.html | 245 ++----- Docs/namespacemembers_enum.html | 102 +-- Docs/namespacemembers_func.html | 33 +- Docs/namespacemembers_type.html | 109 +-- Docs/namespacemembers_vars.html | 42 +- Docs/navtree.css | 1 + Docs/navtree.js | 9 +- Docs/navtreedata.js | 4 +- Docs/navtreeindex0.js | 98 +-- Docs/navtreeindex1.js | 488 ++++++------- Docs/navtreeindex2.js | 105 +-- Docs/pages.html | 28 +- Docs/resize.js | 20 +- Docs/search/all_0.html | 6 +- Docs/search/all_0.js | 42 +- Docs/search/all_1.html | 6 +- Docs/search/all_1.js | 4 +- Docs/search/all_10.html | 6 +- Docs/search/all_10.js | 2 +- Docs/search/all_11.html | 6 +- Docs/search/all_11.js | 38 +- Docs/search/all_12.html | 6 +- Docs/search/all_12.js | 118 +-- Docs/search/all_13.html | 6 +- Docs/search/all_13.js | 20 +- Docs/search/all_14.html | 6 +- Docs/search/all_14.js | 32 +- Docs/search/all_15.html | 6 +- Docs/search/all_15.js | 16 +- Docs/search/all_16.html | 6 +- Docs/search/all_16.js | 10 +- Docs/search/all_17.html | 6 +- Docs/search/all_17.js | 4 +- Docs/search/all_18.html | 6 +- Docs/search/all_18.js | 2 +- Docs/search/all_19.html | 6 +- Docs/search/all_19.js | 2 +- Docs/search/all_1a.html | 6 +- Docs/search/all_1a.js | 8 +- Docs/search/all_2.html | 6 +- Docs/search/all_2.js | 70 +- Docs/search/all_3.html | 6 +- Docs/search/all_3.js | 36 +- Docs/search/all_4.html | 6 +- Docs/search/all_4.js | 8 +- Docs/search/all_5.html | 6 +- Docs/search/all_5.js | 4 +- Docs/search/all_6.html | 6 +- Docs/search/all_6.js | 162 +++-- Docs/search/all_7.html | 6 +- Docs/search/all_7.js | 20 +- Docs/search/all_8.html | 6 +- Docs/search/all_8.js | 21 +- Docs/search/all_9.html | 6 +- Docs/search/all_9.js | 2 +- Docs/search/all_a.html | 6 +- Docs/search/all_a.js | 50 +- Docs/search/all_b.html | 6 +- Docs/search/all_b.js | 10 +- Docs/search/all_c.html | 6 +- Docs/search/all_c.js | 40 +- Docs/search/all_d.html | 6 +- Docs/search/all_d.js | 4 +- Docs/search/all_e.html | 6 +- Docs/search/all_e.js | 26 +- Docs/search/all_f.html | 6 +- Docs/search/all_f.js | 18 +- Docs/search/classes_0.html | 6 +- Docs/search/classes_0.js | 2 +- Docs/search/classes_1.html | 6 +- Docs/search/classes_1.js | 12 +- Docs/search/classes_2.html | 6 +- Docs/search/classes_2.js | 6 +- Docs/search/classes_3.html | 6 +- Docs/search/classes_3.js | 2 +- Docs/search/classes_4.html | 6 +- Docs/search/classes_4.js | 12 +- Docs/search/classes_5.html | 6 +- Docs/search/classes_5.js | 2 +- Docs/search/classes_6.html | 6 +- Docs/search/classes_6.js | 4 +- Docs/search/classes_7.html | 6 +- Docs/search/classes_7.js | 2 +- Docs/search/classes_8.html | 6 +- Docs/search/classes_8.js | 4 +- Docs/search/classes_9.html | 6 +- Docs/search/classes_9.js | 4 +- Docs/search/defines_0.html | 6 +- Docs/search/defines_0.js | 6 +- Docs/search/enums_0.html | 6 +- Docs/search/enums_0.js | 10 +- Docs/search/enums_1.html | 6 +- Docs/search/enums_1.js | 2 +- Docs/search/enums_2.html | 6 +- Docs/search/enums_2.js | 2 +- Docs/search/enums_3.html | 6 +- Docs/search/enums_3.js | 2 +- Docs/search/enums_4.html | 6 +- Docs/search/enums_4.js | 2 +- Docs/search/enums_5.html | 6 +- Docs/search/enums_5.js | 2 +- Docs/search/enums_6.html | 6 +- Docs/search/enums_6.js | 4 +- Docs/search/enums_7.html | 6 +- Docs/search/enums_7.js | 4 +- Docs/search/enums_8.html | 6 +- Docs/search/enums_8.js | 2 +- Docs/search/enums_9.html | 6 +- Docs/search/enums_9.js | 2 +- Docs/search/enums_a.html | 6 +- Docs/search/enums_a.js | 2 +- Docs/search/enums_b.html | 6 +- Docs/search/enums_b.js | 3 +- Docs/search/enums_c.html | 37 + Docs/search/enums_c.js | 5 + Docs/search/enumvalues_0.html | 6 +- Docs/search/enumvalues_0.js | 6 +- Docs/search/enumvalues_1.html | 6 +- Docs/search/enumvalues_1.js | 14 +- Docs/search/enumvalues_10.html | 6 +- Docs/search/enumvalues_10.js | 2 +- Docs/search/enumvalues_11.html | 6 +- Docs/search/enumvalues_11.js | 14 +- Docs/search/enumvalues_12.html | 6 +- Docs/search/enumvalues_12.js | 6 +- Docs/search/enumvalues_13.html | 6 +- Docs/search/enumvalues_13.js | 4 +- Docs/search/enumvalues_14.html | 6 +- Docs/search/enumvalues_14.js | 4 +- Docs/search/enumvalues_15.html | 6 +- Docs/search/enumvalues_15.js | 2 +- Docs/search/enumvalues_16.html | 6 +- Docs/search/enumvalues_16.js | 2 +- Docs/search/enumvalues_2.html | 6 +- Docs/search/enumvalues_2.js | 12 +- Docs/search/enumvalues_3.html | 6 +- Docs/search/enumvalues_3.js | 2 +- Docs/search/enumvalues_4.html | 6 +- Docs/search/enumvalues_4.js | 4 +- Docs/search/enumvalues_5.html | 6 +- Docs/search/enumvalues_5.js | 6 +- Docs/search/enumvalues_6.html | 6 +- Docs/search/enumvalues_6.js | 6 +- Docs/search/enumvalues_7.html | 6 +- Docs/search/enumvalues_7.js | 4 +- Docs/search/enumvalues_8.html | 6 +- Docs/search/enumvalues_8.js | 2 +- Docs/search/enumvalues_9.html | 6 +- Docs/search/enumvalues_9.js | 4 +- Docs/search/enumvalues_a.html | 6 +- Docs/search/enumvalues_a.js | 6 +- Docs/search/enumvalues_b.html | 6 +- Docs/search/enumvalues_b.js | 4 +- Docs/search/enumvalues_c.html | 6 +- Docs/search/enumvalues_c.js | 10 +- Docs/search/enumvalues_d.html | 6 +- Docs/search/enumvalues_d.js | 2 +- Docs/search/enumvalues_e.html | 6 +- Docs/search/enumvalues_e.js | 7 +- Docs/search/enumvalues_f.html | 6 +- Docs/search/enumvalues_f.js | 13 +- Docs/search/files_0.html | 6 +- Docs/search/files_0.js | 10 +- Docs/search/files_1.html | 6 +- Docs/search/files_1.js | 2 +- Docs/search/files_2.html | 6 +- Docs/search/files_2.js | 14 +- Docs/search/files_3.html | 6 +- Docs/search/files_3.js | 8 +- Docs/search/files_4.html | 6 +- Docs/search/files_4.js | 2 +- Docs/search/files_5.html | 6 +- Docs/search/files_5.js | 2 +- Docs/search/files_6.html | 6 +- Docs/search/files_6.js | 17 +- Docs/search/files_7.html | 6 +- Docs/search/files_7.js | 2 +- Docs/search/files_8.html | 6 +- Docs/search/files_8.js | 8 +- Docs/search/files_9.html | 6 +- Docs/search/files_9.js | 2 +- Docs/search/files_a.html | 6 +- Docs/search/files_a.js | 6 +- Docs/search/files_b.html | 6 +- Docs/search/files_b.js | 4 +- Docs/search/files_c.html | 6 +- Docs/search/files_c.js | 4 +- Docs/search/files_d.html | 6 +- Docs/search/files_d.js | 6 +- Docs/search/files_e.html | 6 +- Docs/search/files_e.js | 4 +- Docs/search/functions_0.html | 6 +- Docs/search/functions_0.js | 20 +- Docs/search/functions_1.html | 6 +- Docs/search/functions_1.js | 2 +- Docs/search/functions_10.html | 6 +- Docs/search/functions_10.js | 6 +- Docs/search/functions_11.html | 6 +- Docs/search/functions_11.js | 8 +- Docs/search/functions_2.html | 6 +- Docs/search/functions_2.js | 28 +- Docs/search/functions_3.html | 6 +- Docs/search/functions_3.js | 14 +- Docs/search/functions_4.html | 6 +- Docs/search/functions_4.js | 152 ++-- Docs/search/functions_5.html | 6 +- Docs/search/functions_5.js | 14 +- Docs/search/functions_6.html | 6 +- Docs/search/functions_6.js | 8 +- Docs/search/functions_7.html | 6 +- Docs/search/functions_7.js | 12 +- Docs/search/functions_8.html | 6 +- Docs/search/functions_8.js | 2 +- Docs/search/functions_9.html | 6 +- Docs/search/functions_9.js | 8 +- Docs/search/functions_a.html | 6 +- Docs/search/functions_a.js | 12 +- Docs/search/functions_b.html | 6 +- Docs/search/functions_b.js | 4 +- Docs/search/functions_c.html | 6 +- Docs/search/functions_c.js | 23 +- Docs/search/functions_d.html | 6 +- Docs/search/functions_d.js | 99 +-- Docs/search/functions_e.html | 6 +- Docs/search/functions_e.js | 8 +- Docs/search/functions_f.html | 6 +- Docs/search/functions_f.js | 14 +- Docs/search/namespaces_0.html | 6 +- Docs/search/namespaces_0.js | 2 +- Docs/search/pages_0.html | 6 +- Docs/search/pages_0.js | 4 +- Docs/search/search.css | 13 +- Docs/search/search.js | 68 +- Docs/search/searchdata.js | 2 +- Docs/search/typedefs_0.html | 6 +- Docs/search/typedefs_0.js | 2 +- Docs/search/typedefs_1.html | 6 +- Docs/search/typedefs_1.js | 10 +- Docs/search/typedefs_2.html | 6 +- Docs/search/typedefs_2.js | 2 +- Docs/search/typedefs_3.html | 6 +- Docs/search/typedefs_3.js | 8 +- Docs/search/typedefs_4.html | 6 +- Docs/search/typedefs_4.js | 2 +- Docs/search/typedefs_5.html | 6 +- Docs/search/typedefs_5.js | 2 +- Docs/search/typedefs_6.html | 6 +- Docs/search/typedefs_6.js | 4 +- Docs/search/typedefs_7.html | 6 +- Docs/search/typedefs_7.js | 2 +- Docs/search/typedefs_8.html | 6 +- Docs/search/typedefs_8.js | 2 +- Docs/search/typedefs_9.html | 6 +- Docs/search/typedefs_9.js | 2 +- Docs/search/typedefs_a.html | 6 +- Docs/search/typedefs_a.js | 4 +- Docs/search/typedefs_b.html | 6 +- Docs/search/typedefs_b.js | 2 +- Docs/search/variables_0.html | 6 +- Docs/search/variables_0.js | 9 +- Docs/search/variables_1.html | 6 +- Docs/search/variables_1.js | 12 +- ...ors_3_01_validation_mode_01_4-members.html | 28 +- ...d_operators_3_01_validation_mode_01_4.html | 36 +- ...t_1_1_kernel_compilation_data-members.html | 34 +- ...structktt_1_1_kernel_compilation_data.html | 58 +- Docs/tabs.css | 2 +- Doxyfile | 276 ++++--- Readme.md | 6 +- Source/KttPlatform.h | 4 +- Source/Tuner.h | 2 +- 493 files changed, 9352 insertions(+), 9694 deletions(-) create mode 100644 Docs/_kernel_run_mode_8h.html create mode 100644 Docs/_kernel_run_mode_8h.js create mode 100644 Docs/_kernel_run_mode_8h_source.html create mode 100644 Docs/search/enums_c.html create mode 100644 Docs/search/enums_c.js diff --git a/Changelog.txt b/Changelog.txt index 6550302e..5390ae2f 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,3 +1,17 @@ +Version 2.1 (24/1/2022) +* Introduced KTT Python bindings making it possible to utilize KTT API in Python +* Added onboarding guide for KTT which describes core KTT features and their usage +* Added new methods for compute queue management +* Added new methods for synchronization to main tuner API +* Added non-templated versions of methods for scalar and user buffer kernel arguments addition +* Added support for constant memory variables in CUDA +* Updated CUPTI implementation to utilize newer API functions introduced in CUDA Toolkit 11.3 +* Updated and optimized MCMC searcher +* Kernel run mode can now be queried through compute interface +* Fixed linking issue under Windows caused by unexported methods +* Improved error messages when attempting to add kernel arguments with unsupported data types +* Added Python version of tutorials and certain examples showcasing the usage of new Python bindings + Version 2.0.1 (21/6/2021) * Added more kernel result status categories to distinguish kernel runs which failed due to compiler error or device limits being exceeded diff --git a/Docs/_argument_access_type_8h.html b/Docs/_argument_access_type_8h.html index 2a6ac664..7c0315e1 100644 --- a/Docs/_argument_access_type_8h.html +++ b/Docs/_argument_access_type_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentAccessType.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ArgumentAccessType.h File Reference
+
ArgumentAccessType.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ArgumentAccessType { ktt::Undefined , ktt::ReadOnly @@ -117,14 +115,14 @@
 

Detailed Description

-

Access type of kernel arguments.

+

Access type of kernel arguments.

diff --git a/Docs/_argument_access_type_8h_source.html b/Docs/_argument_access_type_8h_source.html index 09acbaba..e9f15700 100644 --- a/Docs/_argument_access_type_8h_source.html +++ b/Docs/_argument_access_type_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentAccessType.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,29 +91,28 @@
-
-
ArgumentAccessType.h
+
ArgumentAccessType.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
- -
14 {
-
17  Undefined,
-
18 
-
21  ReadOnly,
-
22 
-
25  WriteOnly,
-
26 
-
29  ReadWrite
-
30 };
-
31 
-
32 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
14{
+ +
18
+ +
22
+ +
26
+ +
30};
+
31
+
32} // namespace ktt
+
Definition: KttPlatform.h:41
ArgumentAccessType
Definition: ArgumentAccessType.h:14
@@ -126,7 +124,7 @@ diff --git a/Docs/_argument_data_type_8h.html b/Docs/_argument_data_type_8h.html index 8fb10ff2..9652a54a 100644 --- a/Docs/_argument_data_type_8h.html +++ b/Docs/_argument_data_type_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentDataType.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ArgumentDataType.h File Reference
+
ArgumentDataType.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ArgumentDataType {
  ktt::Char @@ -129,14 +127,14 @@
 

Detailed Description

-

Data type of kernel arguments.

+

Data type of kernel arguments.

diff --git a/Docs/_argument_data_type_8h_source.html b/Docs/_argument_data_type_8h_source.html index b6c39099..d7e5642c 100644 --- a/Docs/_argument_data_type_8h_source.html +++ b/Docs/_argument_data_type_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentDataType.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,65 +91,64 @@
-
-
ArgumentDataType.h
+
ArgumentDataType.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class ArgumentDataType
-
13 {
-
16  Char,
-
17 
- -
21 
-
24  Short,
-
25 
- -
29 
-
32  Int,
-
33 
- -
37 
-
40  Long,
-
41 
- -
45 
-
48  Half,
-
49 
-
52  Float,
-
53 
-
56  Double,
-
57 
-
61  Custom
-
62 };
-
63 
-
64 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
13{
+
16 Char,
+
17
+ +
21
+
24 Short,
+
25
+ +
29
+
32 Int,
+
33
+ +
37
+
40 Long,
+
41
+ +
45
+
48 Half,
+
49
+
52 Float,
+
53
+
56 Double,
+
57
+
61 Custom
+
62};
+
63
+
64} // namespace ktt
+
Definition: KttPlatform.h:41
+
ArgumentDataType
Definition: ArgumentDataType.h:13
- - - - + + +
diff --git a/Docs/_argument_management_type_8h.html b/Docs/_argument_management_type_8h.html index 6a4f6721..21637984 100644 --- a/Docs/_argument_management_type_8h.html +++ b/Docs/_argument_management_type_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentManagementType.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ArgumentManagementType.h File Reference
+
ArgumentManagementType.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ArgumentManagementType { ktt::Framework , ktt::User @@ -115,14 +113,14 @@
 

Detailed Description

-

Management type of kernel arguments.

+

Management type of kernel arguments.

diff --git a/Docs/_argument_management_type_8h_source.html b/Docs/_argument_management_type_8h_source.html index b90deb98..05ef2a9b 100644 --- a/Docs/_argument_management_type_8h_source.html +++ b/Docs/_argument_management_type_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentManagementType.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
-
-
ArgumentManagementType.h
+
ArgumentManagementType.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
- -
13 {
-
16  Framework,
-
17 
-
22  User
-
23 };
-
24 
-
25 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
13{
+ +
17
+
22 User
+
23};
+
24
+
25} // namespace ktt
+
Definition: KttPlatform.h:41
ArgumentManagementType
Definition: ArgumentManagementType.h:13
@@ -120,7 +118,7 @@ diff --git a/Docs/_argument_memory_location_8h.html b/Docs/_argument_memory_location_8h.html index 25257dfe..4e7702ab 100644 --- a/Docs/_argument_memory_location_8h.html +++ b/Docs/_argument_memory_location_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentMemoryLocation.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ArgumentMemoryLocation.h File Reference
+
ArgumentMemoryLocation.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ArgumentMemoryLocation {
  ktt::Undefined @@ -121,14 +119,14 @@
 

Detailed Description

-

Memory location of kernel arguments.

+

Memory location of kernel arguments.

diff --git a/Docs/_argument_memory_location_8h_source.html b/Docs/_argument_memory_location_8h_source.html index 51d68bbb..ab594d7e 100644 --- a/Docs/_argument_memory_location_8h_source.html +++ b/Docs/_argument_memory_location_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentMemoryLocation.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,46 +91,45 @@
-
-
ArgumentMemoryLocation.h
+
ArgumentMemoryLocation.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
- -
14 {
-
17  Undefined,
-
18 
-
22  Device,
-
23 
-
27  Host,
-
28 
- -
35 
-
36  /* Argument data will be stored using compute API's unified memory system. It can be directly accessed from both
-
37  * host and device.
-
38  */
-
39  Unified
-
40 };
-
41 
-
42 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
14{
+ +
18
+
22 Device,
+
23
+
27 Host,
+
28
+ +
35
+
36 /* Argument data will be stored using compute API's unified memory system. It can be directly accessed from both
+
37 * host and device.
+
38 */
+
39 Unified
+
40};
+
41
+
42} // namespace ktt
+
Definition: KttPlatform.h:41
+
ArgumentMemoryLocation
Definition: ArgumentMemoryLocation.h:14
-
diff --git a/Docs/_argument_memory_type_8h.html b/Docs/_argument_memory_type_8h.html index 35a52e98..a94162be 100644 --- a/Docs/_argument_memory_type_8h.html +++ b/Docs/_argument_memory_type_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentMemoryType.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,35 +94,35 @@ -
-
ArgumentMemoryType.h File Reference
+
ArgumentMemoryType.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ArgumentMemoryType { ktt::Scalar , ktt::Vector , ktt::Local +, ktt::Symbol }
 

Detailed Description

-

Memory type of kernel arguments.

+

Memory type of kernel arguments.

diff --git a/Docs/_argument_memory_type_8h.js b/Docs/_argument_memory_type_8h.js index 6e948bd8..7528a1fd 100644 --- a/Docs/_argument_memory_type_8h.js +++ b/Docs/_argument_memory_type_8h.js @@ -3,6 +3,7 @@ var _argument_memory_type_8h = [ "ArgumentMemoryType", "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771", [ [ "Scalar", "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771af60357a8d17e45793298323f1b372a74", null ], [ "Vector", "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771a57dea6f5039281b7fee517fc43bf3110", null ], - [ "Local", "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771a509820290d57f333403f490dde7316f4", null ] + [ "Local", "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771a509820290d57f333403f490dde7316f4", null ], + [ "Symbol", "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771a02c86eb2792f3262c21d030a87e19793", null ] ] ] ]; \ No newline at end of file diff --git a/Docs/_argument_memory_type_8h_source.html b/Docs/_argument_memory_type_8h_source.html index 75e4ff07..e3160d2b 100644 --- a/Docs/_argument_memory_type_8h_source.html +++ b/Docs/_argument_memory_type_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument/ArgumentMemoryType.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,38 +91,40 @@
-
-
ArgumentMemoryType.h
+
ArgumentMemoryType.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
- -
14 {
-
17  Scalar,
-
18 
-
22  Vector,
-
23 
-
29  Local
-
30 };
-
31 
-
32 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
14{
+
17 Scalar,
+
18
+
22 Vector,
+
23
+
29 Local,
+
30
+
34 Symbol
+
35};
+
36
+
37} // namespace ktt
+
Definition: KttPlatform.h:41
ArgumentMemoryType
Definition: ArgumentMemoryType.h:14
- + +
diff --git a/Docs/_buffer_output_descriptor_8h.html b/Docs/_buffer_output_descriptor_8h.html index 0deddd7a..c7eb77e1 100644 --- a/Docs/_buffer_output_descriptor_8h.html +++ b/Docs/_buffer_output_descriptor_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/BufferOutputDescriptor.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
BufferOutputDescriptor.h File Reference
+
BufferOutputDescriptor.h File Reference
#include <cstddef>
@@ -105,25 +103,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::BufferOutputDescriptor
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Kernel buffer output retrieval.

+

Kernel buffer output retrieval.

diff --git a/Docs/_buffer_output_descriptor_8h_source.html b/Docs/_buffer_output_descriptor_8h_source.html index 98945b1b..9d75e24f 100644 --- a/Docs/_buffer_output_descriptor_8h_source.html +++ b/Docs/_buffer_output_descriptor_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/BufferOutputDescriptor.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,41 +91,40 @@
-
-
BufferOutputDescriptor.h
+
BufferOutputDescriptor.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstddef>
-
7 
-
8 #include <KttPlatform.h>
-
9 #include <KttTypes.h>
-
10 
-
11 namespace ktt
-
12 {
-
13 
- -
18 {
-
19 public:
-
26  explicit BufferOutputDescriptor(const ArgumentId id, void* outputDestination);
-
27 
-
36  explicit BufferOutputDescriptor(const ArgumentId id, void* outputDestination, const size_t outputSize);
-
37 
- -
43 
-
48  void* GetOutputDestination() const;
-
49 
-
54  size_t GetOutputSize() const;
-
55 
-
56 private:
-
57  ArgumentId m_ArgumentId;
-
58  void* m_OutputDestination;
-
59  size_t m_OutputSize;
-
60 };
-
61 
-
62 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstddef>
+
7
+
8#include <KttPlatform.h>
+
9#include <KttTypes.h>
+
10
+
11namespace ktt
+
12{
+
13
+ +
18{
+
19public:
+
26 explicit BufferOutputDescriptor(const ArgumentId id, void* outputDestination);
+
27
+
36 explicit BufferOutputDescriptor(const ArgumentId id, void* outputDestination, const size_t outputSize);
+
37
+ +
43
+
48 void* GetOutputDestination() const;
+
49
+
54 size_t GetOutputSize() const;
+
55
+
56private:
+
57 ArgumentId m_ArgumentId;
+
58 void* m_OutputDestination;
+
59 size_t m_OutputSize;
+
60};
+
61
+
62} // namespace ktt
Definition: BufferOutputDescriptor.h:18
@@ -135,7 +133,7 @@
void * GetOutputDestination() const
ArgumentId GetArgumentId() const
size_t GetOutputSize() const
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
uint64_t ArgumentId
Definition: KttTypes.h:48
@@ -143,7 +141,7 @@ diff --git a/Docs/_computation_result_8h.html b/Docs/_computation_result_8h.html index 9714864e..d03f18bd 100644 --- a/Docs/_computation_result_8h.html +++ b/Docs/_computation_result_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/ComputationResult.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ComputationResult.h File Reference
+
ComputationResult.h File Reference
#include <cstdint>
@@ -111,26 +109,26 @@

Go to the source code of this file.

-

+

Classes

class  ktt::ComputationResult
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Result data from a single kernel definition run.

-

Aggregate result data from kernel computation.

+

Result data from a single kernel definition run.

+

Aggregate result data from kernel computation.

diff --git a/Docs/_computation_result_8h_source.html b/Docs/_computation_result_8h_source.html index 0b7e25b8..8d53ab13 100644 --- a/Docs/_computation_result_8h_source.html +++ b/Docs/_computation_result_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/ComputationResult.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,77 +91,76 @@
-
-
ComputationResult.h
+
ComputationResult.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 #include <memory>
-
8 #include <string>
-
9 #include <vector>
-
10 
- - - -
14 #include <KttPlatform.h>
-
15 #include <KttTypes.h>
-
16 
-
17 namespace ktt
-
18 {
-
19 
-
23 class KTT_API ComputationResult
-
24 {
-
25 public:
- -
30 
-
35  explicit ComputationResult(const std::string& kernelFunction);
-
36 
- -
42 
-
48  void SetDurationData(const Nanoseconds duration, const Nanoseconds overhead);
-
49 
-
55  void SetSizeData(const DimensionVector& globalSize, const DimensionVector& localSize);
-
56 
-
61  void SetCompilationData(std::unique_ptr<KernelCompilationData> data);
-
62 
-
67  void SetProfilingData(std::unique_ptr<KernelProfilingData> data);
-
68 
-
73  const std::string& GetKernelFunction() const;
-
74 
- -
80 
-
85  const DimensionVector& GetLocalSize() const;
-
86 
- -
92 
- -
98 
-
103  bool HasCompilationData() const;
-
104 
- -
110 
-
115  bool HasProfilingData() const;
-
116 
- -
122 
- -
129 
- -
135 
-
136 private:
-
137  std::string m_KernelFunction;
-
138  DimensionVector m_GlobalSize;
-
139  DimensionVector m_LocalSize;
-
140  Nanoseconds m_Duration;
-
141  Nanoseconds m_Overhead;
-
142  std::unique_ptr<KernelCompilationData> m_CompilationData;
-
143  std::unique_ptr<KernelProfilingData> m_ProfilingData;
-
144 };
-
145 
-
146 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7#include <memory>
+
8#include <string>
+
9#include <vector>
+
10
+ + + +
14#include <KttPlatform.h>
+
15#include <KttTypes.h>
+
16
+
17namespace ktt
+
18{
+
19
+
23class KTT_API ComputationResult
+
24{
+
25public:
+ +
30
+
35 explicit ComputationResult(const std::string& kernelFunction);
+
36
+ +
42
+
48 void SetDurationData(const Nanoseconds duration, const Nanoseconds overhead);
+
49
+
55 void SetSizeData(const DimensionVector& globalSize, const DimensionVector& localSize);
+
56
+
61 void SetCompilationData(std::unique_ptr<KernelCompilationData> data);
+
62
+
67 void SetProfilingData(std::unique_ptr<KernelProfilingData> data);
+
68
+
73 const std::string& GetKernelFunction() const;
+
74
+ +
80
+ +
86
+ +
92
+ +
98
+
103 bool HasCompilationData() const;
+
104
+ +
110
+
115 bool HasProfilingData() const;
+
116
+ +
122
+ +
129
+ +
135
+
136private:
+
137 std::string m_KernelFunction;
+
138 DimensionVector m_GlobalSize;
+
139 DimensionVector m_LocalSize;
+
140 Nanoseconds m_Duration;
+
141 Nanoseconds m_Overhead;
+
142 std::unique_ptr<KernelCompilationData> m_CompilationData;
+
143 std::unique_ptr<KernelProfilingData> m_ProfilingData;
+
144};
+
145
+
146} // namespace ktt
@@ -189,7 +187,7 @@
ComputationResult(const ComputationResult &other)
Definition: DimensionVector.h:21
Definition: KernelProfilingData.h:18
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
uint64_t Nanoseconds
Definition: KttTypes.h:63
Definition: KernelCompilationData.h:17
@@ -198,7 +196,7 @@ diff --git a/Docs/_compute_api_8h.html b/Docs/_compute_api_8h.html index cbaecbc5..ebd9a781 100644 --- a/Docs/_compute_api_8h.html +++ b/Docs/_compute_api_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/ComputeEngine/ComputeApi.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ComputeApi.h File Reference
+
ComputeApi.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ComputeApi { ktt::OpenCL , ktt::CUDA @@ -116,14 +114,14 @@
 

Detailed Description

-

Compute APIs supported by KTT framework.

+

Compute APIs supported by KTT framework.

diff --git a/Docs/_compute_api_8h_source.html b/Docs/_compute_api_8h_source.html index c3a70dd0..b39978ad 100644 --- a/Docs/_compute_api_8h_source.html +++ b/Docs/_compute_api_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/ComputeEngine/ComputeApi.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,27 +91,26 @@
-
-
ComputeApi.h
+
ComputeApi.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class ComputeApi
-
13 {
-
16  OpenCL,
-
17 
-
20  CUDA,
-
21 
-
24  Vulkan
-
25 };
-
26 
-
27 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+
12enum class ComputeApi
+
13{
+
16 OpenCL,
+
17
+
20 CUDA,
+
21
+
24 Vulkan
+
25};
+
26
+
27} // namespace ktt
+
Definition: KttPlatform.h:41
ComputeApi
Definition: ComputeApi.h:13
@@ -123,7 +121,7 @@ diff --git a/Docs/_compute_api_initializer_8h.html b/Docs/_compute_api_initializer_8h.html index 0e82a8c8..0d7ad681 100644 --- a/Docs/_compute_api_initializer_8h.html +++ b/Docs/_compute_api_initializer_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/ComputeApiInitializer.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ComputeApiInitializer.h File Reference
+
ComputeApiInitializer.h File Reference
#include <vector>
@@ -105,25 +103,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::ComputeApiInitializer
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Custom initializer class for compute API context and queues.

+

Custom initializer class for compute API context and queues.

diff --git a/Docs/_compute_api_initializer_8h_source.html b/Docs/_compute_api_initializer_8h_source.html index e124d756..0e2a8900 100644 --- a/Docs/_compute_api_initializer_8h_source.html +++ b/Docs/_compute_api_initializer_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/ComputeApiInitializer.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,43 +91,42 @@
-
-
ComputeApiInitializer.h
+
ComputeApiInitializer.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <vector>
-
7 
-
8 #include <KttPlatform.h>
-
9 #include <KttTypes.h>
-
10 
-
11 namespace ktt
-
12 {
-
13 
-
17 class KTT_API ComputeApiInitializer
-
18 {
-
19 public:
-
26  explicit ComputeApiInitializer(ComputeContext context, const std::vector<ComputeQueue>& queues);
-
27 
- -
33 
-
38  const std::vector<ComputeQueue>& GetQueues() const;
-
39 
-
40 private:
-
41  ComputeContext m_Context;
-
42  std::vector<ComputeQueue> m_Queues;
-
43 };
-
44 
-
45 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <vector>
+
7
+
8#include <KttPlatform.h>
+
9#include <KttTypes.h>
+
10
+
11namespace ktt
+
12{
+
13
+ +
18{
+
19public:
+
26 explicit ComputeApiInitializer(ComputeContext context, const std::vector<ComputeQueue>& queues);
+
27
+ +
33
+
38 const std::vector<ComputeQueue>& GetQueues() const;
+
39
+
40private:
+
41 ComputeContext m_Context;
+
42 std::vector<ComputeQueue> m_Queues;
+
43};
+
44
+
45} // namespace ktt
Definition: ComputeApiInitializer.h:18
ComputeApiInitializer(ComputeContext context, const std::vector< ComputeQueue > &queues)
const std::vector< ComputeQueue > & GetQueues() const
ComputeContext GetContext() const
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
void * ComputeContext
Definition: KttTypes.h:113
@@ -136,7 +134,7 @@ diff --git a/Docs/_compute_interface_8h.html b/Docs/_compute_interface_8h.html index 71aae508..03158588 100644 --- a/Docs/_compute_interface_8h.html +++ b/Docs/_compute_interface_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/ComputeInterface.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ComputeInterface.h File Reference
+
ComputeInterface.h File Reference
#include <cstddef>
@@ -104,30 +102,31 @@ #include <vector>
#include <Api/Configuration/DimensionVector.h>
#include <Api/Configuration/KernelConfiguration.h>
+#include <KernelRunner/KernelRunMode.h>
#include <KttPlatform.h>
#include <KttTypes.h>

Go to the source code of this file.

-

+

Classes

class  ktt::ComputeInterface
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Functionality related to customizing kernel runs inside KTT.

+

Functionality related to customizing kernel runs inside KTT.

diff --git a/Docs/_compute_interface_8h_source.html b/Docs/_compute_interface_8h_source.html index e86557ca..0d22e547 100644 --- a/Docs/_compute_interface_8h_source.html +++ b/Docs/_compute_interface_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/ComputeInterface.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,108 +91,113 @@
-
-
ComputeInterface.h
+
ComputeInterface.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstddef>
-
7 #include <cstdint>
-
8 #include <vector>
-
9 
- - -
12 #include <KttPlatform.h>
-
13 #include <KttTypes.h>
-
14 
-
15 namespace ktt
-
16 {
-
17 
-
23 class KTT_API ComputeInterface
-
24 {
-
25 public:
-
29  virtual ~ComputeInterface() = default;
-
30 
-
36  virtual void RunKernel(const KernelDefinitionId id) = 0;
-
37 
-
45  virtual void RunKernel(const KernelDefinitionId id, const DimensionVector& globalSize, const DimensionVector& localSize) = 0;
-
46 
-
57  virtual ComputeActionId RunKernelAsync(const KernelDefinitionId id, const QueueId queue) = 0;
-
58 
-
72  virtual ComputeActionId RunKernelAsync(const KernelDefinitionId id, const QueueId queue, const DimensionVector& globalSize,
-
73  const DimensionVector& localSize) = 0;
-
74 
-
79  virtual void WaitForComputeAction(const ComputeActionId id) = 0;
-
80 
-
88  virtual void RunKernelWithProfiling(const KernelDefinitionId id) = 0;
-
89 
-
100  virtual void RunKernelWithProfiling(const KernelDefinitionId id, const DimensionVector& globalSize,
-
101  const DimensionVector& localSize) = 0;
-
102 
-
111  virtual uint64_t GetRemainingProfilingRuns(const KernelDefinitionId id) const = 0;
-
112 
-
118  virtual uint64_t GetRemainingProfilingRuns() const = 0;
-
119 
-
124  virtual QueueId GetDefaultQueue() const = 0;
-
125 
-
130  virtual std::vector<QueueId> GetAllQueues() const = 0;
-
131 
-
136  virtual void SynchronizeQueue(const QueueId queue) = 0;
-
137 
-
141  virtual void SynchronizeDevice() = 0;
-
142 
-
149  virtual const DimensionVector& GetCurrentGlobalSize(const KernelDefinitionId id) const = 0;
-
150 
-
157  virtual const DimensionVector& GetCurrentLocalSize(const KernelDefinitionId id) const = 0;
-
158 
-
163  virtual const KernelConfiguration& GetCurrentConfiguration() const = 0;
-
164 
-
172  virtual void ChangeArguments(const KernelDefinitionId id, const std::vector<ArgumentId>& arguments) = 0;
-
173 
-
181  virtual void SwapArguments(const KernelDefinitionId id, const ArgumentId first, const ArgumentId second) = 0;
-
182 
-
188  virtual void UpdateScalarArgument(const ArgumentId id, const void* data) = 0;
-
189 
-
195  virtual void UpdateLocalArgument(const ArgumentId id, const size_t dataSize) = 0;
-
196 
-
202  virtual void UploadBuffer(const ArgumentId id) = 0;
-
203 
-
213  virtual TransferActionId UploadBufferAsync(const ArgumentId id, const QueueId queue) = 0;
-
214 
-
223  virtual void DownloadBuffer(const ArgumentId id, void* destination, const size_t dataSize = 0) = 0;
-
224 
-
239  virtual TransferActionId DownloadBufferAsync(const ArgumentId id, const QueueId queue, void* destination,
-
240  const size_t dataSize = 0) = 0;
-
241 
-
250  virtual void UpdateBuffer(const ArgumentId id, const void* data, const size_t dataSize = 0) = 0;
-
251 
-
266  virtual TransferActionId UpdateBufferAsync(const ArgumentId id, const QueueId queue, const void* data,
-
267  const size_t dataSize = 0) = 0;
-
268 
-
276  virtual void CopyBuffer(const ArgumentId destination, const ArgumentId source, const size_t dataSize = 0) = 0;
-
277 
-
291  virtual TransferActionId CopyBufferAsync(const ArgumentId destination, const ArgumentId source, const QueueId queue,
-
292  const size_t dataSize = 0) = 0;
-
293 
-
298  virtual void WaitForTransferAction(const TransferActionId id) = 0;
-
299 
-
306  virtual void ResizeBuffer(const ArgumentId id, const size_t newDataSize, const bool preserveData) = 0;
-
307 
-
313  virtual void ClearBuffer(const ArgumentId id) = 0;
-
314 
-
320  virtual bool HasBuffer(const ArgumentId id) = 0;
-
321 
-
331  virtual void GetUnifiedMemoryBufferHandle(const ArgumentId id, UnifiedBufferMemory& memoryHandle) = 0;
-
332 };
-
333 
-
334 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstddef>
+
7#include <cstdint>
+
8#include <vector>
+
9
+ + + +
13#include <KttPlatform.h>
+
14#include <KttTypes.h>
+
15
+
16namespace ktt
+
17{
+
18
+
24class KTT_API ComputeInterface
+
25{
+
26public:
+
30 virtual ~ComputeInterface() = default;
+
31
+
37 virtual void RunKernel(const KernelDefinitionId id) = 0;
+
38
+
46 virtual void RunKernel(const KernelDefinitionId id, const DimensionVector& globalSize, const DimensionVector& localSize) = 0;
+
47
+
58 virtual ComputeActionId RunKernelAsync(const KernelDefinitionId id, const QueueId queue) = 0;
+
59
+
73 virtual ComputeActionId RunKernelAsync(const KernelDefinitionId id, const QueueId queue, const DimensionVector& globalSize,
+
74 const DimensionVector& localSize) = 0;
+
75
+
80 virtual void WaitForComputeAction(const ComputeActionId id) = 0;
+
81
+
89 virtual void RunKernelWithProfiling(const KernelDefinitionId id) = 0;
+
90
+
101 virtual void RunKernelWithProfiling(const KernelDefinitionId id, const DimensionVector& globalSize,
+
102 const DimensionVector& localSize) = 0;
+
103
+
112 virtual uint64_t GetRemainingProfilingRuns(const KernelDefinitionId id) const = 0;
+
113
+
119 virtual uint64_t GetRemainingProfilingRuns() const = 0;
+
120
+
125 virtual QueueId GetDefaultQueue() const = 0;
+
126
+
131 virtual std::vector<QueueId> GetAllQueues() const = 0;
+
132
+
137 virtual void SynchronizeQueue(const QueueId queue) = 0;
+
138
+
142 virtual void SynchronizeQueues() = 0;
+
143
+
147 virtual void SynchronizeDevice() = 0;
+
148
+
155 virtual const DimensionVector& GetCurrentGlobalSize(const KernelDefinitionId id) const = 0;
+
156
+
163 virtual const DimensionVector& GetCurrentLocalSize(const KernelDefinitionId id) const = 0;
+
164
+ +
170
+
175 virtual KernelRunMode GetRunMode() const = 0;
+
176
+
184 virtual void ChangeArguments(const KernelDefinitionId id, const std::vector<ArgumentId>& arguments) = 0;
+
185
+
193 virtual void SwapArguments(const KernelDefinitionId id, const ArgumentId first, const ArgumentId second) = 0;
+
194
+
200 virtual void UpdateScalarArgument(const ArgumentId id, const void* data) = 0;
+
201
+
207 virtual void UpdateLocalArgument(const ArgumentId id, const size_t dataSize) = 0;
+
208
+
214 virtual void UploadBuffer(const ArgumentId id) = 0;
+
215
+
225 virtual TransferActionId UploadBufferAsync(const ArgumentId id, const QueueId queue) = 0;
+
226
+
235 virtual void DownloadBuffer(const ArgumentId id, void* destination, const size_t dataSize = 0) = 0;
+
236
+
251 virtual TransferActionId DownloadBufferAsync(const ArgumentId id, const QueueId queue, void* destination,
+
252 const size_t dataSize = 0) = 0;
+
253
+
262 virtual void UpdateBuffer(const ArgumentId id, const void* data, const size_t dataSize = 0) = 0;
+
263
+
278 virtual TransferActionId UpdateBufferAsync(const ArgumentId id, const QueueId queue, const void* data,
+
279 const size_t dataSize = 0) = 0;
+
280
+
288 virtual void CopyBuffer(const ArgumentId destination, const ArgumentId source, const size_t dataSize = 0) = 0;
+
289
+
303 virtual TransferActionId CopyBufferAsync(const ArgumentId destination, const ArgumentId source, const QueueId queue,
+
304 const size_t dataSize = 0) = 0;
+
305
+
310 virtual void WaitForTransferAction(const TransferActionId id) = 0;
+
311
+
318 virtual void ResizeBuffer(const ArgumentId id, const size_t newDataSize, const bool preserveData) = 0;
+
319
+
325 virtual void ClearBuffer(const ArgumentId id) = 0;
+
326
+
332 virtual bool HasBuffer(const ArgumentId id) = 0;
+
333
+
343 virtual void GetUnifiedMemoryBufferHandle(const ArgumentId id, UnifiedBufferMemory& memoryHandle) = 0;
+
344};
+
345
+
346} // namespace ktt
+ -
Definition: ComputeInterface.h:24
+
Definition: ComputeInterface.h:25
virtual void ResizeBuffer(const ArgumentId id, const size_t newDataSize, const bool preserveData)=0
virtual void UploadBuffer(const ArgumentId id)=0
virtual void SynchronizeQueue(const QueueId queue)=0
@@ -211,10 +215,12 @@
virtual TransferActionId DownloadBufferAsync(const ArgumentId id, const QueueId queue, void *destination, const size_t dataSize=0)=0
virtual void RunKernel(const KernelDefinitionId id, const DimensionVector &globalSize, const DimensionVector &localSize)=0
virtual void CopyBuffer(const ArgumentId destination, const ArgumentId source, const size_t dataSize=0)=0
+
virtual void SynchronizeQueues()=0
virtual TransferActionId CopyBufferAsync(const ArgumentId destination, const ArgumentId source, const QueueId queue, const size_t dataSize=0)=0
virtual void SwapArguments(const KernelDefinitionId id, const ArgumentId first, const ArgumentId second)=0
virtual uint64_t GetRemainingProfilingRuns(const KernelDefinitionId id) const =0
virtual void ClearBuffer(const ArgumentId id)=0
+
virtual KernelRunMode GetRunMode() const =0
virtual ~ComputeInterface()=default
virtual const DimensionVector & GetCurrentLocalSize(const KernelDefinitionId id) const =0
virtual void SynchronizeDevice()=0
@@ -230,9 +236,10 @@
virtual TransferActionId UploadBufferAsync(const ArgumentId id, const QueueId queue)=0
Definition: DimensionVector.h:21
Definition: KernelConfiguration.h:19
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
void * UnifiedBufferMemory
Definition: KttTypes.h:108
uint64_t ComputeActionId
Definition: KttTypes.h:73
+
KernelRunMode
Definition: KernelRunMode.h:13
uint32_t QueueId
Definition: KttTypes.h:33
uint64_t TransferActionId
Definition: KttTypes.h:78
uint64_t ArgumentId
Definition: KttTypes.h:48
@@ -243,7 +250,7 @@ diff --git a/Docs/_configuration_count_8h.html b/Docs/_configuration_count_8h.html index 5ba1e8fc..0b4d1e14 100644 --- a/Docs/_configuration_count_8h.html +++ b/Docs/_configuration_count_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/ConfigurationCount.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ConfigurationCount.h File Reference
+
ConfigurationCount.h File Reference
#include <cstdint>
@@ -105,25 +103,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::ConfigurationCount
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Stop condition based on count of explored configurations.

+

Stop condition based on count of explored configurations.

diff --git a/Docs/_configuration_count_8h_source.html b/Docs/_configuration_count_8h_source.html index 03cb80a2..ea6a231b 100644 --- a/Docs/_configuration_count_8h_source.html +++ b/Docs/_configuration_count_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/ConfigurationCount.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,37 +91,36 @@
-
-
ConfigurationCount.h
+
ConfigurationCount.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 
- -
9 #include <KttPlatform.h>
-
10 
-
11 namespace ktt
-
12 {
-
13 
-
17 class KTT_API ConfigurationCount : public StopCondition
-
18 {
-
19 public:
-
24  explicit ConfigurationCount(const uint64_t count);
-
25 
-
26  bool IsFulfilled() const override;
-
27  void Initialize(const uint64_t configurationsCount) override;
-
28  void Update(const KernelResult& result) override;
-
29  std::string GetStatusString() const override;
-
30 
-
31 private:
-
32  uint64_t m_CurrentCount;
-
33  uint64_t m_TargetCount;
-
34 };
-
35 
-
36 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7
+ +
9#include <KttPlatform.h>
+
10
+
11namespace ktt
+
12{
+
13
+
17class KTT_API ConfigurationCount : public StopCondition
+
18{
+
19public:
+
24 explicit ConfigurationCount(const uint64_t count);
+
25
+
26 bool IsFulfilled() const override;
+
27 void Initialize(const uint64_t configurationsCount) override;
+
28 void Update(const KernelResult& result) override;
+
29 std::string GetStatusString() const override;
+
30
+
31private:
+
32 uint64_t m_CurrentCount;
+
33 uint64_t m_TargetCount;
+
34};
+
35
+
36} // namespace ktt
Definition: ConfigurationCount.h:18
@@ -133,14 +131,14 @@
bool IsFulfilled() const override
Definition: KernelResult.h:21
Definition: StopCondition.h:19
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_configuration_duration_8h.html b/Docs/_configuration_duration_8h.html index cb9cd362..93403c73 100644 --- a/Docs/_configuration_duration_8h.html +++ b/Docs/_configuration_duration_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/ConfigurationDuration.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ConfigurationDuration.h File Reference
+
ConfigurationDuration.h File Reference
#include <Api/StopCondition/StopCondition.h>
@@ -104,25 +102,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::ConfigurationDuration
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Stop condition based on computation duration of a configuration.

+

Stop condition based on computation duration of a configuration.

diff --git a/Docs/_configuration_duration_8h_source.html b/Docs/_configuration_duration_8h_source.html index 9bd298db..5c7b2214 100644 --- a/Docs/_configuration_duration_8h_source.html +++ b/Docs/_configuration_duration_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/ConfigurationDuration.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,35 +91,34 @@
-
-
ConfigurationDuration.h
+
ConfigurationDuration.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
- -
7 #include <KttPlatform.h>
-
8 
-
9 namespace ktt
-
10 {
-
11 
-
15 class KTT_API ConfigurationDuration : public StopCondition
-
16 {
-
17 public:
-
23  explicit ConfigurationDuration(const double duration);
-
24 
-
25  bool IsFulfilled() const override;
-
26  void Initialize(const uint64_t configurationsCount) override;
-
27  void Update(const KernelResult& result) override;
-
28  std::string GetStatusString() const override;
-
29 
-
30 private:
-
31  double m_BestDuration;
-
32  double m_TargetDuration;
-
33 };
-
34 
-
35 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+ +
7#include <KttPlatform.h>
+
8
+
9namespace ktt
+
10{
+
11
+ +
16{
+
17public:
+
23 explicit ConfigurationDuration(const double duration);
+
24
+
25 bool IsFulfilled() const override;
+
26 void Initialize(const uint64_t configurationsCount) override;
+
27 void Update(const KernelResult& result) override;
+
28 std::string GetStatusString() const override;
+
29
+
30private:
+
31 double m_BestDuration;
+
32 double m_TargetDuration;
+
33};
+
34
+
35} // namespace ktt
Definition: ConfigurationDuration.h:16
@@ -131,14 +129,14 @@
std::string GetStatusString() const override
Definition: KernelResult.h:21
Definition: StopCondition.h:19
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_configuration_fraction_8h.html b/Docs/_configuration_fraction_8h.html index 1a8fe23a..82d764b5 100644 --- a/Docs/_configuration_fraction_8h.html +++ b/Docs/_configuration_fraction_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/ConfigurationFraction.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ConfigurationFraction.h File Reference
+
ConfigurationFraction.h File Reference
#include <Api/StopCondition/StopCondition.h>
@@ -104,25 +102,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::ConfigurationFraction
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Stop condition based on fraction of explored configurations.

+

Stop condition based on fraction of explored configurations.

diff --git a/Docs/_configuration_fraction_8h_source.html b/Docs/_configuration_fraction_8h_source.html index 9adfa841..6783e4c4 100644 --- a/Docs/_configuration_fraction_8h_source.html +++ b/Docs/_configuration_fraction_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/ConfigurationFraction.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,38 +91,37 @@
-
-
ConfigurationFraction.h
+
ConfigurationFraction.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
- -
7 #include <KttPlatform.h>
-
8 
-
9 namespace ktt
-
10 {
-
11 
-
15 class KTT_API ConfigurationFraction : public StopCondition
-
16 {
-
17 public:
-
23  explicit ConfigurationFraction(const double fraction);
-
24 
-
25  bool IsFulfilled() const override;
-
26  void Initialize(const uint64_t configurationsCount) override;
-
27  void Update(const KernelResult& result) override;
-
28  std::string GetStatusString() const override;
-
29 
-
30 private:
-
31  uint64_t m_CurrentCount;
-
32  uint64_t m_TotalCount;
-
33  double m_TargetFraction;
-
34 
-
35  double GetExploredFraction() const;
-
36 };
-
37 
-
38 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+ +
7#include <KttPlatform.h>
+
8
+
9namespace ktt
+
10{
+
11
+ +
16{
+
17public:
+
23 explicit ConfigurationFraction(const double fraction);
+
24
+
25 bool IsFulfilled() const override;
+
26 void Initialize(const uint64_t configurationsCount) override;
+
27 void Update(const KernelResult& result) override;
+
28 std::string GetStatusString() const override;
+
29
+
30private:
+
31 uint64_t m_CurrentCount;
+
32 uint64_t m_TotalCount;
+
33 double m_TargetFraction;
+
34
+
35 double GetExploredFraction() const;
+
36};
+
37
+
38} // namespace ktt
Definition: ConfigurationFraction.h:16
@@ -134,14 +132,14 @@
void Initialize(const uint64_t configurationsCount) override
Definition: KernelResult.h:21
Definition: StopCondition.h:19
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_deterministic_searcher_8h.html b/Docs/_deterministic_searcher_8h.html index c73d0a45..8538ba25 100644 --- a/Docs/_deterministic_searcher_8h.html +++ b/Docs/_deterministic_searcher_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher/DeterministicSearcher.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
DeterministicSearcher.h File Reference
+
DeterministicSearcher.h File Reference
#include <cstddef>
@@ -105,25 +103,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::DeterministicSearcher
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Searcher which explores configurations in deterministic order.

+

Searcher which explores configurations in deterministic order.

diff --git a/Docs/_deterministic_searcher_8h_source.html b/Docs/_deterministic_searcher_8h_source.html index 633dda63..20051d27 100644 --- a/Docs/_deterministic_searcher_8h_source.html +++ b/Docs/_deterministic_searcher_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher/DeterministicSearcher.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,36 +91,35 @@
-
-
DeterministicSearcher.h
+
DeterministicSearcher.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstddef>
-
7 
- -
9 #include <KttPlatform.h>
-
10 
-
11 namespace ktt
-
12 {
-
13 
-
17 class KTT_API DeterministicSearcher : public Searcher
-
18 {
-
19 public:
- -
24 
-
25  void OnReset() override;
-
26 
-
27  bool CalculateNextConfiguration(const KernelResult& previousResult) override;
- -
29 
-
30 private:
-
31  uint64_t m_Index;
-
32 };
-
33 
-
34 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstddef>
+
7
+ +
9#include <KttPlatform.h>
+
10
+
11namespace ktt
+
12{
+
13
+
17class KTT_API DeterministicSearcher : public Searcher
+
18{
+
19public:
+ +
24
+
25 void OnReset() override;
+
26
+
27 bool CalculateNextConfiguration(const KernelResult& previousResult) override;
+ +
29
+
30private:
+
31 uint64_t m_Index;
+
32};
+
33
+
34} // namespace ktt
Definition: DeterministicSearcher.h:18
@@ -132,14 +130,14 @@
Definition: KernelConfiguration.h:19
Definition: KernelResult.h:21
Definition: Searcher.h:23
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_device_info_8h.html b/Docs/_device_info_8h.html index 8c6d5a0f..55621ec9 100644 --- a/Docs/_device_info_8h.html +++ b/Docs/_device_info_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Info/DeviceInfo.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
DeviceInfo.h File Reference
+
DeviceInfo.h File Reference
#include <cstdint>
@@ -107,25 +105,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::DeviceInfo
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Information about compute API devices.

+

Information about compute API devices.

diff --git a/Docs/_device_info_8h_source.html b/Docs/_device_info_8h_source.html index ecccf417..71672d78 100644 --- a/Docs/_device_info_8h_source.html +++ b/Docs/_device_info_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Info/DeviceInfo.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,82 +91,81 @@
-
-
DeviceInfo.h
+
DeviceInfo.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 #include <string>
-
8 
-
9 #include <Api/Info/DeviceType.h>
-
10 #include <KttPlatform.h>
-
11 #include <KttTypes.h>
-
12 
-
13 namespace ktt
-
14 {
-
15 
-
19 class KTT_API DeviceInfo
-
20 {
-
21 public:
-
27  explicit DeviceInfo(const DeviceIndex index, const std::string& name);
-
28 
- -
34 
-
39  const std::string& GetName() const;
-
40 
-
45  const std::string& GetVendor() const;
-
46 
-
51  const std::string& GetExtensions() const;
-
52 
- -
58 
-
63  std::string GetDeviceTypeString() const;
-
64 
-
69  uint64_t GetGlobalMemorySize() const;
-
70 
-
75  uint64_t GetLocalMemorySize() const;
-
76 
-
81  uint64_t GetMaxConstantBufferSize() const;
-
82 
-
87  uint64_t GetMaxWorkGroupSize() const;
-
88 
-
93  uint32_t GetMaxComputeUnits() const;
-
94 
-
99  std::string GetString() const;
-
100 
-
105  void SetVendor(const std::string& vendor);
-
106 
-
111  void SetExtensions(const std::string& extensions);
-
112 
-
117  void SetDeviceType(const DeviceType deviceType);
-
118 
-
123  void SetGlobalMemorySize(const uint64_t globalMemorySize);
-
124 
-
129  void SetLocalMemorySize(const uint64_t localMemorySize);
-
130 
-
135  void SetMaxConstantBufferSize(const uint64_t maxConstantBufferSize);
-
136 
-
141  void SetMaxWorkGroupSize(const uint64_t maxWorkGroupSize);
-
142 
-
147  void SetMaxComputeUnits(const uint32_t maxComputeUnits);
-
148 
-
149 private:
-
150  DeviceIndex m_Index;
-
151  std::string m_Name;
-
152  std::string m_Vendor;
-
153  std::string m_Extensions;
-
154  DeviceType m_DeviceType;
-
155  uint64_t m_GlobalMemorySize;
-
156  uint64_t m_LocalMemorySize;
-
157  uint64_t m_MaxConstantBufferSize;
-
158  uint64_t m_MaxWorkGroupSize;
-
159  uint32_t m_MaxComputeUnits;
-
160 };
-
161 
-
162 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7#include <string>
+
8
+ +
10#include <KttPlatform.h>
+
11#include <KttTypes.h>
+
12
+
13namespace ktt
+
14{
+
15
+
19class KTT_API DeviceInfo
+
20{
+
21public:
+
27 explicit DeviceInfo(const DeviceIndex index, const std::string& name);
+
28
+ +
34
+
39 const std::string& GetName() const;
+
40
+
45 const std::string& GetVendor() const;
+
46
+
51 const std::string& GetExtensions() const;
+
52
+ +
58
+
63 std::string GetDeviceTypeString() const;
+
64
+
69 uint64_t GetGlobalMemorySize() const;
+
70
+
75 uint64_t GetLocalMemorySize() const;
+
76
+
81 uint64_t GetMaxConstantBufferSize() const;
+
82
+
87 uint64_t GetMaxWorkGroupSize() const;
+
88
+
93 uint32_t GetMaxComputeUnits() const;
+
94
+
99 std::string GetString() const;
+
100
+
105 void SetVendor(const std::string& vendor);
+
106
+
111 void SetExtensions(const std::string& extensions);
+
112
+
117 void SetDeviceType(const DeviceType deviceType);
+
118
+
123 void SetGlobalMemorySize(const uint64_t globalMemorySize);
+
124
+
129 void SetLocalMemorySize(const uint64_t localMemorySize);
+
130
+
135 void SetMaxConstantBufferSize(const uint64_t maxConstantBufferSize);
+
136
+
141 void SetMaxWorkGroupSize(const uint64_t maxWorkGroupSize);
+
142
+
147 void SetMaxComputeUnits(const uint32_t maxComputeUnits);
+
148
+
149private:
+
150 DeviceIndex m_Index;
+
151 std::string m_Name;
+
152 std::string m_Vendor;
+
153 std::string m_Extensions;
+
154 DeviceType m_DeviceType;
+
155 uint64_t m_GlobalMemorySize;
+
156 uint64_t m_LocalMemorySize;
+
157 uint64_t m_MaxConstantBufferSize;
+
158 uint64_t m_MaxWorkGroupSize;
+
159 uint32_t m_MaxComputeUnits;
+
160};
+
161
+
162} // namespace ktt
@@ -193,7 +191,7 @@
void SetLocalMemorySize(const uint64_t localMemorySize)
void SetMaxComputeUnits(const uint32_t maxComputeUnits)
uint64_t GetLocalMemorySize() const
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
uint32_t DeviceIndex
Definition: KttTypes.h:28
DeviceType
Definition: DeviceType.h:13
@@ -202,7 +200,7 @@ diff --git a/Docs/_device_type_8h.html b/Docs/_device_type_8h.html index be095e04..22b1cea9 100644 --- a/Docs/_device_type_8h.html +++ b/Docs/_device_type_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Info/DeviceType.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
DeviceType.h File Reference
+
DeviceType.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::DeviceType { ktt::CPU , ktt::GPU @@ -116,14 +114,14 @@
 

Detailed Description

-

Type of compute device.

+

Type of compute device.

diff --git a/Docs/_device_type_8h_source.html b/Docs/_device_type_8h_source.html index d06b9bd5..e5978c06 100644 --- a/Docs/_device_type_8h_source.html +++ b/Docs/_device_type_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Info/DeviceType.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,27 +91,26 @@
-
-
DeviceType.h
+
DeviceType.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class DeviceType
-
13 {
-
16  CPU,
-
17 
-
20  GPU,
-
21 
-
24  Custom
-
25 };
-
26 
-
27 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+
12enum class DeviceType
+
13{
+
16 CPU,
+
17
+
20 GPU,
+
21
+
24 Custom
+
25};
+
26
+
27} // namespace ktt
+
Definition: KttPlatform.h:41
DeviceType
Definition: DeviceType.h:13
@@ -123,7 +121,7 @@ diff --git a/Docs/_dimension_vector_8h.html b/Docs/_dimension_vector_8h.html index cce43ee6..5192d911 100644 --- a/Docs/_dimension_vector_8h.html +++ b/Docs/_dimension_vector_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Configuration/DimensionVector.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
DimensionVector.h File Reference
+
DimensionVector.h File Reference
#include <cstddef>
@@ -108,25 +106,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::DimensionVector
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Functionality related to specifying thread sizes of a kernel.

+

Functionality related to specifying thread sizes of a kernel.

diff --git a/Docs/_dimension_vector_8h_source.html b/Docs/_dimension_vector_8h_source.html index 84c89bdf..d561393e 100644 --- a/Docs/_dimension_vector_8h_source.html +++ b/Docs/_dimension_vector_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Configuration/DimensionVector.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,84 +91,83 @@
-
-
DimensionVector.h
+
DimensionVector.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstddef>
-
7 #include <string>
-
8 #include <vector>
-
9 
-
10 #include <Kernel/ModifierAction.h>
- -
12 #include <KttPlatform.h>
-
13 
-
14 namespace ktt
-
15 {
-
16 
-
20 class KTT_API DimensionVector
-
21 {
-
22 public:
- -
27 
-
32  explicit DimensionVector(const size_t sizeX);
-
33 
-
39  explicit DimensionVector(const size_t sizeX, const size_t sizeY);
-
40 
-
47  explicit DimensionVector(const size_t sizeX, const size_t sizeY, const size_t sizeZ);
-
48 
-
54  explicit DimensionVector(const std::vector<size_t>& vector);
-
55 
-
60  void SetSizeX(const size_t sizeX);
-
61 
-
66  void SetSizeY(const size_t sizeY);
-
67 
-
72  void SetSizeZ(const size_t sizeZ);
-
73 
-
79  void SetSize(const ModifierDimension modifierDimension, const size_t size);
-
80 
-
85  void Multiply(const DimensionVector& factor);
-
86 
-
91  void Divide(const DimensionVector& divisor);
-
92 
-
97  void RoundUp(const DimensionVector& multiple);
-
98 
-
105  void ModifyByValue(const size_t value, const ModifierAction modifierAction, const ModifierDimension modifierDimension);
-
106 
-
111  size_t GetSizeX() const;
-
112 
-
117  size_t GetSizeY() const;
-
118 
-
123  size_t GetSizeZ() const;
-
124 
-
130  size_t GetSize(const ModifierDimension modifierDimension) const;
-
131 
-
136  size_t GetTotalSize() const;
-
137 
-
142  std::vector<size_t> GetVector() const;
-
143 
-
148  std::string GetString() const;
-
149 
-
154  bool operator==(const DimensionVector& other) const;
-
155 
-
160  bool operator!=(const DimensionVector& other) const;
-
161 
-
162 private:
-
163  size_t m_SizeX;
-
164  size_t m_SizeY;
-
165  size_t m_SizeZ;
-
166 
-
167  void AddValue(const size_t value, const ModifierDimension modifierDimension);
-
168  void SubtractValue(const size_t value, const ModifierDimension modifierDimension);
-
169  void MultiplyByValue(const size_t value, const ModifierDimension modifierDimension);
-
170  void DivideByValue(const size_t value, const ModifierDimension modifierDimension);
-
171  void DivideCeilByValue(const size_t value, const ModifierDimension modifierDimension);
-
172 };
-
173 
-
174 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstddef>
+
7#include <string>
+
8#include <vector>
+
9
+ + +
12#include <KttPlatform.h>
+
13
+
14namespace ktt
+
15{
+
16
+
20class KTT_API DimensionVector
+
21{
+
22public:
+ +
27
+
32 explicit DimensionVector(const size_t sizeX);
+
33
+
39 explicit DimensionVector(const size_t sizeX, const size_t sizeY);
+
40
+
47 explicit DimensionVector(const size_t sizeX, const size_t sizeY, const size_t sizeZ);
+
48
+
54 explicit DimensionVector(const std::vector<size_t>& vector);
+
55
+
60 void SetSizeX(const size_t sizeX);
+
61
+
66 void SetSizeY(const size_t sizeY);
+
67
+
72 void SetSizeZ(const size_t sizeZ);
+
73
+
79 void SetSize(const ModifierDimension modifierDimension, const size_t size);
+
80
+
85 void Multiply(const DimensionVector& factor);
+
86
+
91 void Divide(const DimensionVector& divisor);
+
92
+
97 void RoundUp(const DimensionVector& multiple);
+
98
+
105 void ModifyByValue(const size_t value, const ModifierAction modifierAction, const ModifierDimension modifierDimension);
+
106
+
111 size_t GetSizeX() const;
+
112
+
117 size_t GetSizeY() const;
+
118
+
123 size_t GetSizeZ() const;
+
124
+
130 size_t GetSize(const ModifierDimension modifierDimension) const;
+
131
+
136 size_t GetTotalSize() const;
+
137
+
142 std::vector<size_t> GetVector() const;
+
143
+
148 std::string GetString() const;
+
149
+
154 bool operator==(const DimensionVector& other) const;
+
155
+
160 bool operator!=(const DimensionVector& other) const;
+
161
+
162private:
+
163 size_t m_SizeX;
+
164 size_t m_SizeY;
+
165 size_t m_SizeZ;
+
166
+
167 void AddValue(const size_t value, const ModifierDimension modifierDimension);
+
168 void SubtractValue(const size_t value, const ModifierDimension modifierDimension);
+
169 void MultiplyByValue(const size_t value, const ModifierDimension modifierDimension);
+
170 void DivideByValue(const size_t value, const ModifierDimension modifierDimension);
+
171 void DivideCeilByValue(const size_t value, const ModifierDimension modifierDimension);
+
172};
+
173
+
174} // namespace ktt
@@ -196,7 +194,7 @@
DimensionVector(const size_t sizeX, const size_t sizeY)
std::string GetString() const
bool operator==(const DimensionVector &other) const
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
ModifierDimension
Definition: ModifierDimension.h:14
ModifierAction
Definition: ModifierAction.h:13
@@ -205,7 +203,7 @@ diff --git a/Docs/_exception_reason_8h.html b/Docs/_exception_reason_8h.html index 4fd92454..7edfefe3 100644 --- a/Docs/_exception_reason_8h.html +++ b/Docs/_exception_reason_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/ExceptionReason.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ExceptionReason.h File Reference
+
ExceptionReason.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ExceptionReason { ktt::General , ktt::CompilerError @@ -116,14 +114,14 @@
 

Detailed Description

-

Reason why KTT exception was thrown.

+

Reason why KTT exception was thrown.

diff --git a/Docs/_exception_reason_8h_source.html b/Docs/_exception_reason_8h_source.html index 82f56947..fd6600ed 100644 --- a/Docs/_exception_reason_8h_source.html +++ b/Docs/_exception_reason_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/ExceptionReason.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,27 +91,26 @@
-
-
ExceptionReason.h
+
ExceptionReason.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class ExceptionReason
-
13 {
-
16  General,
-
17 
- -
21 
- -
25 };
-
26 
-
27 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
13{
+
16 General,
+
17
+ +
21
+ +
25};
+
26
+
27} // namespace ktt
+
Definition: KttPlatform.h:41
ExceptionReason
Definition: ExceptionReason.h:13
@@ -123,7 +121,7 @@ diff --git a/Docs/_global_size_type_8h.html b/Docs/_global_size_type_8h.html index 0a36a054..e7d5b1b9 100644 --- a/Docs/_global_size_type_8h.html +++ b/Docs/_global_size_type_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/ComputeEngine/GlobalSizeType.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
GlobalSizeType.h File Reference
+
GlobalSizeType.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::GlobalSizeType { ktt::OpenCL , ktt::CUDA @@ -116,14 +114,14 @@
 

Detailed Description

-

Format of global thread size.

+

Format of global thread size.

diff --git a/Docs/_global_size_type_8h_source.html b/Docs/_global_size_type_8h_source.html index 23a5f116..921c709d 100644 --- a/Docs/_global_size_type_8h_source.html +++ b/Docs/_global_size_type_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/ComputeEngine/GlobalSizeType.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,38 +91,37 @@
-
-
GlobalSizeType.h
+
GlobalSizeType.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class GlobalSizeType
-
13 {
-
16  OpenCL,
-
17 
-
20  CUDA,
-
21 
-
24  Vulkan
-
25 };
-
26 
-
27 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
13{
+
16 OpenCL,
+
17
+
20 CUDA,
+
21
+
24 Vulkan
+
25};
+
26
+
27} // namespace ktt
+
Definition: KttPlatform.h:41
GlobalSizeType
Definition: GlobalSizeType.h:13
- - - + + +
diff --git a/Docs/_kernel_compilation_data_8h.html b/Docs/_kernel_compilation_data_8h.html index f6e25f66..f29ebfad 100644 --- a/Docs/_kernel_compilation_data_8h.html +++ b/Docs/_kernel_compilation_data_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/KernelCompilationData.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
KernelCompilationData.h File Reference
+
KernelCompilationData.h File Reference
#include <cstdint>
@@ -104,25 +102,25 @@

Go to the source code of this file.

-

+

Classes

struct  ktt::KernelCompilationData
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Compilation information about specific kernel configuration.

+

Compilation information about specific kernel configuration.

diff --git a/Docs/_kernel_compilation_data_8h_source.html b/Docs/_kernel_compilation_data_8h_source.html index 36f76dc2..7f77be5c 100644 --- a/Docs/_kernel_compilation_data_8h_source.html +++ b/Docs/_kernel_compilation_data_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/KernelCompilationData.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,39 +91,38 @@
-
-
KernelCompilationData.h
+
KernelCompilationData.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 
-
8 #include <KttPlatform.h>
-
9 
-
10 namespace ktt
-
11 {
-
12 
-
16 struct KTT_API KernelCompilationData
-
17 {
-
18 public:
- -
23 
- -
28 
- -
32 
- -
37 
- -
42 
-
46  uint64_t m_RegistersCount;
-
47 };
-
48 
-
49 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7
+
8#include <KttPlatform.h>
+
9
+
10namespace ktt
+
11{
+
12
+ +
17{
+
18public:
+ +
23
+ +
28
+ +
32
+ +
37
+ +
42
+ +
47};
+
48
+
49} // namespace ktt
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
Definition: KernelCompilationData.h:17
uint64_t m_ConstantMemorySize
Definition: KernelCompilationData.h:41
@@ -138,7 +136,7 @@ diff --git a/Docs/_kernel_configuration_8h.html b/Docs/_kernel_configuration_8h.html index 8ebde4e0..8bb1ab5e 100644 --- a/Docs/_kernel_configuration_8h.html +++ b/Docs/_kernel_configuration_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Configuration/KernelConfiguration.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
KernelConfiguration.h File Reference
+
KernelConfiguration.h File Reference
#include <string>
@@ -106,25 +104,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::KernelConfiguration
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Definition of kernel tuning configuration.

+

Definition of kernel tuning configuration.

diff --git a/Docs/_kernel_configuration_8h_source.html b/Docs/_kernel_configuration_8h_source.html index d1801181..404d705e 100644 --- a/Docs/_kernel_configuration_8h_source.html +++ b/Docs/_kernel_configuration_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Configuration/KernelConfiguration.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,50 +91,49 @@
-
-
KernelConfiguration.h
+
KernelConfiguration.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <string>
-
7 #include <vector>
-
8 
- -
10 #include <KttPlatform.h>
-
11 
-
12 namespace ktt
-
13 {
-
14 
-
18 class KTT_API KernelConfiguration
-
19 {
-
20 public:
- -
25 
-
30  explicit KernelConfiguration(const std::vector<ParameterPair>& pairs);
-
31 
-
36  const std::vector<ParameterPair>& GetPairs() const;
-
37 
-
42  bool IsValid() const;
-
43 
-
48  std::string GeneratePrefix() const;
-
49 
-
54  std::string GetString() const;
-
55 
-
61  void Merge(const KernelConfiguration& other);
-
62 
-
69  std::vector<KernelConfiguration> GenerateNeighbours(const std::string& parameter, const std::vector<ParameterPair>& pairs) const;
-
70 
-
76  bool operator==(const KernelConfiguration& other) const;
-
77 
-
84  bool operator!=(const KernelConfiguration& other) const;
-
85 
-
86 private:
-
87  std::vector<ParameterPair> m_Pairs;
-
88 };
-
89 
-
90 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <string>
+
7#include <vector>
+
8
+ +
10#include <KttPlatform.h>
+
11
+
12namespace ktt
+
13{
+
14
+ +
19{
+
20public:
+ +
25
+
30 explicit KernelConfiguration(const std::vector<ParameterPair>& pairs);
+
31
+
36 const std::vector<ParameterPair>& GetPairs() const;
+
37
+
42 bool IsValid() const;
+
43
+
48 std::string GeneratePrefix() const;
+
49
+
54 std::string GetString() const;
+
55
+
61 void Merge(const KernelConfiguration& other);
+
62
+
69 std::vector<KernelConfiguration> GenerateNeighbours(const std::string& parameter, const std::vector<ParameterPair>& pairs) const;
+
70
+
76 bool operator==(const KernelConfiguration& other) const;
+
77
+
84 bool operator!=(const KernelConfiguration& other) const;
+
85
+
86private:
+
87 std::vector<ParameterPair> m_Pairs;
+
88};
+
89
+
90} // namespace ktt
Definition: KernelConfiguration.h:19
@@ -149,14 +147,14 @@
bool operator!=(const KernelConfiguration &other) const
KernelConfiguration(const std::vector< ParameterPair > &pairs)
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_kernel_profiling_counter_8h.html b/Docs/_kernel_profiling_counter_8h.html index bb21be22..76874e31 100644 --- a/Docs/_kernel_profiling_counter_8h.html +++ b/Docs/_kernel_profiling_counter_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/KernelProfilingCounter.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
KernelProfilingCounter.h File Reference
+
KernelProfilingCounter.h File Reference
#include <cstdint>
@@ -107,25 +105,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::KernelProfilingCounter
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Information about a single profiling counter.

+

Information about a single profiling counter.

diff --git a/Docs/_kernel_profiling_counter_8h_source.html b/Docs/_kernel_profiling_counter_8h_source.html index b81670d1..268c4c26 100644 --- a/Docs/_kernel_profiling_counter_8h_source.html +++ b/Docs/_kernel_profiling_counter_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/KernelProfilingCounter.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,57 +91,56 @@
-
-
KernelProfilingCounter.h
+
KernelProfilingCounter.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 #include <string>
-
8 #include <variant>
-
9 
- -
11 #include <KttPlatform.h>
-
12 
-
13 namespace ktt
-
14 {
-
15 
- -
20 {
-
21 public:
- -
26 
-
33  explicit KernelProfilingCounter(const std::string& name, const ProfilingCounterType type, const int64_t value);
-
34 
-
41  explicit KernelProfilingCounter(const std::string& name, const ProfilingCounterType type, const uint64_t value);
-
42 
-
49  explicit KernelProfilingCounter(const std::string& name, const ProfilingCounterType type, const double value);
-
50 
-
55  const std::string& GetName() const;
-
56 
- -
63 
-
68  int64_t GetValueInt() const;
-
69 
-
75  uint64_t GetValueUint() const;
-
76 
-
81  double GetValueDouble() const;
-
82 
-
88  bool operator==(const KernelProfilingCounter& other) const;
-
89 
-
95  bool operator!=(const KernelProfilingCounter& other) const;
-
96 
-
102  bool operator<(const KernelProfilingCounter& other) const;
-
103 
-
104 private:
-
105  std::string m_Name;
-
106  ProfilingCounterType m_Type;
-
107  std::variant<int64_t, uint64_t, double> m_Value;
-
108 };
-
109 
-
110 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7#include <string>
+
8#include <variant>
+
9
+ +
11#include <KttPlatform.h>
+
12
+
13namespace ktt
+
14{
+
15
+ +
20{
+
21public:
+ +
26
+
33 explicit KernelProfilingCounter(const std::string& name, const ProfilingCounterType type, const int64_t value);
+
34
+
41 explicit KernelProfilingCounter(const std::string& name, const ProfilingCounterType type, const uint64_t value);
+
42
+
49 explicit KernelProfilingCounter(const std::string& name, const ProfilingCounterType type, const double value);
+
50
+
55 const std::string& GetName() const;
+
56
+ +
63
+
68 int64_t GetValueInt() const;
+
69
+
75 uint64_t GetValueUint() const;
+
76
+
81 double GetValueDouble() const;
+
82
+
88 bool operator==(const KernelProfilingCounter& other) const;
+
89
+
95 bool operator!=(const KernelProfilingCounter& other) const;
+
96
+
102 bool operator<(const KernelProfilingCounter& other) const;
+
103
+
104private:
+
105 std::string m_Name;
+ +
107 std::variant<int64_t, uint64_t, double> m_Value;
+
108};
+
109
+
110} // namespace ktt
Definition: KernelProfilingCounter.h:20
@@ -158,7 +156,7 @@
KernelProfilingCounter(const std::string &name, const ProfilingCounterType type, const double value)
KernelProfilingCounter(const std::string &name, const ProfilingCounterType type, const int64_t value)
bool operator==(const KernelProfilingCounter &other) const
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
ProfilingCounterType
Definition: ProfilingCounterType.h:13
@@ -166,7 +164,7 @@ diff --git a/Docs/_kernel_profiling_data_8h.html b/Docs/_kernel_profiling_data_8h.html index 35cc5fbc..50a78ac0 100644 --- a/Docs/_kernel_profiling_data_8h.html +++ b/Docs/_kernel_profiling_data_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/KernelProfilingData.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
KernelProfilingData.h File Reference
+
KernelProfilingData.h File Reference
#include <vector>
@@ -105,25 +103,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::KernelProfilingData
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Profiling information about a kernel run under specific configuration.

+

Profiling information about a kernel run under specific configuration.

diff --git a/Docs/_kernel_profiling_data_8h_source.html b/Docs/_kernel_profiling_data_8h_source.html index 9a4bbe3a..2c61e70c 100644 --- a/Docs/_kernel_profiling_data_8h_source.html +++ b/Docs/_kernel_profiling_data_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/KernelProfilingData.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,54 +91,53 @@
-
-
KernelProfilingData.h
+
KernelProfilingData.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <vector>
-
7 
- -
9 #include <KttPlatform.h>
-
10 
-
11 namespace ktt
-
12 {
-
13 
-
17 class KTT_API KernelProfilingData
-
18 {
-
19 public:
-
23  KernelProfilingData() = default;
-
24 
-
30  explicit KernelProfilingData(const uint64_t remainingRuns);
-
31 
-
36  explicit KernelProfilingData(const std::vector<KernelProfilingCounter>& counters);
-
37 
-
42  bool IsValid() const;
-
43 
-
48  bool HasCounter(const std::string& name) const;
-
49 
-
55  const KernelProfilingCounter& GetCounter(const std::string& name) const;
-
56 
-
61  const std::vector<KernelProfilingCounter>& GetCounters() const;
-
62 
-
67  void SetCounters(const std::vector<KernelProfilingCounter>& counters);
-
68 
-
73  void AddCounter(const KernelProfilingCounter& counter);
-
74 
- -
80 
-
86  uint64_t GetRemainingProfilingRuns() const;
-
87 
- -
93 
-
94 private:
-
95  std::vector<KernelProfilingCounter> m_Counters;
-
96  uint64_t m_RemainingRuns;
-
97 };
-
98 
-
99 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <vector>
+
7
+ +
9#include <KttPlatform.h>
+
10
+
11namespace ktt
+
12{
+
13
+ +
18{
+
19public:
+ +
24
+
30 explicit KernelProfilingData(const uint64_t remainingRuns);
+
31
+
36 explicit KernelProfilingData(const std::vector<KernelProfilingCounter>& counters);
+
37
+
42 bool IsValid() const;
+
43
+
48 bool HasCounter(const std::string& name) const;
+
49
+
55 const KernelProfilingCounter& GetCounter(const std::string& name) const;
+
56
+
61 const std::vector<KernelProfilingCounter>& GetCounters() const;
+
62
+
67 void SetCounters(const std::vector<KernelProfilingCounter>& counters);
+
68
+
73 void AddCounter(const KernelProfilingCounter& counter);
+
74
+ +
80
+
86 uint64_t GetRemainingProfilingRuns() const;
+
87
+ +
93
+
94private:
+
95 std::vector<KernelProfilingCounter> m_Counters;
+
96 uint64_t m_RemainingRuns;
+
97};
+
98
+
99} // namespace ktt
Definition: KernelProfilingCounter.h:20
@@ -156,14 +154,14 @@
void AddCounter(const KernelProfilingCounter &counter)
void SetCounters(const std::vector< KernelProfilingCounter > &counters)
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_kernel_result_8h_source.html b/Docs/_kernel_result_8h_source.html index d17d03e3..95a3902c 100644 --- a/Docs/_kernel_result_8h_source.html +++ b/Docs/_kernel_result_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/KernelResult.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,74 +91,73 @@
-
-
KernelResult.h
+
KernelResult.h
-
1 
-
4 #pragma once
-
5 
-
6 #include <vector>
-
7 
- - - -
11 #include <KttPlatform.h>
-
12 #include <KttTypes.h>
-
13 
-
14 namespace ktt
-
15 {
-
16 
-
20 class KTT_API KernelResult
-
21 {
-
22 public:
- -
27 
-
33  explicit KernelResult(const std::string& kernelName, const KernelConfiguration& configuration);
-
34 
-
42  explicit KernelResult(const std::string& kernelName, const KernelConfiguration& configuration,
-
43  const std::vector<ComputationResult>& results);
-
44 
-
49  void SetStatus(const ResultStatus status);
-
50 
-
55  void SetExtraDuration(const Nanoseconds duration);
-
56 
-
61  void SetExtraOverhead(const Nanoseconds overhead);
-
62 
-
67  const std::string& GetKernelName() const;
-
68 
-
73  const std::vector<ComputationResult>& GetResults() const;
-
74 
- -
80 
- -
86 
- -
92 
- -
98 
- -
105 
- -
112 
- -
118 
- -
124 
-
129  bool IsValid() const;
-
130 
- -
137 
-
138 private:
-
139  KernelConfiguration m_Configuration;
-
140  std::vector<ComputationResult> m_Results;
-
141  std::string m_KernelName;
-
142  Nanoseconds m_ExtraDuration;
-
143  Nanoseconds m_ExtraOverhead;
-
144  ResultStatus m_Status;
-
145 };
-
146 
-
147 } // namespace ktt
+
1
+
4#pragma once
+
5
+
6#include <vector>
+
7
+ + + +
11#include <KttPlatform.h>
+
12#include <KttTypes.h>
+
13
+
14namespace ktt
+
15{
+
16
+
20class KTT_API KernelResult
+
21{
+
22public:
+ +
27
+
33 explicit KernelResult(const std::string& kernelName, const KernelConfiguration& configuration);
+
34
+
42 explicit KernelResult(const std::string& kernelName, const KernelConfiguration& configuration,
+
43 const std::vector<ComputationResult>& results);
+
44
+
49 void SetStatus(const ResultStatus status);
+
50
+
55 void SetExtraDuration(const Nanoseconds duration);
+
56
+
61 void SetExtraOverhead(const Nanoseconds overhead);
+
62
+
67 const std::string& GetKernelName() const;
+
68
+
73 const std::vector<ComputationResult>& GetResults() const;
+
74
+ +
80
+ +
86
+ +
92
+ +
98
+ +
105
+ +
112
+ +
118
+ +
124
+
129 bool IsValid() const;
+
130
+ +
137
+
138private:
+
139 KernelConfiguration m_Configuration;
+
140 std::vector<ComputationResult> m_Results;
+
141 std::string m_KernelName;
+
142 Nanoseconds m_ExtraDuration;
+
143 Nanoseconds m_ExtraOverhead;
+
144 ResultStatus m_Status;
+
145};
+
146
+
147} // namespace ktt
@@ -185,7 +183,7 @@
KernelResult(const std::string &kernelName, const KernelConfiguration &configuration)
KernelResult(const std::string &kernelName, const KernelConfiguration &configuration, const std::vector< ComputationResult > &results)
bool IsValid() const
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
uint64_t Nanoseconds
Definition: KttTypes.h:63
ResultStatus
Definition: ResultStatus.h:13
@@ -194,7 +192,7 @@ diff --git a/Docs/_kernel_run_mode_8h.html b/Docs/_kernel_run_mode_8h.html new file mode 100644 index 00000000..b1be09a3 --- /dev/null +++ b/Docs/_kernel_run_mode_8h.html @@ -0,0 +1,129 @@ + + + + + + + +Kernel Tuning Toolkit: Source/KernelRunner/KernelRunMode.h File Reference + + + + + + + + + + + + + + +
+
+ + + + + + + +
+
Kernel Tuning Toolkit 2.1 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
KernelRunMode.h File Reference
+
+
+ +

Go to the source code of this file.

+ + + + +

+Namespaces

namespace  ktt
 
+ + + +

+Enumerations

enum class  ktt::KernelRunMode { ktt::Running +, ktt::OfflineTuning +, ktt::OnlineTuning +, ktt::ResultValidation + }
 
+

Detailed Description

+

Definition of different kernel running scenarios.

+
+
+ + + + diff --git a/Docs/_kernel_run_mode_8h.js b/Docs/_kernel_run_mode_8h.js new file mode 100644 index 00000000..31f10e0b --- /dev/null +++ b/Docs/_kernel_run_mode_8h.js @@ -0,0 +1,9 @@ +var _kernel_run_mode_8h = +[ + [ "KernelRunMode", "_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35ee", [ + [ "Running", "_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35eea5bda814c4aedb126839228f1a3d92f09", null ], + [ "OfflineTuning", "_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35eeadd15d361ea740ab5e11145f0fa2f9374", null ], + [ "OnlineTuning", "_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35eeabc40db0f46cab2811dede4f5341446fb", null ], + [ "ResultValidation", "_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35eeab2f31c1511a9baf03796731b9fbc2e1b", null ] + ] ] +]; \ No newline at end of file diff --git a/Docs/_kernel_run_mode_8h_source.html b/Docs/_kernel_run_mode_8h_source.html new file mode 100644 index 00000000..4f30d7e1 --- /dev/null +++ b/Docs/_kernel_run_mode_8h_source.html @@ -0,0 +1,131 @@ + + + + + + + +Kernel Tuning Toolkit: Source/KernelRunner/KernelRunMode.h Source File + + + + + + + + + + + + + + +
+
+ + + + + + + +
+
Kernel Tuning Toolkit 2.1 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
KernelRunMode.h
+
+
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+
12enum class KernelRunMode
+
13{
+
16 Running,
+
17
+ +
21
+ +
25
+ +
29};
+
30
+
31} // namespace ktt
+
Definition: KttPlatform.h:41
+
KernelRunMode
Definition: KernelRunMode.h:13
+ + + + +
+
+ + + + diff --git a/Docs/_ktt_8h.html b/Docs/_ktt_8h.html index 9d11551f..d0092c13 100644 --- a/Docs/_ktt_8h.html +++ b/Docs/_ktt_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Ktt.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,8 +91,7 @@
-
-
Ktt.h File Reference
+
Ktt.h File Reference
#include <Tuner.h>
@@ -108,14 +106,14 @@

Go to the source code of this file.

Detailed Description

-

Public KTT API header.

+

Public KTT API header.

diff --git a/Docs/_ktt_8h_source.html b/Docs/_ktt_8h_source.html index 8be759c7..223a16a6 100644 --- a/Docs/_ktt_8h_source.html +++ b/Docs/_ktt_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Ktt.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
-
-
Ktt.h
+
Ktt.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <Tuner.h>
-
7 
- - - -
11 
- - - - -
16 
-
17 #include <Api/ComputeInterface.h>
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <Tuner.h>
+
7
+ + + +
11
+ + + + +
16
+ @@ -126,7 +124,7 @@ diff --git a/Docs/_ktt_exception_8h.html b/Docs/_ktt_exception_8h.html index cde7af0a..1e0e5d2a 100644 --- a/Docs/_ktt_exception_8h.html +++ b/Docs/_ktt_exception_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/KttException.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
KttException.h File Reference
+
KttException.h File Reference
#include <exception>
@@ -106,25 +104,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::KttException
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Error handling for KTT framework.

+

Error handling for KTT framework.

diff --git a/Docs/_ktt_exception_8h_source.html b/Docs/_ktt_exception_8h_source.html index a4b79133..5daf9761 100644 --- a/Docs/_ktt_exception_8h_source.html +++ b/Docs/_ktt_exception_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/KttException.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,44 +91,43 @@
-
-
KttException.h
+
KttException.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <exception>
-
7 #include <string>
-
8 
-
9 #include <Api/ExceptionReason.h>
-
10 #include <KttPlatform.h>
-
11 
-
12 namespace ktt
-
13 {
-
14 
-
18 class KttException : public std::exception
-
19 {
-
20 public:
-
26  KTT_API KttException(const std::string& message, const ExceptionReason reason = ExceptionReason::General);
-
27 
-
32  KTT_API const char* what() const noexcept override;
-
33 
-
38  KTT_API ExceptionReason GetReason() const;
-
39 
-
40 private:
-
41  std::string m_Message;
-
42  ExceptionReason m_Reason;
-
43 };
-
44 
-
45 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <exception>
+
7#include <string>
+
8
+ +
10#include <KttPlatform.h>
+
11
+
12namespace ktt
+
13{
+
14
+
18class KttException : public std::exception
+
19{
+
20public:
+
26 KTT_API KttException(const std::string& message, const ExceptionReason reason = ExceptionReason::General);
+
27
+
32 KTT_API const char* what() const noexcept override;
+
33
+
38 KTT_API ExceptionReason GetReason() const;
+
39
+
40private:
+
41 std::string m_Message;
+
42 ExceptionReason m_Reason;
+
43};
+
44
+
45} // namespace ktt
Definition: KttException.h:19
KTT_API ExceptionReason GetReason() const
KTT_API KttException(const std::string &message, const ExceptionReason reason=ExceptionReason::General)
KTT_API const char * what() const noexcept override
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
ExceptionReason
Definition: ExceptionReason.h:13
@@ -138,7 +136,7 @@ diff --git a/Docs/_ktt_platform_8h.html b/Docs/_ktt_platform_8h.html index 3b0f5c81..f9dae0fb 100644 --- a/Docs/_ktt_platform_8h.html +++ b/Docs/_ktt_platform_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KttPlatform.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -96,8 +95,7 @@ Namespaces | Macros | Functions -
-
KttPlatform.h File Reference
+
KttPlatform.h File Reference
#include <cstdint>
@@ -105,24 +103,30 @@

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
- - + + + + - + - +

+

Macros

+
#define KTT_API
 
+#define KTT_VIRTUAL_API
 
+#define KTT_VISIBILITY_HIDDEN   __attribute__((visibility("hidden")))
 
#define KTT_VERSION_MAJOR   2
 
#define KTT_VERSION_MINOR   0
#define KTT_VERSION_MINOR   1
 
#define KTT_VERSION_PATCH   1
#define KTT_VERSION_PATCH   0
 
- @@ -130,9 +134,9 @@

+

Functions

KTT_API uint32_t ktt::GetKttVersion ()
 
 

Detailed Description

-

Preprocessor definitions which ensure compatibility for multiple compilers and KTT version definitions.

+

Preprocessor definitions which ensure compatibility for multiple compilers and KTT version definitions.

Macro Definition Documentation

- +

◆ KTT_VERSION_MAJOR

- +

◆ KTT_VERSION_MINOR

- +
#define KTT_VERSION_MINOR   0#define KTT_VERSION_MINOR   1
-

Minor version of KTT framework. Second number in KTT version description.

+

Minor version of KTT framework. Second number in KTT version description.

- +

◆ KTT_VERSION_PATCH

- +
#define KTT_VERSION_PATCH   1#define KTT_VERSION_PATCH   0
-

Patch version of KTT framework. Third number in KTT version description.

+

Patch version of KTT framework. Third number in KTT version description.

@@ -183,7 +187,7 @@

diff --git a/Docs/_ktt_platform_8h.js b/Docs/_ktt_platform_8h.js index 50572550..b0533b95 100644 --- a/Docs/_ktt_platform_8h.js +++ b/Docs/_ktt_platform_8h.js @@ -1,6 +1,5 @@ var _ktt_platform_8h = [ - [ "KTT_API", "_ktt_platform_8h.html#ab86eda35e4f644e86fbaf235b9a5490b", null ], [ "KTT_VERSION_MAJOR", "_ktt_platform_8h.html#ad2cfb9fcbae19b1303040b0e2e8584b3", null ], [ "KTT_VERSION_MINOR", "_ktt_platform_8h.html#ae5ff38ea2d15f4dbba741735cc2c5959", null ], [ "KTT_VERSION_PATCH", "_ktt_platform_8h.html#ac72a954d39b9511660df6fd96f698c2e", null ], diff --git a/Docs/_ktt_platform_8h_source.html b/Docs/_ktt_platform_8h_source.html index e45a854d..e6e023dd 100644 --- a/Docs/_ktt_platform_8h_source.html +++ b/Docs/_ktt_platform_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KttPlatform.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,45 +91,49 @@
-
-
KttPlatform.h
+
KttPlatform.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 #include <string>
-
8 
-
9 #ifndef KTT_API
-
10 #if defined(_MSC_VER)
-
11  #pragma warning(disable : 4251) // Irrelevant MSVC warning as long as exported classes have no public attributes.
-
12 
-
13  #if defined(KTT_LIBRARY)
-
14  #define KTT_API __declspec(dllexport)
-
15  #else
-
16  #define KTT_API __declspec(dllimport)
-
17  #endif // KTT_LIBRARY
-
18 #else
-
19  #define KTT_API
-
20 #endif // _MSC_VER
-
21 #endif // KTT_API
-
22 
-
25 #define KTT_VERSION_MAJOR 2
-
26 
-
29 #define KTT_VERSION_MINOR 0
-
30 
-
33 #define KTT_VERSION_PATCH 1
-
34 
-
35 namespace ktt
-
36 {
-
37 
-
42 KTT_API uint32_t GetKttVersion();
-
43 
-
48 KTT_API std::string GetKttVersionString();
-
49 
-
50 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7#include <string>
+
8
+
9#ifndef KTT_API
+
10#if defined(_MSC_VER)
+
11 #pragma warning(disable : 4251) // Irrelevant MSVC warning as long as exported classes have no public attributes.
+
12
+
13 #if defined(KTT_LIBRARY)
+
14 #define KTT_API __declspec(dllexport)
+
15 #else
+
16 #define KTT_API __declspec(dllimport)
+
17 #endif // KTT_LIBRARY
+
18
+
19 #define KTT_VIRTUAL_API virtual
+
20 #define KTT_VISIBILITY_HIDDEN
+
21#else
+
22 #define KTT_API
+
23 #define KTT_VIRTUAL_API
+
24 #define KTT_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
+
25#endif // _MSC_VER
+
26#endif // KTT_API
+
27
+
30#define KTT_VERSION_MAJOR 2
+
31
+
34#define KTT_VERSION_MINOR 1
+
35
+
38#define KTT_VERSION_PATCH 0
+
39
+
40namespace ktt
+
41{
+
42
+
47KTT_API uint32_t GetKttVersion();
+
48
+
53KTT_API std::string GetKttVersionString();
+
54
+
55} // namespace ktt
+
Definition: KttPlatform.h:41
KTT_API uint32_t GetKttVersion()
KTT_API std::string GetKttVersionString()
@@ -139,7 +142,7 @@ diff --git a/Docs/_ktt_types_8h.html b/Docs/_ktt_types_8h.html index db7a7007..d62ff8df 100644 --- a/Docs/_ktt_types_8h.html +++ b/Docs/_ktt_types_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KttTypes.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -96,8 +95,7 @@ Namespaces | Typedefs | Variables -
-
KttTypes.h File Reference
+
KttTypes.h File Reference
#include <cstdint>
@@ -111,12 +109,12 @@

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
- @@ -130,7 +128,7 @@ - + @@ -161,8 +159,10 @@

+

Typedefs

using ktt::PlatformIndex = uint32_t
 
 
using ktt::ArgumentId = uint64_t
 
using ktt::ParameterInput = std::vector< std::pair< std::string, std::variant< uint64_t, double > >>
using ktt::ParameterInput = std::vector< std::pair< std::string, std::variant< uint64_t, double > > >
 
using ktt::UserData = std::map< std::string, std::string >
 
using ktt::ComputeBuffer = void *
 
- + + @@ -173,14 +173,14 @@

+

Variables

const QueueId ktt::InvalidQueueId = std::numeric_limits<QueueId>::max()
 
const KernelDefinitionId ktt::InvalidKernelDefinitionId = std::numeric_limits<KernelDefinitionId>::max()
 
const KernelId ktt::InvalidKernelId = std::numeric_limits<KernelId>::max()
 

Detailed Description

-

Definitions of KTT type aliases and constants.

+

Definitions of KTT type aliases and constants.

diff --git a/Docs/_ktt_types_8h.js b/Docs/_ktt_types_8h.js index fa4e1aaa..0042453b 100644 --- a/Docs/_ktt_types_8h.js +++ b/Docs/_ktt_types_8h.js @@ -24,5 +24,6 @@ var _ktt_types_8h = [ "InvalidArgumentId", "_ktt_types_8h.html#a06314e7380eb0baeb140510fcae36003", null ], [ "InvalidDuration", "_ktt_types_8h.html#a903bdfdf84555ceb9eba5dc578c446bb", null ], [ "InvalidKernelDefinitionId", "_ktt_types_8h.html#a275bc1cfb6ac089034630c96f78285d4", null ], - [ "InvalidKernelId", "_ktt_types_8h.html#af007a5e422c999a5a8f4fd1d875cc7bf", null ] + [ "InvalidKernelId", "_ktt_types_8h.html#af007a5e422c999a5a8f4fd1d875cc7bf", null ], + [ "InvalidQueueId", "_ktt_types_8h.html#ad611ee0e290241acfe5dcec46fce712c", null ] ]; \ No newline at end of file diff --git a/Docs/_ktt_types_8h_source.html b/Docs/_ktt_types_8h_source.html index 00d9293b..68e1e3b6 100644 --- a/Docs/_ktt_types_8h_source.html +++ b/Docs/_ktt_types_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KttTypes.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,91 +91,92 @@
-
-
KttTypes.h
+
KttTypes.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 #include <functional>
-
8 #include <limits>
-
9 #include <map>
-
10 #include <string>
-
11 #include <utility>
-
12 #include <variant>
-
13 #include <vector>
-
14 
-
15 namespace ktt
-
16 {
-
17 
-
18 class ComputeInterface;
-
19 
-
23 using PlatformIndex = uint32_t;
-
24 
-
28 using DeviceIndex = uint32_t;
-
29 
-
33 using QueueId = uint32_t;
-
34 
-
38 using KernelDefinitionId = uint64_t;
-
39 
-
43 using KernelId = uint64_t;
-
44 
-
48 using ArgumentId = uint64_t;
-
49 
-
53 using ParameterInput = std::vector<std::pair<std::string, std::variant<uint64_t, double>>>;
-
54 
-
58 using UserData = std::map<std::string, std::string>;
-
59 
-
63 using Nanoseconds = uint64_t;
-
64 
-
68 using KernelComputeId = std::string;
-
69 
-
73 using ComputeActionId = uint64_t;
-
74 
-
78 using TransferActionId = uint64_t;
-
79 
-
83 using ModifierFunction = std::function<uint64_t(const uint64_t /*defaultSize*/, const std::vector<uint64_t>& /*parameterValues*/)>;
-
84 
-
88 using ConstraintFunction = std::function<bool(const std::vector<uint64_t>& /*parameterValues*/)>;
-
89 
-
93 using KernelLauncher = std::function<void(ComputeInterface& /*interface*/)>;
-
94 
-
98 using ReferenceComputation = std::function<void(void* /*buffer*/)>;
-
99 
-
103 using ValueComparator = std::function<bool(const void* /*result*/, const void* /*reference*/)>;
-
104 
-
108 using UnifiedBufferMemory = void*;
-
109 
-
113 using ComputeContext = void*;
-
114 
-
118 using ComputeQueue = void*;
-
119 
-
123 using ComputeBuffer = void*;
-
124 
-
127 inline const KernelDefinitionId InvalidKernelDefinitionId = std::numeric_limits<KernelDefinitionId>::max();
-
128 
-
131 inline const KernelId InvalidKernelId = std::numeric_limits<KernelId>::max();
-
132 
-
135 inline const ArgumentId InvalidArgumentId = std::numeric_limits<ArgumentId>::max();
-
136 
-
139 inline const Nanoseconds InvalidDuration = std::numeric_limits<Nanoseconds>::max();
-
140 
-
141 } // namespace ktt
-
Definition: ComputeInterface.h:24
-
Definition: KttPlatform.h:36
-
const ArgumentId InvalidArgumentId
Definition: KttTypes.h:135
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7#include <functional>
+
8#include <limits>
+
9#include <map>
+
10#include <string>
+
11#include <utility>
+
12#include <variant>
+
13#include <vector>
+
14
+
15namespace ktt
+
16{
+
17
+
18class ComputeInterface;
+
19
+
23using PlatformIndex = uint32_t;
+
24
+
28using DeviceIndex = uint32_t;
+
29
+
33using QueueId = uint32_t;
+
34
+
38using KernelDefinitionId = uint64_t;
+
39
+
43using KernelId = uint64_t;
+
44
+
48using ArgumentId = uint64_t;
+
49
+
53using ParameterInput = std::vector<std::pair<std::string, std::variant<uint64_t, double>>>;
+
54
+
58using UserData = std::map<std::string, std::string>;
+
59
+
63using Nanoseconds = uint64_t;
+
64
+
68using KernelComputeId = std::string;
+
69
+
73using ComputeActionId = uint64_t;
+
74
+
78using TransferActionId = uint64_t;
+
79
+
83using ModifierFunction = std::function<uint64_t(const uint64_t /*defaultSize*/, const std::vector<uint64_t>& /*parameterValues*/)>;
+
84
+
88using ConstraintFunction = std::function<bool(const std::vector<uint64_t>& /*parameterValues*/)>;
+
89
+
93using KernelLauncher = std::function<void(ComputeInterface& /*interface*/)>;
+
94
+
98using ReferenceComputation = std::function<void(void* /*buffer*/)>;
+
99
+
103using ValueComparator = std::function<bool(const void* /*result*/, const void* /*reference*/)>;
+
104
+ +
109
+
113using ComputeContext = void*;
+
114
+
118using ComputeQueue = void*;
+
119
+
123using ComputeBuffer = void*;
+
124
+
127inline const QueueId InvalidQueueId = std::numeric_limits<QueueId>::max();
+
128
+
131inline const KernelDefinitionId InvalidKernelDefinitionId = std::numeric_limits<KernelDefinitionId>::max();
+
132
+
135inline const KernelId InvalidKernelId = std::numeric_limits<KernelId>::max();
+
136
+
139inline const ArgumentId InvalidArgumentId = std::numeric_limits<ArgumentId>::max();
+
140
+
143inline const Nanoseconds InvalidDuration = std::numeric_limits<Nanoseconds>::max();
+
144
+
145} // namespace ktt
+
Definition: ComputeInterface.h:25
+
Definition: KttPlatform.h:41
+
const ArgumentId InvalidArgumentId
Definition: KttTypes.h:139
uint32_t DeviceIndex
Definition: KttTypes.h:28
void * UnifiedBufferMemory
Definition: KttTypes.h:108
-
const KernelDefinitionId InvalidKernelDefinitionId
Definition: KttTypes.h:127
+
const KernelDefinitionId InvalidKernelDefinitionId
Definition: KttTypes.h:131
uint64_t ComputeActionId
Definition: KttTypes.h:73
uint32_t QueueId
Definition: KttTypes.h:33
void * ComputeContext
Definition: KttTypes.h:113
uint64_t KernelId
Definition: KttTypes.h:43
std::function< bool(const std::vector< uint64_t > &)> ConstraintFunction
Definition: KttTypes.h:88
uint64_t Nanoseconds
Definition: KttTypes.h:63
-
const Nanoseconds InvalidDuration
Definition: KttTypes.h:139
+
const Nanoseconds InvalidDuration
Definition: KttTypes.h:143
uint64_t TransferActionId
Definition: KttTypes.h:78
void * ComputeQueue
Definition: KttTypes.h:118
std::function< void(void *)> ReferenceComputation
Definition: KttTypes.h:98
@@ -184,10 +184,11 @@
std::function< bool(const void *, const void *)> ValueComparator
Definition: KttTypes.h:103
void * ComputeBuffer
Definition: KttTypes.h:123
uint32_t PlatformIndex
Definition: KttTypes.h:23
+
const QueueId InvalidQueueId
Definition: KttTypes.h:127
std::function< uint64_t(const uint64_t, const std::vector< uint64_t > &)> ModifierFunction
Definition: KttTypes.h:83
uint64_t ArgumentId
Definition: KttTypes.h:48
-
std::vector< std::pair< std::string, std::variant< uint64_t, double > >> ParameterInput
Definition: KttTypes.h:53
-
const KernelId InvalidKernelId
Definition: KttTypes.h:131
+
std::vector< std::pair< std::string, std::variant< uint64_t, double > > > ParameterInput
Definition: KttTypes.h:53
+
const KernelId InvalidKernelId
Definition: KttTypes.h:135
uint64_t KernelDefinitionId
Definition: KttTypes.h:38
std::map< std::string, std::string > UserData
Definition: KttTypes.h:58
std::function< void(ComputeInterface &)> KernelLauncher
Definition: KttTypes.h:93
@@ -197,7 +198,7 @@ diff --git a/Docs/_logging_level_8h.html b/Docs/_logging_level_8h.html index 2171f356..4ace52e8 100644 --- a/Docs/_logging_level_8h.html +++ b/Docs/_logging_level_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Utility/Logger/LoggingLevel.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
LoggingLevel.h File Reference
+
LoggingLevel.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::LoggingLevel {
  ktt::Off @@ -121,14 +119,14 @@
 

Detailed Description

-

Verbosity level of KTT logger.

+

Verbosity level of KTT logger.

diff --git a/Docs/_logging_level_8h_source.html b/Docs/_logging_level_8h_source.html index 3a8b9798..508786d8 100644 --- a/Docs/_logging_level_8h_source.html +++ b/Docs/_logging_level_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Utility/Logger/LoggingLevel.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,31 +91,30 @@
-
-
LoggingLevel.h
+
LoggingLevel.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class LoggingLevel
-
13 {
-
16  Off,
-
17 
-
20  Error,
-
21 
-
24  Warning,
-
25 
-
28  Info,
-
29 
-
32  Debug
-
33 };
-
34 
-
35 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+
12enum class LoggingLevel
+
13{
+
16 Off,
+
17
+
20 Error,
+
21
+
24 Warning,
+
25
+
28 Info,
+
29
+
32 Debug
+
33};
+
34
+
35} // namespace ktt
+
Definition: KttPlatform.h:41
LoggingLevel
Definition: LoggingLevel.h:13
@@ -129,7 +127,7 @@ diff --git a/Docs/_mcmc_searcher_8h.html b/Docs/_mcmc_searcher_8h.html index 226a8e71..16f15738 100644 --- a/Docs/_mcmc_searcher_8h.html +++ b/Docs/_mcmc_searcher_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher/McmcSearcher.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,38 +94,37 @@ -
-
McmcSearcher.h File Reference
+
McmcSearcher.h File Reference
#include <cstddef>
+#include <map>
#include <random>
#include <set>
-#include <vector>
#include <Api/Searcher/Searcher.h>
#include <KttPlatform.h>

Go to the source code of this file.

-

+

Classes

class  ktt::McmcSearcher
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Searcher which explores configurations using Markov chain Monte Carlo method.

+

Searcher which explores configurations using Markov chain Monte Carlo method.

diff --git a/Docs/_mcmc_searcher_8h_source.html b/Docs/_mcmc_searcher_8h_source.html index d34b4128..05654c14 100644 --- a/Docs/_mcmc_searcher_8h_source.html +++ b/Docs/_mcmc_searcher_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher/McmcSearcher.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,79 +91,77 @@
-
-
McmcSearcher.h
+
McmcSearcher.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstddef>
-
7 #include <random>
-
8 #include <set>
-
9 #include <vector>
-
10 
-
11 #include <Api/Searcher/Searcher.h>
-
12 #include <KttPlatform.h>
-
13 
-
14 namespace ktt
-
15 {
-
16 
-
20 class KTT_API McmcSearcher : public Searcher
-
21 {
-
22 public:
-
27  McmcSearcher(const std::vector<double>& start);
-
28 
-
29  void OnInitialize() override;
-
30  void OnReset() override;
-
31 
-
32  bool CalculateNextConfiguration(const KernelResult& previousResult) override;
- -
34 
-
35 private:
-
36  uint64_t m_Index;
-
37  size_t m_VisitedStatesCount;
-
38  size_t m_OriginState;
-
39  size_t m_CurrentState;
-
40  size_t m_Boot;
-
41  double m_BestTime;
-
42 
-
43  std::vector<double> m_Start;
-
44  std::vector<double> m_ExecutionTimes;
-
45  std::set<size_t> m_UnexploredIndices;
-
46 
-
47  std::default_random_engine m_Generator;
-
48  std::uniform_int_distribution<size_t> m_IntDistribution;
-
49  std::uniform_real_distribution<double> m_ProbabilityDistribution;
-
50 
-
51  inline static size_t m_MaximumDifferences = 2;
-
52  inline static size_t m_BootIterations = 10;
-
53  inline static double m_EscapeProbability = 0.02;
-
54 
-
55  std::vector<size_t> GetNeighbours(const size_t referenceId) const;
-
56  size_t SearchStateIndex(const std::vector<double>& state) const;
-
57 };
-
58 
-
59 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstddef>
+
7#include <map>
+
8#include <random>
+
9#include <set>
+
10
+ +
12#include <KttPlatform.h>
+
13
+
14namespace ktt
+
15{
+
16
+
20class KTT_API McmcSearcher : public Searcher
+
21{
+
22public:
+ +
28
+
33 [[deprecated("Use constructor which accepts kernel configuration.")]] McmcSearcher(const std::vector<double>& start);
+
34
+
35 void OnInitialize() override;
+
36 void OnReset() override;
+
37
+
38 bool CalculateNextConfiguration(const KernelResult& previousResult) override;
+ +
40
+
41private:
+
42 uint64_t m_Index;
+
43 size_t m_VisitedStatesCount;
+
44 size_t m_OriginState;
+
45 size_t m_CurrentState;
+
46 size_t m_Boot;
+
47 double m_BestTime;
+
48
+
49 KernelConfiguration m_Start;
+
50 std::map<size_t, double> m_ExecutionTimes;
+
51
+
52 std::default_random_engine m_Generator;
+
53 std::uniform_int_distribution<size_t> m_IntDistribution;
+
54 std::uniform_real_distribution<double> m_ProbabilityDistribution;
+
55
+
56 inline static size_t m_MaximumDifferences = 2;
+
57 inline static size_t m_BootIterations = 10;
+
58 inline static double m_EscapeProbability = 0.02;
+
59};
+
60
+
61} // namespace ktt
Definition: KernelConfiguration.h:19
Definition: KernelResult.h:21
Definition: McmcSearcher.h:21
+
McmcSearcher(const KernelConfiguration &start={})
KernelConfiguration GetCurrentConfiguration() const override
bool CalculateNextConfiguration(const KernelResult &previousResult) override
void OnReset() override
McmcSearcher(const std::vector< double > &start)
void OnInitialize() override
Definition: Searcher.h:23
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_modifier_action_8h.html b/Docs/_modifier_action_8h.html index 0393e35c..4256487b 100644 --- a/Docs/_modifier_action_8h.html +++ b/Docs/_modifier_action_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Kernel/ModifierAction.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ModifierAction.h File Reference
+
ModifierAction.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ModifierAction {
  ktt::Add @@ -121,14 +119,14 @@
 

Detailed Description

-

Modifier action for kernel parameters.

+

Modifier action for kernel parameters.

diff --git a/Docs/_modifier_action_8h_source.html b/Docs/_modifier_action_8h_source.html index 4dff1abb..c5d191c2 100644 --- a/Docs/_modifier_action_8h_source.html +++ b/Docs/_modifier_action_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Kernel/ModifierAction.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,31 +91,30 @@
-
-
ModifierAction.h
+
ModifierAction.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class ModifierAction
-
13 {
-
16  Add,
-
17 
-
20  Subtract,
-
21 
-
24  Multiply,
-
25 
-
28  Divide,
-
29 
- -
33 };
-
34 
-
35 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
13{
+
16 Add,
+
17
+ +
21
+ +
25
+
28 Divide,
+
29
+ +
33};
+
34
+
35} // namespace ktt
+
Definition: KttPlatform.h:41
ModifierAction
Definition: ModifierAction.h:13
@@ -129,7 +127,7 @@ diff --git a/Docs/_modifier_dimension_8h.html b/Docs/_modifier_dimension_8h.html index 6f023b89..4fe43e18 100644 --- a/Docs/_modifier_dimension_8h.html +++ b/Docs/_modifier_dimension_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Kernel/ModifierDimension.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ModifierDimension.h File Reference
+
ModifierDimension.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ModifierDimension { ktt::X , ktt::Y @@ -116,14 +114,14 @@
 

Detailed Description

-

Modifier dimension for kernel parameters.

+

Modifier dimension for kernel parameters.

diff --git a/Docs/_modifier_dimension_8h_source.html b/Docs/_modifier_dimension_8h_source.html index 297d0850..a463d79b 100644 --- a/Docs/_modifier_dimension_8h_source.html +++ b/Docs/_modifier_dimension_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Kernel/ModifierDimension.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,27 +91,26 @@
-
-
ModifierDimension.h
+
ModifierDimension.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
- -
14 {
-
17  X,
-
18 
-
21  Y,
-
22 
-
25  Z
-
26 };
-
27 
-
28 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
14{
+
17 X,
+
18
+
21 Y,
+
22
+
25 Z
+
26};
+
27
+
28} // namespace ktt
+
Definition: KttPlatform.h:41
ModifierDimension
Definition: ModifierDimension.h:14
@@ -123,7 +121,7 @@ diff --git a/Docs/_modifier_type_8h.html b/Docs/_modifier_type_8h.html index be7a0aa2..621a377e 100644 --- a/Docs/_modifier_type_8h.html +++ b/Docs/_modifier_type_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Kernel/ModifierType.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ModifierType.h File Reference
+
ModifierType.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ModifierType { ktt::Global , ktt::Local @@ -115,14 +113,14 @@
 

Detailed Description

-

Modifier type for kernel parameters.

+

Modifier type for kernel parameters.

diff --git a/Docs/_modifier_type_8h_source.html b/Docs/_modifier_type_8h_source.html index ad25049f..742fff13 100644 --- a/Docs/_modifier_type_8h_source.html +++ b/Docs/_modifier_type_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Kernel/ModifierType.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
-
-
ModifierType.h
+
ModifierType.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class ModifierType
-
13 {
-
16  Global,
-
17 
-
20  Local
-
21 };
-
22 
-
23 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+
12enum class ModifierType
+
13{
+
16 Global,
+
17
+
20 Local
+
21};
+
22
+
23} // namespace ktt
+
Definition: KttPlatform.h:41
ModifierType
Definition: ModifierType.h:13
@@ -120,7 +118,7 @@ diff --git a/Docs/_output_format_8h.html b/Docs/_output_format_8h.html index 2411d3e5..7412dff4 100644 --- a/Docs/_output_format_8h.html +++ b/Docs/_output_format_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Output/OutputFormat.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
OutputFormat.h File Reference
+
OutputFormat.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::OutputFormat { ktt::JSON , ktt::XML @@ -115,14 +113,14 @@
 

Detailed Description

-

Format of tuner output.

+

Format of tuner output.

diff --git a/Docs/_output_format_8h_source.html b/Docs/_output_format_8h_source.html index f0290190..f99e2b08 100644 --- a/Docs/_output_format_8h_source.html +++ b/Docs/_output_format_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Output/OutputFormat.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
-
-
OutputFormat.h
+
OutputFormat.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class OutputFormat
-
13 {
-
16  JSON,
-
17 
-
20  XML
-
21 };
-
22 
-
23 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+
12enum class OutputFormat
+
13{
+
16 JSON,
+
17
+
20 XML
+
21};
+
22
+
23} // namespace ktt
+
Definition: KttPlatform.h:41
OutputFormat
Definition: OutputFormat.h:13
@@ -120,7 +118,7 @@ diff --git a/Docs/_parameter_pair_8h.html b/Docs/_parameter_pair_8h.html index cfbe4394..900d3504 100644 --- a/Docs/_parameter_pair_8h.html +++ b/Docs/_parameter_pair_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Configuration/ParameterPair.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ParameterPair.h File Reference
+
ParameterPair.h File Reference
#include <cstdint>
@@ -108,25 +106,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::ParameterPair
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Value for one kernel parameter.

+

Value for one kernel parameter.

diff --git a/Docs/_parameter_pair_8h_source.html b/Docs/_parameter_pair_8h_source.html index 4d7ce402..2c481dc4 100644 --- a/Docs/_parameter_pair_8h_source.html +++ b/Docs/_parameter_pair_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Configuration/ParameterPair.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,64 +91,63 @@
-
-
ParameterPair.h
+
ParameterPair.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 #include <string>
-
8 #include <variant>
-
9 #include <vector>
-
10 
-
11 #include <KttPlatform.h>
-
12 
-
13 namespace ktt
-
14 {
-
15 
-
19 class KTT_API ParameterPair
-
20 {
-
21 public:
- -
26 
-
32  explicit ParameterPair(const std::string& name, const uint64_t value);
-
33 
-
39  explicit ParameterPair(const std::string& name, const double value);
-
40 
-
45  void SetValue(const uint64_t value);
-
46 
-
51  void SetValue(const double value);
-
52 
-
57  const std::string& GetName() const;
-
58 
-
63  std::string GetString() const;
-
64 
-
69  std::string GetValueString() const;
-
70 
-
75  uint64_t GetValue() const;
-
76 
-
81  double GetValueDouble() const;
-
82 
-
87  bool HasValueDouble() const;
-
88 
-
94  bool HasSameValue(const ParameterPair& other) const;
-
95 
-
102  template <typename T>
-
103  static T GetParameterValue(const std::vector<ParameterPair>& pairs, const std::string& name);
-
104 
-
112  template <typename T>
-
113  static std::vector<T> GetParameterValues(const std::vector<ParameterPair>& pairs, const std::vector<std::string>& names);
-
114 
-
115 private:
-
116  std::string m_Name;
-
117  std::variant<uint64_t, double> m_Value;
-
118 };
-
119 
-
120 } // namespace ktt
-
121 
-
122 #include <Api/Configuration/ParameterPair.inl>
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7#include <string>
+
8#include <variant>
+
9#include <vector>
+
10
+
11#include <KttPlatform.h>
+
12
+
13namespace ktt
+
14{
+
15
+
19class KTT_API ParameterPair
+
20{
+
21public:
+ +
26
+
32 explicit ParameterPair(const std::string& name, const uint64_t value);
+
33
+
39 explicit ParameterPair(const std::string& name, const double value);
+
40
+
45 void SetValue(const uint64_t value);
+
46
+
51 void SetValue(const double value);
+
52
+
57 const std::string& GetName() const;
+
58
+
63 std::string GetString() const;
+
64
+
69 std::string GetValueString() const;
+
70
+
75 uint64_t GetValue() const;
+
76
+
81 double GetValueDouble() const;
+
82
+
87 bool HasValueDouble() const;
+
88
+
94 bool HasSameValue(const ParameterPair& other) const;
+
95
+
102 template <typename T>
+
103 static T GetParameterValue(const std::vector<ParameterPair>& pairs, const std::string& name);
+
104
+
112 template <typename T>
+
113 static std::vector<T> GetParameterValues(const std::vector<ParameterPair>& pairs, const std::vector<std::string>& names);
+
114
+
115private:
+
116 std::string m_Name;
+
117 std::variant<uint64_t, double> m_Value;
+
118};
+
119
+
120} // namespace ktt
+
121
+
122#include <Api/Configuration/ParameterPair.inl>
Definition: ParameterPair.h:20
std::string GetValueString() const
@@ -166,14 +164,14 @@
uint64_t GetValue() const
static T GetParameterValue(const std::vector< ParameterPair > &pairs, const std::string &name)
ParameterPair(const std::string &name, const uint64_t value)
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_platform_info_8h.html b/Docs/_platform_info_8h.html index df70a332..341ba411 100644 --- a/Docs/_platform_info_8h.html +++ b/Docs/_platform_info_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Info/PlatformInfo.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
PlatformInfo.h File Reference
+
PlatformInfo.h File Reference
#include <string>
@@ -105,25 +103,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::PlatformInfo
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Information about compute API platforms.

+

Information about compute API platforms.

diff --git a/Docs/_platform_info_8h_source.html b/Docs/_platform_info_8h_source.html index 8e5acaed..2b1cd413 100644 --- a/Docs/_platform_info_8h_source.html +++ b/Docs/_platform_info_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Info/PlatformInfo.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,53 +91,52 @@
-
-
PlatformInfo.h
+
PlatformInfo.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <string>
-
7 
-
8 #include <KttPlatform.h>
-
9 #include <KttTypes.h>
-
10 
-
11 namespace ktt
-
12 {
-
13 
-
17 class KTT_API PlatformInfo
-
18 {
-
19 public:
-
25  explicit PlatformInfo(const PlatformIndex index, const std::string& name);
-
26 
- -
32 
-
37  const std::string& GetName() const;
-
38 
-
43  const std::string& GetVendor() const;
-
44 
-
49  const std::string& GetVersion() const;
-
50 
-
55  const std::string& GetExtensions() const;
-
56 
-
61  std::string GetString() const;
-
62 
-
67  void SetVendor(const std::string& vendor);
-
68 
-
73  void SetVersion(const std::string& version);
-
74 
-
79  void SetExtensions(const std::string& extensions);
-
80 
-
81 private:
-
82  PlatformIndex m_Index;
-
83  std::string m_Name;
-
84  std::string m_Vendor;
-
85  std::string m_Version;
-
86  std::string m_Extensions;
-
87 };
-
88 
-
89 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <string>
+
7
+
8#include <KttPlatform.h>
+
9#include <KttTypes.h>
+
10
+
11namespace ktt
+
12{
+
13
+
17class KTT_API PlatformInfo
+
18{
+
19public:
+
25 explicit PlatformInfo(const PlatformIndex index, const std::string& name);
+
26
+ +
32
+
37 const std::string& GetName() const;
+
38
+
43 const std::string& GetVendor() const;
+
44
+
49 const std::string& GetVersion() const;
+
50
+
55 const std::string& GetExtensions() const;
+
56
+
61 std::string GetString() const;
+
62
+
67 void SetVendor(const std::string& vendor);
+
68
+
73 void SetVersion(const std::string& version);
+
74
+
79 void SetExtensions(const std::string& extensions);
+
80
+
81private:
+
82 PlatformIndex m_Index;
+
83 std::string m_Name;
+
84 std::string m_Vendor;
+
85 std::string m_Version;
+
86 std::string m_Extensions;
+
87};
+
88
+
89} // namespace ktt
Definition: PlatformInfo.h:18
@@ -152,7 +150,7 @@
const std::string & GetVersion() const
PlatformIndex GetIndex() const
void SetVersion(const std::string &version)
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
uint32_t PlatformIndex
Definition: KttTypes.h:23
@@ -160,7 +158,7 @@ diff --git a/Docs/_profiling_counter_type_8h.html b/Docs/_profiling_counter_type_8h.html index 5e159189..9d6da4b6 100644 --- a/Docs/_profiling_counter_type_8h.html +++ b/Docs/_profiling_counter_type_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/ProfilingCounterType.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ProfilingCounterType.h File Reference
+
ProfilingCounterType.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ProfilingCounterType {
  ktt::Int @@ -122,14 +120,14 @@
 

Detailed Description

-

Definition of enum which specifies data type of a profiling counter.

+

Definition of enum which specifies data type of a profiling counter.

diff --git a/Docs/_profiling_counter_type_8h_source.html b/Docs/_profiling_counter_type_8h_source.html index a83a6a31..adf95d6a 100644 --- a/Docs/_profiling_counter_type_8h_source.html +++ b/Docs/_profiling_counter_type_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/ProfilingCounterType.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,33 +91,32 @@
-
-
ProfilingCounterType.h
+
ProfilingCounterType.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
- -
13 {
-
16  Int,
-
17 
- -
21 
-
24  Double,
-
25 
-
28  Percent,
-
29 
-
32  Throughput,
-
33 
- -
38 };
-
39 
-
40 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
13{
+
16 Int,
+
17
+ +
21
+
24 Double,
+
25
+
28 Percent,
+
29
+ +
33
+ +
38};
+
39
+
40} // namespace ktt
+
Definition: KttPlatform.h:41
ProfilingCounterType
Definition: ProfilingCounterType.h:13
@@ -132,7 +130,7 @@ diff --git a/Docs/_random_searcher_8h.html b/Docs/_random_searcher_8h.html index de86569d..302e5c2d 100644 --- a/Docs/_random_searcher_8h.html +++ b/Docs/_random_searcher_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher/RandomSearcher.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
RandomSearcher.h File Reference
+
RandomSearcher.h File Reference
#include <Api/Searcher/Searcher.h>
@@ -104,25 +102,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::RandomSearcher
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Searcher which explores configurations in random order.

+

Searcher which explores configurations in random order.

diff --git a/Docs/_random_searcher_8h_source.html b/Docs/_random_searcher_8h_source.html index d0f2df61..ab8a078e 100644 --- a/Docs/_random_searcher_8h_source.html +++ b/Docs/_random_searcher_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher/RandomSearcher.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,34 +91,33 @@
-
-
RandomSearcher.h
+
RandomSearcher.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
- -
7 #include <KttPlatform.h>
-
8 
-
9 namespace ktt
-
10 {
-
11 
-
15 class KTT_API RandomSearcher : public Searcher
-
16 {
-
17 public:
- -
22 
-
23  void OnInitialize() override;
-
24 
-
25  bool CalculateNextConfiguration(const KernelResult& previousResult) override;
- -
27 
-
28 private:
-
29  KernelConfiguration m_CurrentConfiguration;
-
30 };
-
31 
-
32 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+ +
7#include <KttPlatform.h>
+
8
+
9namespace ktt
+
10{
+
11
+
15class KTT_API RandomSearcher : public Searcher
+
16{
+
17public:
+ +
22
+
23 void OnInitialize() override;
+
24
+
25 bool CalculateNextConfiguration(const KernelResult& previousResult) override;
+ +
27
+
28private:
+
29 KernelConfiguration m_CurrentConfiguration;
+
30};
+
31
+
32} // namespace ktt
Definition: KernelConfiguration.h:19
@@ -130,14 +128,14 @@
void OnInitialize() override
KernelConfiguration GetCurrentConfiguration() const override
Definition: Searcher.h:23
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_result_status_8h.html b/Docs/_result_status_8h.html index 5439e106..b5aa3871 100644 --- a/Docs/_result_status_8h.html +++ b/Docs/_result_status_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/ResultStatus.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ResultStatus.h File Reference
+
ResultStatus.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ResultStatus {
  ktt::Ok @@ -121,14 +119,14 @@
 

Detailed Description

-

Status of a kernel result.

+

Status of a kernel result.

diff --git a/Docs/_result_status_8h_source.html b/Docs/_result_status_8h_source.html index 9cc05abe..ac773631 100644 --- a/Docs/_result_status_8h_source.html +++ b/Docs/_result_status_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output/ResultStatus.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,44 +91,43 @@
-
-
ResultStatus.h
+
ResultStatus.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class ResultStatus
-
13 {
-
16  Ok,
-
17 
- -
21 
- -
25 
- -
29 
- -
33 };
-
34 
-
35 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+
12enum class ResultStatus
+
13{
+
16 Ok,
+
17
+ +
21
+ +
25
+ +
29
+ +
33};
+
34
+
35} // namespace ktt
+
Definition: KttPlatform.h:41
ResultStatus
Definition: ResultStatus.h:13
- +
diff --git a/Docs/_searcher_8h.html b/Docs/_searcher_8h.html index 01121b76..a5c75569 100644 --- a/Docs/_searcher_8h.html +++ b/Docs/_searcher_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher/Searcher.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
Searcher.h File Reference
+
Searcher.h File Reference
#include <cstdint>
@@ -108,25 +106,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::Searcher
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Interface for implementing kernel configuration searchers.

+

Interface for implementing kernel configuration searchers.

diff --git a/Docs/_searcher_8h_source.html b/Docs/_searcher_8h_source.html index e0a89d32..dba27b35 100644 --- a/Docs/_searcher_8h_source.html +++ b/Docs/_searcher_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher/Searcher.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,65 +91,64 @@
-
-
Searcher.h
+
Searcher.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 #include <set>
-
8 #include <vector>
-
9 
- -
11 #include <Api/Output/KernelResult.h>
-
12 #include <KttPlatform.h>
-
13 
-
14 namespace ktt
-
15 {
-
16 
-
17 class ConfigurationData;
-
18 
-
22 class KTT_API Searcher
-
23 {
-
24 public:
-
29  virtual ~Searcher() = default;
-
30 
-
35  virtual void OnInitialize();
-
36 
-
41  virtual void OnReset();
-
42 
-
50  virtual bool CalculateNextConfiguration(const KernelResult& previousResult) = 0;
-
51 
- -
58 
- -
63 
-
70  KernelConfiguration GetConfiguration(const uint64_t index) const;
-
71 
-
77  uint64_t GetIndex(const KernelConfiguration& configuration) const;
-
78 
- -
84 
-
95  std::vector<KernelConfiguration> GetNeighbourConfigurations(const KernelConfiguration& configuration,
-
96  const uint64_t maxDifferences, const size_t maxNeighbours = 3) const;
-
97 
-
102  uint64_t GetConfigurationsCount() const;
-
103 
-
108  const std::set<uint64_t>& GetExploredIndices() const;
-
109 
-
114  bool IsInitialized() const;
-
115 
-
120  void Initialize(const ConfigurationData& data);
-
121 
-
125  void Reset();
-
126 
-
127 private:
-
128  const ConfigurationData* m_Data;
-
129 };
-
130 
-
131 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7#include <set>
+
8#include <vector>
+
9
+ +
11#include <Api/Output/KernelResult.h>
+
12#include <KttPlatform.h>
+
13
+
14namespace ktt
+
15{
+
16
+
17class ConfigurationData;
+
18
+
22class KTT_API Searcher
+
23{
+
24public:
+
29 virtual ~Searcher() = default;
+
30
+
35 virtual void OnInitialize();
+
36
+
41 virtual void OnReset();
+
42
+
50 virtual bool CalculateNextConfiguration(const KernelResult& previousResult) = 0;
+
51
+ +
58
+ +
63
+
70 KernelConfiguration GetConfiguration(const uint64_t index) const;
+
71
+
77 uint64_t GetIndex(const KernelConfiguration& configuration) const;
+
78
+ +
84
+
95 std::vector<KernelConfiguration> GetNeighbourConfigurations(const KernelConfiguration& configuration,
+
96 const uint64_t maxDifferences, const size_t maxNeighbours = 3) const;
+
97
+
102 uint64_t GetConfigurationsCount() const;
+
103
+
108 const std::set<uint64_t>& GetExploredIndices() const;
+
109
+
114 bool IsInitialized() const;
+
115
+
120 void Initialize(const ConfigurationData& data);
+
121
+
125 void Reset();
+
126
+
127private:
+
128 const ConfigurationData* m_Data;
+
129};
+
130
+
131} // namespace ktt
Definition: KernelConfiguration.h:19
@@ -171,14 +169,14 @@
virtual bool CalculateNextConfiguration(const KernelResult &previousResult)=0
KernelConfiguration GetConfiguration(const uint64_t index) const
uint64_t GetIndex(const KernelConfiguration &configuration) const
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_stop_condition_8h.html b/Docs/_stop_condition_8h.html index 2b9bc90d..5a0125dc 100644 --- a/Docs/_stop_condition_8h.html +++ b/Docs/_stop_condition_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/StopCondition.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
StopCondition.h File Reference
+
StopCondition.h File Reference
#include <cstdint>
@@ -106,25 +104,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::StopCondition
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Interface for implementing tuning stop conditions.

+

Interface for implementing tuning stop conditions.

diff --git a/Docs/_stop_condition_8h_source.html b/Docs/_stop_condition_8h_source.html index f75dddbc..511c97a7 100644 --- a/Docs/_stop_condition_8h_source.html +++ b/Docs/_stop_condition_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/StopCondition.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,37 +91,36 @@
-
-
StopCondition.h
+
StopCondition.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <cstdint>
-
7 #include <string>
-
8 
-
9 #include <Api/Output/KernelResult.h>
-
10 #include <KttPlatform.h>
-
11 
-
12 namespace ktt
-
13 {
-
14 
-
18 class KTT_API StopCondition
-
19 {
-
20 public:
-
25  virtual ~StopCondition() = default;
-
26 
-
31  virtual bool IsFulfilled() const = 0;
-
32 
-
37  virtual void Initialize(const uint64_t configurationsCount) = 0;
-
38 
-
43  virtual void Update(const KernelResult& result) = 0;
-
44 
-
49  virtual std::string GetStatusString() const = 0;
-
50 };
-
51 
-
52 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <cstdint>
+
7#include <string>
+
8
+
9#include <Api/Output/KernelResult.h>
+
10#include <KttPlatform.h>
+
11
+
12namespace ktt
+
13{
+
14
+
18class KTT_API StopCondition
+
19{
+
20public:
+
25 virtual ~StopCondition() = default;
+
26
+
31 virtual bool IsFulfilled() const = 0;
+
32
+
37 virtual void Initialize(const uint64_t configurationsCount) = 0;
+
38
+
43 virtual void Update(const KernelResult& result) = 0;
+
44
+
49 virtual std::string GetStatusString() const = 0;
+
50};
+
51
+
52} // namespace ktt
Definition: KernelResult.h:21
Definition: StopCondition.h:19
@@ -131,14 +129,14 @@
virtual bool IsFulfilled() const =0
virtual void Initialize(const uint64_t configurationsCount)=0
virtual void Update(const KernelResult &result)=0
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_time_unit_8h.html b/Docs/_time_unit_8h.html index 92a7f584..8ef98da0 100644 --- a/Docs/_time_unit_8h.html +++ b/Docs/_time_unit_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Output/TimeConfiguration/TimeUnit.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
TimeUnit.h File Reference
+
TimeUnit.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::TimeUnit { ktt::Nanoseconds , ktt::Microseconds @@ -117,14 +115,14 @@
 

Detailed Description

-

Time unit used during logging and output operations.

+

Time unit used during logging and output operations.

diff --git a/Docs/_time_unit_8h_source.html b/Docs/_time_unit_8h_source.html index 626a1e2e..502556f3 100644 --- a/Docs/_time_unit_8h_source.html +++ b/Docs/_time_unit_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Output/TimeConfiguration/TimeUnit.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,41 +91,40 @@
-
-
TimeUnit.h
+
TimeUnit.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class TimeUnit
-
13 {
- -
17 
- -
21 
- -
25 
-
28  Seconds
-
29 };
-
30 
-
31 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+
12enum class TimeUnit
+
13{
+ +
17
+ +
21
+ +
25
+ +
29};
+
30
+
31} // namespace ktt
+
Definition: KttPlatform.h:41
TimeUnit
Definition: TimeUnit.h:13
- +
uint64_t Nanoseconds
Definition: KttTypes.h:63
diff --git a/Docs/_tuner_8h.html b/Docs/_tuner_8h.html index c0d7d591..aadd6437 100644 --- a/Docs/_tuner_8h.html +++ b/Docs/_tuner_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Tuner.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
Tuner.h File Reference
+
Tuner.h File Reference
#include <memory>
@@ -134,25 +132,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::Tuner
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Main part of public API of KTT framework.

+

Main part of public API of KTT framework.

diff --git a/Docs/_tuner_8h_source.html b/Docs/_tuner_8h_source.html index 59fb7cf4..ea4fc885 100644 --- a/Docs/_tuner_8h_source.html +++ b/Docs/_tuner_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Tuner.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,215 +91,240 @@
-
-
Tuner.h
+
Tuner.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <memory>
-
7 #include <ostream>
-
8 #include <string>
-
9 #include <vector>
-
10 
-
11 // Compatibility for multiple platforms
-
12 #include <KttPlatform.h>
-
13 
-
14 // Data types and enums
- - -
17 #include <Kernel/ModifierAction.h>
- -
19 #include <Kernel/ModifierType.h>
- - - - - - - - -
28 #include <Output/OutputFormat.h>
- -
30 #include <KttTypes.h>
-
31 
-
32 // Data holders
- - -
35 #include <Api/Info/DeviceInfo.h>
-
36 #include <Api/Info/PlatformInfo.h>
- -
38 #include <Api/Output/KernelResult.h>
-
39 
-
40 // Tuner customization
-
41 #include <Api/Searcher/Searcher.h>
- - -
44 
-
45 // Half floating-point data type support
-
46 #include <Utility/External/half.hpp>
-
47 
-
51 namespace ktt
-
52 {
-
53 
-
54 class TunerCore;
-
55 
-
59 class KTT_API Tuner
-
60 {
-
61 public:
-
70  explicit Tuner(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api);
-
71 
-
83  explicit Tuner(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api, const uint32_t computeQueueCount);
-
84 
-
91  explicit Tuner(const ComputeApi api, const ComputeApiInitializer& initializer);
-
92 
-
96  ~Tuner();
-
97 
-
109  KernelDefinitionId AddKernelDefinition(const std::string& name, const std::string& source, const DimensionVector& globalSize,
-
110  const DimensionVector& localSize, const std::vector<std::string>& typeNames = {});
-
111 
-
123  KernelDefinitionId AddKernelDefinitionFromFile(const std::string& name, const std::string& filePath,
-
124  const DimensionVector& globalSize, const DimensionVector& localSize, const std::vector<std::string>& typeNames = {});
-
125 
- -
132 
-
139  void SetArguments(const KernelDefinitionId id, const std::vector<ArgumentId>& argumentIds);
-
140 
-
147  KernelId CreateSimpleKernel(const std::string& name, const KernelDefinitionId definitionId);
-
148 
-
157  KernelId CreateCompositeKernel(const std::string& name, const std::vector<KernelDefinitionId>& definitionIds,
-
158  KernelLauncher launcher = nullptr);
-
159 
-
165  void RemoveKernel(const KernelId id);
-
166 
-
174  void SetLauncher(const KernelId id, KernelLauncher launcher);
-
175 
-
188  void AddParameter(const KernelId id, const std::string& name, const std::vector<uint64_t>& values, const std::string& group = "");
-
189 
-
202  void AddParameter(const KernelId id, const std::string& name, const std::vector<double>& values, const std::string& group = "");
-
203 
-
220  void AddThreadModifier(const KernelId id, const std::vector<KernelDefinitionId>& definitionIds, const ModifierType type,
-
221  const ModifierDimension dimension, const std::vector<std::string>& parameters, ModifierFunction function);
-
222 
-
236  void AddThreadModifier(const KernelId id, const std::vector<KernelDefinitionId>& definitionIds, const ModifierType type,
-
237  const ModifierDimension dimension, const std::string& parameter, const ModifierAction action);
-
238 
-
249  void AddConstraint(const KernelId id, const std::vector<std::string>& parameters, ConstraintFunction function);
-
250 
-
258  void SetProfiledDefinitions(const KernelId id, const std::vector<KernelDefinitionId>& definitionIds);
-
259 
-
269  template <typename T>
-
270  ArgumentId AddArgumentVector(const std::vector<T>& data, const ArgumentAccessType accessType);
-
271 
-
287  template <typename T>
-
288  ArgumentId AddArgumentVector(std::vector<T>& data, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation,
-
289  const ArgumentManagementType managementType, const bool referenceUserData);
-
290 
-
303  template <typename T>
-
304  ArgumentId AddArgumentVector(ComputeBuffer buffer, const size_t bufferSize, const ArgumentAccessType accessType,
-
305  const ArgumentMemoryLocation memoryLocation);
-
306 
-
312  template <typename T>
- -
314 
-
324  template <typename T>
-
325  ArgumentId AddArgumentLocal(const size_t localMemorySize);
-
326 
-
332  void RemoveArgument(const ArgumentId id);
-
333 
-
340  void SetReadOnlyArgumentCache(const bool flag);
-
341 
-
351  KernelResult Run(const KernelId id, const KernelConfiguration& configuration, const std::vector<BufferOutputDescriptor>& output);
-
352 
-
361  void SetProfiling(const bool flag);
-
362 
-
371  void SetValidationMethod(const ValidationMethod method, const double toleranceThreshold);
-
372 
- -
379 
-
385  void SetValidationRange(const ArgumentId id, const size_t range);
-
386 
-
395  void SetValueComparator(const ArgumentId id, ValueComparator comparator);
-
396 
- -
406 
-
415  void SetReferenceKernel(const ArgumentId id, const KernelId referenceId, const KernelConfiguration& configuration);
-
416 
-
425  std::vector<KernelResult> Tune(const KernelId id);
-
426 
-
436  std::vector<KernelResult> Tune(const KernelId id, std::unique_ptr<StopCondition> stopCondition);
-
437 
-
453  KernelResult TuneIteration(const KernelId id, const std::vector<BufferOutputDescriptor>& output,
-
454  const bool recomputeReference = false);
-
455 
-
468  std::vector<KernelResult> SimulateKernelTuning(const KernelId id, const std::vector<KernelResult>& results,
-
469  const uint64_t iterations = 0);
-
470 
-
476  void SetSearcher(const KernelId id, std::unique_ptr<Searcher> searcher);
-
477 
-
482  void ClearData(const KernelId id);
-
483 
- -
491 
- -
500 
-
509  std::string GetKernelSource(const KernelId id, const KernelConfiguration& configuration) const;
-
510 
-
518  std::string GetKernelDefinitionSource(const KernelDefinitionId id, const KernelConfiguration& configuration) const;
-
519 
-
524  static void SetTimeUnit(const TimeUnit unit);
-
525 
-
535  void SaveResults(const std::vector<KernelResult>& results, const std::string& filePath, const OutputFormat format,
-
536  const UserData& data = {}) const;
-
537 
-
546  std::vector<KernelResult> LoadResults(const std::string& filePath, const OutputFormat format) const;
-
547 
-
557  std::vector<KernelResult> LoadResults(const std::string& filePath, const OutputFormat format, UserData& data) const;
-
558 
-
562  void Synchronize();
-
563 
-
572  void SetProfilingCounters(const std::vector<std::string>& counters);
-
573 
-
582  void SetCompilerOptions(const std::string& options);
-
583 
- -
592 
-
599  void SetAutomaticGlobalSizeCorrection(const bool flag);
-
600 
-
607  void SetKernelCacheCapacity(const uint64_t capacity);
-
608 
-
613  std::vector<PlatformInfo> GetPlatformInfo() const;
-
614 
-
620  std::vector<DeviceInfo> GetDeviceInfo(const PlatformIndex platform) const;
-
621 
- -
627 
-
632  static void SetLoggingLevel(const LoggingLevel level);
-
633 
-
638  static void SetLoggingTarget(std::ostream& outputTarget);
-
639 
-
644  static void SetLoggingTarget(const std::string& filePath);
-
645 
-
646 private:
-
647  std::unique_ptr<TunerCore> m_Tuner;
-
648 
-
649  ArgumentId AddArgumentWithReferencedData(const size_t elementSize, const ArgumentDataType dataType,
-
650  const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType,
-
651  const ArgumentManagementType managementType, void* data, const size_t dataSize);
-
652  ArgumentId AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType,
-
653  const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType,
-
654  const ArgumentManagementType managementType, const void* data, const size_t dataSize);
-
655  ArgumentId AddUserArgument(ComputeBuffer buffer, const size_t elementSize, const ArgumentDataType dataType,
-
656  const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const size_t dataSize);
-
657 
-
658  template <typename T>
-
659  ArgumentDataType DeriveArgumentDataType() const;
-
660 };
-
661 
-
662 } // namespace ktt
-
663 
-
664 #include <Tuner.inl>
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <memory>
+
7#include <ostream>
+
8#include <string>
+
9#include <vector>
+
10
+
11// Compatibility for multiple platforms
+
12#include <KttPlatform.h>
+
13
+
14// Data types and enums
+ + + + +
19#include <Kernel/ModifierType.h>
+ + + + + + + + +
28#include <Output/OutputFormat.h>
+ +
30#include <KttTypes.h>
+
31
+
32// Data holders
+ + +
35#include <Api/Info/DeviceInfo.h>
+ + +
38#include <Api/Output/KernelResult.h>
+
39
+
40// Tuner customization
+ + + +
44
+
45// Half floating-point data type support
+
46#include <Utility/External/half.hpp>
+
47
+
51namespace ktt
+
52{
+
53
+
54class TunerCore;
+
55
+
59class KTT_API Tuner
+
60{
+
61public:
+
70 explicit Tuner(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api);
+
71
+
83 explicit Tuner(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api, const uint32_t computeQueueCount);
+
84
+
91 explicit Tuner(const ComputeApi api, const ComputeApiInitializer& initializer);
+
92
+
101 explicit Tuner(const ComputeApi api, const ComputeApiInitializer& initializer, std::vector<QueueId>& assignedQueueIds);
+
102
+ +
107
+
119 KernelDefinitionId AddKernelDefinition(const std::string& name, const std::string& source, const DimensionVector& globalSize,
+
120 const DimensionVector& localSize, const std::vector<std::string>& typeNames = {});
+
121
+
133 KernelDefinitionId AddKernelDefinitionFromFile(const std::string& name, const std::string& filePath,
+
134 const DimensionVector& globalSize, const DimensionVector& localSize, const std::vector<std::string>& typeNames = {});
+
135
+
142 KernelDefinitionId GetKernelDefinitionId(const std::string& name, const std::vector<std::string>& typeNames = {}) const;
+
143
+ +
150
+
157 void SetArguments(const KernelDefinitionId id, const std::vector<ArgumentId>& argumentIds);
+
158
+
165 KernelId CreateSimpleKernel(const std::string& name, const KernelDefinitionId definitionId);
+
166
+
176 KernelId CreateCompositeKernel(const std::string& name, const std::vector<KernelDefinitionId>& definitionIds,
+
177 KernelLauncher launcher = nullptr);
+
178
+
184 void RemoveKernel(const KernelId id);
+
185
+
193 void SetLauncher(const KernelId id, KernelLauncher launcher);
+
194
+
207 void AddParameter(const KernelId id, const std::string& name, const std::vector<uint64_t>& values, const std::string& group = "");
+
208
+
221 void AddParameter(const KernelId id, const std::string& name, const std::vector<double>& values, const std::string& group = "");
+
222
+
239 void AddThreadModifier(const KernelId id, const std::vector<KernelDefinitionId>& definitionIds, const ModifierType type,
+
240 const ModifierDimension dimension, const std::vector<std::string>& parameters, ModifierFunction function);
+
241
+
255 void AddThreadModifier(const KernelId id, const std::vector<KernelDefinitionId>& definitionIds, const ModifierType type,
+
256 const ModifierDimension dimension, const std::string& parameter, const ModifierAction action);
+
257
+
268 void AddConstraint(const KernelId id, const std::vector<std::string>& parameters, ConstraintFunction function);
+
269
+
277 void SetProfiledDefinitions(const KernelId id, const std::vector<KernelDefinitionId>& definitionIds);
+
278
+
288 template <typename T>
+
289 ArgumentId AddArgumentVector(const std::vector<T>& data, const ArgumentAccessType accessType);
+
290
+
306 template <typename T>
+
307 ArgumentId AddArgumentVector(std::vector<T>& data, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation,
+
308 const ArgumentManagementType managementType, const bool referenceUserData);
+
309
+
322 template <typename T>
+
323 ArgumentId AddArgumentVector(ComputeBuffer buffer, const size_t bufferSize, const ArgumentAccessType accessType,
+
324 const ArgumentMemoryLocation memoryLocation);
+
325
+
341 ArgumentId AddArgumentVector(ComputeBuffer buffer, const size_t bufferSize, const size_t elementSize,
+
342 const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation);
+
343
+
349 template <typename T>
+ +
351
+
359 ArgumentId AddArgumentScalar(const void* data, const size_t dataSize);
+
360
+
370 template <typename T>
+
371 ArgumentId AddArgumentLocal(const size_t localMemorySize);
+
372
+
380 template <typename T>
+
381 ArgumentId AddArgumentSymbol(const T& data, const std::string& symbolName = "");
+
382
+ +
389
+
396 void SetReadOnlyArgumentCache(const bool flag);
+
397
+
407 KernelResult Run(const KernelId id, const KernelConfiguration& configuration, const std::vector<BufferOutputDescriptor>& output);
+
408
+
417 void SetProfiling(const bool flag);
+
418
+
427 void SetValidationMethod(const ValidationMethod method, const double toleranceThreshold);
+
428
+ +
435
+
441 void SetValidationRange(const ArgumentId id, const size_t range);
+
442
+ +
452
+ +
462
+
471 void SetReferenceKernel(const ArgumentId id, const KernelId referenceId, const KernelConfiguration& configuration);
+
472
+
481 std::vector<KernelResult> Tune(const KernelId id);
+
482
+
492 std::vector<KernelResult> Tune(const KernelId id, std::unique_ptr<StopCondition> stopCondition);
+
493
+
509 KernelResult TuneIteration(const KernelId id, const std::vector<BufferOutputDescriptor>& output,
+
510 const bool recomputeReference = false);
+
511
+
524 std::vector<KernelResult> SimulateKernelTuning(const KernelId id, const std::vector<KernelResult>& results,
+
525 const uint64_t iterations = 0);
+
526
+
532 void SetSearcher(const KernelId id, std::unique_ptr<Searcher> searcher);
+
533
+
538 void ClearData(const KernelId id);
+
539
+ +
547
+ +
556
+
565 std::string GetKernelSource(const KernelId id, const KernelConfiguration& configuration) const;
+
566
+
574 std::string GetKernelDefinitionSource(const KernelDefinitionId id, const KernelConfiguration& configuration) const;
+
575
+
580 static void SetTimeUnit(const TimeUnit unit);
+
581
+
591 void SaveResults(const std::vector<KernelResult>& results, const std::string& filePath, const OutputFormat format,
+
592 const UserData& data = {}) const;
+
593
+
602 std::vector<KernelResult> LoadResults(const std::string& filePath, const OutputFormat format) const;
+
603
+
613 std::vector<KernelResult> LoadResults(const std::string& filePath, const OutputFormat format, UserData& data) const;
+
614
+ +
622
+ +
628
+ +
634
+ +
640
+
645 void SynchronizeQueue(const QueueId id);
+
646
+ +
651
+ +
656
+
660 [[deprecated("Use SynchronizeDevice() or SynchronizeQueues() method instead.")]] void Synchronize();
+
661
+
670 void SetProfilingCounters(const std::vector<std::string>& counters);
+
671
+
680 void SetCompilerOptions(const std::string& options);
+
681
+ +
690
+
697 void SetAutomaticGlobalSizeCorrection(const bool flag);
+
698
+
705 void SetKernelCacheCapacity(const uint64_t capacity);
+
706
+
711 std::vector<PlatformInfo> GetPlatformInfo() const;
+
712
+
718 std::vector<DeviceInfo> GetDeviceInfo(const PlatformIndex platform) const;
+
719
+ +
725
+
730 static void SetLoggingLevel(const LoggingLevel level);
+
731
+
736 static void SetLoggingTarget(std::ostream& outputTarget);
+
737
+
742 static void SetLoggingTarget(const std::string& filePath);
+
743
+
744private:
+
745 std::unique_ptr<TunerCore> m_Tuner;
+
746
+
747 KTT_VIRTUAL_API ArgumentId AddArgumentWithReferencedData(const size_t elementSize, const ArgumentDataType dataType,
+
748 const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType,
+
749 const ArgumentManagementType managementType, void* data, const size_t dataSize);
+
750 KTT_VIRTUAL_API ArgumentId AddArgumentWithOwnedData(const size_t elementSize, const ArgumentDataType dataType,
+
751 const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const ArgumentMemoryType memoryType,
+
752 const ArgumentManagementType managementType, const void* data, const size_t dataSize, const std::string& symbolName = "");
+
753 KTT_VIRTUAL_API ArgumentId AddUserArgument(ComputeBuffer buffer, const size_t elementSize, const ArgumentDataType dataType,
+
754 const ArgumentMemoryLocation memoryLocation, const ArgumentAccessType accessType, const size_t dataSize);
+
755
+
756 template <typename T>
+
757 ArgumentDataType DeriveArgumentDataType() const;
+
758};
+
759
+
760} // namespace ktt
+
761
+
762#include <Tuner.inl>
@@ -332,6 +356,7 @@
Definition: KernelConfiguration.h:19
Definition: KernelResult.h:21
Definition: Tuner.h:60
+
ArgumentId AddArgumentScalar(const void *data, const size_t dataSize)
void SetProfiling(const bool flag)
void SetKernelCacheCapacity(const uint64_t capacity)
Tuner(const ComputeApi api, const ComputeApiInitializer &initializer)
@@ -344,11 +369,15 @@
std::vector< KernelResult > Tune(const KernelId id, std::unique_ptr< StopCondition > stopCondition)
KernelId CreateCompositeKernel(const std::string &name, const std::vector< KernelDefinitionId > &definitionIds, KernelLauncher launcher=nullptr)
void SetArguments(const KernelDefinitionId id, const std::vector< ArgumentId > &argumentIds)
+
void RemoveComputeQueue(const QueueId id)
std::string GetKernelDefinitionSource(const KernelDefinitionId id, const KernelConfiguration &configuration) const
ArgumentId AddArgumentVector(std::vector< T > &data, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation, const ArgumentManagementType managementType, const bool referenceUserData)
+
void WaitForTransferAction(const TransferActionId id)
+
ArgumentId AddArgumentVector(ComputeBuffer buffer, const size_t bufferSize, const size_t elementSize, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation)
KernelConfiguration GetBestConfiguration(const KernelId id) const
void SetValueComparator(const ArgumentId id, ValueComparator comparator)
DeviceInfo GetCurrentDeviceInfo() const
+
void WaitForComputeAction(const ComputeActionId id)
ArgumentId AddArgumentVector(const std::vector< T > &data, const ArgumentAccessType accessType)
std::vector< DeviceInfo > GetDeviceInfo(const PlatformIndex platform) const
std::vector< KernelResult > LoadResults(const std::string &filePath, const OutputFormat format) const
@@ -359,6 +388,7 @@
static void SetTimeUnit(const TimeUnit unit)
void SetCompilerOptions(const std::string &options)
void SetProfiledDefinitions(const KernelId id, const std::vector< KernelDefinitionId > &definitionIds)
+
Tuner(const ComputeApi api, const ComputeApiInitializer &initializer, std::vector< QueueId > &assignedQueueIds)
void SetValidationMode(const ValidationMode mode)
void RemoveKernelDefinition(const KernelDefinitionId id)
KernelResult Run(const KernelId id, const KernelConfiguration &configuration, const std::vector< BufferOutputDescriptor > &output)
@@ -374,8 +404,12 @@
void RemoveArgument(const ArgumentId id)
void RemoveKernel(const KernelId id)
void SetReferenceComputation(const ArgumentId id, ReferenceComputation computation)
+
QueueId AddComputeQueue(ComputeQueue queue)
KernelDefinitionId AddKernelDefinition(const std::string &name, const std::string &source, const DimensionVector &globalSize, const DimensionVector &localSize, const std::vector< std::string > &typeNames={})
+
KernelDefinitionId GetKernelDefinitionId(const std::string &name, const std::vector< std::string > &typeNames={}) const
+
void SynchronizeQueue(const QueueId id)
void AddThreadModifier(const KernelId id, const std::vector< KernelDefinitionId > &definitionIds, const ModifierType type, const ModifierDimension dimension, const std::string &parameter, const ModifierAction action)
+
void SynchronizeDevice()
ArgumentId AddArgumentVector(ComputeBuffer buffer, const size_t bufferSize, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation)
void ClearData(const KernelId id)
void AddParameter(const KernelId id, const std::string &name, const std::vector< uint64_t > &values, const std::string &group="")
@@ -386,25 +420,31 @@
KernelResult TuneIteration(const KernelId id, const std::vector< BufferOutputDescriptor > &output, const bool recomputeReference=false)
void SetGlobalSizeType(const GlobalSizeType type)
std::vector< KernelResult > SimulateKernelTuning(const KernelId id, const std::vector< KernelResult > &results, const uint64_t iterations=0)
+
void SynchronizeQueues()
KernelDefinitionId AddKernelDefinitionFromFile(const std::string &name, const std::string &filePath, const DimensionVector &globalSize, const DimensionVector &localSize, const std::vector< std::string > &typeNames={})
void AddThreadModifier(const KernelId id, const std::vector< KernelDefinitionId > &definitionIds, const ModifierType type, const ModifierDimension dimension, const std::vector< std::string > &parameters, ModifierFunction function)
static void SetLoggingTarget(const std::string &filePath)
void SetLauncher(const KernelId id, KernelLauncher launcher)
+
ArgumentId AddArgumentSymbol(const T &data, const std::string &symbolName="")
std::string GetKernelSource(const KernelId id, const KernelConfiguration &configuration) const
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
TimeUnit
Definition: TimeUnit.h:13
GlobalSizeType
Definition: GlobalSizeType.h:13
LoggingLevel
Definition: LoggingLevel.h:13
ValidationMethod
Definition: ValidationMethod.h:13
uint32_t DeviceIndex
Definition: KttTypes.h:28
+
uint64_t ComputeActionId
Definition: KttTypes.h:73
ValidationMode
Definition: ValidationMode.h:15
+
uint32_t QueueId
Definition: KttTypes.h:33
ArgumentAccessType
Definition: ArgumentAccessType.h:14
ModifierDimension
Definition: ModifierDimension.h:14
uint64_t KernelId
Definition: KttTypes.h:43
std::function< bool(const std::vector< uint64_t > &)> ConstraintFunction
Definition: KttTypes.h:88
ArgumentManagementType
Definition: ArgumentManagementType.h:13
ArgumentDataType
Definition: ArgumentDataType.h:13
+
uint64_t TransferActionId
Definition: KttTypes.h:78
ComputeApi
Definition: ComputeApi.h:13
+
void * ComputeQueue
Definition: KttTypes.h:118
ArgumentMemoryType
Definition: ArgumentMemoryType.h:14
std::function< void(void *)> ReferenceComputation
Definition: KttTypes.h:98
ModifierAction
Definition: ModifierAction.h:13
@@ -416,7 +456,7 @@
uint32_t PlatformIndex
Definition: KttTypes.h:23
std::function< uint64_t(const uint64_t, const std::vector< uint64_t > &)> ModifierFunction
Definition: KttTypes.h:83
uint64_t ArgumentId
Definition: KttTypes.h:48
-
std::vector< std::pair< std::string, std::variant< uint64_t, double > >> ParameterInput
Definition: KttTypes.h:53
+
std::vector< std::pair< std::string, std::variant< uint64_t, double > > > ParameterInput
Definition: KttTypes.h:53
uint64_t KernelDefinitionId
Definition: KttTypes.h:38
std::map< std::string, std::string > UserData
Definition: KttTypes.h:58
std::function< void(ComputeInterface &)> KernelLauncher
Definition: KttTypes.h:93
@@ -426,7 +466,7 @@ diff --git a/Docs/_tuning_duration_8h.html b/Docs/_tuning_duration_8h.html index 3b656959..9ae2ae41 100644 --- a/Docs/_tuning_duration_8h.html +++ b/Docs/_tuning_duration_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/TuningDuration.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
TuningDuration.h File Reference
+
TuningDuration.h File Reference
#include <chrono>
@@ -105,25 +103,25 @@

Go to the source code of this file.

-

+

Classes

class  ktt::TuningDuration
 
- - +

+

Namespaces

 ktt
namespace  ktt
 

Detailed Description

-

Stop condition based on total tuning duration.

+

Stop condition based on total tuning duration.

diff --git a/Docs/_tuning_duration_8h_source.html b/Docs/_tuning_duration_8h_source.html index a25f5b93..82ca4233 100644 --- a/Docs/_tuning_duration_8h_source.html +++ b/Docs/_tuning_duration_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition/TuningDuration.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,38 +91,37 @@
-
-
TuningDuration.h
+
TuningDuration.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <chrono>
-
7 
- -
9 #include <KttPlatform.h>
-
10 
-
11 namespace ktt
-
12 {
-
13 
-
17 class KTT_API TuningDuration : public StopCondition
-
18 {
-
19 public:
-
25  explicit TuningDuration(const double duration);
-
26 
-
27  bool IsFulfilled() const override;
-
28  void Initialize(const uint64_t configurationsCount) override;
-
29  void Update(const KernelResult& result) override;
-
30  std::string GetStatusString() const override;
-
31 
-
32 private:
-
33  std::chrono::steady_clock::time_point m_InitialTime;
-
34  double m_PassedTime;
-
35  double m_TargetTime;
-
36 };
-
37 
-
38 } // namespace ktt
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <chrono>
+
7
+ +
9#include <KttPlatform.h>
+
10
+
11namespace ktt
+
12{
+
13
+
17class KTT_API TuningDuration : public StopCondition
+
18{
+
19public:
+
25 explicit TuningDuration(const double duration);
+
26
+
27 bool IsFulfilled() const override;
+
28 void Initialize(const uint64_t configurationsCount) override;
+
29 void Update(const KernelResult& result) override;
+
30 std::string GetStatusString() const override;
+
31
+
32private:
+
33 std::chrono::steady_clock::time_point m_InitialTime;
+
34 double m_PassedTime;
+
35 double m_TargetTime;
+
36};
+
37
+
38} // namespace ktt
Definition: KernelResult.h:21
@@ -134,14 +132,14 @@
std::string GetStatusString() const override
void Initialize(const uint64_t configurationsCount) override
TuningDuration(const double duration)
-
Definition: KttPlatform.h:36
+
Definition: KttPlatform.h:41
diff --git a/Docs/_validation_method_8h.html b/Docs/_validation_method_8h.html index 6370aa06..c00066d5 100644 --- a/Docs/_validation_method_8h.html +++ b/Docs/_validation_method_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelRunner/ValidationMethod.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,19 +94,18 @@ -
-
ValidationMethod.h File Reference
+
ValidationMethod.h File Reference

Go to the source code of this file.

- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ValidationMethod { ktt::AbsoluteDifference , ktt::SideBySideComparison @@ -116,14 +114,14 @@
 

Detailed Description

-

Definition of a method used during validation of floating-point output arguments.

+

Definition of a method used during validation of floating-point output arguments.

diff --git a/Docs/_validation_method_8h_source.html b/Docs/_validation_method_8h_source.html index b06d7504..26c7ff1e 100644 --- a/Docs/_validation_method_8h_source.html +++ b/Docs/_validation_method_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelRunner/ValidationMethod.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,27 +91,26 @@
-
-
ValidationMethod.h
+
ValidationMethod.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 namespace ktt
-
7 {
-
8 
-
12 enum class ValidationMethod
-
13 {
- -
17 
- -
21 
- -
26 };
-
27 
-
28 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6namespace ktt
+
7{
+
8
+ +
13{
+ +
17
+ +
21
+ +
26};
+
27
+
28} // namespace ktt
+
Definition: KttPlatform.h:41
ValidationMethod
Definition: ValidationMethod.h:13
@@ -123,7 +121,7 @@ diff --git a/Docs/_validation_mode_8h.html b/Docs/_validation_mode_8h.html index 13dab058..a469506d 100644 --- a/Docs/_validation_mode_8h.html +++ b/Docs/_validation_mode_8h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelRunner/ValidationMode.h File Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -96,25 +95,24 @@ Classes | Namespaces | Enumerations -
-
ValidationMode.h File Reference
+
ValidationMode.h File Reference
#include <Utility/BitfieldEnum.h>

Go to the source code of this file.

-

+

Classes

struct  ktt::EnableBitfieldOperators< ValidationMode >
 
- - +

+

Namespaces

 ktt
namespace  ktt
 
-

+

Enumerations

enum class  ktt::ValidationMode {
  ktt::None = 0 @@ -128,14 +126,14 @@
 

Detailed Description

-

Definition of kernel output validation in different scenarios.

+

Definition of kernel output validation in different scenarios.

diff --git a/Docs/_validation_mode_8h.js b/Docs/_validation_mode_8h.js index afcee50f..dfc4c0e7 100644 --- a/Docs/_validation_mode_8h.js +++ b/Docs/_validation_mode_8h.js @@ -1,6 +1,6 @@ var _validation_mode_8h = [ - [ "EnableBitfieldOperators< ValidationMode >", "structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html", "structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4" ], + [ "ktt::EnableBitfieldOperators< ValidationMode >", "structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html", "structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4" ], [ "ValidationMode", "_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04", [ [ "None", "_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04a6adf97f83acf6453d4a6a4b1070f3754", null ], [ "Running", "_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04a5bda814c4aedb126839228f1a3d92f09", null ], diff --git a/Docs/_validation_mode_8h_source.html b/Docs/_validation_mode_8h_source.html index 10d808e0..43a5d8fa 100644 --- a/Docs/_validation_mode_8h_source.html +++ b/Docs/_validation_mode_8h_source.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelRunner/ValidationMode.h Source File @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,52 +91,51 @@
-
-
ValidationMode.h
+
ValidationMode.h
-Go to the documentation of this file.
1 
-
4 #pragma once
-
5 
-
6 #include <Utility/BitfieldEnum.h>
-
7 
-
8 namespace ktt
-
9 {
-
10 
-
14 enum class ValidationMode
-
15 {
-
18  None = 0,
-
19 
-
22  Running = (1 << 0),
-
23 
-
26  OfflineTuning = (1 << 1),
-
27 
-
30  OnlineTuning = (1 << 2),
-
31 
- -
35 };
-
36 
-
39 template <>
-
40 struct EnableBitfieldOperators<ValidationMode>
-
41 {
-
44  static const bool m_Enable = true;
-
45 };
-
46 
-
47 } // namespace ktt
-
Definition: KttPlatform.h:36
+Go to the documentation of this file.
1
+
4#pragma once
+
5
+
6#include <Utility/BitfieldEnum.h>
+
7
+
8namespace ktt
+
9{
+
10
+ +
15{
+
18 None = 0,
+
19
+
22 Running = (1 << 0),
+
23
+
26 OfflineTuning = (1 << 1),
+
27
+
30 OnlineTuning = (1 << 2),
+
31
+ +
35};
+
36
+
39template <>
+
40struct EnableBitfieldOperators<ValidationMode>
+
41{
+
44 static const bool m_Enable = true;
+
45};
+
46
+
47} // namespace ktt
+
Definition: KttPlatform.h:41
ValidationMode
Definition: ValidationMode.h:15
- - - + + +
diff --git a/Docs/annotated.html b/Docs/annotated.html index 4eb6b43f..84745bc5 100644 --- a/Docs/annotated.html +++ b/Docs/annotated.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,38 +91,37 @@
-
-
Class List
+
Class List
@@ -131,7 +129,7 @@ diff --git a/Docs/annotated_dup.js b/Docs/annotated_dup.js index 5112a5b6..d9a738b4 100644 --- a/Docs/annotated_dup.js +++ b/Docs/annotated_dup.js @@ -1,30 +1,30 @@ var annotated_dup = [ [ "ktt", "namespacektt.html", [ - [ "Tuner", "classktt_1_1_tuner.html", "classktt_1_1_tuner" ], + [ "BufferOutputDescriptor", "classktt_1_1_buffer_output_descriptor.html", "classktt_1_1_buffer_output_descriptor" ], + [ "ComputationResult", "classktt_1_1_computation_result.html", "classktt_1_1_computation_result" ], [ "ComputeApiInitializer", "classktt_1_1_compute_api_initializer.html", "classktt_1_1_compute_api_initializer" ], [ "ComputeInterface", "classktt_1_1_compute_interface.html", "classktt_1_1_compute_interface" ], - [ "KttException", "classktt_1_1_ktt_exception.html", "classktt_1_1_ktt_exception" ], - [ "DimensionVector", "classktt_1_1_dimension_vector.html", "classktt_1_1_dimension_vector" ], - [ "KernelConfiguration", "classktt_1_1_kernel_configuration.html", "classktt_1_1_kernel_configuration" ], - [ "ParameterPair", "classktt_1_1_parameter_pair.html", "classktt_1_1_parameter_pair" ], + [ "ConfigurationCount", "classktt_1_1_configuration_count.html", "classktt_1_1_configuration_count" ], + [ "ConfigurationDuration", "classktt_1_1_configuration_duration.html", "classktt_1_1_configuration_duration" ], + [ "ConfigurationFraction", "classktt_1_1_configuration_fraction.html", "classktt_1_1_configuration_fraction" ], + [ "DeterministicSearcher", "classktt_1_1_deterministic_searcher.html", "classktt_1_1_deterministic_searcher" ], [ "DeviceInfo", "classktt_1_1_device_info.html", "classktt_1_1_device_info" ], - [ "PlatformInfo", "classktt_1_1_platform_info.html", "classktt_1_1_platform_info" ], - [ "BufferOutputDescriptor", "classktt_1_1_buffer_output_descriptor.html", "classktt_1_1_buffer_output_descriptor" ], - [ "ComputationResult", "classktt_1_1_computation_result.html", "classktt_1_1_computation_result" ], + [ "DimensionVector", "classktt_1_1_dimension_vector.html", "classktt_1_1_dimension_vector" ], + [ "EnableBitfieldOperators< ValidationMode >", "structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html", "structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4" ], [ "KernelCompilationData", "structktt_1_1_kernel_compilation_data.html", "structktt_1_1_kernel_compilation_data" ], + [ "KernelConfiguration", "classktt_1_1_kernel_configuration.html", "classktt_1_1_kernel_configuration" ], [ "KernelProfilingCounter", "classktt_1_1_kernel_profiling_counter.html", "classktt_1_1_kernel_profiling_counter" ], [ "KernelProfilingData", "classktt_1_1_kernel_profiling_data.html", "classktt_1_1_kernel_profiling_data" ], [ "KernelResult", "classktt_1_1_kernel_result.html", "classktt_1_1_kernel_result" ], - [ "DeterministicSearcher", "classktt_1_1_deterministic_searcher.html", "classktt_1_1_deterministic_searcher" ], + [ "KttException", "classktt_1_1_ktt_exception.html", "classktt_1_1_ktt_exception" ], [ "McmcSearcher", "classktt_1_1_mcmc_searcher.html", "classktt_1_1_mcmc_searcher" ], + [ "ParameterPair", "classktt_1_1_parameter_pair.html", "classktt_1_1_parameter_pair" ], + [ "PlatformInfo", "classktt_1_1_platform_info.html", "classktt_1_1_platform_info" ], [ "RandomSearcher", "classktt_1_1_random_searcher.html", "classktt_1_1_random_searcher" ], [ "Searcher", "classktt_1_1_searcher.html", "classktt_1_1_searcher" ], - [ "ConfigurationCount", "classktt_1_1_configuration_count.html", "classktt_1_1_configuration_count" ], - [ "ConfigurationDuration", "classktt_1_1_configuration_duration.html", "classktt_1_1_configuration_duration" ], - [ "ConfigurationFraction", "classktt_1_1_configuration_fraction.html", "classktt_1_1_configuration_fraction" ], [ "StopCondition", "classktt_1_1_stop_condition.html", "classktt_1_1_stop_condition" ], - [ "TuningDuration", "classktt_1_1_tuning_duration.html", "classktt_1_1_tuning_duration" ], - [ "EnableBitfieldOperators< ValidationMode >", "structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html", "structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4" ] + [ "Tuner", "classktt_1_1_tuner.html", "classktt_1_1_tuner" ], + [ "TuningDuration", "classktt_1_1_tuning_duration.html", "classktt_1_1_tuning_duration" ] ] ] ]; \ No newline at end of file diff --git a/Docs/classes.html b/Docs/classes.html index 1c499d6c..958582aa 100644 --- a/Docs/classes.html +++ b/Docs/classes.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Index @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,41 +91,40 @@
-
-
Class Index
+
Class Index
@@ -134,7 +132,7 @@ diff --git a/Docs/classktt_1_1_buffer_output_descriptor-members.html b/Docs/classktt_1_1_buffer_output_descriptor-members.html index d4fce4b0..f4ef43a8 100644 --- a/Docs/classktt_1_1_buffer_output_descriptor-members.html +++ b/Docs/classktt_1_1_buffer_output_descriptor-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,24 +91,23 @@
-
-
ktt::BufferOutputDescriptor Member List
+
ktt::BufferOutputDescriptor Member List

This is the complete list of members for ktt::BufferOutputDescriptor, including all inherited members.

- + - +
BufferOutputDescriptor(const ArgumentId id, void *outputDestination)ktt::BufferOutputDescriptorexplicit
BufferOutputDescriptor(const ArgumentId id, void *outputDestination, const size_t outputSize)ktt::BufferOutputDescriptorexplicit
BufferOutputDescriptor(const ArgumentId id, void *outputDestination, const size_t outputSize)ktt::BufferOutputDescriptorexplicit
GetArgumentId() constktt::BufferOutputDescriptor
GetOutputDestination() constktt::BufferOutputDescriptor
GetOutputDestination() constktt::BufferOutputDescriptor
GetOutputSize() constktt::BufferOutputDescriptor
diff --git a/Docs/classktt_1_1_buffer_output_descriptor.html b/Docs/classktt_1_1_buffer_output_descriptor.html index 5021ff85..7fb1dab7 100644 --- a/Docs/classktt_1_1_buffer_output_descriptor.html +++ b/Docs/classktt_1_1_buffer_output_descriptor.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::BufferOutputDescriptor Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::BufferOutputDescriptor Class Reference
+
ktt::BufferOutputDescriptor Class Reference

#include <BufferOutputDescriptor.h>

- @@ -116,9 +114,9 @@

+

Public Member Functions

 BufferOutputDescriptor (const ArgumentId id, void *outputDestination)
 
 

Detailed Description

-

Class which can be used to retrieve kernel argument data when calling certain KTT API methods.

+

Class which can be used to retrieve kernel argument data when calling certain KTT API methods.

Constructor & Destructor Documentation

- +

◆ BufferOutputDescriptor() [1/2]

@@ -151,7 +149,7 @@

-

Constructor, which creates new output descriptor object for specified kernel argument.

Parameters
+

Constructor, which creates new output descriptor object for specified kernel argument.

Parameters
@@ -161,7 +159,7 @@

+

◆ BufferOutputDescriptor() [2/2]

idId of vector argument which will be retrieved.
outputDestinationPointer to destination where vector argument data will be copied. Destination buffer size needs to be equal or greater than argument size.
@@ -212,7 +210,7 @@

Member Function Documentation

- +

◆ GetArgumentId()

- +

◆ GetOutputDestination()

- +

◆ GetOutputSize()

@@ -275,7 +273,7 @@

diff --git a/Docs/classktt_1_1_computation_result-members.html b/Docs/classktt_1_1_computation_result-members.html index 7719d918..2aa79aed 100644 --- a/Docs/classktt_1_1_computation_result-members.html +++ b/Docs/classktt_1_1_computation_result-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@

idId of vector argument which will be retrieved.
outputDestinationPointer to destination where vector argument data will be copied. Destination buffer size needs to be equal or greater than specified output size.
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,37 +91,36 @@
-
-
ktt::ComputationResult Member List
+
ktt::ComputationResult Member List

This is the complete list of members for ktt::ComputationResult, including all inherited members.

- + - + - + - + - + - + - + - + - +
ComputationResult()ktt::ComputationResult
ComputationResult(const std::string &kernelFunction)ktt::ComputationResultexplicit
ComputationResult(const std::string &kernelFunction)ktt::ComputationResultexplicit
ComputationResult(const ComputationResult &other)ktt::ComputationResult
GetCompilationData() constktt::ComputationResult
GetCompilationData() constktt::ComputationResult
GetDuration() constktt::ComputationResult
GetGlobalSize() constktt::ComputationResult
GetGlobalSize() constktt::ComputationResult
GetKernelFunction() constktt::ComputationResult
GetLocalSize() constktt::ComputationResult
GetLocalSize() constktt::ComputationResult
GetOverhead() constktt::ComputationResult
GetProfilingData() constktt::ComputationResult
GetProfilingData() constktt::ComputationResult
HasCompilationData() constktt::ComputationResult
HasProfilingData() constktt::ComputationResult
HasProfilingData() constktt::ComputationResult
HasRemainingProfilingRuns() constktt::ComputationResult
operator=(const ComputationResult &other)ktt::ComputationResult
operator=(const ComputationResult &other)ktt::ComputationResult
SetCompilationData(std::unique_ptr< KernelCompilationData > data)ktt::ComputationResult
SetDurationData(const Nanoseconds duration, const Nanoseconds overhead)ktt::ComputationResult
SetDurationData(const Nanoseconds duration, const Nanoseconds overhead)ktt::ComputationResult
SetProfilingData(std::unique_ptr< KernelProfilingData > data)ktt::ComputationResult
SetSizeData(const DimensionVector &globalSize, const DimensionVector &localSize)ktt::ComputationResult
SetSizeData(const DimensionVector &globalSize, const DimensionVector &localSize)ktt::ComputationResult
diff --git a/Docs/classktt_1_1_computation_result.html b/Docs/classktt_1_1_computation_result.html index 27b3a6d3..78dcf1ca 100644 --- a/Docs/classktt_1_1_computation_result.html +++ b/Docs/classktt_1_1_computation_result.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::ComputationResult Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::ComputationResult Class Reference
+
ktt::ComputationResult Class Reference

#include <ComputationResult.h>

- @@ -142,9 +140,9 @@

+

Public Member Functions

 ComputationResult ()
 
 

Detailed Description

-

Class which holds result data from a single kernel definition run such as duration, overhead and profiling data.

+

Class which holds result data from a single kernel definition run such as duration, overhead and profiling data.

Constructor & Destructor Documentation

- +

◆ ComputationResult() [1/3]

- +

◆ ComputationResult() [2/3]

- +

◆ GetGlobalSize()

- +

◆ GetKernelFunction()

- +

◆ GetLocalSize()

- +

◆ GetOverhead()

- +

◆ GetProfilingData()

@@ -341,11 +339,11 @@

-

Retrieves kernel profiling data. Should only be called after prior check for valid data.

Returns
Profiling data generated by the compute API. See KernelProfilingData for more information.
+

Retrieves kernel profiling data. Should only be called after prior check for valid data.

Returns
Profiling data generated by the compute API. See KernelProfilingData for more information.

- +

◆ HasCompilationData()

- +

◆ HasProfilingData()

- +

◆ HasRemainingProfilingRuns()

- +

◆ operator=()

@@ -414,7 +412,7 @@

-

Copy assignment operator.

Parameters
+

Copy assignment operator.

Parameters
otherResult from which the data will be copied.
@@ -423,7 +421,7 @@

+

◆ SetCompilationData()

@@ -438,7 +436,7 @@

-

Fills compilation data for the result.

Parameters
+

Fills compilation data for the result.

Parameters
dataCompilation data generated by the compute API. See KernelCompilationData for more information.
@@ -447,7 +445,7 @@

+

◆ SetDurationData()

@@ -472,7 +470,7 @@

-

Fills duration data for the result.

Parameters
+

Fills duration data for the result.

Parameters
@@ -482,7 +480,7 @@

+

◆ SetProfilingData()

durationRaw kernel duration, usually reported by the underlying compute API.
overheadOverhead related to kernel launch such as kernel function compilation.
dataProfiling data generated by the compute API. See KernelProfilingData for more information.
@@ -506,7 +504,7 @@

+

◆ SetSizeData()

@@ -531,7 +529,7 @@

-

Fills thread size data for the result.

Parameters
+

Fills thread size data for the result.

Parameters
@@ -550,7 +548,7 @@

diff --git a/Docs/classktt_1_1_compute_api_initializer-members.html b/Docs/classktt_1_1_compute_api_initializer-members.html index 841c8558..9bb4ec4e 100644 --- a/Docs/classktt_1_1_compute_api_initializer-members.html +++ b/Docs/classktt_1_1_compute_api_initializer-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@

globalSizeGlobal thread size with which the kernel was launched.
localSizeLocal thread size with which the kernel was launched.
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,22 +91,21 @@
-
-
ktt::ComputeApiInitializer Member List
+
ktt::ComputeApiInitializer Member List

This is the complete list of members for ktt::ComputeApiInitializer, including all inherited members.

- +
ComputeApiInitializer(ComputeContext context, const std::vector< ComputeQueue > &queues)ktt::ComputeApiInitializerexplicit
GetContext() constktt::ComputeApiInitializer
GetContext() constktt::ComputeApiInitializer
GetQueues() constktt::ComputeApiInitializer
diff --git a/Docs/classktt_1_1_compute_api_initializer.html b/Docs/classktt_1_1_compute_api_initializer.html index ca7dd569..c4b613ae 100644 --- a/Docs/classktt_1_1_compute_api_initializer.html +++ b/Docs/classktt_1_1_compute_api_initializer.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::ComputeApiInitializer Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::ComputeApiInitializer Class Reference
+
ktt::ComputeApiInitializer Class Reference

#include <ComputeApiInitializer.h>

- @@ -112,9 +110,9 @@

+

Public Member Functions

 ComputeApiInitializer (ComputeContext context, const std::vector< ComputeQueue > &queues)
 
 

Detailed Description

-

Class which can be used to initialize tuner with custom compute device context and queues.

+

Class which can be used to initialize tuner with custom compute device context and queues.

Constructor & Destructor Documentation

- +

◆ ComputeApiInitializer()

@@ -147,7 +145,7 @@

-

Constructor which creates new initializer.

Parameters
+

Constructor which creates new initializer.

Parameters
@@ -158,7 +156,7 @@

Member Function Documentation

- +

◆ GetContext()

- +

◆ GetQueues()

@@ -203,7 +201,7 @@

diff --git a/Docs/classktt_1_1_compute_interface-members.html b/Docs/classktt_1_1_compute_interface-members.html index 5ffa0244..11c86043 100644 --- a/Docs/classktt_1_1_compute_interface-members.html +++ b/Docs/classktt_1_1_compute_interface-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@

contextUser-provided context. Depending on compute API, it can be either CUcontext or cl_context handle.
queuesUser-provided queues. Depending on compute API, it can be a vector of either CUstream or cl_command_queue handles. In case of OpenCL API, the queues must be created with CL_QUEUE_PROFILING_ENABLE flag. The number of queues must be at least 1.
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,53 +91,54 @@
-
-
ktt::ComputeInterface Member List
+
ktt::ComputeInterface Member List

This is the complete list of members for ktt::ComputeInterface, including all inherited members.

- + - + - + - + - + - + - - - - - - - - - - - - - + + + + + + + + + + + + + + + - + - + - + - +
ChangeArguments(const KernelDefinitionId id, const std::vector< ArgumentId > &arguments)=0ktt::ComputeInterfacepure virtual
ClearBuffer(const ArgumentId id)=0ktt::ComputeInterfacepure virtual
ClearBuffer(const ArgumentId id)=0ktt::ComputeInterfacepure virtual
CopyBuffer(const ArgumentId destination, const ArgumentId source, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
CopyBufferAsync(const ArgumentId destination, const ArgumentId source, const QueueId queue, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
CopyBufferAsync(const ArgumentId destination, const ArgumentId source, const QueueId queue, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
DownloadBuffer(const ArgumentId id, void *destination, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
DownloadBufferAsync(const ArgumentId id, const QueueId queue, void *destination, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
DownloadBufferAsync(const ArgumentId id, const QueueId queue, void *destination, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
GetAllQueues() const =0ktt::ComputeInterfacepure virtual
GetCurrentConfiguration() const =0ktt::ComputeInterfacepure virtual
GetCurrentConfiguration() const =0ktt::ComputeInterfacepure virtual
GetCurrentGlobalSize(const KernelDefinitionId id) const =0ktt::ComputeInterfacepure virtual
GetCurrentLocalSize(const KernelDefinitionId id) const =0ktt::ComputeInterfacepure virtual
GetCurrentLocalSize(const KernelDefinitionId id) const =0ktt::ComputeInterfacepure virtual
GetDefaultQueue() const =0ktt::ComputeInterfacepure virtual
GetRemainingProfilingRuns(const KernelDefinitionId id) const =0ktt::ComputeInterfacepure virtual
GetRemainingProfilingRuns(const KernelDefinitionId id) const =0ktt::ComputeInterfacepure virtual
GetRemainingProfilingRuns() const =0ktt::ComputeInterfacepure virtual
GetUnifiedMemoryBufferHandle(const ArgumentId id, UnifiedBufferMemory &memoryHandle)=0ktt::ComputeInterfacepure virtual
HasBuffer(const ArgumentId id)=0ktt::ComputeInterfacepure virtual
ResizeBuffer(const ArgumentId id, const size_t newDataSize, const bool preserveData)=0ktt::ComputeInterfacepure virtual
RunKernel(const KernelDefinitionId id)=0ktt::ComputeInterfacepure virtual
RunKernel(const KernelDefinitionId id, const DimensionVector &globalSize, const DimensionVector &localSize)=0ktt::ComputeInterfacepure virtual
RunKernelAsync(const KernelDefinitionId id, const QueueId queue)=0ktt::ComputeInterfacepure virtual
RunKernelAsync(const KernelDefinitionId id, const QueueId queue, const DimensionVector &globalSize, const DimensionVector &localSize)=0ktt::ComputeInterfacepure virtual
RunKernelWithProfiling(const KernelDefinitionId id)=0ktt::ComputeInterfacepure virtual
RunKernelWithProfiling(const KernelDefinitionId id, const DimensionVector &globalSize, const DimensionVector &localSize)=0ktt::ComputeInterfacepure virtual
SwapArguments(const KernelDefinitionId id, const ArgumentId first, const ArgumentId second)=0ktt::ComputeInterfacepure virtual
SynchronizeDevice()=0ktt::ComputeInterfacepure virtual
SynchronizeQueue(const QueueId queue)=0ktt::ComputeInterfacepure virtual
UpdateBuffer(const ArgumentId id, const void *data, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
GetRunMode() const =0ktt::ComputeInterfacepure virtual
GetUnifiedMemoryBufferHandle(const ArgumentId id, UnifiedBufferMemory &memoryHandle)=0ktt::ComputeInterfacepure virtual
HasBuffer(const ArgumentId id)=0ktt::ComputeInterfacepure virtual
ResizeBuffer(const ArgumentId id, const size_t newDataSize, const bool preserveData)=0ktt::ComputeInterfacepure virtual
RunKernel(const KernelDefinitionId id)=0ktt::ComputeInterfacepure virtual
RunKernel(const KernelDefinitionId id, const DimensionVector &globalSize, const DimensionVector &localSize)=0ktt::ComputeInterfacepure virtual
RunKernelAsync(const KernelDefinitionId id, const QueueId queue)=0ktt::ComputeInterfacepure virtual
RunKernelAsync(const KernelDefinitionId id, const QueueId queue, const DimensionVector &globalSize, const DimensionVector &localSize)=0ktt::ComputeInterfacepure virtual
RunKernelWithProfiling(const KernelDefinitionId id)=0ktt::ComputeInterfacepure virtual
RunKernelWithProfiling(const KernelDefinitionId id, const DimensionVector &globalSize, const DimensionVector &localSize)=0ktt::ComputeInterfacepure virtual
SwapArguments(const KernelDefinitionId id, const ArgumentId first, const ArgumentId second)=0ktt::ComputeInterfacepure virtual
SynchronizeDevice()=0ktt::ComputeInterfacepure virtual
SynchronizeQueue(const QueueId queue)=0ktt::ComputeInterfacepure virtual
SynchronizeQueues()=0ktt::ComputeInterfacepure virtual
UpdateBuffer(const ArgumentId id, const void *data, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
UpdateBufferAsync(const ArgumentId id, const QueueId queue, const void *data, const size_t dataSize=0)=0ktt::ComputeInterfacepure virtual
UpdateLocalArgument(const ArgumentId id, const size_t dataSize)=0ktt::ComputeInterfacepure virtual
UpdateLocalArgument(const ArgumentId id, const size_t dataSize)=0ktt::ComputeInterfacepure virtual
UpdateScalarArgument(const ArgumentId id, const void *data)=0ktt::ComputeInterfacepure virtual
UploadBuffer(const ArgumentId id)=0ktt::ComputeInterfacepure virtual
UploadBuffer(const ArgumentId id)=0ktt::ComputeInterfacepure virtual
UploadBufferAsync(const ArgumentId id, const QueueId queue)=0ktt::ComputeInterfacepure virtual
WaitForComputeAction(const ComputeActionId id)=0ktt::ComputeInterfacepure virtual
WaitForComputeAction(const ComputeActionId id)=0ktt::ComputeInterfacepure virtual
WaitForTransferAction(const TransferActionId id)=0ktt::ComputeInterfacepure virtual
~ComputeInterface()=defaultktt::ComputeInterfacevirtual
~ComputeInterface()=defaultktt::ComputeInterfacevirtual
diff --git a/Docs/classktt_1_1_compute_interface.html b/Docs/classktt_1_1_compute_interface.html index 8f6e3e5f..885c9948 100644 --- a/Docs/classktt_1_1_compute_interface.html +++ b/Docs/classktt_1_1_compute_interface.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::ComputeInterface Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::ComputeInterface Class Referenceabstract
+
ktt::ComputeInterface Class Referenceabstract

#include <ComputeInterface.h>

- @@ -130,6 +128,8 @@ + + @@ -138,6 +138,8 @@ + + @@ -174,9 +176,9 @@

+

Public Member Functions

virtual ~ComputeInterface ()=default
 
 
virtual void SynchronizeQueue (const QueueId queue)=0
 
virtual void SynchronizeQueues ()=0
 
virtual void SynchronizeDevice ()=0
 
virtual const DimensionVectorGetCurrentGlobalSize (const KernelDefinitionId id) const =0
 
virtual const KernelConfigurationGetCurrentConfiguration () const =0
 
virtual KernelRunMode GetRunMode () const =0
 
virtual void ChangeArguments (const KernelDefinitionId id, const std::vector< ArgumentId > &arguments)=0
 
virtual void SwapArguments (const KernelDefinitionId id, const ArgumentId first, const ArgumentId second)=0
 

Detailed Description

-

Interface for customizing kernel runs in order to run some part of computation on CPU, utilize iterative kernel launches, composite kernels and more. In order to use this functionality, custom kernel launcher function must be defined for the corresponding kernel.

+

Interface for customizing kernel runs in order to run some part of computation on CPU, utilize iterative kernel launches, composite kernels and more. In order to use this functionality, custom kernel launcher function must be defined for the corresponding kernel.

Constructor & Destructor Documentation

- +

◆ ~ComputeInterface()

Member Function Documentation

-
+

◆ ChangeArguments()

@@ -236,7 +238,7 @@

-

Changes kernel arguments for the specified kernel definitions under currently launched kernel.

Parameters
+

Changes kernel arguments for the specified kernel definitions under currently launched kernel.

Parameters
@@ -246,7 +248,7 @@

+

◆ ClearBuffer()

@@ -269,7 +271,7 @@

-

Removes compute buffer for the specified vector argument. This method should be used mainly with arguments with ArgumentManagementType set to User.

Parameters
+

Removes compute buffer for the specified vector argument. This method should be used mainly with arguments with ArgumentManagementType set to User.

Parameters

idId of kernel definition for which the arguments will be changed. The specified definition must be included in the currently launched kernel.
argumentsIds of arguments to be used by the specified kernel definition. The order of ids must match the order of arguments inside kernel function. The provided ids must be unique.
idId of vector argument whose buffer will be removed.
@@ -278,7 +280,7 @@

+

◆ CopyBuffer()

@@ -317,7 +319,7 @@

-

Copies part of the compute buffer of source vector argument to compute buffer of destination vector argument.

Parameters
+

Copies part of the compute buffer of source vector argument to compute buffer of destination vector argument.

Parameters
@@ -328,7 +330,7 @@

+

◆ CopyBufferAsync()

destinationId of destination vector argument.
sourceId of source vector argument.
@@ -382,11 +384,11 @@

Returns
Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with WaitForTransferAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded kernel durations may occur.
+
Returns
Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with e.g., WaitForTransferAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded transfer durations may occur.
- +

◆ DownloadBuffer()

destinationId of destination vector argument.
sourceId of source vector argument.
@@ -436,7 +438,7 @@

+

◆ DownloadBufferAsync()

idId of vector argument which will be downloaded.
destinationBuffer where the argument data will be downloaded. Its size must be equal or greater than the specified data size.
@@ -490,11 +492,11 @@

Returns
Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with WaitForTransferAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded kernel durations may occur.
+
Returns
Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with e.g., WaitForTransferAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded transfer durations may occur.
- +

◆ GetAllQueues()

- +

◆ GetCurrentConfiguration()

@@ -542,11 +544,11 @@

-

Returns configuration of the currently launched kernel.

Returns
Configuration of the currently launched kernel. See KernelConfiguration for more information.
+

Returns configuration of the currently launched kernel.

Returns
Configuration of the currently launched kernel. See KernelConfiguration for more information.

- +

◆ GetCurrentGlobalSize()

idId of vector argument which will be downloaded.
queueId of queue in which the command to download argument will be submitted.
idId of kernel definition for which the global size will be retrieved. The specified definition must be included in the currently launched kernel.
@@ -579,7 +581,7 @@

+

◆ GetCurrentLocalSize()

- +

◆ GetRemainingProfilingRuns() [1/2]

- +

◆ GetRemainingProfilingRuns() [2/2]

@@ -687,7 +689,7 @@

-

Retrieves number of remaining profiling runs that are needed to collect all the profiling counters for the specified kernel definition.

Parameters
+

Retrieves number of remaining profiling runs that are needed to collect all the profiling counters for the specified kernel definition.

Parameters
idId of kernel definition for which the number of remaining profiling runs will be retrieved. The specified definition must be included in the currently launched kernel.
@@ -697,7 +699,33 @@

+ +

◆ GetRunMode()

+ +
+
+ + + + + +
+ + + + + + + +
KernelRunMode ktt::ComputeInterface::GetRunMode () const
+
+pure virtual
+
+

Returns run mode of the currently launched kernel.

Returns
Run mode of the currently launched kernel. See KernelRunMode for more information.
+ +
+
+

◆ GetUnifiedMemoryBufferHandle()

@@ -730,7 +758,7 @@

-

Retrieves memory handle for the specified unified memory argument. The handle can be used to manipulate argument memory on host side. Example usage: ktt::UnifiedBufferMemory memory; GetUnifiedMemoryBufferHandle(..., memory); float* floatArray = static_cast<float*>(memory);

Parameters
+

Retrieves memory handle for the specified unified memory argument. The handle can be used to manipulate argument memory on host side. Example usage: ktt::UnifiedBufferMemory memory; GetUnifiedMemoryBufferHandle(..., memory); float* floatArray = static_cast<float*>(memory);

Parameters
@@ -740,7 +768,7 @@

+

◆ HasBuffer()

idId of vector argument whose memory handle will be retrieved.
memoryHandleLocation where the memory handle will be stored.
idId of vector argument to check.
@@ -773,7 +801,7 @@

+

◆ ResizeBuffer()

@@ -812,7 +840,7 @@

-

Resizes compute buffer for the specified vector argument.

Parameters
+

Resizes compute buffer for the specified vector argument.

Parameters
@@ -823,7 +851,7 @@

+

◆ RunKernel() [1/2]

idId of vector argument whose buffer will be resized.
newDataSizeSize in bytes for the resized buffer.
idId of kernel definition which will be run. The specified definition must be included in the currently launched kernel.
@@ -855,7 +883,7 @@

+

◆ RunKernel() [2/2]

- +

◆ RunKernelAsync() [2/2]

@@ -994,7 +1022,7 @@

-

Runs the specified kernel definition using provided thread sizes. The kernel will be launched asynchronously in the specified queue.

Parameters
+

Runs the specified kernel definition using provided thread sizes. The kernel will be launched asynchronously in the specified queue.

Parameters
@@ -1003,11 +1031,11 @@

Returns
Id of asynchronous action corresponding to the issued kernel run command. The action must be waited for with WaitForComputeAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded kernel durations may occur.
+
Returns
Id of asynchronous action corresponding to the issued kernel run command. The action must be waited for with e.g., WaitForComputeAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded kernel durations may occur.
- +

◆ RunKernelWithProfiling() [1/2]

idId of kernel definition which will be run. The specified definition must be included in the currently launched kernel.
queueId of queue in which the command to run kernel will be submitted.
idId of kernel definition which will be run. The specified definition must be included in the currently launched kernel.
@@ -1039,7 +1067,7 @@

+

◆ RunKernelWithProfiling() [2/2]

@@ -1078,7 +1106,7 @@

-

Runs the specified kernel definition using provided thread sizes. Collection of kernel profiling counters will be enabled for this run which means that performance will be decreased. Running kernels with profiling will always cause implicit device synchronization before and after the kernel run is finished.

Parameters
+

Runs the specified kernel definition using provided thread sizes. Collection of kernel profiling counters will be enabled for this run which means that performance will be decreased. Running kernels with profiling will always cause implicit device synchronization before and after the kernel run is finished.

Parameters
@@ -1089,7 +1117,7 @@

+

◆ SwapArguments()

idId of kernel definition which will be run. The specified definition must be included in the currently launched kernel.
globalSizeDimensions for global size with which the kernel will be run.
@@ -1139,7 +1167,7 @@

+

◆ SynchronizeDevice()

- +

◆ SynchronizeQueue()

idId of kernel definition for which the arguments will be swapped. The specified definition must be included in the currently launched kernel.
firstId of the first argument which will be swapped.
queueId of queue which will be synchronized.
@@ -1197,7 +1225,33 @@

+ +

◆ SynchronizeQueues()

+ +
+
+ + + + + +
+ + + + + + + +
void ktt::ComputeInterface::SynchronizeQueues ()
+
+pure virtual
+
+

Blocks until all commands submitted to all device queues are completed.

+ +
+
+

◆ UpdateBuffer()

@@ -1236,7 +1290,7 @@

-

Updates data in compute buffer of the specified vector argument.

Parameters
+

Updates data in compute buffer of the specified vector argument.

Parameters
@@ -1247,7 +1301,7 @@

+

◆ UpdateBufferAsync()

idId of vector argument which will be updated.
dataPointer to new data for vector argument. Its size must be equal or greater than the specified data size. The data must have matching kernel argument data type.
@@ -1301,11 +1355,11 @@

Returns
Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with WaitForTransferAction(), SynchronizeQueue() or SynchronizeDevice() methods. Otherwise, problems such as incorrectly recorded kernel durations may occur.
+
Returns
Id of asynchronous action corresponding to the issued data transfer command. The action must be waited for with e.g., WaitForTransferAction(), SynchronizeQueue() methods. Otherwise, problems such as incorrectly recorded transfer durations may occur.
- +

◆ UpdateLocalArgument()

idId of vector argument which will be updated.
queueId of queue in which the command to update argument will be submitted.
@@ -1348,7 +1402,7 @@

+

◆ UpdateScalarArgument()

idId of local memory argument which will be updated.
dataSizeNew size in bytes for the argument.
@@ -1391,7 +1445,7 @@

+

◆ UploadBuffer()

@@ -1414,7 +1468,7 @@

-

Uploads the specified vector argument into compute buffer. This method should be used mainly with arguments with ArgumentManagementType set to User.

Parameters
+

Uploads the specified vector argument into compute buffer. This method should be used mainly with arguments with ArgumentManagementType set to User.

Parameters

idId of scalar argument which will be updated.
dataPointer to new data for scalar argument. The data must have matching kernel argument data type.
idId of vector argument which will be uploaded.
@@ -1423,7 +1477,7 @@

+

◆ UploadBufferAsync()

- +

◆ WaitForComputeAction()

@@ -1490,7 +1544,7 @@

-

Blocks until the specified compute action is finished.

Parameters
+

Blocks until the specified compute action is finished.

Parameters
idId of compute action to wait for.
@@ -1499,7 +1553,7 @@

+

◆ WaitForTransferAction()

diff --git a/Docs/classktt_1_1_compute_interface.js b/Docs/classktt_1_1_compute_interface.js index cea79dbc..b180fd75 100644 --- a/Docs/classktt_1_1_compute_interface.js +++ b/Docs/classktt_1_1_compute_interface.js @@ -14,6 +14,7 @@ var classktt_1_1_compute_interface = [ "GetDefaultQueue", "classktt_1_1_compute_interface.html#abe3d80e67b9daa10aee4cb1ec288139b", null ], [ "GetRemainingProfilingRuns", "classktt_1_1_compute_interface.html#a52e7566928e755696360228d2e4cbd76", null ], [ "GetRemainingProfilingRuns", "classktt_1_1_compute_interface.html#a8d1a5aad6165ffdba835d40443651c97", null ], + [ "GetRunMode", "classktt_1_1_compute_interface.html#a8f087328c03c3264e040fb9165805d73", null ], [ "GetUnifiedMemoryBufferHandle", "classktt_1_1_compute_interface.html#a5a075573b67aa482409c25a9412ef024", null ], [ "HasBuffer", "classktt_1_1_compute_interface.html#abe4d68e723947c2493f74e1e3d89d742", null ], [ "ResizeBuffer", "classktt_1_1_compute_interface.html#a0bd5e6cad0cabf6d747e35acc3ce1b55", null ], @@ -26,6 +27,7 @@ var classktt_1_1_compute_interface = [ "SwapArguments", "classktt_1_1_compute_interface.html#a8b7e7a9742a11f1ef0ae207d3130312f", null ], [ "SynchronizeDevice", "classktt_1_1_compute_interface.html#a98c87a68ccfd62d97570bb499447a32b", null ], [ "SynchronizeQueue", "classktt_1_1_compute_interface.html#a0f15c5a48f396ab20feb26a23baf3482", null ], + [ "SynchronizeQueues", "classktt_1_1_compute_interface.html#a794206c1530523c7a00d8da20582c37f", null ], [ "UpdateBuffer", "classktt_1_1_compute_interface.html#a4d543715c40557ac22aef493595ee9f2", null ], [ "UpdateBufferAsync", "classktt_1_1_compute_interface.html#ae82a93622fddc93bd6abe262b3b4f6c0", null ], [ "UpdateLocalArgument", "classktt_1_1_compute_interface.html#abc9416b26a01b2ce04d334a379e3fa95", null ], diff --git a/Docs/classktt_1_1_configuration_count-members.html b/Docs/classktt_1_1_configuration_count-members.html index a526c0b8..e04dc448 100644 --- a/Docs/classktt_1_1_configuration_count-members.html +++ b/Docs/classktt_1_1_configuration_count-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
-
-
ktt::ConfigurationCount Member List
+
ktt::ConfigurationCount Member List

This is the complete list of members for ktt::ConfigurationCount, including all inherited members.

- + - + - +
ConfigurationCount(const uint64_t count)ktt::ConfigurationCountexplicit
GetStatusString() const overridektt::ConfigurationCountvirtual
GetStatusString() const overridektt::ConfigurationCountvirtual
Initialize(const uint64_t configurationsCount) overridektt::ConfigurationCountvirtual
IsFulfilled() const overridektt::ConfigurationCountvirtual
IsFulfilled() const overridektt::ConfigurationCountvirtual
Update(const KernelResult &result) overridektt::ConfigurationCountvirtual
~StopCondition()=defaultktt::StopConditionvirtual
~StopCondition()=defaultktt::StopConditionvirtual
diff --git a/Docs/classktt_1_1_configuration_count.html b/Docs/classktt_1_1_configuration_count.html index 4cb17c5d..dfbe1834 100644 --- a/Docs/classktt_1_1_configuration_count.html +++ b/Docs/classktt_1_1_configuration_count.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::ConfigurationCount Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::ConfigurationCount Class Reference
+
ktt::ConfigurationCount Class Reference
@@ -111,7 +109,7 @@
- @@ -126,11 +124,19 @@ + + + + + + + +

+

Public Member Functions

 ConfigurationCount (const uint64_t count)
 
- Public Member Functions inherited from ktt::StopCondition
virtual ~StopCondition ()=default
 
virtual bool IsFulfilled () const =0
 
virtual void Initialize (const uint64_t configurationsCount)=0
 
virtual void Update (const KernelResult &result)=0
 
virtual std::string GetStatusString () const =0
 

Detailed Description

-

Class which implements stop condition based on count of explored configurations.

+

Class which implements stop condition based on count of explored configurations.

Constructor & Destructor Documentation

- +

◆ ConfigurationCount()

- +

◆ Initialize()

- +

◆ Update()

diff --git a/Docs/classktt_1_1_configuration_duration-members.html b/Docs/classktt_1_1_configuration_duration-members.html index 1218886f..e719e8b8 100644 --- a/Docs/classktt_1_1_configuration_duration-members.html +++ b/Docs/classktt_1_1_configuration_duration-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
-
-
ktt::ConfigurationDuration Member List
+
ktt::ConfigurationDuration Member List

This is the complete list of members for ktt::ConfigurationDuration, including all inherited members.

- + - + - +
ConfigurationDuration(const double duration)ktt::ConfigurationDurationexplicit
GetStatusString() const overridektt::ConfigurationDurationvirtual
GetStatusString() const overridektt::ConfigurationDurationvirtual
Initialize(const uint64_t configurationsCount) overridektt::ConfigurationDurationvirtual
IsFulfilled() const overridektt::ConfigurationDurationvirtual
IsFulfilled() const overridektt::ConfigurationDurationvirtual
Update(const KernelResult &result) overridektt::ConfigurationDurationvirtual
~StopCondition()=defaultktt::StopConditionvirtual
~StopCondition()=defaultktt::StopConditionvirtual
diff --git a/Docs/classktt_1_1_configuration_duration.html b/Docs/classktt_1_1_configuration_duration.html index 969a5f97..b00c0ed2 100644 --- a/Docs/classktt_1_1_configuration_duration.html +++ b/Docs/classktt_1_1_configuration_duration.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::ConfigurationDuration Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::ConfigurationDuration Class Reference
+
ktt::ConfigurationDuration Class Reference
@@ -111,7 +109,7 @@
- @@ -126,11 +124,19 @@ + + + + + + + +

+

Public Member Functions

 ConfigurationDuration (const double duration)
 
- Public Member Functions inherited from ktt::StopCondition
virtual ~StopCondition ()=default
 
virtual bool IsFulfilled () const =0
 
virtual void Initialize (const uint64_t configurationsCount)=0
 
virtual void Update (const KernelResult &result)=0
 
virtual std::string GetStatusString () const =0
 

Detailed Description

-

Class which implements stop condition based on computation duration of a configuration.

+

Class which implements stop condition based on computation duration of a configuration.

Constructor & Destructor Documentation

- +

◆ ConfigurationDuration()

- +

◆ Initialize()

- +

◆ Update()

diff --git a/Docs/classktt_1_1_configuration_fraction-members.html b/Docs/classktt_1_1_configuration_fraction-members.html index 479a0a90..24b414e1 100644 --- a/Docs/classktt_1_1_configuration_fraction-members.html +++ b/Docs/classktt_1_1_configuration_fraction-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
-
-
ktt::ConfigurationFraction Member List
+
ktt::ConfigurationFraction Member List

This is the complete list of members for ktt::ConfigurationFraction, including all inherited members.

- + - + - +
ConfigurationFraction(const double fraction)ktt::ConfigurationFractionexplicit
GetStatusString() const overridektt::ConfigurationFractionvirtual
GetStatusString() const overridektt::ConfigurationFractionvirtual
Initialize(const uint64_t configurationsCount) overridektt::ConfigurationFractionvirtual
IsFulfilled() const overridektt::ConfigurationFractionvirtual
IsFulfilled() const overridektt::ConfigurationFractionvirtual
Update(const KernelResult &result) overridektt::ConfigurationFractionvirtual
~StopCondition()=defaultktt::StopConditionvirtual
~StopCondition()=defaultktt::StopConditionvirtual
diff --git a/Docs/classktt_1_1_configuration_fraction.html b/Docs/classktt_1_1_configuration_fraction.html index 9a622f9e..ac514802 100644 --- a/Docs/classktt_1_1_configuration_fraction.html +++ b/Docs/classktt_1_1_configuration_fraction.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::ConfigurationFraction Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::ConfigurationFraction Class Reference
+
ktt::ConfigurationFraction Class Reference
@@ -111,7 +109,7 @@
- @@ -126,11 +124,19 @@ + + + + + + + +

+

Public Member Functions

 ConfigurationFraction (const double fraction)
 
- Public Member Functions inherited from ktt::StopCondition
virtual ~StopCondition ()=default
 
virtual bool IsFulfilled () const =0
 
virtual void Initialize (const uint64_t configurationsCount)=0
 
virtual void Update (const KernelResult &result)=0
 
virtual std::string GetStatusString () const =0
 

Detailed Description

-

Class which implements stop condition based on fraction of explored configurations.

+

Class which implements stop condition based on fraction of explored configurations.

Constructor & Destructor Documentation

- +

◆ ConfigurationFraction()

- +

◆ Initialize()

- +

◆ Update()

diff --git a/Docs/classktt_1_1_deterministic_searcher-members.html b/Docs/classktt_1_1_deterministic_searcher-members.html index 49fe49dd..e78b4d78 100644 --- a/Docs/classktt_1_1_deterministic_searcher-members.html +++ b/Docs/classktt_1_1_deterministic_searcher-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,35 +91,34 @@
-
-
ktt::DeterministicSearcher Member List
+
ktt::DeterministicSearcher Member List

This is the complete list of members for ktt::DeterministicSearcher, including all inherited members.

- + - + - + - + - + - + - + - +
CalculateNextConfiguration(const KernelResult &previousResult) overridektt::DeterministicSearchervirtual
DeterministicSearcher()ktt::DeterministicSearcher
DeterministicSearcher()ktt::DeterministicSearcher
GetConfiguration(const uint64_t index) constktt::Searcher
GetConfigurationsCount() constktt::Searcher
GetConfigurationsCount() constktt::Searcher
GetCurrentConfiguration() const overridektt::DeterministicSearchervirtual
GetExploredIndices() constktt::Searcher
GetExploredIndices() constktt::Searcher
GetIndex(const KernelConfiguration &configuration) constktt::Searcher
GetNeighbourConfigurations(const KernelConfiguration &configuration, const uint64_t maxDifferences, const size_t maxNeighbours=3) constktt::Searcher
GetNeighbourConfigurations(const KernelConfiguration &configuration, const uint64_t maxDifferences, const size_t maxNeighbours=3) constktt::Searcher
GetRandomConfiguration() constktt::Searcher
Initialize(const ConfigurationData &data)ktt::Searcher
Initialize(const ConfigurationData &data)ktt::Searcher
IsInitialized() constktt::Searcher
OnInitialize()ktt::Searchervirtual
OnInitialize()ktt::Searchervirtual
OnReset() overridektt::DeterministicSearchervirtual
Reset()ktt::Searcher
Reset()ktt::Searcher
Searcher()ktt::Searcher
~Searcher()=defaultktt::Searchervirtual
~Searcher()=defaultktt::Searchervirtual
diff --git a/Docs/classktt_1_1_deterministic_searcher.html b/Docs/classktt_1_1_deterministic_searcher.html index 25347e64..f09e552c 100644 --- a/Docs/classktt_1_1_deterministic_searcher.html +++ b/Docs/classktt_1_1_deterministic_searcher.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::DeterministicSearcher Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::DeterministicSearcher Class Reference
+
ktt::DeterministicSearcher Class Reference
@@ -111,7 +109,7 @@
- @@ -126,6 +124,12 @@ + + + + + + @@ -148,9 +152,9 @@

+

Public Member Functions

 DeterministicSearcher ()
 
 
virtual void OnInitialize ()
 
virtual void OnReset ()
 
virtual bool CalculateNextConfiguration (const KernelResult &previousResult)=0
 
virtual KernelConfiguration GetCurrentConfiguration () const =0
 
 Searcher ()
 
KernelConfiguration GetConfiguration (const uint64_t index) const
 

Detailed Description

-

Searcher which explores configurations in deterministic order.

+

Searcher which explores configurations in deterministic order.

Constructor & Destructor Documentation

- +

◆ DeterministicSearcher()

Member Function Documentation

-
+

◆ CalculateNextConfiguration()

- +

◆ OnReset()

diff --git a/Docs/classktt_1_1_device_info-members.html b/Docs/classktt_1_1_device_info-members.html index eed2600f..c210d873 100644 --- a/Docs/classktt_1_1_device_info-members.html +++ b/Docs/classktt_1_1_device_info-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,40 +91,39 @@
-
-
ktt::DeviceInfo Member List
+
ktt::DeviceInfo Member List

This is the complete list of members for ktt::DeviceInfo, including all inherited members.

- + - + - + - + - + - + - + - + - + - +
DeviceInfo(const DeviceIndex index, const std::string &name)ktt::DeviceInfoexplicit
GetDeviceType() constktt::DeviceInfo
GetDeviceType() constktt::DeviceInfo
GetDeviceTypeString() constktt::DeviceInfo
GetExtensions() constktt::DeviceInfo
GetExtensions() constktt::DeviceInfo
GetGlobalMemorySize() constktt::DeviceInfo
GetIndex() constktt::DeviceInfo
GetIndex() constktt::DeviceInfo
GetLocalMemorySize() constktt::DeviceInfo
GetMaxComputeUnits() constktt::DeviceInfo
GetMaxComputeUnits() constktt::DeviceInfo
GetMaxConstantBufferSize() constktt::DeviceInfo
GetMaxWorkGroupSize() constktt::DeviceInfo
GetMaxWorkGroupSize() constktt::DeviceInfo
GetName() constktt::DeviceInfo
GetString() constktt::DeviceInfo
GetString() constktt::DeviceInfo
GetVendor() constktt::DeviceInfo
SetDeviceType(const DeviceType deviceType)ktt::DeviceInfo
SetDeviceType(const DeviceType deviceType)ktt::DeviceInfo
SetExtensions(const std::string &extensions)ktt::DeviceInfo
SetGlobalMemorySize(const uint64_t globalMemorySize)ktt::DeviceInfo
SetGlobalMemorySize(const uint64_t globalMemorySize)ktt::DeviceInfo
SetLocalMemorySize(const uint64_t localMemorySize)ktt::DeviceInfo
SetMaxComputeUnits(const uint32_t maxComputeUnits)ktt::DeviceInfo
SetMaxComputeUnits(const uint32_t maxComputeUnits)ktt::DeviceInfo
SetMaxConstantBufferSize(const uint64_t maxConstantBufferSize)ktt::DeviceInfo
SetMaxWorkGroupSize(const uint64_t maxWorkGroupSize)ktt::DeviceInfo
SetMaxWorkGroupSize(const uint64_t maxWorkGroupSize)ktt::DeviceInfo
SetVendor(const std::string &vendor)ktt::DeviceInfo
diff --git a/Docs/classktt_1_1_device_info.html b/Docs/classktt_1_1_device_info.html index 3c9229b0..6b1b672e 100644 --- a/Docs/classktt_1_1_device_info.html +++ b/Docs/classktt_1_1_device_info.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::DeviceInfo Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::DeviceInfo Class Reference
+
ktt::DeviceInfo Class Reference

#include <DeviceInfo.h>

- @@ -148,9 +146,9 @@

+

Public Member Functions

 DeviceInfo (const DeviceIndex index, const std::string &name)
 
 

Detailed Description

-

Class which holds information about a compute API device.

+

Class which holds information about a compute API device.

Constructor & Destructor Documentation

- +

◆ DeviceInfo()

@@ -183,7 +181,7 @@

-

Constructor which creates new device info object.

Parameters
+

Constructor which creates new device info object.

Parameters
@@ -194,7 +192,7 @@

Member Function Documentation

- +

◆ GetDeviceType()

@@ -208,11 +206,11 @@

-

Getter for type of device. See DeviceType for more information.

Returns
Type of device.
+

Getter for type of device. See DeviceType for more information.

Returns
Type of device.

- +

◆ GetDeviceTypeString()

@@ -226,11 +224,11 @@

-

Getter for type of device converted to string. See DeviceType for more information.

Returns
Type of device converted to string.
+

Getter for type of device converted to string. See DeviceType for more information.

Returns
Type of device converted to string.

- +

◆ GetExtensions()

- +

◆ GetGlobalMemorySize()

- +

◆ GetIndex()

- +

◆ GetLocalMemorySize()

- +

◆ GetMaxComputeUnits()

- +

◆ GetMaxConstantBufferSize()

- +

◆ GetMaxWorkGroupSize()

- +

◆ GetName()

- +

◆ GetString()

- +

◆ GetVendor()

- +

◆ SetDeviceType()

indexIndex of device assigned by KTT framework.
nameName of device retrieved from compute API.
deviceTypeType of device.
@@ -434,7 +432,7 @@

+

◆ SetExtensions()

@@ -449,7 +447,7 @@

-

Setter for list of supported device extensions.

Parameters
+

Setter for list of supported device extensions.

Parameters
extensionsList of supported device extensions.
@@ -458,7 +456,7 @@

+

◆ SetGlobalMemorySize()

@@ -473,7 +471,7 @@

-

Setter for global memory size of device.

Parameters
+

Setter for global memory size of device.

Parameters
globalMemorySizeGlobal memory size of device.
@@ -482,7 +480,7 @@

+

◆ SetLocalMemorySize()

@@ -497,7 +495,7 @@

-

Setter for local memory size of device.

Parameters
+

Setter for local memory size of device.

Parameters
localMemorySizeLocal memory size of device.
@@ -506,7 +504,7 @@

+

◆ SetMaxComputeUnits()

@@ -521,7 +519,7 @@

-

Setter for maximum compute units count of device.

Parameters
+

Setter for maximum compute units count of device.

Parameters
maxComputeUnitsMaximum compute units count of device.
@@ -530,7 +528,7 @@

+

◆ SetMaxConstantBufferSize()

@@ -545,7 +543,7 @@

-

Setter for constant memory size of device.

Parameters
+

Setter for constant memory size of device.

Parameters
maxConstantBufferSizeConstant memory size of device.
@@ -554,7 +552,7 @@

+

◆ SetMaxWorkGroupSize()

@@ -569,7 +567,7 @@

-

Setter for maximum work-group size of device.

Parameters
+

Setter for maximum work-group size of device.

Parameters
maxWorkGroupSizeMaximum work-group size of device.
@@ -578,7 +576,7 @@

+

◆ SetVendor()

diff --git a/Docs/classktt_1_1_dimension_vector-members.html b/Docs/classktt_1_1_dimension_vector-members.html index 35c44005..1d9c2d6e 100644 --- a/Docs/classktt_1_1_dimension_vector-members.html +++ b/Docs/classktt_1_1_dimension_vector-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,41 +91,40 @@
-
-
ktt::DimensionVector Member List
+
ktt::DimensionVector Member List

This is the complete list of members for ktt::DimensionVector, including all inherited members.

- + - + - + - + - + - + - + - + - + - + - +
DimensionVector()ktt::DimensionVector
DimensionVector(const size_t sizeX)ktt::DimensionVectorexplicit
DimensionVector(const size_t sizeX)ktt::DimensionVectorexplicit
DimensionVector(const size_t sizeX, const size_t sizeY)ktt::DimensionVectorexplicit
DimensionVector(const size_t sizeX, const size_t sizeY, const size_t sizeZ)ktt::DimensionVectorexplicit
DimensionVector(const size_t sizeX, const size_t sizeY, const size_t sizeZ)ktt::DimensionVectorexplicit
DimensionVector(const std::vector< size_t > &vector)ktt::DimensionVectorexplicit
Divide(const DimensionVector &divisor)ktt::DimensionVector
Divide(const DimensionVector &divisor)ktt::DimensionVector
GetSize(const ModifierDimension modifierDimension) constktt::DimensionVector
GetSizeX() constktt::DimensionVector
GetSizeX() constktt::DimensionVector
GetSizeY() constktt::DimensionVector
GetSizeZ() constktt::DimensionVector
GetSizeZ() constktt::DimensionVector
GetString() constktt::DimensionVector
GetTotalSize() constktt::DimensionVector
GetTotalSize() constktt::DimensionVector
GetVector() constktt::DimensionVector
ModifyByValue(const size_t value, const ModifierAction modifierAction, const ModifierDimension modifierDimension)ktt::DimensionVector
ModifyByValue(const size_t value, const ModifierAction modifierAction, const ModifierDimension modifierDimension)ktt::DimensionVector
Multiply(const DimensionVector &factor)ktt::DimensionVector
operator!=(const DimensionVector &other) constktt::DimensionVector
operator!=(const DimensionVector &other) constktt::DimensionVector
operator==(const DimensionVector &other) constktt::DimensionVector
RoundUp(const DimensionVector &multiple)ktt::DimensionVector
RoundUp(const DimensionVector &multiple)ktt::DimensionVector
SetSize(const ModifierDimension modifierDimension, const size_t size)ktt::DimensionVector
SetSizeX(const size_t sizeX)ktt::DimensionVector
SetSizeX(const size_t sizeX)ktt::DimensionVector
SetSizeY(const size_t sizeY)ktt::DimensionVector
SetSizeZ(const size_t sizeZ)ktt::DimensionVector
SetSizeZ(const size_t sizeZ)ktt::DimensionVector
diff --git a/Docs/classktt_1_1_dimension_vector.html b/Docs/classktt_1_1_dimension_vector.html index eabecbb2..e44cc08b 100644 --- a/Docs/classktt_1_1_dimension_vector.html +++ b/Docs/classktt_1_1_dimension_vector.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::DimensionVector Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::DimensionVector Class Reference
+
ktt::DimensionVector Class Reference

#include <DimensionVector.h>

- @@ -150,9 +148,9 @@

+

Public Member Functions

 DimensionVector ()
 
 

Detailed Description

-

Class which holds information about either global or local thread size of a single kernel.

+

Class which holds information about either global or local thread size of a single kernel.

Constructor & Destructor Documentation

- +

◆ DimensionVector() [1/5]

- +

◆ DimensionVector() [2/5]

@@ -193,7 +191,7 @@

-

Constructor which creates dimension vector with specified thread size in dimension x and thread sizes in other dimensions set to 1.

Parameters
+

Constructor which creates dimension vector with specified thread size in dimension x and thread sizes in other dimensions set to 1.

Parameters
sizeXThread size in dimension x.
@@ -202,7 +200,7 @@

+

◆ DimensionVector() [3/5]

@@ -235,7 +233,7 @@

-

Constructor which creates dimension vector with specified thread sizes in dimensions x and y and thread size in dimension z set to 1.

Parameters
+

Constructor which creates dimension vector with specified thread sizes in dimensions x and y and thread size in dimension z set to 1.

Parameters
@@ -245,7 +243,7 @@

+

◆ DimensionVector() [4/5]

sizeXThread size in dimension x.
sizeYThread size in dimension y.
@@ -295,7 +293,7 @@

+

◆ DimensionVector() [5/5]

sizeXThread size in dimension x.
sizeYThread size in dimension y.
vectorSource vector for dimension vector thread sizes.
@@ -328,7 +326,7 @@

Member Function Documentation

- +

◆ Divide()

- +

◆ GetSizeZ()

- +

◆ GetString()

- +

◆ GetTotalSize()

- +

◆ GetVector()

- +

◆ ModifyByValue()

- +

◆ operator==()

- +

◆ RoundUp()

@@ -604,7 +602,7 @@

-

Rounds up thread sizes to be multiple of values provided by specified dimension vector.

Parameters
+

Rounds up thread sizes to be multiple of values provided by specified dimension vector.

Parameters
multipleSource of values for thread size round up.
@@ -613,7 +611,7 @@

+

◆ SetSize()

@@ -638,7 +636,7 @@

-

Setter for thread size in specified dimension.

Parameters
+

Setter for thread size in specified dimension.

Parameters
@@ -648,7 +646,7 @@

+

◆ SetSizeX()

modifierDimensionSpecifies which dimension size will be set.
sizeThread size in specified dimension.
sizeXThread size in dimension x.
@@ -672,7 +670,7 @@

+

◆ SetSizeY()

@@ -687,7 +685,7 @@

-

Setter for thread size in dimension y.

Parameters
+

Setter for thread size in dimension y.

Parameters
sizeYThread size in dimension y.
@@ -696,7 +694,7 @@

+

◆ SetSizeZ()

diff --git a/Docs/classktt_1_1_kernel_configuration-members.html b/Docs/classktt_1_1_kernel_configuration-members.html index f27093c0..ead710e9 100644 --- a/Docs/classktt_1_1_kernel_configuration-members.html +++ b/Docs/classktt_1_1_kernel_configuration-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,29 +91,28 @@
-
-
ktt::KernelConfiguration Member List
+
ktt::KernelConfiguration Member List

This is the complete list of members for ktt::KernelConfiguration, including all inherited members.

- + - + - + - + - +
GenerateNeighbours(const std::string &parameter, const std::vector< ParameterPair > &pairs) constktt::KernelConfiguration
GeneratePrefix() constktt::KernelConfiguration
GeneratePrefix() constktt::KernelConfiguration
GetPairs() constktt::KernelConfiguration
GetString() constktt::KernelConfiguration
GetString() constktt::KernelConfiguration
IsValid() constktt::KernelConfiguration
KernelConfiguration()ktt::KernelConfiguration
KernelConfiguration()ktt::KernelConfiguration
KernelConfiguration(const std::vector< ParameterPair > &pairs)ktt::KernelConfigurationexplicit
Merge(const KernelConfiguration &other)ktt::KernelConfiguration
Merge(const KernelConfiguration &other)ktt::KernelConfiguration
operator!=(const KernelConfiguration &other) constktt::KernelConfiguration
operator==(const KernelConfiguration &other) constktt::KernelConfiguration
operator==(const KernelConfiguration &other) constktt::KernelConfiguration
diff --git a/Docs/classktt_1_1_kernel_configuration.html b/Docs/classktt_1_1_kernel_configuration.html index 19b757f2..6a6548e8 100644 --- a/Docs/classktt_1_1_kernel_configuration.html +++ b/Docs/classktt_1_1_kernel_configuration.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::KernelConfiguration Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::KernelConfiguration Class Reference
+
ktt::KernelConfiguration Class Reference

#include <KernelConfiguration.h>

- @@ -126,9 +124,9 @@

+

Public Member Functions

 KernelConfiguration ()
 
 

Detailed Description

-

Class which describes single kernel tuning configuration.

+

Class which describes single kernel tuning configuration.

Constructor & Destructor Documentation

- +

◆ KernelConfiguration() [1/2]

- +

◆ KernelConfiguration() [2/2]

@@ -169,7 +167,7 @@

-

Constructs kernel configuration with the specified parameter pairs.

Parameters
+

Constructs kernel configuration with the specified parameter pairs.

Parameters
pairsValues of tuning parameters for the configuration.
@@ -179,7 +177,7 @@

Member Function Documentation

- +

◆ GenerateNeighbours()

@@ -204,7 +202,7 @@

-

Generates neighbour configurations which differ in the specified parameter.

Parameters
+

Generates neighbour configurations which differ in the specified parameter.

Parameters
@@ -215,7 +213,7 @@

+

◆ GeneratePrefix()

- +

◆ GetPairs()

@@ -247,11 +245,11 @@

-

Returns values of tuning parameters for kernel configuration.

Returns
Values of tuning parameters. See ParameterPair for more information.
+

Returns values of tuning parameters for kernel configuration.

Returns
Values of tuning parameters. See ParameterPair for more information.

- +

◆ GetString()

- +

◆ IsValid()

- +

◆ Merge()

parameterParameter which will be different in the generated configurations. All other parameters will remain identical.
pairsAll valid pairs for the previously specified parameter.
otherConfiguration that will be merged into this one.
@@ -311,7 +309,7 @@

+

◆ operator!=()

@@ -326,7 +324,7 @@

-

Checks whether kernel configuration is equal to other. I.e., it has different parameter pairs or the pairs have different values.

Parameters
+

Checks whether kernel configuration is equal to other. I.e., it has different parameter pairs or the pairs have different values.

Parameters
otherComparison target.
@@ -336,7 +334,7 @@

+

◆ operator==()

diff --git a/Docs/classktt_1_1_kernel_profiling_counter-members.html b/Docs/classktt_1_1_kernel_profiling_counter-members.html index d0f0196a..81d9e702 100644 --- a/Docs/classktt_1_1_kernel_profiling_counter-members.html +++ b/Docs/classktt_1_1_kernel_profiling_counter-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,31 +91,30 @@
-
-
ktt::KernelProfilingCounter Member List
+
ktt::KernelProfilingCounter Member List

This is the complete list of members for ktt::KernelProfilingCounter, including all inherited members.

- + - + - + - + - + - +
GetName() constktt::KernelProfilingCounter
GetType() constktt::KernelProfilingCounter
GetType() constktt::KernelProfilingCounter
GetValueDouble() constktt::KernelProfilingCounter
GetValueInt() constktt::KernelProfilingCounter
GetValueInt() constktt::KernelProfilingCounter
GetValueUint() constktt::KernelProfilingCounter
KernelProfilingCounter()=defaultktt::KernelProfilingCounter
KernelProfilingCounter()=defaultktt::KernelProfilingCounter
KernelProfilingCounter(const std::string &name, const ProfilingCounterType type, const int64_t value)ktt::KernelProfilingCounterexplicit
KernelProfilingCounter(const std::string &name, const ProfilingCounterType type, const uint64_t value)ktt::KernelProfilingCounterexplicit
KernelProfilingCounter(const std::string &name, const ProfilingCounterType type, const uint64_t value)ktt::KernelProfilingCounterexplicit
KernelProfilingCounter(const std::string &name, const ProfilingCounterType type, const double value)ktt::KernelProfilingCounterexplicit
operator!=(const KernelProfilingCounter &other) constktt::KernelProfilingCounter
operator!=(const KernelProfilingCounter &other) constktt::KernelProfilingCounter
operator<(const KernelProfilingCounter &other) constktt::KernelProfilingCounter
operator==(const KernelProfilingCounter &other) constktt::KernelProfilingCounter
operator==(const KernelProfilingCounter &other) constktt::KernelProfilingCounter
diff --git a/Docs/classktt_1_1_kernel_profiling_counter.html b/Docs/classktt_1_1_kernel_profiling_counter.html index 970e9a98..ee111ad1 100644 --- a/Docs/classktt_1_1_kernel_profiling_counter.html +++ b/Docs/classktt_1_1_kernel_profiling_counter.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::KernelProfilingCounter Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::KernelProfilingCounter Class Reference
+
ktt::KernelProfilingCounter Class Reference

#include <KernelProfilingCounter.h>

- @@ -130,9 +128,9 @@

+

Public Member Functions

 KernelProfilingCounter ()=default
 
 

Detailed Description

-

Class which holds information about single profiling counter.

+

Class which holds information about single profiling counter.

Constructor & Destructor Documentation

- +

◆ KernelProfilingCounter() [1/4]

- +

◆ KernelProfilingCounter() [2/4]

@@ -197,7 +195,7 @@

-

Constructor which initializes a profiling counter with specified name, value and type.

Parameters
+

Constructor which initializes a profiling counter with specified name, value and type.

Parameters
@@ -208,7 +206,7 @@

+

◆ KernelProfilingCounter() [3/4]

nameName of a profiling counter.
typeType of a profiling counter. See ProfilingCounterType for more information.
@@ -258,7 +256,7 @@

+

◆ KernelProfilingCounter() [4/4]

nameName of a profiling counter.
typeType of a profiling counter. See ProfilingCounterType for more information.
@@ -309,7 +307,7 @@

Member Function Documentation

- +

◆ GetName()

- +

◆ GetType()

@@ -341,11 +339,11 @@

-

Getter for type of a profiling counter. Type of a profiling counter is used to determine which field inside ProfilingCounterValue needs to accessed in order to retrieve a valid value.

Returns
Type of a profiling counter. See ProfilingCounterType for more information.
+

Getter for type of a profiling counter. Type of a profiling counter is used to determine which field inside ProfilingCounterValue needs to accessed in order to retrieve a valid value.

Returns
Type of a profiling counter. See ProfilingCounterType for more information.

- +

◆ GetValueDouble()

- +

◆ GetValueInt()

- +

◆ GetValueUint()

- +

◆ operator!=()

nameName of a profiling counter.
typeType of a profiling counter. See ProfilingCounterType for more information.
otherComparison target.
@@ -424,7 +422,7 @@

+

◆ operator<()

@@ -439,7 +437,7 @@

-

Checks whether profiling counter is lesser than other. I.e., its name is lesser than other's name.

Parameters
+

Checks whether profiling counter is lesser than other. I.e., its name is lesser than other's name.

Parameters
otherComparison target.
@@ -449,7 +447,7 @@

+

◆ operator==()

diff --git a/Docs/classktt_1_1_kernel_profiling_data-members.html b/Docs/classktt_1_1_kernel_profiling_data-members.html index 772c0134..bd7761b0 100644 --- a/Docs/classktt_1_1_kernel_profiling_data-members.html +++ b/Docs/classktt_1_1_kernel_profiling_data-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,31 +91,30 @@
-
-
ktt::KernelProfilingData Member List
+
ktt::KernelProfilingData Member List

This is the complete list of members for ktt::KernelProfilingData, including all inherited members.

- + - + - + - + - + - +
AddCounter(const KernelProfilingCounter &counter)ktt::KernelProfilingData
DecreaseRemainingProfilingRuns()ktt::KernelProfilingData
DecreaseRemainingProfilingRuns()ktt::KernelProfilingData
GetCounter(const std::string &name) constktt::KernelProfilingData
GetCounters() constktt::KernelProfilingData
GetCounters() constktt::KernelProfilingData
GetRemainingProfilingRuns() constktt::KernelProfilingData
HasCounter(const std::string &name) constktt::KernelProfilingData
HasCounter(const std::string &name) constktt::KernelProfilingData
HasRemainingProfilingRuns() constktt::KernelProfilingData
IsValid() constktt::KernelProfilingData
IsValid() constktt::KernelProfilingData
KernelProfilingData()=defaultktt::KernelProfilingData
KernelProfilingData(const uint64_t remainingRuns)ktt::KernelProfilingDataexplicit
KernelProfilingData(const uint64_t remainingRuns)ktt::KernelProfilingDataexplicit
KernelProfilingData(const std::vector< KernelProfilingCounter > &counters)ktt::KernelProfilingDataexplicit
SetCounters(const std::vector< KernelProfilingCounter > &counters)ktt::KernelProfilingData
SetCounters(const std::vector< KernelProfilingCounter > &counters)ktt::KernelProfilingData
diff --git a/Docs/classktt_1_1_kernel_profiling_data.html b/Docs/classktt_1_1_kernel_profiling_data.html index 2b2526b8..c7eda043 100644 --- a/Docs/classktt_1_1_kernel_profiling_data.html +++ b/Docs/classktt_1_1_kernel_profiling_data.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::KernelProfilingData Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::KernelProfilingData Class Reference
+
ktt::KernelProfilingData Class Reference

#include <KernelProfilingData.h>

- @@ -130,9 +128,9 @@

+

Public Member Functions

 KernelProfilingData ()=default
 
 

Detailed Description

-

Class which holds profiling information about a kernel run under specific configuration.

+

Class which holds profiling information about a kernel run under specific configuration.

Constructor & Destructor Documentation

- +

◆ KernelProfilingData() [1/3]

- +

◆ KernelProfilingData() [2/3]

@@ -181,7 +179,7 @@

-

Constructor which creates invalid profiling data and initializes number of remaining kernel runs needed to gather valid profiling counters.

Parameters
+

Constructor which creates invalid profiling data and initializes number of remaining kernel runs needed to gather valid profiling counters.

Parameters
remainingRunsNumber of remaining kernel runs needed to gather valid profiling counters.
@@ -190,7 +188,7 @@

+

◆ KernelProfilingData() [3/3]

@@ -213,7 +211,7 @@

-

Constructor which creates valid profiling data and fills the structure with provided profiling counters.

Parameters
+

Constructor which creates valid profiling data and fills the structure with provided profiling counters.

Parameters
countersVector of profiling counters.
@@ -223,7 +221,7 @@

Member Function Documentation

- +

◆ AddCounter()

- +

◆ GetCounter()

- +

◆ GetRemainingProfilingRuns()

- +

◆ HasCounter()

- +

◆ IsValid()

- +

◆ SetCounters()

diff --git a/Docs/classktt_1_1_kernel_result-members.html b/Docs/classktt_1_1_kernel_result-members.html index 1af92b3b..6255860f 100644 --- a/Docs/classktt_1_1_kernel_result-members.html +++ b/Docs/classktt_1_1_kernel_result-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,37 +91,36 @@
-
-
ktt::KernelResult Member List
+
ktt::KernelResult Member List

This is the complete list of members for ktt::KernelResult, including all inherited members.

- + - + - + - + - + - + - + - + - +
GetConfiguration() constktt::KernelResult
GetExtraDuration() constktt::KernelResult
GetExtraDuration() constktt::KernelResult
GetExtraOverhead() constktt::KernelResult
GetKernelDuration() constktt::KernelResult
GetKernelDuration() constktt::KernelResult
GetKernelName() constktt::KernelResult
GetKernelOverhead() constktt::KernelResult
GetKernelOverhead() constktt::KernelResult
GetResults() constktt::KernelResult
GetStatus() constktt::KernelResult
GetStatus() constktt::KernelResult
GetTotalDuration() constktt::KernelResult
GetTotalOverhead() constktt::KernelResult
GetTotalOverhead() constktt::KernelResult
HasRemainingProfilingRuns() constktt::KernelResult
IsValid() constktt::KernelResult
IsValid() constktt::KernelResult
KernelResult()ktt::KernelResult
KernelResult(const std::string &kernelName, const KernelConfiguration &configuration)ktt::KernelResultexplicit
KernelResult(const std::string &kernelName, const KernelConfiguration &configuration)ktt::KernelResultexplicit
KernelResult(const std::string &kernelName, const KernelConfiguration &configuration, const std::vector< ComputationResult > &results)ktt::KernelResultexplicit
SetExtraDuration(const Nanoseconds duration)ktt::KernelResult
SetExtraDuration(const Nanoseconds duration)ktt::KernelResult
SetExtraOverhead(const Nanoseconds overhead)ktt::KernelResult
SetStatus(const ResultStatus status)ktt::KernelResult
SetStatus(const ResultStatus status)ktt::KernelResult
diff --git a/Docs/classktt_1_1_kernel_result.html b/Docs/classktt_1_1_kernel_result.html index bfb5fa21..36df6867 100644 --- a/Docs/classktt_1_1_kernel_result.html +++ b/Docs/classktt_1_1_kernel_result.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::KernelResult Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::KernelResult Class Reference
+
ktt::KernelResult Class Reference

#include <KernelResult.h>

- @@ -142,9 +140,9 @@

+

Public Member Functions

 KernelResult ()
 
 

Detailed Description

-

Class which holds aggregate result data from kernel computation such as individual computation results, configuration and status.

+

Class which holds aggregate result data from kernel computation such as individual computation results, configuration and status.

Constructor & Destructor Documentation

- +

◆ KernelResult() [1/3]

- +

◆ KernelResult() [2/3]

@@ -195,7 +193,7 @@

-

Constructor which creates kernel result for the specified kernel and configuration.

Parameters
+

Constructor which creates kernel result for the specified kernel and configuration.

Parameters
@@ -205,7 +203,7 @@

+

◆ KernelResult() [3/3]

kernelNameName of a kernel tied to the result.
configurationConfiguration tied to the result.
@@ -256,7 +254,7 @@

Member Function Documentation

- +

◆ GetConfiguration()

- +

◆ GetExtraDuration()

- +

◆ GetExtraOverhead()

- +

◆ GetKernelDuration()

- +

◆ GetKernelName()

- +

◆ GetKernelOverhead()

- +

◆ GetResults()

@@ -378,11 +376,11 @@

-

Retrieves partial results from computations performed as part of the kernel launch.

Returns
Partial results from computations. See ComputationResult for more information.
+

Retrieves partial results from computations performed as part of the kernel launch.

Returns
Partial results from computations. See ComputationResult for more information.

- +

◆ GetStatus()

@@ -396,11 +394,11 @@

-

Retrieves status of the kernel result.

Returns
Status of the kernel result. See ResultStatus for more information.
+

Retrieves status of the kernel result.

Returns
Status of the kernel result. See ResultStatus for more information.

- +

◆ GetTotalDuration()

- +

◆ GetTotalOverhead()

- +

◆ HasRemainingProfilingRuns()

- +

◆ IsValid()

- +

◆ SetExtraDuration()

kernelNameName of a kernel tied to the result.
configurationConfiguration tied to the result.
durationExtra computation duration.
@@ -496,7 +494,7 @@

+

◆ SetExtraOverhead()

@@ -511,7 +509,7 @@

-

Sets extra overhead. This includes for example duration of buffer transfers performed in custom kernel launcher.

Parameters
+

Sets extra overhead. This includes for example duration of buffer transfers performed in custom kernel launcher.

Parameters
overheadExtra computation overhead.
@@ -520,7 +518,7 @@

+

◆ SetStatus()

diff --git a/Docs/classktt_1_1_ktt_exception-members.html b/Docs/classktt_1_1_ktt_exception-members.html index 7b6a43ca..1a8d2297 100644 --- a/Docs/classktt_1_1_ktt_exception-members.html +++ b/Docs/classktt_1_1_ktt_exception-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,22 +91,21 @@
-
-
ktt::KttException Member List
+
ktt::KttException Member List

This is the complete list of members for ktt::KttException, including all inherited members.

- +
GetReason() constktt::KttException
KttException(const std::string &message, const ExceptionReason reason=ExceptionReason::General)ktt::KttException
KttException(const std::string &message, const ExceptionReason reason=ExceptionReason::General)ktt::KttException
what() const noexcept overridektt::KttException
diff --git a/Docs/classktt_1_1_ktt_exception.html b/Docs/classktt_1_1_ktt_exception.html index 5ad04f05..a6af8e08 100644 --- a/Docs/classktt_1_1_ktt_exception.html +++ b/Docs/classktt_1_1_ktt_exception.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::KttException Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::KttException Class Reference
+
ktt::KttException Class Reference
@@ -108,7 +106,7 @@
- @@ -118,9 +116,9 @@

+

Public Member Functions

KTT_API KttException (const std::string &message, const ExceptionReason reason=ExceptionReason::General)
 
 

Detailed Description

-

Exception thrown when invalid usage of KTT API is detected.

+

Exception thrown when invalid usage of KTT API is detected.

Constructor & Destructor Documentation

- +

◆ KttException()

@@ -145,7 +143,7 @@

-

Creates new exception with the specified error message.

Parameters
+

Creates new exception with the specified error message.

Parameters
@@ -156,7 +154,7 @@

Member Function Documentation

- +

◆ GetReason()

- +

◆ what()

@@ -209,7 +207,7 @@

diff --git a/Docs/classktt_1_1_mcmc_searcher-members.html b/Docs/classktt_1_1_mcmc_searcher-members.html index a187e960..5eeb00a2 100644 --- a/Docs/classktt_1_1_mcmc_searcher-members.html +++ b/Docs/classktt_1_1_mcmc_searcher-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@

messageHolds message describing why the exception was thrown.
reasonReason why the exception was thrown.
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,35 +91,35 @@
-
-
ktt::McmcSearcher Member List
+
ktt::McmcSearcher Member List

This is the complete list of members for ktt::McmcSearcher, including all inherited members.

- + - + - + - + - - - - - - - + + + + + + + +
CalculateNextConfiguration(const KernelResult &previousResult) overridektt::McmcSearchervirtual
GetConfiguration(const uint64_t index) constktt::Searcher
GetConfiguration(const uint64_t index) constktt::Searcher
GetConfigurationsCount() constktt::Searcher
GetCurrentConfiguration() const overridektt::McmcSearchervirtual
GetCurrentConfiguration() const overridektt::McmcSearchervirtual
GetExploredIndices() constktt::Searcher
GetIndex(const KernelConfiguration &configuration) constktt::Searcher
GetIndex(const KernelConfiguration &configuration) constktt::Searcher
GetNeighbourConfigurations(const KernelConfiguration &configuration, const uint64_t maxDifferences, const size_t maxNeighbours=3) constktt::Searcher
GetRandomConfiguration() constktt::Searcher
GetRandomConfiguration() constktt::Searcher
Initialize(const ConfigurationData &data)ktt::Searcher
IsInitialized() constktt::Searcher
McmcSearcher(const std::vector< double > &start)ktt::McmcSearcher
OnInitialize() overridektt::McmcSearchervirtual
OnReset() overridektt::McmcSearchervirtual
Reset()ktt::Searcher
Searcher()ktt::Searcher
~Searcher()=defaultktt::Searchervirtual
IsInitialized() constktt::Searcher
McmcSearcher(const KernelConfiguration &start={})ktt::McmcSearcher
McmcSearcher(const std::vector< double > &start)ktt::McmcSearcher
OnInitialize() overridektt::McmcSearchervirtual
OnReset() overridektt::McmcSearchervirtual
Reset()ktt::Searcher
Searcher()ktt::Searcher
~Searcher()=defaultktt::Searchervirtual
diff --git a/Docs/classktt_1_1_mcmc_searcher.html b/Docs/classktt_1_1_mcmc_searcher.html index 384ad7b5..c33e7100 100644 --- a/Docs/classktt_1_1_mcmc_searcher.html +++ b/Docs/classktt_1_1_mcmc_searcher.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::McmcSearcher Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::McmcSearcher Class Reference
+
ktt::McmcSearcher Class Reference
@@ -111,8 +109,10 @@
- + + @@ -126,6 +126,14 @@ + + + + + + + + @@ -148,10 +156,34 @@

+

Public Member Functions

 McmcSearcher (const KernelConfiguration &start={})
 
 McmcSearcher (const std::vector< double > &start)
 
void OnInitialize () override
- Public Member Functions inherited from ktt::Searcher
virtual ~Searcher ()=default
 
virtual void OnInitialize ()
 
virtual void OnReset ()
 
virtual bool CalculateNextConfiguration (const KernelResult &previousResult)=0
 
virtual KernelConfiguration GetCurrentConfiguration () const =0
 
 Searcher ()
 
KernelConfiguration GetConfiguration (const uint64_t index) const
 

Detailed Description

-

Searcher which explores configurations using Markov chain Monte Carlo method.

+

Searcher which explores configurations using Markov chain Monte Carlo method.

Constructor & Destructor Documentation

- -

◆ McmcSearcher()

+ +

◆ McmcSearcher() [1/2]

+ +
+
+ + + + + + + + +
ktt::McmcSearcher::McmcSearcher (const KernelConfigurationstart = {})
+
+

Initializes MCMC searcher.

Parameters
+ + +
startOptional parameter which specifies the starting point for MCMC searcher.
+
+
+ +
+
+ +

◆ McmcSearcher() [2/2]

@@ -165,7 +197,7 @@

-

Initializes MCMC searcher.

Parameters
+

Initializes MCMC searcher.

Parameters
startOptional parameter which specifies starting point for MCMC searcher.
@@ -175,7 +207,7 @@

Member Function Documentation

- +

◆ CalculateNextConfiguration()

- +

◆ OnInitialize()

- +

◆ OnReset()

diff --git a/Docs/classktt_1_1_mcmc_searcher.js b/Docs/classktt_1_1_mcmc_searcher.js index 38dbed47..b01cc0fe 100644 --- a/Docs/classktt_1_1_mcmc_searcher.js +++ b/Docs/classktt_1_1_mcmc_searcher.js @@ -1,5 +1,6 @@ var classktt_1_1_mcmc_searcher = [ + [ "McmcSearcher", "classktt_1_1_mcmc_searcher.html#a26787c6d480b599445f7ba1028e5afae", null ], [ "McmcSearcher", "classktt_1_1_mcmc_searcher.html#a8df1133d653a1e9dbad3c813efb42f73", null ], [ "CalculateNextConfiguration", "classktt_1_1_mcmc_searcher.html#a7686a12442d4c52eba6cf0c50f707934", null ], [ "GetCurrentConfiguration", "classktt_1_1_mcmc_searcher.html#a6f2ddec84f5b53535e8ef162569080a7", null ], diff --git a/Docs/classktt_1_1_parameter_pair-members.html b/Docs/classktt_1_1_parameter_pair-members.html index ecf3609b..68764e5f 100644 --- a/Docs/classktt_1_1_parameter_pair-members.html +++ b/Docs/classktt_1_1_parameter_pair-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,33 +91,32 @@
-
-
ktt::ParameterPair Member List
+
ktt::ParameterPair Member List

This is the complete list of members for ktt::ParameterPair, including all inherited members.

- + - + - + - + - + - + - +
GetName() constktt::ParameterPair
GetParameterValue(const std::vector< ParameterPair > &pairs, const std::string &name)ktt::ParameterPairstatic
GetParameterValue(const std::vector< ParameterPair > &pairs, const std::string &name)ktt::ParameterPairstatic
GetParameterValues(const std::vector< ParameterPair > &pairs, const std::vector< std::string > &names)ktt::ParameterPairstatic
GetString() constktt::ParameterPair
GetString() constktt::ParameterPair
GetValue() constktt::ParameterPair
GetValueDouble() constktt::ParameterPair
GetValueDouble() constktt::ParameterPair
GetValueString() constktt::ParameterPair
HasSameValue(const ParameterPair &other) constktt::ParameterPair
HasSameValue(const ParameterPair &other) constktt::ParameterPair
HasValueDouble() constktt::ParameterPair
ParameterPair()ktt::ParameterPair
ParameterPair()ktt::ParameterPair
ParameterPair(const std::string &name, const uint64_t value)ktt::ParameterPairexplicit
ParameterPair(const std::string &name, const double value)ktt::ParameterPairexplicit
ParameterPair(const std::string &name, const double value)ktt::ParameterPairexplicit
SetValue(const uint64_t value)ktt::ParameterPair
SetValue(const double value)ktt::ParameterPair
SetValue(const double value)ktt::ParameterPair
diff --git a/Docs/classktt_1_1_parameter_pair.html b/Docs/classktt_1_1_parameter_pair.html index 3c5803b3..1a26437b 100644 --- a/Docs/classktt_1_1_parameter_pair.html +++ b/Docs/classktt_1_1_parameter_pair.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::ParameterPair Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -96,14 +95,13 @@ Public Member Functions | Static Public Member Functions | List of all members -
-
ktt::ParameterPair Class Reference
+
ktt::ParameterPair Class Reference

#include <ParameterPair.h>

- @@ -130,7 +128,7 @@

+

Public Member Functions

 ParameterPair ()
 
bool HasSameValue (const ParameterPair &other) const
 
- @@ -140,9 +138,9 @@

+

Static Public Member Functions

template<typename T >
static T GetParameterValue (const std::vector< ParameterPair > &pairs, const std::string &name)
 

Detailed Description

-

Class which holds single value for one kernel parameter.

+

Class which holds single value for one kernel parameter.

Constructor & Destructor Documentation

- +

◆ ParameterPair() [1/3]

- +

◆ ParameterPair() [2/3]

@@ -193,7 +191,7 @@

-

Constructor which creates parameter pair for integer parameter.

Parameters
+

Constructor which creates parameter pair for integer parameter.

Parameters
@@ -203,7 +201,7 @@

+

◆ ParameterPair() [3/3]

nameName of a kernel parameter tied to the pair.
valueValue of a parameter.
@@ -247,7 +245,7 @@

Member Function Documentation

- +

◆ GetName()

- +

◆ GetParameterValue()

nameName of a kernel parameter tied to the pair.
valueValue of a parameter.
@@ -311,7 +309,7 @@

+

◆ GetParameterValues()

pairsPairs which will be searched for the specified parameter.
nameParameter whose value will be returned.
@@ -357,7 +355,7 @@

+

◆ GetString()

- +

◆ GetValue()

- +

◆ GetValueDouble()

- +

◆ GetValueString()

- +

◆ HasSameValue()

pairsPairs which will be searched for the specified parameters.
namesParameters whose values will be returned.
otherSource for other value.
@@ -454,7 +452,7 @@

+

◆ HasValueDouble()

- +

◆ SetValue() [1/2]

@@ -487,7 +485,7 @@

-

Setter for value of a floating-point parameter.

Parameters
+

Setter for value of a floating-point parameter.

Parameters
valueNew value of a floating-point parameter.
@@ -496,7 +494,7 @@

+

◆ SetValue() [2/2]

diff --git a/Docs/classktt_1_1_platform_info-members.html b/Docs/classktt_1_1_platform_info-members.html index 75d2abd1..6f2e05f7 100644 --- a/Docs/classktt_1_1_platform_info-members.html +++ b/Docs/classktt_1_1_platform_info-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,29 +91,28 @@
-
-
ktt::PlatformInfo Member List
+
ktt::PlatformInfo Member List

This is the complete list of members for ktt::PlatformInfo, including all inherited members.

- + - + - + - + - +
GetExtensions() constktt::PlatformInfo
GetIndex() constktt::PlatformInfo
GetIndex() constktt::PlatformInfo
GetName() constktt::PlatformInfo
GetString() constktt::PlatformInfo
GetString() constktt::PlatformInfo
GetVendor() constktt::PlatformInfo
GetVersion() constktt::PlatformInfo
GetVersion() constktt::PlatformInfo
PlatformInfo(const PlatformIndex index, const std::string &name)ktt::PlatformInfoexplicit
SetExtensions(const std::string &extensions)ktt::PlatformInfo
SetExtensions(const std::string &extensions)ktt::PlatformInfo
SetVendor(const std::string &vendor)ktt::PlatformInfo
SetVersion(const std::string &version)ktt::PlatformInfo
SetVersion(const std::string &version)ktt::PlatformInfo
diff --git a/Docs/classktt_1_1_platform_info.html b/Docs/classktt_1_1_platform_info.html index aa8fa052..c0bdedd2 100644 --- a/Docs/classktt_1_1_platform_info.html +++ b/Docs/classktt_1_1_platform_info.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::PlatformInfo Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,14 +94,13 @@ -
-
ktt::PlatformInfo Class Reference
+
ktt::PlatformInfo Class Reference

#include <PlatformInfo.h>

- @@ -126,9 +124,9 @@

+

Public Member Functions

 PlatformInfo (const PlatformIndex index, const std::string &name)
 
 

Detailed Description

-

Class which holds information about a compute API platform.

+

Class which holds information about a compute API platform.

Constructor & Destructor Documentation

- +

◆ PlatformInfo()

@@ -161,7 +159,7 @@

-

Constructor, which creates new platform info object.

Parameters
+

Constructor, which creates new platform info object.

Parameters
@@ -172,7 +170,7 @@

Member Function Documentation

- +

◆ GetExtensions()

- +

◆ GetIndex()

- +

◆ GetName()

- +

◆ GetString()

- +

◆ GetVendor()

- +

◆ GetVersion()

- +

◆ SetExtensions()

indexIndex of platform assigned by KTT framework.
nameName of platform retrieved from compute API.
extensionsList of supported platform extensions.
@@ -304,7 +302,7 @@

+

◆ SetVendor()

@@ -319,7 +317,7 @@

-

Setter for name of platform vendor.

Parameters
+

Setter for name of platform vendor.

Parameters
vendorName of platform vendor.
@@ -328,7 +326,7 @@

+

◆ SetVersion()

diff --git a/Docs/classktt_1_1_random_searcher-members.html b/Docs/classktt_1_1_random_searcher-members.html index 40135981..003e0983 100644 --- a/Docs/classktt_1_1_random_searcher-members.html +++ b/Docs/classktt_1_1_random_searcher-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,35 +91,34 @@
-
-
ktt::RandomSearcher Member List
+
ktt::RandomSearcher Member List

This is the complete list of members for ktt::RandomSearcher, including all inherited members.

- + - + - + - + - + - + - + - +
CalculateNextConfiguration(const KernelResult &previousResult) overridektt::RandomSearchervirtual
GetConfiguration(const uint64_t index) constktt::Searcher
GetConfiguration(const uint64_t index) constktt::Searcher
GetConfigurationsCount() constktt::Searcher
GetCurrentConfiguration() const overridektt::RandomSearchervirtual
GetCurrentConfiguration() const overridektt::RandomSearchervirtual
GetExploredIndices() constktt::Searcher
GetIndex(const KernelConfiguration &configuration) constktt::Searcher
GetIndex(const KernelConfiguration &configuration) constktt::Searcher
GetNeighbourConfigurations(const KernelConfiguration &configuration, const uint64_t maxDifferences, const size_t maxNeighbours=3) constktt::Searcher
GetRandomConfiguration() constktt::Searcher
GetRandomConfiguration() constktt::Searcher
Initialize(const ConfigurationData &data)ktt::Searcher
IsInitialized() constktt::Searcher
IsInitialized() constktt::Searcher
OnInitialize() overridektt::RandomSearchervirtual
OnReset()ktt::Searchervirtual
OnReset()ktt::Searchervirtual
RandomSearcher()ktt::RandomSearcher
Reset()ktt::Searcher
Reset()ktt::Searcher
Searcher()ktt::Searcher
~Searcher()=defaultktt::Searchervirtual
~Searcher()=defaultktt::Searchervirtual
diff --git a/Docs/classktt_1_1_random_searcher.html b/Docs/classktt_1_1_random_searcher.html index 3a6f3855..2a0070ad 100644 --- a/Docs/classktt_1_1_random_searcher.html +++ b/Docs/classktt_1_1_random_searcher.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::RandomSearcher Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::RandomSearcher Class Reference
+
ktt::RandomSearcher Class Reference
@@ -111,7 +109,7 @@
- @@ -124,8 +122,14 @@ + + + + + + @@ -148,9 +152,9 @@

+

Public Member Functions

 RandomSearcher ()
 
- Public Member Functions inherited from ktt::Searcher
virtual ~Searcher ()=default
 
virtual void OnInitialize ()
 
virtual void OnReset ()
 
virtual bool CalculateNextConfiguration (const KernelResult &previousResult)=0
 
virtual KernelConfiguration GetCurrentConfiguration () const =0
 
 Searcher ()
 
KernelConfiguration GetConfiguration (const uint64_t index) const
 

Detailed Description

-

Searcher which explores configurations in random order.

+

Searcher which explores configurations in random order.

Constructor & Destructor Documentation

- +

◆ RandomSearcher()

Member Function Documentation

-
+

◆ CalculateNextConfiguration()

- +

◆ OnInitialize()

diff --git a/Docs/classktt_1_1_searcher-members.html b/Docs/classktt_1_1_searcher-members.html index 3c4797a5..52f2a3de 100644 --- a/Docs/classktt_1_1_searcher-members.html +++ b/Docs/classktt_1_1_searcher-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,34 +91,33 @@
-
-
ktt::Searcher Member List
+
ktt::Searcher Member List

This is the complete list of members for ktt::Searcher, including all inherited members.

- + - + - + - + - + - + - +
CalculateNextConfiguration(const KernelResult &previousResult)=0ktt::Searcherpure virtual
GetConfiguration(const uint64_t index) constktt::Searcher
GetConfiguration(const uint64_t index) constktt::Searcher
GetConfigurationsCount() constktt::Searcher
GetCurrentConfiguration() const =0ktt::Searcherpure virtual
GetCurrentConfiguration() const =0ktt::Searcherpure virtual
GetExploredIndices() constktt::Searcher
GetIndex(const KernelConfiguration &configuration) constktt::Searcher
GetIndex(const KernelConfiguration &configuration) constktt::Searcher
GetNeighbourConfigurations(const KernelConfiguration &configuration, const uint64_t maxDifferences, const size_t maxNeighbours=3) constktt::Searcher
GetRandomConfiguration() constktt::Searcher
GetRandomConfiguration() constktt::Searcher
Initialize(const ConfigurationData &data)ktt::Searcher
IsInitialized() constktt::Searcher
IsInitialized() constktt::Searcher
OnInitialize()ktt::Searchervirtual
OnReset()ktt::Searchervirtual
OnReset()ktt::Searchervirtual
Reset()ktt::Searcher
Searcher()ktt::Searcher
Searcher()ktt::Searcher
~Searcher()=defaultktt::Searchervirtual
diff --git a/Docs/classktt_1_1_searcher.html b/Docs/classktt_1_1_searcher.html index deef119c..00a39443 100644 --- a/Docs/classktt_1_1_searcher.html +++ b/Docs/classktt_1_1_searcher.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::Searcher Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::Searcher Class Referenceabstract
+
ktt::Searcher Class Referenceabstract
@@ -113,7 +111,7 @@
- @@ -147,9 +145,9 @@

+

Public Member Functions

virtual ~Searcher ()=default
 
 

Detailed Description

-

Class which is used to decide which kernel configuration will be run next during the kernel tuning process.

+

Class which is used to decide which kernel configuration will be run next during the kernel tuning process.

Constructor & Destructor Documentation

- +

◆ ~Searcher()

@@ -171,11 +169,11 @@

-

Searcher destructor. Inheriting class can override destructor with custom implementation. Default implementation is provided by KTT framework.

+

Searcher destructor. Inheriting class can override destructor with custom implementation. Default implementation is provided by KTT framework.

- +

◆ Searcher()

Member Function Documentation

-
+

◆ CalculateNextConfiguration()

- +

◆ GetConfiguration()

- +

◆ GetCurrentConfiguration()

- +

◆ GetExploredIndices()

- +

◆ GetIndex()

@@ -333,7 +331,7 @@

-

Returns index of the specified configuration.

Parameters
+

Returns index of the specified configuration.

Parameters
configurationConfiguration for which the index will be retrieved.
@@ -343,7 +341,7 @@

+

◆ GetNeighbourConfigurations()

- +

◆ OnInitialize()

- +

◆ OnReset()

- +

◆ Reset()

@@ -529,7 +527,7 @@

diff --git a/Docs/classktt_1_1_stop_condition-members.html b/Docs/classktt_1_1_stop_condition-members.html index faada3bc..484df56b 100644 --- a/Docs/classktt_1_1_stop_condition-members.html +++ b/Docs/classktt_1_1_stop_condition-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,24 +91,23 @@
-
-
ktt::StopCondition Member List
+
ktt::StopCondition Member List

This is the complete list of members for ktt::StopCondition, including all inherited members.

- + - +
GetStatusString() const =0ktt::StopConditionpure virtual
Initialize(const uint64_t configurationsCount)=0ktt::StopConditionpure virtual
Initialize(const uint64_t configurationsCount)=0ktt::StopConditionpure virtual
IsFulfilled() const =0ktt::StopConditionpure virtual
Update(const KernelResult &result)=0ktt::StopConditionpure virtual
Update(const KernelResult &result)=0ktt::StopConditionpure virtual
~StopCondition()=defaultktt::StopConditionvirtual
diff --git a/Docs/classktt_1_1_stop_condition.html b/Docs/classktt_1_1_stop_condition.html index fadbc5ff..a6ebedb2 100644 --- a/Docs/classktt_1_1_stop_condition.html +++ b/Docs/classktt_1_1_stop_condition.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::StopCondition Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::StopCondition Class Referenceabstract
+
ktt::StopCondition Class Referenceabstract
@@ -114,7 +112,7 @@
- @@ -128,9 +126,9 @@

+

Public Member Functions

virtual ~StopCondition ()=default
 
 

Detailed Description

-

Class which can be used to stop the tuning process when certain condition is satisfied.

+

Class which can be used to stop the tuning process when certain condition is satisfied.

Constructor & Destructor Documentation

- +

◆ ~StopCondition()

Member Function Documentation

-
+

◆ GetStatusString()

- +

◆ Initialize()

- +

◆ IsFulfilled()

- +

◆ Update()

@@ -290,7 +288,7 @@

diff --git a/Docs/classktt_1_1_tuner-members.html b/Docs/classktt_1_1_tuner-members.html index d8aae924..f28f1720 100644 --- a/Docs/classktt_1_1_tuner-members.html +++ b/Docs/classktt_1_1_tuner-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,78 +91,89 @@
-
-
ktt::Tuner Member List
+
ktt::Tuner Member List

This is the complete list of members for ktt::Tuner, including all inherited members.

- + + + - + - + + + - + - + - + - + - + - + - - - - - - - + + + + + + + + + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - + + + + + + + + + + + + +
AddArgumentLocal(const size_t localMemorySize)ktt::Tuner
AddArgumentScalar(const T &data)ktt::Tuner
AddArgumentScalar(const T &data)ktt::Tuner
AddArgumentScalar(const void *data, const size_t dataSize)ktt::Tuner
AddArgumentSymbol(const T &data, const std::string &symbolName="")ktt::Tuner
AddArgumentVector(const std::vector< T > &data, const ArgumentAccessType accessType)ktt::Tuner
AddArgumentVector(std::vector< T > &data, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation, const ArgumentManagementType managementType, const bool referenceUserData)ktt::Tuner
AddArgumentVector(std::vector< T > &data, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation, const ArgumentManagementType managementType, const bool referenceUserData)ktt::Tuner
AddArgumentVector(ComputeBuffer buffer, const size_t bufferSize, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation)ktt::Tuner
AddConstraint(const KernelId id, const std::vector< std::string > &parameters, ConstraintFunction function)ktt::Tuner
AddArgumentVector(ComputeBuffer buffer, const size_t bufferSize, const size_t elementSize, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation)ktt::Tuner
AddComputeQueue(ComputeQueue queue)ktt::Tuner
AddConstraint(const KernelId id, const std::vector< std::string > &parameters, ConstraintFunction function)ktt::Tuner
AddKernelDefinition(const std::string &name, const std::string &source, const DimensionVector &globalSize, const DimensionVector &localSize, const std::vector< std::string > &typeNames={})ktt::Tuner
AddKernelDefinitionFromFile(const std::string &name, const std::string &filePath, const DimensionVector &globalSize, const DimensionVector &localSize, const std::vector< std::string > &typeNames={})ktt::Tuner
AddKernelDefinitionFromFile(const std::string &name, const std::string &filePath, const DimensionVector &globalSize, const DimensionVector &localSize, const std::vector< std::string > &typeNames={})ktt::Tuner
AddParameter(const KernelId id, const std::string &name, const std::vector< uint64_t > &values, const std::string &group="")ktt::Tuner
AddParameter(const KernelId id, const std::string &name, const std::vector< double > &values, const std::string &group="")ktt::Tuner
AddParameter(const KernelId id, const std::string &name, const std::vector< double > &values, const std::string &group="")ktt::Tuner
AddThreadModifier(const KernelId id, const std::vector< KernelDefinitionId > &definitionIds, const ModifierType type, const ModifierDimension dimension, const std::vector< std::string > &parameters, ModifierFunction function)ktt::Tuner
AddThreadModifier(const KernelId id, const std::vector< KernelDefinitionId > &definitionIds, const ModifierType type, const ModifierDimension dimension, const std::string &parameter, const ModifierAction action)ktt::Tuner
AddThreadModifier(const KernelId id, const std::vector< KernelDefinitionId > &definitionIds, const ModifierType type, const ModifierDimension dimension, const std::string &parameter, const ModifierAction action)ktt::Tuner
ClearData(const KernelId id)ktt::Tuner
CreateCompositeKernel(const std::string &name, const std::vector< KernelDefinitionId > &definitionIds, KernelLauncher launcher=nullptr)ktt::Tuner
CreateCompositeKernel(const std::string &name, const std::vector< KernelDefinitionId > &definitionIds, KernelLauncher launcher=nullptr)ktt::Tuner
CreateConfiguration(const KernelId id, const ParameterInput &parameters) constktt::Tuner
CreateSimpleKernel(const std::string &name, const KernelDefinitionId definitionId)ktt::Tuner
CreateSimpleKernel(const std::string &name, const KernelDefinitionId definitionId)ktt::Tuner
GetBestConfiguration(const KernelId id) constktt::Tuner
GetCurrentDeviceInfo() constktt::Tuner
GetCurrentDeviceInfo() constktt::Tuner
GetDeviceInfo(const PlatformIndex platform) constktt::Tuner
GetKernelDefinitionSource(const KernelDefinitionId id, const KernelConfiguration &configuration) constktt::Tuner
GetKernelSource(const KernelId id, const KernelConfiguration &configuration) constktt::Tuner
GetPlatformInfo() constktt::Tuner
LoadResults(const std::string &filePath, const OutputFormat format) constktt::Tuner
LoadResults(const std::string &filePath, const OutputFormat format, UserData &data) constktt::Tuner
RemoveArgument(const ArgumentId id)ktt::Tuner
RemoveKernel(const KernelId id)ktt::Tuner
GetKernelDefinitionId(const std::string &name, const std::vector< std::string > &typeNames={}) constktt::Tuner
GetKernelDefinitionSource(const KernelDefinitionId id, const KernelConfiguration &configuration) constktt::Tuner
GetKernelSource(const KernelId id, const KernelConfiguration &configuration) constktt::Tuner
GetPlatformInfo() constktt::Tuner
LoadResults(const std::string &filePath, const OutputFormat format) constktt::Tuner
LoadResults(const std::string &filePath, const OutputFormat format, UserData &data) constktt::Tuner
RemoveArgument(const ArgumentId id)ktt::Tuner
RemoveComputeQueue(const QueueId id)ktt::Tuner
RemoveKernel(const KernelId id)ktt::Tuner
RemoveKernelDefinition(const KernelDefinitionId id)ktt::Tuner
Run(const KernelId id, const KernelConfiguration &configuration, const std::vector< BufferOutputDescriptor > &output)ktt::Tuner
Run(const KernelId id, const KernelConfiguration &configuration, const std::vector< BufferOutputDescriptor > &output)ktt::Tuner
SaveResults(const std::vector< KernelResult > &results, const std::string &filePath, const OutputFormat format, const UserData &data={}) constktt::Tuner
SetArguments(const KernelDefinitionId id, const std::vector< ArgumentId > &argumentIds)ktt::Tuner
SetArguments(const KernelDefinitionId id, const std::vector< ArgumentId > &argumentIds)ktt::Tuner
SetAutomaticGlobalSizeCorrection(const bool flag)ktt::Tuner
SetCompilerOptions(const std::string &options)ktt::Tuner
SetCompilerOptions(const std::string &options)ktt::Tuner
SetGlobalSizeType(const GlobalSizeType type)ktt::Tuner
SetKernelCacheCapacity(const uint64_t capacity)ktt::Tuner
SetKernelCacheCapacity(const uint64_t capacity)ktt::Tuner
SetLauncher(const KernelId id, KernelLauncher launcher)ktt::Tuner
SetLoggingLevel(const LoggingLevel level)ktt::Tunerstatic
SetLoggingLevel(const LoggingLevel level)ktt::Tunerstatic
SetLoggingTarget(std::ostream &outputTarget)ktt::Tunerstatic
SetLoggingTarget(const std::string &filePath)ktt::Tunerstatic
SetLoggingTarget(const std::string &filePath)ktt::Tunerstatic
SetProfiledDefinitions(const KernelId id, const std::vector< KernelDefinitionId > &definitionIds)ktt::Tuner
SetProfiling(const bool flag)ktt::Tuner
SetProfiling(const bool flag)ktt::Tuner
SetProfilingCounters(const std::vector< std::string > &counters)ktt::Tuner
SetReadOnlyArgumentCache(const bool flag)ktt::Tuner
SetReadOnlyArgumentCache(const bool flag)ktt::Tuner
SetReferenceComputation(const ArgumentId id, ReferenceComputation computation)ktt::Tuner
SetReferenceKernel(const ArgumentId id, const KernelId referenceId, const KernelConfiguration &configuration)ktt::Tuner
SetReferenceKernel(const ArgumentId id, const KernelId referenceId, const KernelConfiguration &configuration)ktt::Tuner
SetSearcher(const KernelId id, std::unique_ptr< Searcher > searcher)ktt::Tuner
SetTimeUnit(const TimeUnit unit)ktt::Tunerstatic
SetTimeUnit(const TimeUnit unit)ktt::Tunerstatic
SetValidationMethod(const ValidationMethod method, const double toleranceThreshold)ktt::Tuner
SetValidationMode(const ValidationMode mode)ktt::Tuner
SetValidationMode(const ValidationMode mode)ktt::Tuner
SetValidationRange(const ArgumentId id, const size_t range)ktt::Tuner
SetValueComparator(const ArgumentId id, ValueComparator comparator)ktt::Tuner
SetValueComparator(const ArgumentId id, ValueComparator comparator)ktt::Tuner
SimulateKernelTuning(const KernelId id, const std::vector< KernelResult > &results, const uint64_t iterations=0)ktt::Tuner
Synchronize()ktt::Tuner
Tune(const KernelId id)ktt::Tuner
Tune(const KernelId id, std::unique_ptr< StopCondition > stopCondition)ktt::Tuner
TuneIteration(const KernelId id, const std::vector< BufferOutputDescriptor > &output, const bool recomputeReference=false)ktt::Tuner
Tuner(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api)ktt::Tunerexplicit
Tuner(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api, const uint32_t computeQueueCount)ktt::Tunerexplicit
Tuner(const ComputeApi api, const ComputeApiInitializer &initializer)ktt::Tunerexplicit
Synchronize()ktt::Tuner
SynchronizeDevice()ktt::Tuner
SynchronizeQueue(const QueueId id)ktt::Tuner
SynchronizeQueues()ktt::Tuner
Tune(const KernelId id)ktt::Tuner
Tune(const KernelId id, std::unique_ptr< StopCondition > stopCondition)ktt::Tuner
TuneIteration(const KernelId id, const std::vector< BufferOutputDescriptor > &output, const bool recomputeReference=false)ktt::Tuner
Tuner(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api)ktt::Tunerexplicit
Tuner(const PlatformIndex platform, const DeviceIndex device, const ComputeApi api, const uint32_t computeQueueCount)ktt::Tunerexplicit
Tuner(const ComputeApi api, const ComputeApiInitializer &initializer)ktt::Tunerexplicit
Tuner(const ComputeApi api, const ComputeApiInitializer &initializer, std::vector< QueueId > &assignedQueueIds)ktt::Tunerexplicit
WaitForComputeAction(const ComputeActionId id)ktt::Tuner
WaitForTransferAction(const TransferActionId id)ktt::Tuner
~Tuner()ktt::Tuner
diff --git a/Docs/classktt_1_1_tuner.html b/Docs/classktt_1_1_tuner.html index f782a1d1..11ff4d15 100644 --- a/Docs/classktt_1_1_tuner.html +++ b/Docs/classktt_1_1_tuner.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::Tuner Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -96,14 +95,13 @@ Public Member Functions | Static Public Member Functions | List of all members -
-
ktt::Tuner Class Reference
+
ktt::Tuner Class Reference

#include <Tuner.h>

- @@ -111,12 +109,16 @@ + + + + @@ -150,12 +152,19 @@ + + + + + + + @@ -202,6 +211,20 @@ + + + + + + + + + + + + + + @@ -221,7 +244,7 @@

+

Public Member Functions

 Tuner (const PlatformIndex platform, const DeviceIndex device, const ComputeApi api)
 
 
 Tuner (const ComputeApi api, const ComputeApiInitializer &initializer)
 
 Tuner (const ComputeApi api, const ComputeApiInitializer &initializer, std::vector< QueueId > &assignedQueueIds)
 
 ~Tuner ()
 
KernelDefinitionId AddKernelDefinition (const std::string &name, const std::string &source, const DimensionVector &globalSize, const DimensionVector &localSize, const std::vector< std::string > &typeNames={})
 
KernelDefinitionId AddKernelDefinitionFromFile (const std::string &name, const std::string &filePath, const DimensionVector &globalSize, const DimensionVector &localSize, const std::vector< std::string > &typeNames={})
 
KernelDefinitionId GetKernelDefinitionId (const std::string &name, const std::vector< std::string > &typeNames={}) const
 
void RemoveKernelDefinition (const KernelDefinitionId id)
 
void SetArguments (const KernelDefinitionId id, const std::vector< ArgumentId > &argumentIds)
template<typename T >
ArgumentId AddArgumentVector (ComputeBuffer buffer, const size_t bufferSize, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation)
 
ArgumentId AddArgumentVector (ComputeBuffer buffer, const size_t bufferSize, const size_t elementSize, const ArgumentAccessType accessType, const ArgumentMemoryLocation memoryLocation)
 
template<typename T >
ArgumentId AddArgumentScalar (const T &data)
 
ArgumentId AddArgumentScalar (const void *data, const size_t dataSize)
 
template<typename T >
ArgumentId AddArgumentLocal (const size_t localMemorySize)
 
template<typename T >
ArgumentId AddArgumentSymbol (const T &data, const std::string &symbolName="")
 
void RemoveArgument (const ArgumentId id)
 
void SetReadOnlyArgumentCache (const bool flag)
 
std::vector< KernelResultLoadResults (const std::string &filePath, const OutputFormat format, UserData &data) const
 
QueueId AddComputeQueue (ComputeQueue queue)
 
void RemoveComputeQueue (const QueueId id)
 
void WaitForComputeAction (const ComputeActionId id)
 
void WaitForTransferAction (const TransferActionId id)
 
void SynchronizeQueue (const QueueId id)
 
void SynchronizeQueues ()
 
void SynchronizeDevice ()
 
void Synchronize ()
 
void SetProfilingCounters (const std::vector< std::string > &counters)
DeviceInfo GetCurrentDeviceInfo () const
 
- @@ -233,10 +256,10 @@

+

Static Public Member Functions

static void SetTimeUnit (const TimeUnit unit)
 
 

Detailed Description

-

Class which serves as the main part of public API of KTT framework.

+

Class which serves as the main part of public API of KTT framework.

Constructor & Destructor Documentation

- -

◆ Tuner() [1/3]

+ +

◆ Tuner() [1/4]

@@ -274,7 +297,7 @@

-

Creates tuner for the specified platform, device and compute API. All compute commands are submitted to a single queue. Indices for available platforms and devices can be retrieved by using GetPlatformInfo() and GetDeviceInfo() methods. If the specified compute API is CUDA or Vulkan, platform index is ignored.

Parameters
+

Creates tuner for the specified platform, device and compute API. All compute commands are submitted to a single queue. Indices for available platforms and devices can be retrieved by using GetPlatformInfo() and GetDeviceInfo() methods. If the specified compute API is CUDA or Vulkan, platform index is ignored.

Parameters
@@ -285,8 +308,8 @@

-

◆ Tuner() [2/3]

+ +

◆ Tuner() [2/4]

@@ -330,7 +353,7 @@

-

Creates tuner for the specified platform, device and compute API. Multiple compute queues can be created, based on the specified count. Compute commands to different queues can be submitted by utilizing KernelLauncher and ComputeInterface. Indices for available platforms and devices can be retrieved by using GetPlatformInfo() and GetDeviceInfo() methods. If the specified compute API is CUDA or Vulkan, platform index is ignored.

Parameters
+

Creates tuner for the specified platform, device and compute API. Multiple compute queues can be created, based on the specified count. Compute commands to different queues can be submitted by utilizing KernelLauncher and ComputeInterface. Indices for available platforms and devices can be retrieved by using GetPlatformInfo() and GetDeviceInfo() methods. If the specified compute API is CUDA or Vulkan, platform index is ignored.

Parameters

platformIndex for platform used by the tuner.
deviceIndex for device used by the tuner.
@@ -342,8 +365,8 @@

-

◆ Tuner() [3/3]

+ +

◆ Tuner() [3/4]

platformIndex for platform used by the tuner.
deviceIndex for device used by the tuner.
@@ -385,7 +408,57 @@

+ +

◆ Tuner() [4/4]

+ +
+
+
apiCompute API used by the tuner.
initializerCustom compute API initializer. See ComputeApiInitializer for more information.
+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
ktt::Tuner::Tuner (const ComputeApi api,
const ComputeApiInitializerinitializer,
std::vector< QueueId > & assignedQueueIds 
)
+
+explicit
+

+

Creates tuner for the specified compute API using custom initializer. The initializer contains user-provided compute device context and queues. The ids assigned to queues will be added to the provided vector.

Parameters
+ + + + +
apiCompute API used by the tuner.
initializerCustom compute API initializer. See ComputeApiInitializer for more information.
assignedQueueIdsIds assigned to compute queues inside initializer by the tuner. The order of assigned ids matches the order of queues inside initializer.
+
+
+ +
+
+

◆ ~Tuner()

@@ -399,12 +472,12 @@

-

Tuner destructor.

+

Tuner destructor.

Member Function Documentation

- +

◆ AddArgumentLocal()

@@ -421,7 +494,7 @@

-

Adds new local memory (shared memory in CUDA) argument to the tuner. All local memory arguments are read-only and cannot be initialized from host memory. In case of CUDA API usage, local memory arguments cannot be directly set as kernel function arguments. Setting a local memory argument to kernel in CUDA means that corresponding amount of memory will be allocated for kernel to use. In that case, all local memory argument ids should be specified at the end of the vector when calling SetArguments() method.

Parameters
+

Adds new local memory (shared memory in CUDA) argument to the tuner. All local memory arguments are read-only and cannot be initialized from host memory. In case of CUDA API usage, local memory arguments cannot be directly set as kernel function arguments. Setting a local memory argument to kernel in CUDA means that corresponding amount of memory will be allocated for kernel to use. In that case, all local memory argument ids should be specified at the end of the vector when calling SetArguments() method.

Parameters
localMemorySizeSize of kernel argument in bytes.
@@ -431,8 +504,8 @@

-

◆ AddArgumentScalar()

+ +

◆ AddArgumentScalar() [1/2]

+ +

◆ AddArgumentScalar() [2/2]

+ +
+
+ + + + + + + + + + + + + + + + + + +
ArgumentId ktt::Tuner::AddArgumentScalar (const void * data,
const size_t dataSize 
)
+
+

Adds new scalar argument to the tuner. All scalar arguments are read-only. This method can be utilized when templated version of scalar argument addition cannot be used.

Parameters
+ + + +
dataPointer to memory with kernel argument data.
dataSizeSize of data in bytes (e.g., 4 for 32-bit float).
+
+
+
Returns
Id assigned to kernel argument by tuner. The id can be used in other API methods.
+ +
+
+ +

◆ AddArgumentSymbol()

+ +
+
+
+template<typename T >
+ + + + + + + + + + + + + + + + + + +
template< typename T > ArgumentId ktt::Tuner::AddArgumentSymbol (const T & data,
const std::string & symbolName = "" 
)
+
+

Adds new symbol argument to the tuner.

Parameters
+
dataKernel argument data. The data type must be trivially copyable. Bool, reference or pointer types are not supported.
symbolNameName of the corresponding symbol in kernel source code. Only utilized when tuner is using CUDA API. The symbol name must be unique.
@@ -458,8 +605,8 @@

-

◆ AddArgumentVector() [1/3]

+ +

◆ AddArgumentVector() [1/4]

+ +

◆ AddArgumentVector() [2/4]

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ArgumentId ktt::Tuner::AddArgumentVector (ComputeBuffer buffer,
const size_t bufferSize,
const size_t elementSize,
const ArgumentAccessType accessType,
const ArgumentMemoryLocation memoryLocation 
)
+
+

Adds new vector argument to the tuner. The argument buffer is created and managed by user and depending on the compute API, can be either CUdeviceptr or cl_mem handle. The tuner will not destroy the argument. This method can be utilized when templated version of argument addition cannot be used. When using validation for arguments added through this method, value comparator must always be provided.

Parameters
+
bufferUser-provided memory buffer.
bufferSizeSize of the provided user buffer in bytes.
elementSizeSize of a single element inside buffer in bytes (e.g., 4 for 32-bit float).
accessTypeAccess type specifies whether argument is used for input or output. See ArgumentAccessType for more information.
memoryLocationMemory location specifies whether argument data will be accessed from device or host memory during its usage by compute API. See ArgumentMemoryLocation for more information.
@@ -510,8 +714,8 @@

-

◆ AddArgumentVector() [2/3]

+ +

◆ AddArgumentVector() [3/4]

@@ -537,7 +741,7 @@

-

Adds new vector argument to the tuner. Makes copy of argument data, so the source data vector remains unaffected by tuner operations. Argument data will be accessed from device memory during its usage by compute API. The compute API buffer will be automatically created and managed by the KTT framework.

Parameters
+

Adds new vector argument to the tuner. Makes copy of argument data, so the source data vector remains unaffected by tuner operations. Argument data will be accessed from device memory during its usage by compute API. The compute API buffer will be automatically created and managed by the KTT framework.

Parameters
@@ -548,8 +752,8 @@

-

◆ AddArgumentVector() [3/3]

+ +

◆ AddArgumentVector() [4/4]

dataKernel argument data. The data type must be trivially copyable. Bool, reference or pointer types are not supported.
accessTypeAccess type specifies whether argument is used for input or output. See ArgumentAccessType for more information.
@@ -607,7 +811,32 @@

+ +

◆ AddComputeQueue()

+ +
+
+
dataKernel argument data. The data type must be trivially copyable. Bool, reference or pointer types are not supported.
accessTypeAccess type specifies whether argument is used for input or output. See ArgumentAccessType for more information.
+ + + + + + + +
QueueId ktt::Tuner::AddComputeQueue (ComputeQueue queue)
+

+

Adds the specified compute queue to the tuner. New queues can only be added if tuner was initialized with compute API initializer.

Parameters
+ + +
queueQueue which will be added. The queue should be tied to the context specified inside compute API initializer.
+
+
+
Returns
Id assigned to queue by the tuner.
+ +
+
+

◆ AddConstraint()

@@ -638,7 +867,7 @@

-

Adds constraint for the specified kernel. Constraints are used to prevent generating of configurations with conflicting combinations of parameter values.

Parameters
+

Adds constraint for the specified kernel. Constraints are used to prevent generating of configurations with conflicting combinations of parameter values.

Parameters
@@ -649,7 +878,7 @@

+

◆ AddKernelDefinition()

idId of kernel for which the constraint will be added.
parametersNames of kernel parameters which will be affected by the constraint function. The order of parameter names corresponds to the order of parameter values inside the constraint function vector argument. Note that constraints can only be added between parameters which belong into the same group. The corresponding parameters must be added to the tuner with AddParameter() before calling this method.
@@ -706,7 +935,7 @@

+

◆ AddKernelDefinitionFromFile()

nameName of a kernel function inside kernel source code. The name must be unique.
sourceKernel source code written in the corresponding compute API language.
@@ -763,7 +992,7 @@

+

◆ AddParameter() [1/2]

nameName of a kernel function inside kernel source code. The name must be unique.
filePathPath to file with kernel source code written in the corresponding compute API language.
@@ -812,7 +1041,7 @@

+

◆ AddParameter() [2/2]

idId of kernel for which the parameter will be added.
nameName of a parameter. Parameter names for a single kernel must be unique.
@@ -861,7 +1090,7 @@

+

◆ AddThreadModifier() [1/2]

idId of kernel for which the parameter will be added.
nameName of a parameter. Parameter names for a single kernel must be unique.
@@ -924,7 +1153,7 @@

+

◆ AddThreadModifier() [2/2]

idId of kernel for which the modifier will be set.
definitionIdsKernel definitions whose thread sizes will be affected by the thread modifier.
@@ -987,7 +1216,7 @@

+

◆ ClearData()

idId of kernel for which the modifier will be set.
definitionIdsKernel definitions whose thread sizes will be affected by the thread modifier.
idId of kernel whose data will be cleared.
@@ -1011,7 +1240,7 @@

+

◆ CreateCompositeKernel()

@@ -1042,7 +1271,7 @@

-

Creates composite kernel from the specified definitions. Note that kernel launcher is required in order to launch kernels with multiple definitions.

Parameters
+

Creates composite kernel from the specified definitions. Note that kernel launcher is required in order to launch kernels with multiple definitions.

Parameters
@@ -1054,7 +1283,7 @@

+

◆ CreateConfiguration()

nameKernel name used during logging and output operations. The name must be unique.
definitionIdsIds of kernel definitions which will be utilized by the kernel.
@@ -1090,7 +1319,7 @@

+

◆ CreateSimpleKernel()

idId of kernel for which the configuration will be created.
parametersVector of parameter names and their values from which the configuration is generated. If certain parameters are omitted, their first specified values are added to the configuration.
@@ -1126,7 +1355,7 @@

+

◆ GetBestConfiguration()

nameKernel name used during logging and output operations. The name must be unique.
definitionIdId of kernel definition which will be utilized by the kernel.
idId of kernel for which the best configuration will be returned.
@@ -1151,7 +1380,7 @@

+

◆ GetCurrentDeviceInfo()

@@ -1165,11 +1394,11 @@

-

Retrieves detailed information about device used by the tuner. See DeviceInfo for more information.

Returns
Information about device used by the tuner.
+

Retrieves detailed information about device used by the tuner. See DeviceInfo for more information.

Returns
Information about device used by the tuner.

- +

◆ GetDeviceInfo()

@@ -1184,7 +1413,7 @@

-

Retrieves detailed information about all available devices on the specified platform. See DeviceInfo for more information.

Parameters
+

Retrieves detailed information about all available devices on the specified platform. See DeviceInfo for more information.

Parameters
platformIndex of platform for which the device information will be retrieved.
@@ -1194,7 +1423,43 @@

+ +

◆ GetKernelDefinitionId()

+ +
+
+ + + + + + + + + + + + + + + + + + +
KernelDefinitionId ktt::Tuner::GetKernelDefinitionId (const std::string & name,
const std::vector< std::string > & typeNames = {} 
) const
+
+

Retrieves kernel definition id from the tuner based on provided name and template arguments.

Parameters
+ + + +
nameName of a kernel definition.
typeNamesNames of types which were used to instantiate kernel template. Only supported in CUDA kernels.
+
+
+
Returns
Id of the corresponding kernel definition. If no such definition exists, InvalidKernelDefinitionId will be returned.
+ +
+
+

◆ GetKernelDefinitionSource()

@@ -1219,7 +1484,7 @@

-

Returns kernel source with preprocessor definitions for the specified kernel definition based on provided configuration.

Parameters
+

Returns kernel source with preprocessor definitions for the specified kernel definition based on provided configuration.

Parameters
@@ -1230,7 +1495,7 @@

+

◆ GetKernelSource()

idId of kernel definition for which the source is returned.
configurationKernel configuration for which the source will be generated. See KernelConfiguration for more information.
@@ -1266,7 +1531,7 @@

+

◆ GetPlatformInfo()

@@ -1280,11 +1545,11 @@

-

Retrieves detailed information about all available platforms. See PlatformInfo for more information.

Returns
Information about all available platforms.
+

Retrieves detailed information about all available platforms. See PlatformInfo for more information.

Returns
Information about all available platforms.

- +

◆ LoadResults() [1/2]

@@ -1309,7 +1574,7 @@

-

Loads kernel results from the specified file. The file must be previously created by the tuner method SaveResults() with corresponding output format.

Parameters
+

Loads kernel results from the specified file. The file must be previously created by the tuner method SaveResults() with corresponding output format.

Parameters

idId of kernel for which the source is returned.
configurationKernel configuration for which the source will be generated. See KernelConfiguration for more information.
@@ -1320,7 +1585,7 @@

+

◆ LoadResults() [2/2]

@@ -1351,7 +1616,7 @@

-

Loads kernel results from the specified file. The file must be previously created by the tuner method SaveResults() with corresponding output format.

Parameters
+

Loads kernel results from the specified file. The file must be previously created by the tuner method SaveResults() with corresponding output format.

Parameters

filePathFile from which the results will be loaded. The file extension is added automatically based on the specified format.
formatFormat in which the results are stored. See OutputFormat for more information.
@@ -1363,7 +1628,7 @@

+

◆ RemoveArgument()

filePathFile from which the results will be loaded. The file extension is added automatically based on the specified format.
formatFormat in which the results are stored. See OutputFormat for more information.
idId of the argument which will be removed.
@@ -1387,7 +1652,31 @@

+ +

◆ RemoveComputeQueue()

+ +
+
+ + + + + + + + +
void ktt::Tuner::RemoveComputeQueue (const QueueId id)
+
+

Removes the specified compute queue from the tuner. Only queues added by user can be removed.

Parameters
+ + +
idId of compute queue which will be removed.
+
+
+ +
+
+

◆ RemoveKernel()

@@ -1402,7 +1691,7 @@

-

Removes kernel with the specified id from the tuner. If the kernel is used as a reference kernel, the corresponding kernel argument output validation will be disabled.

Parameters
+

Removes kernel with the specified id from the tuner. If the kernel is used as a reference kernel, the corresponding kernel argument output validation will be disabled.

Parameters
idId of the kernel which will be removed.
@@ -1411,7 +1700,7 @@

+

◆ RemoveKernelDefinition()

@@ -1426,7 +1715,7 @@

-

Removes kernel definition with the specified id from the tuner. Note that definition can only be removed if it is not associated with any kernel.

Parameters
+

Removes kernel definition with the specified id from the tuner. Note that definition can only be removed if it is not associated with any kernel.

Parameters
idId of the kernel definition which will be removed.
@@ -1435,7 +1724,7 @@

+

◆ Run()

@@ -1466,7 +1755,7 @@

-

Runs kernel using the specified configuration.

Parameters
+

Runs kernel using the specified configuration.

Parameters
@@ -1478,7 +1767,7 @@

+

◆ SaveResults()

idId of kernel which will be run.
configurationConfiguration under which the kernel will be launched. See KernelConfiguration for more information.
@@ -1527,7 +1816,7 @@

+

◆ SetArguments()

resultsResults which will be saved.
filePathFile where the results will be saved. The file extension is added automatically based on the specified format.
@@ -1562,7 +1851,7 @@

+

◆ SetAutomaticGlobalSizeCorrection()

idId of a kernel definition for which the arguments will be set.
argumentIdsIds of arguments to be used by the specified definition. The order of ids must match the order of kernel arguments inside kernel function. The provided ids must be unique.
flagIf true, automatic global size correction will be enabled. It will be disabled otherwise.
@@ -1586,7 +1875,7 @@

+

◆ SetCompilerOptions()

@@ -1601,7 +1890,7 @@

-

Sets compute API compiler options to specified options. There are no default options for OpenCL backend. By default for CUDA backend it adds the compiler option "--gpu-architecture=compute_xx", where xx is the compute capability retrieved from the device. For the list of OpenCL compiler options, see: https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clBuildProgram.html For the list of CUDA compiler options, see: http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-command-options

Parameters
+

Sets compute API compiler options to specified options. There are no default options for OpenCL backend. By default for CUDA backend it adds the compiler option "--gpu-architecture=compute_xx", where xx is the compute capability retrieved from the device. For the list of OpenCL compiler options, see: https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/clBuildProgram.html For the list of CUDA compiler options, see: http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-command-options

Parameters
optionsCompute API compiler options. If multiple options are used, they need to be separated by a single space character.
@@ -1610,7 +1899,7 @@

+

◆ SetGlobalSizeType()

@@ -1625,7 +1914,7 @@

-

Sets global size specification type to specified compute API style. In OpenCL, NDrange size is specified as number of work-items in a work-group multiplied by number of work-groups. In CUDA, grid size is specified as number of blocks. This method makes it possible to use OpenCL style in CUDA and vice versa. Default global size type is the one corresponding to the compute API used by the tuner.

Parameters
+

Sets global size specification type to specified compute API style. In OpenCL, NDrange size is specified as number of work-items in a work-group multiplied by number of work-groups. In CUDA, grid size is specified as number of blocks. This method makes it possible to use OpenCL style in CUDA and vice versa. Default global size type is the one corresponding to the compute API used by the tuner.

Parameters
typeGlobal size type which will be set for tuner. See GlobalSizeType for more information.
@@ -1634,7 +1923,7 @@

+

◆ SetKernelCacheCapacity()

@@ -1649,7 +1938,7 @@

-

Sets capacity of compiled kernel cache used by the tuner. The cache contains recently compiled kernels which are prepared to be launched immediately, eliminating compilation overhead. Using the cache can significantly improve tuner performance during online tuning or iterative kernel running with custom KernelLauncher. Default cache size is 10.

Parameters
+

Sets capacity of compiled kernel cache used by the tuner. The cache contains recently compiled kernels which are prepared to be launched immediately, eliminating compilation overhead. Using the cache can significantly improve tuner performance during online tuning or iterative kernel running with custom KernelLauncher. Default cache size is 10.

Parameters
capacityControls kernel cache capacity. If zero, kernel cache is completely disabled.
@@ -1658,7 +1947,7 @@

+

◆ SetLauncher()

@@ -1683,7 +1972,7 @@

-

Specifies kernel launcher for a kernel. Kernel launcher enables customization of kernel execution. This is useful in multiple cases. E.g., running part of the computation in C++ code, utilizing iterative kernel launches or kernels with multiple definitions.

Parameters
+

Specifies kernel launcher for a kernel. Kernel launcher enables customization of kernel execution. This is useful in multiple cases. E.g., running part of the computation in C++ code, utilizing iterative kernel launches or kernels with multiple definitions.

Parameters
@@ -1693,7 +1982,7 @@

+

◆ SetLoggingLevel()

idId of kernel for which launcher will be set.
launcherLauncher for the specified kernel. See ComputeInterface for further information.
levelLogging level which will be used by tuner. See LoggingLevel for more information.
@@ -1725,7 +2014,7 @@

+

◆ SetLoggingTarget() [1/2]

@@ -1748,7 +2037,7 @@

-

Sets the target for info messages logging to specified file. Default logging target is std::clog.

Parameters
+

Sets the target for info messages logging to specified file. Default logging target is std::clog.

Parameters
filePathPath to file where tuner info messages will printed.
@@ -1757,7 +2046,7 @@

+

◆ SetLoggingTarget() [2/2]

@@ -1780,7 +2069,7 @@

-

Sets the target for info messages logging to specified output stream. Default logging target is std::clog.

Parameters
+

Sets the target for info messages logging to specified output stream. Default logging target is std::clog.

Parameters
outputTargetLocation where tuner info messages will be printed.
@@ -1789,7 +2078,7 @@

+

◆ SetProfiledDefinitions()

@@ -1814,7 +2103,7 @@

-

Enables profiling of specified kernel definitions. This is useful if only some definitions inside the kernel need to be profiled. By default, profiling is enabled only for the first definition specified during kernel creation. Note that this method has effect only if kernel profiling functionality is enabled. See SetKernelProfiling() method for more information.

Parameters
+

Enables profiling of specified kernel definitions. This is useful if only some definitions inside the kernel need to be profiled. By default, profiling is enabled only for the first definition specified during kernel creation. Note that this method has effect only if kernel profiling functionality is enabled. See SetKernelProfiling() method for more information.

Parameters
@@ -1824,7 +2113,7 @@

+

◆ SetProfiling()

idId of kernel for which the profiled definitions will be set.
definitionIdsIds of definitions inside the kernel for which the profiling will be enabled.
flagIf true, kernel profiling is enabled. It is disabled otherwise.
@@ -1848,7 +2137,7 @@

+

◆ SetProfilingCounters()

@@ -1863,7 +2152,7 @@

-

Specifies profiling counters that will be collected during kernel profiling. Note that not all profiling counters are available on all devices. For the list of old CUDA CUPTI profiling counters, see: https://docs.nvidia.com/cupti/Cupti/r_main.html#metrics-reference For the list of new CUDA CUPTI profiling counters, see: https://docs.nvidia.com/cupti/Cupti/r_main.html#r_host_raw_metrics_api For the list of AMD GPA profiling counters, see: https://gpuperfapi.readthedocs.io/en/latest/counters.html

Parameters
+

Specifies profiling counters that will be collected during kernel profiling. Note that not all profiling counters are available on all devices. For the list of old CUDA CUPTI profiling counters, see: https://docs.nvidia.com/cupti/Cupti/r_main.html#metrics-reference For the list of new CUDA CUPTI profiling counters, see: https://docs.nvidia.com/cupti/Cupti/r_main.html#r_host_raw_metrics_api For the list of AMD GPA profiling counters, see: https://gpuperfapi.readthedocs.io/en/latest/counters.html

Parameters
countersNames of counters that will be collected during kernel profiling.
@@ -1872,7 +2161,7 @@

+

◆ SetReadOnlyArgumentCache()

@@ -1887,7 +2176,7 @@

-

Toggles caching of read-only kernel arguments which have management type set to framework. This can significantly speed up tuning, since arguments are uploaded into compute API buffers only once. Caching is enabled by default. Users who wish to modify read-only arguments inside kernel launcher may wish to disable this behaviour.

Parameters
+

Toggles caching of read-only kernel arguments which have management type set to framework. This can significantly speed up tuning, since arguments are uploaded into compute API buffers only once. Caching is enabled by default. Users who wish to modify read-only arguments inside kernel launcher may want to disable this behaviour.

Parameters
flagIf true, read-only argument caching is enabled. It is disabled otherwise.
@@ -1896,7 +2185,7 @@

+

◆ SetReferenceComputation()

@@ -1921,7 +2210,7 @@

-

Sets reference computation for the specified argument. Reference computation output will be compared to tuned kernel output in order to ensure correctness of computation.

Parameters
+

Sets reference computation for the specified argument. Reference computation output will be compared to tuned kernel output in order to ensure correctness of computation.

Parameters
@@ -1931,7 +2220,7 @@

+

◆ SetReferenceKernel()

idId of argument for which reference computation will be set. Only not read-only vector arguments can be validated.
computationFunction which receives memory buffer on input where it stores its computed reference result. The size of buffer matches the size of kernel argument in bytes. If a custom validation range was set, the size of buffer matches the specified range.
@@ -1973,7 +2262,7 @@

+

◆ SetSearcher()

@@ -1998,7 +2287,7 @@

-

Sets searcher which will be used during kernel tuning. If no searcher is specified, DeterministicSearcher will be used.

Parameters
+

Sets searcher which will be used during kernel tuning. If no searcher is specified, DeterministicSearcher will be used.

Parameters

idId of argument for which reference kernel will be set. Only not read-only vector arguments can be validated.
referenceIdId of reference kernel.
@@ -2008,7 +2297,7 @@

+

◆ SetTimeUnit()

idId of kernel for which searcher will be set.
searcherSearcher which decides which kernel configuration will be launched next. See Searcher for more information.
unitTime unit which will be used for printing of results. See TimeUnit for more information.
@@ -2040,7 +2329,7 @@

+

◆ SetValidationMethod()

@@ -2065,7 +2354,7 @@

-

Sets validation method and tolerance threshold for floating-point argument validation. Default validation method is side by side comparison. Default tolerance threshold is 1e-4.

Parameters
+

Sets validation method and tolerance threshold for floating-point argument validation. Default validation method is side by side comparison. Default tolerance threshold is 1e-4.

Parameters
@@ -2075,7 +2364,7 @@

+

◆ SetValidationMode()

methodValidation method which will be used for floating-point argument validation. See ValidationMethod for more information.
toleranceThresholdOutput validation threshold. If difference between tuned kernel output and reference output is within the threshold, the tuned kernel output will be considered correct.
modeBitfield of modes under which kernel output validation is enabled. See ValidationMode for more information.
@@ -2099,7 +2388,7 @@

+

◆ SetValidationRange()

@@ -2124,7 +2413,7 @@

-

Sets validation range for the specified argument. The entire argument is validated by default.

Parameters
+

Sets validation range for the specified argument. The entire argument is validated by default.

Parameters
@@ -2134,7 +2423,7 @@

+

◆ SetValueComparator()

idId of argument for which the validation range will be set. Only not read-only vector arguments can be validated.
rangeNumber of argument elements which will be validated, starting from the first element.
@@ -2169,7 +2458,7 @@

+

◆ SimulateKernelTuning()

@@ -2200,7 +2489,7 @@

-

Performs simulated tuning process for the specified kernel. The kernel is not tuned, execution times are read from the provided results. Creates configuration space based on combinations of provided kernel parameters and constraints. The configurations will be launched in order that depends on specified Searcher. This method can be used to test behaviour and performance of newly implemented searchers. The provided results should correspond to the results output by the same kernel during regular tuning.

Parameters
+

Performs simulated tuning process for the specified kernel. The kernel is not tuned, execution times are read from the provided results. Creates configuration space based on combinations of provided kernel parameters and constraints. The configurations will be launched in order that depends on specified Searcher. This method can be used to test behaviour and performance of newly implemented searchers. The provided results should correspond to the results output by the same kernel during regular tuning.

Parameters

idId of argument for which the comparator will be set. Only not read-only vector arguments can be validated.
comparatorFunction which receives two elements with data type matching the type of specified kernel argument and returns true if the elements are equal. Returns false otherwise.
@@ -2212,7 +2501,7 @@

+

◆ Synchronize()

+ + +

◆ SynchronizeDevice()

+ +
+
+

idId of the kernel for simulated tuning.
resultsResults from which the kernel execution times will be retrieved.
+ + + + + + +
void ktt::Tuner::SynchronizeDevice ()
+

+

Blocks until all commands submitted to KTT device are completed.

+ +
+

+ +

◆ SynchronizeQueue()

+ +
+
+ + + + + + + + +
void ktt::Tuner::SynchronizeQueue (const QueueId id)
+
+

Blocks until all commands submitted to the specified KTT device queue are completed.

Parameters
+ + +
idId of queue which will be synchronized.
+
+
- + +

◆ SynchronizeQueues()

+ +
+
+ + + + + + + +
void ktt::Tuner::SynchronizeQueues ()
+
+

Blocks until all commands submitted to all KTT device queues are completed.

+ +
+
+

◆ Tune() [1/2]

@@ -2245,7 +2594,7 @@

-

Performs the tuning process for specified kernel. Creates configuration space based on combinations of provided kernel parameters and constraints. The configurations will be launched in order that depends on the specified Searcher. Tuning will end when all configurations are explored.

Parameters
+

Performs the tuning process for specified kernel. Creates configuration space based on combinations of provided kernel parameters and constraints. The configurations will be launched in order that depends on the specified Searcher. Tuning will end when all configurations are explored.

Parameters
idId of the tuned kernel.
@@ -2255,7 +2604,7 @@

+

◆ Tune() [2/2]

@@ -2280,7 +2629,7 @@

-

Performs the tuning process for specified kernel. Creates configuration space based on combinations of provided kernel parameters and constraints. The configurations will be launched in order that depends on the specified Searcher. Tuning will end either when all configurations are explored or when the specified stop condition is fulfilled.

Parameters
+

Performs the tuning process for specified kernel. Creates configuration space based on combinations of provided kernel parameters and constraints. The configurations will be launched in order that depends on the specified Searcher. Tuning will end either when all configurations are explored or when the specified stop condition is fulfilled.

Parameters
@@ -2291,7 +2640,7 @@

+

◆ TuneIteration()

idId of the tuned kernel.
stopConditionCondition which decides whether to continue the tuning process. See StopCondition for more information.
@@ -2332,6 +2681,54 @@

Returns
Result containing information about kernel computation in specific configuration. See KernelResult for more information.
+ + + +

◆ WaitForComputeAction()

+ +
+
+
idId of the tuned kernel.
outputUser-provided memory locations for kernel arguments which should be retrieved. See BufferOutputDescriptor for more information.
+ + + + + + + +
void ktt::Tuner::WaitForComputeAction (const ComputeActionId id)
+

+

Blocks until the specified compute action is finished.

Parameters
+ + +
idId of compute action to wait for.
+
+
+ +
+

+ +

◆ WaitForTransferAction()

+ +
+
+ + + + + + + + +
void ktt::Tuner::WaitForTransferAction (const TransferActionId id)
+
+

Blocks until the specified buffer transfer action is finished.

Parameters
+ + +
idId of transfer action to wait for.
+
+
+

The documentation for this class was generated from the following file:
    @@ -2343,7 +2740,7 @@

      - +

diff --git a/Docs/classktt_1_1_tuner.js b/Docs/classktt_1_1_tuner.js index 59ba5171..85556eb3 100644 --- a/Docs/classktt_1_1_tuner.js +++ b/Docs/classktt_1_1_tuner.js @@ -3,12 +3,17 @@ var classktt_1_1_tuner = [ "Tuner", "classktt_1_1_tuner.html#a63c326b21d847e1ec8d2d63cfb2bb698", null ], [ "Tuner", "classktt_1_1_tuner.html#a86b6039192d8740b2a457479e584cdc8", null ], [ "Tuner", "classktt_1_1_tuner.html#a04863df669de4665b5687174ef53908d", null ], + [ "Tuner", "classktt_1_1_tuner.html#a704d563ca882d602f1f4e374fe03fb19", null ], [ "~Tuner", "classktt_1_1_tuner.html#a09198b07768176d263dbee8a6c54b377", null ], [ "AddArgumentLocal", "classktt_1_1_tuner.html#a7f1f558e170b15a155be9c7fac64d3a2", null ], [ "AddArgumentScalar", "classktt_1_1_tuner.html#a8aafe5e4d2e7d89fb1a59ad9069e3f88", null ], + [ "AddArgumentScalar", "classktt_1_1_tuner.html#a0014a51fff4d963f7eccba50ae4ecb28", null ], + [ "AddArgumentSymbol", "classktt_1_1_tuner.html#af45fabe98321bfe3f51bf5011897c69e", null ], [ "AddArgumentVector", "classktt_1_1_tuner.html#aa74cb565dee534d538c85d2d57a4b3f5", null ], + [ "AddArgumentVector", "classktt_1_1_tuner.html#a34b8bb17df50309173a07303fb1b10a7", null ], [ "AddArgumentVector", "classktt_1_1_tuner.html#a4a8fdad5788a9f1c6bb71b731c17d89c", null ], [ "AddArgumentVector", "classktt_1_1_tuner.html#a2b3b932d8a87ff1e2a20b1c791af38dc", null ], + [ "AddComputeQueue", "classktt_1_1_tuner.html#a9894503f98831c5c3f391c5bb5729ed4", null ], [ "AddConstraint", "classktt_1_1_tuner.html#a5cfc141a0f3d3dbb237a76c394d760b8", null ], [ "AddKernelDefinition", "classktt_1_1_tuner.html#a98d47480e40733abfad1780f599a4be8", null ], [ "AddKernelDefinitionFromFile", "classktt_1_1_tuner.html#ad3c8e80a4eefa9dd00b475b237a40e9b", null ], @@ -23,12 +28,14 @@ var classktt_1_1_tuner = [ "GetBestConfiguration", "classktt_1_1_tuner.html#a360361884d6c490d557d9926da3b813d", null ], [ "GetCurrentDeviceInfo", "classktt_1_1_tuner.html#a3a08f0fef2d05fd93913f6a461c8d77a", null ], [ "GetDeviceInfo", "classktt_1_1_tuner.html#a4bc8f3f6cca5a25c91a26a78d1581e3f", null ], + [ "GetKernelDefinitionId", "classktt_1_1_tuner.html#a9db4f79c0e1d7cc2e2eb3e194e6a0cb8", null ], [ "GetKernelDefinitionSource", "classktt_1_1_tuner.html#a226f0185c96bcc8f54b1b9deb9d6b4d8", null ], [ "GetKernelSource", "classktt_1_1_tuner.html#afa014f51574f99a828f040895290f3f4", null ], [ "GetPlatformInfo", "classktt_1_1_tuner.html#a5c02e27e4f5631fa2b45d6ed9f7edea1", null ], [ "LoadResults", "classktt_1_1_tuner.html#a5870dcea68c8006b1ca92c90171018b4", null ], [ "LoadResults", "classktt_1_1_tuner.html#a0e2675eb66dd12edc676df06e4a6db5a", null ], [ "RemoveArgument", "classktt_1_1_tuner.html#a9243c05712e47d182385b2e097403ea5", null ], + [ "RemoveComputeQueue", "classktt_1_1_tuner.html#a2016ef58ab7ade647bf93fa43808c78e", null ], [ "RemoveKernel", "classktt_1_1_tuner.html#a95f62e176f33adafdde70dd077fb9e88", null ], [ "RemoveKernelDefinition", "classktt_1_1_tuner.html#a789c4c550bd615bda43afd152d410b6c", null ], [ "Run", "classktt_1_1_tuner.html#a78ced5f956d194f95d1eab209c0da355", null ], @@ -56,7 +63,12 @@ var classktt_1_1_tuner = [ "SetValueComparator", "classktt_1_1_tuner.html#a385b67cfa5ac085f540fe21c1f461bf2", null ], [ "SimulateKernelTuning", "classktt_1_1_tuner.html#acf0bb2189bf6c1210c42b0f1ddd93399", null ], [ "Synchronize", "classktt_1_1_tuner.html#a0c8667aa5703517c100b261a23eed0dd", null ], + [ "SynchronizeDevice", "classktt_1_1_tuner.html#aa38e76c15db1d25743288ebccd67df3b", null ], + [ "SynchronizeQueue", "classktt_1_1_tuner.html#a9f056f4076db595826eea1bdbacbcdcb", null ], + [ "SynchronizeQueues", "classktt_1_1_tuner.html#ad1cc2c4aac72510f028ada44a0283fab", null ], [ "Tune", "classktt_1_1_tuner.html#a7ed0d38ea099aef96218bd95a98e8a06", null ], [ "Tune", "classktt_1_1_tuner.html#a16040c3a6f5603ef337ef4a7057f9b69", null ], - [ "TuneIteration", "classktt_1_1_tuner.html#ab98c514e83ad83f0ecffb7f1e4b48e34", null ] + [ "TuneIteration", "classktt_1_1_tuner.html#ab98c514e83ad83f0ecffb7f1e4b48e34", null ], + [ "WaitForComputeAction", "classktt_1_1_tuner.html#a42bfecb3a7da093420dcff86ff3a235f", null ], + [ "WaitForTransferAction", "classktt_1_1_tuner.html#a32e8f697b84556c3164575897f7f891a", null ] ]; \ No newline at end of file diff --git a/Docs/classktt_1_1_tuning_duration-members.html b/Docs/classktt_1_1_tuning_duration-members.html index 17c2950f..c075cf8e 100644 --- a/Docs/classktt_1_1_tuning_duration-members.html +++ b/Docs/classktt_1_1_tuning_duration-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
-
-
ktt::TuningDuration Member List
+
ktt::TuningDuration Member List

This is the complete list of members for ktt::TuningDuration, including all inherited members.

- + - + - +
GetStatusString() const overridektt::TuningDurationvirtual
Initialize(const uint64_t configurationsCount) overridektt::TuningDurationvirtual
Initialize(const uint64_t configurationsCount) overridektt::TuningDurationvirtual
IsFulfilled() const overridektt::TuningDurationvirtual
TuningDuration(const double duration)ktt::TuningDurationexplicit
TuningDuration(const double duration)ktt::TuningDurationexplicit
Update(const KernelResult &result) overridektt::TuningDurationvirtual
~StopCondition()=defaultktt::StopConditionvirtual
~StopCondition()=defaultktt::StopConditionvirtual
diff --git a/Docs/classktt_1_1_tuning_duration.html b/Docs/classktt_1_1_tuning_duration.html index cda2715a..9ebf28a0 100644 --- a/Docs/classktt_1_1_tuning_duration.html +++ b/Docs/classktt_1_1_tuning_duration.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::TuningDuration Class Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -95,8 +94,7 @@ -
-
ktt::TuningDuration Class Reference
+
ktt::TuningDuration Class Reference
@@ -111,7 +109,7 @@
- @@ -126,11 +124,19 @@ + + + + + + + +

+

Public Member Functions

 TuningDuration (const double duration)
 
- Public Member Functions inherited from ktt::StopCondition
virtual ~StopCondition ()=default
 
virtual bool IsFulfilled () const =0
 
virtual void Initialize (const uint64_t configurationsCount)=0
 
virtual void Update (const KernelResult &result)=0
 
virtual std::string GetStatusString () const =0
 

Detailed Description

-

Class which implements stop condition based on total tuning duration.

+

Class which implements stop condition based on total tuning duration.

Constructor & Destructor Documentation

- +

◆ TuningDuration()

- +

◆ Initialize()

- +

◆ Update()

diff --git a/Docs/dir_0b41b623c32abd77cf87d94fa0e997df.html b/Docs/dir_0b41b623c32abd77cf87d94fa0e997df.html index f1f13c28..e704eeca 100644 --- a/Docs/dir_0b41b623c32abd77cf87d94fa0e997df.html +++ b/Docs/dir_0b41b623c32abd77cf87d94fa0e997df.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/ComputeEngine Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,16 +91,15 @@
-
-
ComputeEngine Directory Reference
+
ComputeEngine Directory Reference
- - + - +

+

Files

file  ComputeApi.h [code]
file  ComputeApi.h [code]
 
file  GlobalSizeType.h [code]
file  GlobalSizeType.h [code]
 
@@ -110,7 +108,7 @@ diff --git a/Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.html b/Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.html index 590c8c37..1d8a5f77 100644 --- a/Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.html +++ b/Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Output Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,26 +91,27 @@
-
-
Output Directory Reference
+
Output Directory Reference
@@ -120,7 +120,7 @@ diff --git a/Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.js b/Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.js index 25ef01bc..1db512e6 100644 --- a/Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.js +++ b/Docs/dir_4027c6dcc37a2421c6b4a72f50c05b6d.js @@ -1,19 +1,19 @@ var dir_4027c6dcc37a2421c6b4a72f50c05b6d = [ [ "BufferOutputDescriptor.h", "_buffer_output_descriptor_8h.html", [ - [ "BufferOutputDescriptor", "classktt_1_1_buffer_output_descriptor.html", "classktt_1_1_buffer_output_descriptor" ] + [ "ktt::BufferOutputDescriptor", "classktt_1_1_buffer_output_descriptor.html", "classktt_1_1_buffer_output_descriptor" ] ] ], [ "ComputationResult.h", "_computation_result_8h.html", [ - [ "ComputationResult", "classktt_1_1_computation_result.html", "classktt_1_1_computation_result" ] + [ "ktt::ComputationResult", "classktt_1_1_computation_result.html", "classktt_1_1_computation_result" ] ] ], [ "KernelCompilationData.h", "_kernel_compilation_data_8h.html", [ - [ "KernelCompilationData", "structktt_1_1_kernel_compilation_data.html", "structktt_1_1_kernel_compilation_data" ] + [ "ktt::KernelCompilationData", "structktt_1_1_kernel_compilation_data.html", "structktt_1_1_kernel_compilation_data" ] ] ], [ "KernelProfilingCounter.h", "_kernel_profiling_counter_8h.html", [ - [ "KernelProfilingCounter", "classktt_1_1_kernel_profiling_counter.html", "classktt_1_1_kernel_profiling_counter" ] + [ "ktt::KernelProfilingCounter", "classktt_1_1_kernel_profiling_counter.html", "classktt_1_1_kernel_profiling_counter" ] ] ], [ "KernelProfilingData.h", "_kernel_profiling_data_8h.html", [ - [ "KernelProfilingData", "classktt_1_1_kernel_profiling_data.html", "classktt_1_1_kernel_profiling_data" ] + [ "ktt::KernelProfilingData", "classktt_1_1_kernel_profiling_data.html", "classktt_1_1_kernel_profiling_data" ] ] ], [ "KernelResult.h", "_kernel_result_8h_source.html", null ], [ "ProfilingCounterType.h", "_profiling_counter_type_8h.html", "_profiling_counter_type_8h" ], diff --git a/Docs/dir_44a111874746047cefd3f7a73e059188.html b/Docs/dir_44a111874746047cefd3f7a73e059188.html index cc8e20c2..a7772581 100644 --- a/Docs/dir_44a111874746047cefd3f7a73e059188.html +++ b/Docs/dir_44a111874746047cefd3f7a73e059188.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Output Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,17 +91,18 @@
-
-
Output Directory Reference
+
Output Directory Reference
- + +

+

Directories

directory  TimeConfiguration
 
- - +

+

Files

file  OutputFormat.h [code]
file  OutputFormat.h [code]
 
@@ -111,7 +111,7 @@ diff --git a/Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.html b/Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.html index 3d7ba233..6e44bb06 100644 --- a/Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.html +++ b/Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/StopCondition Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,22 +91,21 @@
-
-
StopCondition Directory Reference
+
StopCondition Directory Reference
@@ -116,7 +114,7 @@ diff --git a/Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.js b/Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.js index 84aa19ca..614f230b 100644 --- a/Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.js +++ b/Docs/dir_49c4ffe5cd0c89340eeffad86272a62d.js @@ -1,18 +1,18 @@ var dir_49c4ffe5cd0c89340eeffad86272a62d = [ [ "ConfigurationCount.h", "_configuration_count_8h.html", [ - [ "ConfigurationCount", "classktt_1_1_configuration_count.html", "classktt_1_1_configuration_count" ] + [ "ktt::ConfigurationCount", "classktt_1_1_configuration_count.html", "classktt_1_1_configuration_count" ] ] ], [ "ConfigurationDuration.h", "_configuration_duration_8h.html", [ - [ "ConfigurationDuration", "classktt_1_1_configuration_duration.html", "classktt_1_1_configuration_duration" ] + [ "ktt::ConfigurationDuration", "classktt_1_1_configuration_duration.html", "classktt_1_1_configuration_duration" ] ] ], [ "ConfigurationFraction.h", "_configuration_fraction_8h.html", [ - [ "ConfigurationFraction", "classktt_1_1_configuration_fraction.html", "classktt_1_1_configuration_fraction" ] + [ "ktt::ConfigurationFraction", "classktt_1_1_configuration_fraction.html", "classktt_1_1_configuration_fraction" ] ] ], [ "StopCondition.h", "_stop_condition_8h.html", [ - [ "StopCondition", "classktt_1_1_stop_condition.html", "classktt_1_1_stop_condition" ] + [ "ktt::StopCondition", "classktt_1_1_stop_condition.html", "classktt_1_1_stop_condition" ] ] ], [ "TuningDuration.h", "_tuning_duration_8h.html", [ - [ "TuningDuration", "classktt_1_1_tuning_duration.html", "classktt_1_1_tuning_duration" ] + [ "ktt::TuningDuration", "classktt_1_1_tuning_duration.html", "classktt_1_1_tuning_duration" ] ] ] ]; \ No newline at end of file diff --git a/Docs/dir_548aaaa9c70dca9b5c9a52a3e1ba8d1d.html b/Docs/dir_548aaaa9c70dca9b5c9a52a3e1ba8d1d.html index 444804cb..a1de9a9a 100644 --- a/Docs/dir_548aaaa9c70dca9b5c9a52a3e1ba8d1d.html +++ b/Docs/dir_548aaaa9c70dca9b5c9a52a3e1ba8d1d.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Docs/Resources Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,8 +91,7 @@
-
-
Resources Directory Reference
+
Resources Directory Reference
@@ -102,7 +100,7 @@ diff --git a/Docs/dir_5baf679cf4943df8904dce5d41d4d3dc.html b/Docs/dir_5baf679cf4943df8904dce5d41d4d3dc.html index fde668a3..8954ee30 100644 --- a/Docs/dir_5baf679cf4943df8904dce5d41d4d3dc.html +++ b/Docs/dir_5baf679cf4943df8904dce5d41d4d3dc.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Docs Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,8 +91,7 @@
-
-
Docs Directory Reference
+
Docs Directory Reference
@@ -102,7 +100,7 @@ diff --git a/Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.html b/Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.html index d4bb45e2..0342cbc4 100644 --- a/Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.html +++ b/Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,23 +91,32 @@
-
-
Api Directory Reference
+
Api Directory Reference
- + + + + + + + + + +

+

Directories

directory  Configuration
 
directory  Info
 
directory  Output
 
directory  Searcher
 
directory  StopCondition
 
- - + - + - + - +

+

Files

file  ComputeApiInitializer.h [code]
file  ComputeApiInitializer.h [code]
 
file  ComputeInterface.h [code]
file  ComputeInterface.h [code]
 
file  ExceptionReason.h [code]
file  ExceptionReason.h [code]
 
file  KttException.h [code]
file  KttException.h [code]
 
@@ -117,7 +125,7 @@ diff --git a/Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.js b/Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.js index 425a1311..5d8bcf3d 100644 --- a/Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.js +++ b/Docs/dir_5d7cbe695353800ab9a622e4d83d7d91.js @@ -6,13 +6,13 @@ var dir_5d7cbe695353800ab9a622e4d83d7d91 = [ "Searcher", "dir_c6d7e35021e38f2156a4334dff14b70a.html", "dir_c6d7e35021e38f2156a4334dff14b70a" ], [ "StopCondition", "dir_49c4ffe5cd0c89340eeffad86272a62d.html", "dir_49c4ffe5cd0c89340eeffad86272a62d" ], [ "ComputeApiInitializer.h", "_compute_api_initializer_8h.html", [ - [ "ComputeApiInitializer", "classktt_1_1_compute_api_initializer.html", "classktt_1_1_compute_api_initializer" ] + [ "ktt::ComputeApiInitializer", "classktt_1_1_compute_api_initializer.html", "classktt_1_1_compute_api_initializer" ] ] ], [ "ComputeInterface.h", "_compute_interface_8h.html", [ - [ "ComputeInterface", "classktt_1_1_compute_interface.html", "classktt_1_1_compute_interface" ] + [ "ktt::ComputeInterface", "classktt_1_1_compute_interface.html", "classktt_1_1_compute_interface" ] ] ], [ "ExceptionReason.h", "_exception_reason_8h.html", "_exception_reason_8h" ], [ "KttException.h", "_ktt_exception_8h.html", [ - [ "KttException", "classktt_1_1_ktt_exception.html", "classktt_1_1_ktt_exception" ] + [ "ktt::KttException", "classktt_1_1_ktt_exception.html", "classktt_1_1_ktt_exception" ] ] ] ]; \ No newline at end of file diff --git a/Docs/dir_6130151139ac5df9513b64611b721a4c.html b/Docs/dir_6130151139ac5df9513b64611b721a4c.html index 49f8f8f0..a75d22e7 100644 --- a/Docs/dir_6130151139ac5df9513b64611b721a4c.html +++ b/Docs/dir_6130151139ac5df9513b64611b721a4c.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Kernel Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,18 +91,17 @@
-
-
Kernel Directory Reference
+
Kernel Directory Reference
@@ -112,7 +110,7 @@ diff --git a/Docs/dir_74389ed8173ad57b461b9d623a1f3867.html b/Docs/dir_74389ed8173ad57b461b9d623a1f3867.html index c3ab2e90..2b87b557 100644 --- a/Docs/dir_74389ed8173ad57b461b9d623a1f3867.html +++ b/Docs/dir_74389ed8173ad57b461b9d623a1f3867.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,25 +91,34 @@
-
-
Source Directory Reference
+
Source Directory Reference
- - + + + + + + + + + + +

+

Directories

directory  Utility
directory  Api
 
directory  ComputeEngine
 
directory  Kernel
 
directory  KernelArgument
 
directory  KernelRunner
 
directory  Output
 
- - + - + - + - +

+

Files

file  Ktt.h [code]
file  Ktt.h [code]
 
file  KttPlatform.h [code]
file  KttPlatform.h [code]
 
file  KttTypes.h [code]
file  KttTypes.h [code]
 
file  Tuner.h [code]
file  Tuner.h [code]
 
@@ -119,7 +127,7 @@ diff --git a/Docs/dir_74389ed8173ad57b461b9d623a1f3867.js b/Docs/dir_74389ed8173ad57b461b9d623a1f3867.js index 0f8f342e..59aaeb47 100644 --- a/Docs/dir_74389ed8173ad57b461b9d623a1f3867.js +++ b/Docs/dir_74389ed8173ad57b461b9d623a1f3867.js @@ -11,6 +11,6 @@ var dir_74389ed8173ad57b461b9d623a1f3867 = [ "KttPlatform.h", "_ktt_platform_8h.html", "_ktt_platform_8h" ], [ "KttTypes.h", "_ktt_types_8h.html", "_ktt_types_8h" ], [ "Tuner.h", "_tuner_8h.html", [ - [ "Tuner", "classktt_1_1_tuner.html", "classktt_1_1_tuner" ] + [ "ktt::Tuner", "classktt_1_1_tuner.html", "classktt_1_1_tuner" ] ] ] ]; \ No newline at end of file diff --git a/Docs/dir_80eb1528bafb2f92d16ff19227bfa56f.html b/Docs/dir_80eb1528bafb2f92d16ff19227bfa56f.html index b744623b..84d00d06 100644 --- a/Docs/dir_80eb1528bafb2f92d16ff19227bfa56f.html +++ b/Docs/dir_80eb1528bafb2f92d16ff19227bfa56f.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Output/TimeConfiguration Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,14 +91,13 @@
-
-
TimeConfiguration Directory Reference
+
TimeConfiguration Directory Reference
- - +

+

Files

file  TimeUnit.h [code]
file  TimeUnit.h [code]
 
@@ -108,7 +106,7 @@ diff --git a/Docs/dir_8307084b275792eca5324684a4bab095.html b/Docs/dir_8307084b275792eca5324684a4bab095.html index 4b7b23a8..3b7c41a7 100644 --- a/Docs/dir_8307084b275792eca5324684a4bab095.html +++ b/Docs/dir_8307084b275792eca5324684a4bab095.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelArgument Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,22 +91,21 @@
-
-
KernelArgument Directory Reference
+
KernelArgument Directory Reference
@@ -116,7 +114,7 @@ diff --git a/Docs/dir_9494064b2e4fad632a20bee9cc16985c.html b/Docs/dir_9494064b2e4fad632a20bee9cc16985c.html index f67a78c6..cb79821a 100644 --- a/Docs/dir_9494064b2e4fad632a20bee9cc16985c.html +++ b/Docs/dir_9494064b2e4fad632a20bee9cc16985c.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Info Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,18 +91,17 @@
-
-
Info Directory Reference
+
Info Directory Reference
- - + - + - +

+

Files

file  DeviceInfo.h [code]
file  DeviceInfo.h [code]
 
file  DeviceType.h [code]
file  DeviceType.h [code]
 
file  PlatformInfo.h [code]
file  PlatformInfo.h [code]
 
@@ -112,7 +110,7 @@ diff --git a/Docs/dir_9494064b2e4fad632a20bee9cc16985c.js b/Docs/dir_9494064b2e4fad632a20bee9cc16985c.js index 964f0713..0dbaa629 100644 --- a/Docs/dir_9494064b2e4fad632a20bee9cc16985c.js +++ b/Docs/dir_9494064b2e4fad632a20bee9cc16985c.js @@ -1,10 +1,10 @@ var dir_9494064b2e4fad632a20bee9cc16985c = [ [ "DeviceInfo.h", "_device_info_8h.html", [ - [ "DeviceInfo", "classktt_1_1_device_info.html", "classktt_1_1_device_info" ] + [ "ktt::DeviceInfo", "classktt_1_1_device_info.html", "classktt_1_1_device_info" ] ] ], [ "DeviceType.h", "_device_type_8h.html", "_device_type_8h" ], [ "PlatformInfo.h", "_platform_info_8h.html", [ - [ "PlatformInfo", "classktt_1_1_platform_info.html", "classktt_1_1_platform_info" ] + [ "ktt::PlatformInfo", "classktt_1_1_platform_info.html", "classktt_1_1_platform_info" ] ] ] ]; \ No newline at end of file diff --git a/Docs/dir_99be523abdf748dd5dce316df9c29ee3.html b/Docs/dir_99be523abdf748dd5dce316df9c29ee3.html index 1347f82d..efc76aba 100644 --- a/Docs/dir_99be523abdf748dd5dce316df9c29ee3.html +++ b/Docs/dir_99be523abdf748dd5dce316df9c29ee3.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Configuration Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,18 +91,17 @@
-
-
Configuration Directory Reference
+
Configuration Directory Reference
@@ -112,7 +110,7 @@ diff --git a/Docs/dir_99be523abdf748dd5dce316df9c29ee3.js b/Docs/dir_99be523abdf748dd5dce316df9c29ee3.js index fcbaac66..7710a6a0 100644 --- a/Docs/dir_99be523abdf748dd5dce316df9c29ee3.js +++ b/Docs/dir_99be523abdf748dd5dce316df9c29ee3.js @@ -1,12 +1,12 @@ var dir_99be523abdf748dd5dce316df9c29ee3 = [ [ "DimensionVector.h", "_dimension_vector_8h.html", [ - [ "DimensionVector", "classktt_1_1_dimension_vector.html", "classktt_1_1_dimension_vector" ] + [ "ktt::DimensionVector", "classktt_1_1_dimension_vector.html", "classktt_1_1_dimension_vector" ] ] ], [ "KernelConfiguration.h", "_kernel_configuration_8h.html", [ - [ "KernelConfiguration", "classktt_1_1_kernel_configuration.html", "classktt_1_1_kernel_configuration" ] + [ "ktt::KernelConfiguration", "classktt_1_1_kernel_configuration.html", "classktt_1_1_kernel_configuration" ] ] ], [ "ParameterPair.h", "_parameter_pair_8h.html", [ - [ "ParameterPair", "classktt_1_1_parameter_pair.html", "classktt_1_1_parameter_pair" ] + [ "ktt::ParameterPair", "classktt_1_1_parameter_pair.html", "classktt_1_1_parameter_pair" ] ] ] ]; \ No newline at end of file diff --git a/Docs/dir_a84262e66930ad9071f68edb7a79274b.html b/Docs/dir_a84262e66930ad9071f68edb7a79274b.html index 97247a73..d972ab54 100644 --- a/Docs/dir_a84262e66930ad9071f68edb7a79274b.html +++ b/Docs/dir_a84262e66930ad9071f68edb7a79274b.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Utility Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,13 +91,14 @@
-
-
Utility Directory Reference
+
Utility Directory Reference
- + +

+

Directories

directory  Logger
 
@@ -106,7 +106,7 @@ diff --git a/Docs/dir_b86836361da653cbb02f0dbb253faee5.html b/Docs/dir_b86836361da653cbb02f0dbb253faee5.html index c24db55d..54dc878b 100644 --- a/Docs/dir_b86836361da653cbb02f0dbb253faee5.html +++ b/Docs/dir_b86836361da653cbb02f0dbb253faee5.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/KernelRunner Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,16 +91,17 @@
-
-
KernelRunner Directory Reference
+
KernelRunner Directory Reference
@@ -110,7 +110,7 @@ diff --git a/Docs/dir_b86836361da653cbb02f0dbb253faee5.js b/Docs/dir_b86836361da653cbb02f0dbb253faee5.js index 364d423d..50528da9 100644 --- a/Docs/dir_b86836361da653cbb02f0dbb253faee5.js +++ b/Docs/dir_b86836361da653cbb02f0dbb253faee5.js @@ -1,5 +1,6 @@ var dir_b86836361da653cbb02f0dbb253faee5 = [ + [ "KernelRunMode.h", "_kernel_run_mode_8h.html", "_kernel_run_mode_8h" ], [ "ValidationMethod.h", "_validation_method_8h.html", "_validation_method_8h" ], [ "ValidationMode.h", "_validation_mode_8h.html", "_validation_mode_8h" ] ]; \ No newline at end of file diff --git a/Docs/dir_c6d7e35021e38f2156a4334dff14b70a.html b/Docs/dir_c6d7e35021e38f2156a4334dff14b70a.html index fe7b936d..e856b564 100644 --- a/Docs/dir_c6d7e35021e38f2156a4334dff14b70a.html +++ b/Docs/dir_c6d7e35021e38f2156a4334dff14b70a.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Api/Searcher Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,20 +91,19 @@
-
-
Searcher Directory Reference
+
Searcher Directory Reference
@@ -114,7 +112,7 @@ diff --git a/Docs/dir_c6d7e35021e38f2156a4334dff14b70a.js b/Docs/dir_c6d7e35021e38f2156a4334dff14b70a.js index 90204352..125c9b95 100644 --- a/Docs/dir_c6d7e35021e38f2156a4334dff14b70a.js +++ b/Docs/dir_c6d7e35021e38f2156a4334dff14b70a.js @@ -1,15 +1,15 @@ var dir_c6d7e35021e38f2156a4334dff14b70a = [ [ "DeterministicSearcher.h", "_deterministic_searcher_8h.html", [ - [ "DeterministicSearcher", "classktt_1_1_deterministic_searcher.html", "classktt_1_1_deterministic_searcher" ] + [ "ktt::DeterministicSearcher", "classktt_1_1_deterministic_searcher.html", "classktt_1_1_deterministic_searcher" ] ] ], [ "McmcSearcher.h", "_mcmc_searcher_8h.html", [ - [ "McmcSearcher", "classktt_1_1_mcmc_searcher.html", "classktt_1_1_mcmc_searcher" ] + [ "ktt::McmcSearcher", "classktt_1_1_mcmc_searcher.html", "classktt_1_1_mcmc_searcher" ] ] ], [ "RandomSearcher.h", "_random_searcher_8h.html", [ - [ "RandomSearcher", "classktt_1_1_random_searcher.html", "classktt_1_1_random_searcher" ] + [ "ktt::RandomSearcher", "classktt_1_1_random_searcher.html", "classktt_1_1_random_searcher" ] ] ], [ "Searcher.h", "_searcher_8h.html", [ - [ "Searcher", "classktt_1_1_searcher.html", "classktt_1_1_searcher" ] + [ "ktt::Searcher", "classktt_1_1_searcher.html", "classktt_1_1_searcher" ] ] ] ]; \ No newline at end of file diff --git a/Docs/dir_f3e3b7636ae42a1b29e0b190d8ce34c2.html b/Docs/dir_f3e3b7636ae42a1b29e0b190d8ce34c2.html index 488685b7..c2d6f8ce 100644 --- a/Docs/dir_f3e3b7636ae42a1b29e0b190d8ce34c2.html +++ b/Docs/dir_f3e3b7636ae42a1b29e0b190d8ce34c2.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Source/Utility/Logger Directory Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,14 +91,13 @@
-
-
Logger Directory Reference
+
Logger Directory Reference
- - +

+

Files

file  LoggingLevel.h [code]
file  LoggingLevel.h [code]
 
@@ -108,7 +106,7 @@ diff --git a/Docs/doxygen.css b/Docs/doxygen.css index ffbff022..90367372 100644 --- a/Docs/doxygen.css +++ b/Docs/doxygen.css @@ -1,4 +1,4 @@ -/* The standard CSS for doxygen 1.9.1 */ +/* The standard CSS for doxygen 1.9.3 */ body, table, div, p, dl { font: 400 14px/22px Roboto,sans-serif; @@ -228,6 +228,33 @@ a.codeRef, a.codeRef:visited, a.lineRef, a.lineRef:visited { color: #4665A2; } +a.code.hl_class { /* style for links to class names in code snippets */ } +a.code.hl_struct { /* style for links to struct names in code snippets */ } +a.code.hl_union { /* style for links to union names in code snippets */ } +a.code.hl_interface { /* style for links to interface names in code snippets */ } +a.code.hl_protocol { /* style for links to protocol names in code snippets */ } +a.code.hl_category { /* style for links to category names in code snippets */ } +a.code.hl_exception { /* style for links to exception names in code snippets */ } +a.code.hl_service { /* style for links to service names in code snippets */ } +a.code.hl_singleton { /* style for links to singleton names in code snippets */ } +a.code.hl_concept { /* style for links to concept names in code snippets */ } +a.code.hl_namespace { /* style for links to namespace names in code snippets */ } +a.code.hl_package { /* style for links to package names in code snippets */ } +a.code.hl_define { /* style for links to macro names in code snippets */ } +a.code.hl_function { /* style for links to function names in code snippets */ } +a.code.hl_variable { /* style for links to variable names in code snippets */ } +a.code.hl_typedef { /* style for links to typedef names in code snippets */ } +a.code.hl_enumvalue { /* style for links to enum value names in code snippets */ } +a.code.hl_enumeration { /* style for links to enumeration names in code snippets */ } +a.code.hl_signal { /* style for links to Qt signal names in code snippets */ } +a.code.hl_slot { /* style for links to Qt slot names in code snippets */ } +a.code.hl_friend { /* style for links to friend names in code snippets */ } +a.code.hl_dcop { /* style for links to KDE3 DCOP names in code snippets */ } +a.code.hl_property { /* style for links to property names in code snippets */ } +a.code.hl_event { /* style for links to event names in code snippets */ } +a.code.hl_sequence { /* style for links to sequence names in code snippets */ } +a.code.hl_dictionary { /* style for links to dictionary names in code snippets */ } + /* @end */ dl.el { @@ -235,7 +262,7 @@ dl.el { } ul { - overflow: hidden; /*Fixed: list item bullets overlap floating elements*/ + overflow: visible; } #side-nav ul { @@ -313,6 +340,7 @@ div.line.glow { span.lineno { padding-right: 4px; + margin-right: 9px; text-align: right; border-right: 2px solid #0F0; background-color: #E8E8E8; @@ -439,6 +467,12 @@ img.footer { vertical-align: middle; } +.compoundTemplParams { + color: #4665A2; + font-size: 80%; + line-height: 120%; +} + /* @group Code Colorization */ span.keyword { @@ -1322,6 +1356,11 @@ dl.section dd { } +#projectrow +{ + height: 56px; +} + #projectlogo { text-align: center; @@ -1337,18 +1376,19 @@ dl.section dd { #projectalign { vertical-align: middle; + padding-left: 0.5em; } #projectname { - font: 300% Tahoma, Arial,sans-serif; + font: 200% Tahoma, Arial,sans-serif; margin: 0px; padding: 2px 0px; } #projectbrief { - font: 120% Tahoma, Arial,sans-serif; + font: 90% Tahoma, Arial,sans-serif; margin: 0px; padding: 0px; } @@ -1487,6 +1527,10 @@ span.emoji { */ } +span.obfuscator { + display: none; +} + .PageDocRTL-title div.toc li.level1 { margin-left: 0 !important; margin-right: 0; @@ -1541,7 +1585,7 @@ tr.heading h2 { #powerTip { cursor: default; - white-space: nowrap; + /*white-space: nowrap;*/ background-color: white; border: 1px solid gray; border-radius: 4px 4px 4px 4px; @@ -1780,6 +1824,10 @@ table.DocNodeLTR { margin-left: 0; } +code.JavaDocCode + direction:ltr; +} + tt, code, kbd, samp { display: inline-block; diff --git a/Docs/files.html b/Docs/files.html index 4889bb75..b4b35bfa 100644 --- a/Docs/files.html +++ b/Docs/files.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: File List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - - - - - - - - - - - - - - + + + + + + + + + + + + + +
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,8 +91,7 @@
-
-
File List
+
File List
Here is a list of all documented files with brief descriptions:
@@ -146,19 +144,20 @@
 ArgumentMemoryLocation.h
 ArgumentMemoryType.h
  KernelRunner
 ValidationMethod.h
 ValidationMode.h
  Output
  TimeConfiguration
 TimeUnit.h
 OutputFormat.h
  Utility
  Logger
 LoggingLevel.h
 Ktt.h
 KttPlatform.h
 KttTypes.h
 Tuner.h
 KernelRunMode.h
 ValidationMethod.h
 ValidationMode.h
  Output
  TimeConfiguration
 TimeUnit.h
 OutputFormat.h
  Utility
  Logger
 LoggingLevel.h
 Ktt.h
 KttPlatform.h
 KttTypes.h
 Tuner.h
@@ -166,7 +165,7 @@ diff --git a/Docs/functions.html b/Docs/functions.html index 2f946d61..7f688d32 100644 --- a/Docs/functions.html +++ b/Docs/functions.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,41 +93,25 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- a -

diff --git a/Docs/functions_b.html b/Docs/functions_b.html index abf20813..9c593564 100644 --- a/Docs/functions_b.html +++ b/Docs/functions_b.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,17 +93,15 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- b -

diff --git a/Docs/functions_c.html b/Docs/functions_c.html index d16327ee..86c2457b 100644 --- a/Docs/functions_c.html +++ b/Docs/functions_c.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,59 +93,28 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- c -

diff --git a/Docs/functions_d.html b/Docs/functions_d.html index 54d6698b..4436a034 100644 --- a/Docs/functions_d.html +++ b/Docs/functions_d.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,35 +93,21 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- d -

diff --git a/Docs/functions_func.html b/Docs/functions_func.html index 302c22c5..25110683 100644 --- a/Docs/functions_func.html +++ b/Docs/functions_func.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,41 +93,25 @@
  -

- a -

diff --git a/Docs/functions_func_b.html b/Docs/functions_func_b.html index e23fe81f..99365b02 100644 --- a/Docs/functions_func_b.html +++ b/Docs/functions_func_b.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,17 +93,15 @@
  -

- b -

diff --git a/Docs/functions_func_c.html b/Docs/functions_func_c.html index 62869bc5..d9765891 100644 --- a/Docs/functions_func_c.html +++ b/Docs/functions_func_c.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,59 +93,28 @@
  -

- c -

diff --git a/Docs/functions_func_d.html b/Docs/functions_func_d.html index 7ad0a3b9..67743d44 100644 --- a/Docs/functions_func_d.html +++ b/Docs/functions_func_d.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,35 +93,21 @@
  -

- d -

diff --git a/Docs/functions_func_g.html b/Docs/functions_func_g.html index 2646eb6d..b8a861e4 100644 --- a/Docs/functions_func_g.html +++ b/Docs/functions_func_g.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,255 +93,89 @@
  -

- g -

diff --git a/Docs/functions_func_h.html b/Docs/functions_func_h.html index 667f58c3..f1f53275 100644 --- a/Docs/functions_func_h.html +++ b/Docs/functions_func_h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,37 +93,21 @@
  -

- h -

diff --git a/Docs/functions_func_i.html b/Docs/functions_func_i.html index 5a58af23..882bd225 100644 --- a/Docs/functions_func_i.html +++ b/Docs/functions_func_i.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,37 +93,18 @@ diff --git a/Docs/functions_func_k.html b/Docs/functions_func_k.html index f44e0efb..bc66dd70 100644 --- a/Docs/functions_func_k.html +++ b/Docs/functions_func_k.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,32 +93,20 @@
  -

- k -

diff --git a/Docs/functions_func_l.html b/Docs/functions_func_l.html index e1013de9..58890784 100644 --- a/Docs/functions_func_l.html +++ b/Docs/functions_func_l.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,17 +93,15 @@
  -

- l -

diff --git a/Docs/functions_func_m.html b/Docs/functions_func_m.html index d99c9331..b9b50904 100644 --- a/Docs/functions_func_m.html +++ b/Docs/functions_func_m.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,26 +93,18 @@
  -

- m -

diff --git a/Docs/functions_func_o.html b/Docs/functions_func_o.html index 028e3c20..c15a44a6 100644 --- a/Docs/functions_func_o.html +++ b/Docs/functions_func_o.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,40 +93,20 @@ diff --git a/Docs/functions_func_p.html b/Docs/functions_func_p.html index 9044b808..01ae26d0 100644 --- a/Docs/functions_func_p.html +++ b/Docs/functions_func_p.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,20 +93,16 @@
  -

- p -

diff --git a/Docs/functions_func_r.html b/Docs/functions_func_r.html index cc5a837b..6891030e 100644 --- a/Docs/functions_func_r.html +++ b/Docs/functions_func_r.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,47 +93,26 @@
  -

- r -

diff --git a/Docs/functions_func_s.html b/Docs/functions_func_s.html index c67dabff..599c9cf4 100644 --- a/Docs/functions_func_s.html +++ b/Docs/functions_func_s.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,163 +93,64 @@
  -

- s -

diff --git a/Docs/functions_func_t.html b/Docs/functions_func_t.html index f430668d..c322e260 100644 --- a/Docs/functions_func_t.html +++ b/Docs/functions_func_t.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,26 +93,18 @@
  -

- t -

diff --git a/Docs/functions_func_u.html b/Docs/functions_func_u.html index 7167f83d..882d9ce4 100644 --- a/Docs/functions_func_u.html +++ b/Docs/functions_func_u.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,39 +93,21 @@
  -

- u -

diff --git a/Docs/functions_func_w.html b/Docs/functions_func_w.html index 856853ed..ef9c47f9 100644 --- a/Docs/functions_func_w.html +++ b/Docs/functions_func_w.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,23 +93,17 @@
  -

- w -

diff --git a/Docs/functions_func_~.html b/Docs/functions_func_~.html index d5ad7a0c..10de30b1 100644 --- a/Docs/functions_func_~.html +++ b/Docs/functions_func_~.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Functions @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,26 +93,18 @@
  -

- ~ -

diff --git a/Docs/functions_g.html b/Docs/functions_g.html index 73fb3180..fceb3e44 100644 --- a/Docs/functions_g.html +++ b/Docs/functions_g.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,255 +93,89 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- g -

diff --git a/Docs/functions_h.html b/Docs/functions_h.html index 913879b9..46b29998 100644 --- a/Docs/functions_h.html +++ b/Docs/functions_h.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + -
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,37 +93,21 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- h -

diff --git a/Docs/functions_i.html b/Docs/functions_i.html index b62aeaeb..32c6100c 100644 --- a/Docs/functions_i.html +++ b/Docs/functions_i.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1292,24 +1334,24 @@

-

Enum for enabling kernel output validation in different scenarios.

+

Enum for enabling kernel output validation in different scenarios.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,37 +93,18 @@ diff --git a/Docs/functions_k.html b/Docs/functions_k.html index 62e9d57c..e4ff7d11 100644 --- a/Docs/functions_k.html +++ b/Docs/functions_k.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1261,19 +1303,19 @@

-

Enum for validation method used during validation of floating-point output arguments.

+

Enum for validation method used during validation of floating-point output arguments.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,32 +93,20 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- k -

diff --git a/Docs/functions_l.html b/Docs/functions_l.html index dc929380..9e94700c 100644 --- a/Docs/functions_l.html +++ b/Docs/functions_l.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1228,21 +1270,21 @@

-

Enum for time unit used during logging and output operations.

+

Enum for time unit used during logging and output operations.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,17 +93,15 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- l -

diff --git a/Docs/functions_m.html b/Docs/functions_m.html index d9af0a60..0d713fda 100644 --- a/Docs/functions_m.html +++ b/Docs/functions_m.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1193,23 +1235,23 @@

-

Enum which describes status of a kernel result.

+

Enum which describes status of a kernel result.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,44 +93,24 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- m -

diff --git a/Docs/functions_o.html b/Docs/functions_o.html index 9510166e..e847e53e 100644 --- a/Docs/functions_o.html +++ b/Docs/functions_o.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1156,25 +1198,25 @@

-

Enum which specifies data type of a profiling counter.

+

Enum which specifies data type of a profiling counter.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,40 +93,20 @@ diff --git a/Docs/functions_p.html b/Docs/functions_p.html index 6c654496..4a4faa2d 100644 --- a/Docs/functions_p.html +++ b/Docs/functions_p.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1127,17 +1169,17 @@

-

Enum for format of tuner output.

+

Enum for format of tuner output.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,20 +93,16 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- p -

diff --git a/Docs/functions_r.html b/Docs/functions_r.html index a2d58524..a6e75a7c 100644 --- a/Docs/functions_r.html +++ b/Docs/functions_r.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1098,17 +1140,17 @@

-

Enum for modifier type for kernel parameters. Specifies whether kernel parameter value affects corresponding kernel thread size.

+

Enum for modifier type for kernel parameters. Specifies whether kernel parameter value affects corresponding kernel thread size.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,47 +93,26 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- r -

diff --git a/Docs/functions_s.html b/Docs/functions_s.html index 36389e92..e6e8fb69 100644 --- a/Docs/functions_s.html +++ b/Docs/functions_s.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1067,19 +1109,19 @@

-

Enum for modifier dimension for kernel parameters. Dimensions are utilized during specification of parameters which modify kernel thread sizes.

+

Enum for modifier dimension for kernel parameters. Dimensions are utilized during specification of parameters which modify kernel thread sizes.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,163 +93,64 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- s -

diff --git a/Docs/functions_t.html b/Docs/functions_t.html index d4133801..88903422 100644 --- a/Docs/functions_t.html +++ b/Docs/functions_t.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -1032,23 +1074,23 @@

-

Enum for modifier action for kernel parameters which modify thread size.

+

Enum for modifier action for kernel parameters which modify thread size.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,26 +93,18 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- t -

diff --git a/Docs/functions_u.html b/Docs/functions_u.html index 5257311f..f6ed6d49 100644 --- a/Docs/functions_u.html +++ b/Docs/functions_u.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -997,23 +1039,23 @@

-

Enum for verbosity level of KTT logger. Higher logging levels also include logging of information from lower levels.

+

Enum for verbosity level of KTT logger. Higher logging levels also include logging of information from lower levels.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,39 +93,21 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- u -

diff --git a/Docs/functions_vars.html b/Docs/functions_vars.html index fcd2db89..d7bf5c95 100644 --- a/Docs/functions_vars.html +++ b/Docs/functions_vars.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members - Variables @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - + + +
-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -93,31 +92,19 @@
 
diff --git a/Docs/functions_w.html b/Docs/functions_w.html index f4a2e2db..3ee6ac86 100644 --- a/Docs/functions_w.html +++ b/Docs/functions_w.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -935,19 +944,19 @@

-

Enum which describes reason why KTT exception was thrown.

+

Enum which describes reason why KTT exception was thrown.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,23 +93,17 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- w -

diff --git a/Docs/functions_~.html b/Docs/functions_~.html index c3632d5f..0a9da111 100644 --- a/Docs/functions_~.html +++ b/Docs/functions_~.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -904,19 +913,19 @@

-

Enum for type of compute device. Based on device types available in OpenCL API.

+

Enum for type of compute device. Based on device types available in OpenCL API.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -94,26 +93,18 @@
Here is a list of all documented class members with links to the class documentation for each member:
-

- ~ -

diff --git a/Docs/globals.html b/Docs/globals.html index 68d46386..bb4db512 100644 --- a/Docs/globals.html +++ b/Docs/globals.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: File Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -873,19 +882,19 @@

-

Enum for compute API used by KTT framework. It is utilized during tuner creation.

+

Enum for compute API used by KTT framework. It is utilized during tuner creation.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -93,22 +92,16 @@
Here is a list of all documented file members with links to the documentation:
diff --git a/Docs/globals_defs.html b/Docs/globals_defs.html index 2a6c33a7..5ee0dad0 100644 --- a/Docs/globals_defs.html +++ b/Docs/globals_defs.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: File Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -842,19 +849,21 @@

-

Enum for memory type of kernel arguments. Specifies which compute API function should be used internally by KTT framework to make the argument accessible to kernel functions.

+

Enum for memory type of kernel arguments. Specifies which compute API function should be used internally by KTT framework to make the argument accessible to kernel functions.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -93,22 +92,16 @@
 
diff --git a/Docs/hierarchy.html b/Docs/hierarchy.html index a923d57b..3cd0b54c 100644 --- a/Docs/hierarchy.html +++ b/Docs/hierarchy.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Class Hierarchy @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -809,21 +816,21 @@

-

Enum for memory location of vector kernel arguments. Specifies the memory from which the argument data will be accessed by compute API functions and kernels.

+

Enum for memory location of vector kernel arguments. Specifies the memory from which the argument data will be accessed by compute API functions and kernels.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -92,8 +91,7 @@
-
-
Class Hierarchy
+
Class Hierarchy
This inheritance list is sorted roughly, but not completely, alphabetically:
@@ -131,7 +129,7 @@ diff --git a/Docs/index.html b/Docs/index.html index 89d119aa..e8b65f5b 100644 --- a/Docs/index.html +++ b/Docs/index.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: KTT - Kernel Tuning Toolkit @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
- - + - @@ -780,17 +787,17 @@

-

Enum for management type of kernel arguments. Specifies who is responsible for managing vector kernel arguments.

+

Enum for management type of kernel arguments. Specifies who is responsible for managing vector kernel arguments.

-
Kernel Tuning Toolkit -  2.0.1 +
+
Kernel Tuning Toolkit 2.1
+
- + @@ -72,7 +71,7 @@ @@ -91,69 +90,72 @@ -
-
-
KTT - Kernel Tuning Toolkit
+
+
KTT - Kernel Tuning Toolkit
-

-

KTT is an auto-tuning framework for OpenCL, CUDA kernels and GLSL compute shaders. Version 2.0 which contains major API overhaul as well as new features and improvements is now available.

+

+

KTT is an autotuning framework for OpenCL, CUDA kernels and GLSL compute shaders. Version 2.1 which introduces API bindings for Python and new onboarding guide is now available.

Main features

    -
  • Ability to define kernel tuning parameters such as kernel thread sizes, vector data types and loop unroll factors in order to optimize computation for a particular device.
  • +
  • Ability to define kernel tuning parameters such as kernel thread sizes, vector data types and loop unroll factors to optimize computation for a particular device.
  • Support for iterative kernel launches and composite kernels.
  • Support for multiple compute queues and asynchronous operations.
  • Support for online auto-tuning - kernel tuning combined with regular kernel running.
  • -
  • Ability to automatically ensure correctness of tuned computation with reference kernel or C++ function.
  • +
  • Ability to automatically ensure the correctness of tuned computation with reference kernel or C++ function.
  • Support for multiple compute APIs, switching between CUDA, OpenCL and Vulkan requires only minor changes in C++ code (e.g., changing the kernel source file), no library recompilation is needed.
  • -
  • Large number of customization options, including support for kernel arguments with user-defined data types, ability to change kernel compiler flags and more.
  • +
  • Public API available in C++ (native) and Python (bindings).
  • +
  • Many customization options, including support for kernel arguments with user-defined data types, ability to change kernel compiler flags and more.

Getting started

    -
  • Documentation for KTT API can be found here.
  • +
  • Introductory guide to KTT can be found here.
  • +
  • Full documentation for KTT API can be found here.
  • KTT FAQ can be found here.
  • -
  • The newest release of KTT framework can be found here.
  • -
  • Prebuilt binaries are not provided due to many different combinations of compute APIs and build options available. Please check the Building KTT section for detailed instructions on how to perform a build.
  • +
  • The newest release of the KTT framework can be found here.
  • +
  • Prebuilt binaries are not provided due to many different combinations of compute APIs and build options available. The Building KTT section contains detailed instructions on how to perform a build.

Tutorials

-

Tutorials are short examples which serve as an introduction to KTT framework. Each tutorial covers a specific part of the API. All tutorials are available for both OpenCL and CUDA backends. Most of the tutorials are also available for Vulkan. Tutorials assume that reader has some knowledge about C++ and GPU programming. List of the currently available tutorials:

+

Tutorials are short examples that serve as an introduction to the KTT framework. Each tutorial covers a specific part of the API. All tutorials are available for both OpenCL and CUDA backends. Most of the tutorials are also available for Vulkan. Tutorials assume that the reader has some knowledge about C++ and GPU programming. List of the currently available tutorials:

  • Info: Retrieving information about compute API platforms and devices through KTT API.
  • KernelRunning: Running simple kernel with KTT framework and retrieving output.
  • -
  • KernelTuning: Simple kernel tuning using small number of tuning parameters and reference computation to validate output.
  • +
  • KernelTuning: Simple kernel tuning using a small number of tuning parameters and reference computation to validate output.
  • CustomArgumentTypes: Usage of kernel arguments with custom data types and validating the output with value comparator.
  • ComputeApiInitializer: Providing tuner with custom compute context, queues and buffers.
  • VectorArgumentCustomization: Showcasing different usage options for vector kernel arguments.
  • +
  • PythonInterfaces: Implementing custom searchers and stop conditions in Python, which can afterward be used with the tuner.

Examples

-

Examples showcase how KTT framework could be utilized in real-world scenarios. They are more complex than tutorials and assume that reader is familiar with KTT API. List of some of the currently available examples:

+

Examples showcase how the KTT framework could be utilized in real-world scenarios. They are more complex than tutorials and assume that the reader is familiar with KTT API. List of some of the currently available examples:

  • CoulombSum2d: Tuning of electrostatic potential map computation, focuses on a single slice.
  • -
  • CoulombSum3dIterative: 3D version of previous example, utilizes kernel from 2D version and launches it iteratively.
  • -
  • CoulombSum3d: Alternative to iterative version, utilizes kernel which computes the entire map in single invocation.
  • +
  • CoulombSum3dIterative: 3D version of the previous example, utilizes kernel from 2D version and launches it iteratively.
  • +
  • CoulombSum3d: Alternative to iterative version, utilizes kernel which computes the entire map in a single invocation.
  • Nbody: Tuning of N-body simulation.
  • Reduction: Tuning of vector reduction, launches a kernel iteratively.
  • -
  • Sort: Radix sort example, combines multiple kernels into composite kernel.
  • +
  • Sort: Radix sort example, combines multiple kernels into a composite kernel.
  • Bicg: Biconjugate gradients method example, features reference computation, composite kernels and constraints.

Building KTT

    -
  • KTT can be built as a dynamic (shared) library using command line build tool Premake. Currently supported operating systems are Linux and Windows.
  • +
  • KTT can be built as a dynamic (shared) library using the command line build tool Premake. Currently supported operating systems are Linux and Windows.
  • The prerequisites to build KTT are:
    • C++17 compiler, for example Clang 7.0, GCC 9.1, MSVC 14.16 (Visual Studio 2017) or newer
    • OpenCL, CUDA or Vulkan library, supported SDKs are AMD OCL SDK, Intel SDK for OpenCL, NVIDIA CUDA Toolkit and Vulkan SDK
    • -
    • Premake 5
    • +
    • Command line build tool Premake 5
    • +
    • (Optional) Python 3 with NumPy for Python bindings support
  • Build under Linux (inside KTT root folder):
    • ensure that path to vendor SDK is correctly set in the environment variables
    • run ./premake5 gmake to generate makefile
    • -
    • run cd Build to get inside build directory
    • +
    • run cd Build to get inside the build directory
    • afterwards run make config={configuration}_{architecture} to build the project (e.g., make config=release_x86_64)
  • Build under Windows (inside KTT root folder):
      -
    • ensure that path to vendor SDK is correctly set in the environment variables, this should be done automatically during SDK installation
    • +
    • ensure that path to vendor SDK is correctly set in the environment variables; this should be done automatically during SDK installation
    • run premake5.exe vs20xx (e.g., premake5.exe vs2019) to generate Visual Studio project files
    • open generated solution file and build the project inside Visual Studio
    @@ -163,14 +165,15 @@

    Building KTT

  • --platform=vendor specifies SDK used for building KTT, useful when multiple SDKs are installed
  • --profiling=library enables compilation of kernel profiling functionality using specified library
  • --vulkan enables compilation of experimental Vulkan backend
  • +
  • --python enables compilation of Python bindings
  • --no-examples disables compilation of examples
  • --no-tutorials disables compilation of tutorials
  • --tests enables compilation of unit tests
  • -
  • --no-cuda disables inclusion of CUDA API during compilation, only affects Nvidia platform
  • -
  • --no-opencl disables inclusion of OpenCL API during compilation
  • +
  • --no-cuda disables the inclusion of CUDA API during compilation, only affects Nvidia platform
  • +
  • --no-opencl disables the inclusion of OpenCL API during compilation
-
  • KTT and applications utilizing it rely on external dynamic (shared) libraries in order to work correctly. There are multiple ways to provide access to these libraries, e.g., copying given library inside application folder or adding the containing folder to library path (example for Linux: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/shared/library). Libraries which are bundled with device drivers are usually visible by default. The list of libraries currently utilized by KTT:
      +
    • KTT and applications that utilize it rely on external dynamic (shared) libraries to work correctly. There are multiple ways to provide access to these libraries, e.g., copying a given library inside the application folder or adding the containing folder to the library path (example for Linux: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/shared/library). Libraries which are bundled with device drivers are usually visible by default. The list of libraries currently utilized by KTT:
      • OpenCL distributed with specific device drivers (OpenCL only)
      • cuda distributed with specific device drivers (CUDA only)
      • nvrtc distributed with specific device drivers (CUDA only)
      • @@ -183,18 +186,20 @@

        Building KTT

    +

    Python bindings

    +

    To be able to use KTT Python API, the KTT module must be built with --python option. For the build option to work, access to Python development headers and library must be provided under environment variables PYTHON_HEADERS and PYTHON_LIB respectively. Once the build is finished, in addition to the regular C++ module, a Python module will be created (named pyktt.pyd under Windows, pyktt.so under Linux). This module can be imported into Python programs in the same way as regular modules. Note that Python must have access to all modules which depend on the KTT module (e.g., various profiling libraries), otherwise the loading will fail.

    Related projects

    -

    KTT API is based on CLTune project. Certain parts of the API are similar to CLTune, however internal structure is completely rewritten from scratch. The ClTuneGemm and ClTuneConvolution examples are adopted from CLTune.

    -

    KTT search space generation and tuning configuration storage techniques are derived from ATF project. Certain modifications were made to the original ATF algorithms due to differences in API and available framework features. The examples stored in AtfSamples folder are adopted from ATF.

    +

    KTT API is based on CLTune project. Certain parts of the API are similar to CLTune. However, the internal structure is completely rewritten from scratch. The ClTuneGemm and ClTuneConvolution examples are adopted from CLTune.

    +

    KTT search space generation and tuning configuration storage techniques are derived from ATF project. Due to differences in API and available framework features, certain modifications were made to the original ATF algorithms. The examples stored in AtfSamples folder are adopted from ATF.

    How to cite

    -

    F. Petrovič et al. A benchmark set of highly-efficient CUDA and OpenCL kernels and its dynamic autotuning with Kernel Tuning Toolkit. In Future Generation Computer Systems, Volume 108, 2020.

    +

    F. Petrovič et al. A benchmark set of highly-efficient CUDA and OpenCL kernels and its dynamic autotuning with Kernel Tuning Toolkit. In Future Generation Computer Systems, Volume 108, 2020.

  • diff --git a/Docs/jquery.js b/Docs/jquery.js index 103c32d7..c9ed3d99 100644 --- a/Docs/jquery.js +++ b/Docs/jquery.js @@ -1,5 +1,5 @@ -/*! jQuery v3.4.1 | (c) JS Foundation and other contributors | jquery.org/license */ -!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],E=C.document,r=Object.getPrototypeOf,s=t.slice,g=t.concat,u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.4.1",k=function(e,t){return new k.fn.init(e,t)},p=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;function d(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp($),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+$),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),ne=function(e,t,n){var r="0x"+t-65536;return r!=r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(m.childNodes),m.childNodes),t[m.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&((e?e.ownerDocument||e:m)!==C&&T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!A[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&U.test(t)){(s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=k),o=(l=h(t)).length;while(o--)l[o]="#"+s+" "+xe(l[o]);c=l.join(","),f=ee.test(t)&&ye(e.parentNode)||e}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){A(t,!0)}finally{s===k&&e.removeAttribute("id")}}}return g(t.replace(B,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[k]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:m;return r!==C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),m!==C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=k,!C.getElementsByName||!C.getElementsByName(k).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+k+"-]").length||v.push("~="),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+k+"+*").length||v.push(".#.+[+~]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",$)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e===C||e.ownerDocument===m&&y(m,e)?-1:t===C||t.ownerDocument===m&&y(m,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e===C?-1:t===C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]===m?-1:s[r]===m?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if((e.ownerDocument||e)!==C&&T(e),d.matchesSelector&&E&&!A[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){A(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=p[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&p(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?k.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?k.grep(e,function(e){return e===n!==r}):"string"!=typeof n?k.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(k.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||q,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:L.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof k?t[0]:t,k.merge(this,k.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),D.test(r[1])&&k.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(k):k.makeArray(e,this)}).prototype=k.fn,q=k(E);var H=/^(?:parents|prev(?:Until|All))/,O={children:!0,contents:!0,next:!0,prev:!0};function P(e,t){while((e=e[t])&&1!==e.nodeType);return e}k.fn.extend({has:function(e){var t=k(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i,ge={option:[1,""],thead:[1,"","
    "],col:[2,"","
    "],tr:[2,"","
    "],td:[3,"","
    "],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?k.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;nx",y.noCloneChecked=!!me.cloneNode(!0).lastChild.defaultValue;var Te=/^key/,Ce=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Ee=/^([^.]*)(?:\.(.+)|)/;function ke(){return!0}function Se(){return!1}function Ne(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function Ae(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)Ae(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Se;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return k().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=k.guid++)),e.each(function(){k.event.add(this,t,i,r,n)})}function De(e,i,o){o?(Q.set(e,i,!1),k.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Q.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(k.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Q.set(this,i,r),t=o(this,i),this[i](),r!==(n=Q.get(this,i))||t?Q.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Q.set(this,i,{value:k.event.trigger(k.extend(r[0],k.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Q.get(e,i)&&k.event.add(e,i,ke)}k.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.get(t);if(v){n.handler&&(n=(o=n).handler,i=o.selector),i&&k.find.matchesSelector(ie,i),n.guid||(n.guid=k.guid++),(u=v.events)||(u=v.events={}),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof k&&k.event.triggered!==e.type?k.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(R)||[""]).length;while(l--)d=g=(s=Ee.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=k.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=k.event.special[d]||{},c=k.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&k.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),k.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.hasData(e)&&Q.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(R)||[""]).length;while(l--)if(d=g=(s=Ee.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=k.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||k.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)k.event.remove(e,d+t[l],n,r,!0);k.isEmptyObject(u)&&Q.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=k.event.fix(e),u=new Array(arguments.length),l=(Q.get(this,"events")||{})[s.type]||[],c=k.event.special[s.type]||{};for(u[0]=s,t=1;t\x20\t\r\n\f]*)[^>]*)\/>/gi,qe=/\s*$/g;function Oe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&k(e).children("tbody")[0]||e}function Pe(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function Re(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Me(e,t){var n,r,i,o,a,s,u,l;if(1===t.nodeType){if(Q.hasData(e)&&(o=Q.access(e),a=Q.set(t,o),l=o.events))for(i in delete a.handle,a.events={},l)for(n=0,r=l[i].length;n")},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=oe(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||k.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Vt,Gt=[],Yt=/(=)\?(?=&|$)|\?\?/;k.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Gt.pop()||k.expando+"_"+kt++;return this[e]=!0,e}}),k.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Yt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Yt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Yt,"$1"+r):!1!==e.jsonp&&(e.url+=(St.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||k.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?k(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Gt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Vt=E.implementation.createHTMLDocument("").body).innerHTML="
    ",2===Vt.childNodes.length),k.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=D.exec(e))?[t.createElement(i[1])]:(i=we([e],t,o),o&&o.length&&k(o).remove(),k.merge([],i.childNodes)));var r,i,o},k.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(k.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},k.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){k.fn[t]=function(e){return this.on(t,e)}}),k.expr.pseudos.animated=function(t){return k.grep(k.timers,function(e){return t===e.elem}).length},k.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=k.css(e,"position"),c=k(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=k.css(e,"top"),u=k.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,k.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},k.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){k.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===k.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===k.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=k(e).offset()).top+=k.css(e,"borderTopWidth",!0),i.left+=k.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-k.css(r,"marginTop",!0),left:t.left-i.left-k.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===k.css(e,"position"))e=e.offsetParent;return e||ie})}}),k.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;k.fn[t]=function(e){return _(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),k.each(["top","left"],function(e,n){k.cssHooks[n]=ze(y.pixelPosition,function(e,t){if(t)return t=_e(e,n),$e.test(t)?k(e).position()[n]+"px":t})}),k.each({Height:"height",Width:"width"},function(a,s){k.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){k.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return _(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?k.css(e,t,i):k.style(e,t,n,i)},s,n?e:void 0,n)}})}),k.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){k.fn[n]=function(e,t){return 0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
    "],col:[2,"","
    "],tr:[2,"","
    "],td:[3,"","
    "],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function De(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function qe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=zt.pop()||S.expando+"_"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ut.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,"$1"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument("").body).innerHTML="
    ",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0 - - + + Kernel Tuning Toolkit: KTT FAQ @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + - @@ -731,37 +738,37 @@

    -

    Enum for data type of kernel arguments. Specifies the data type of elements inside single kernel argument.

    +

    Enum for data type of kernel arguments. Specifies the data type of elements inside single kernel argument.

    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -91,23 +90,22 @@ -
    -
    -
    KTT FAQ
    +
    +
    KTT FAQ
    -

    This file provides answers to common questions that users might have involving the usage of KTT framework.

    +

    This file provides answers to common questions that users might have involving the usage of KTT framework.

    Building KTT

    -

    Q: During project file generation, Premake prints error that compute API libraries were not found.
    +

    Q: During project file generation, Premake prints error that compute API libraries were not found.
    A: There are two likely reasons for this. First, you need to make sure that compute SDK provided by your device vendor (e.g., CUDA Toolkit) is installed correctly on your machine. Second, you need to set path to the SDK in your environment variables (e.g., setting path to CUDA Toolkit on Linux: export CUDA_PATH=/path/to/cuda/toolkit). On Windows, the path to SDK is usually set automatically during SDK installation.

    -

    Q: I'm getting compilation errors during KTT build.
    +

    Q: I'm getting compilation errors during KTT build.
    A: List of compatible compilers can be found in readme on main KTT Github page. If you are unable to build KTT with compatible compiler (generally any compiler which supports C++17), you can report a bug here.

    Using KTT

    -

    Q: I've ported my native OpenCL / CUDA application to KTT but it crashes at runtime.
    +

    Q: I've ported my native OpenCL / CUDA application to KTT but it crashes at runtime.
    A: KTT checks for correct usage of the API during runtime. If it detects a problem, an exception is thrown. In most cases, the exception also contains an error message which describes the problem. If the exception message is not helpful and you believe that you are using the API correctly, you can report a bug here.

    -

    Q: I have an application which performs some part of the computation directly in C/C++ code and utilizes iterative kernel launches. Can such application be ported to KTT?
    +

    Q: I have an application which performs some part of the computation directly in C/C++ code and utilizes iterative kernel launches. Can such application be ported to KTT?
    A: Yes, in this case you need to utilize kernel launcher and compute interface API, which is fully documented. You can also read some of the examples which already utilize kernel launchers (e.g., Reduction, CoulombSum3dIterative).

    -

    Q: Running my application with KTT uses much more memory than running it natively.
    +

    Q: Running my application with KTT uses much more memory than running it natively.
    A: KTT by default makes a copy of all buffers that are added to tuner with AddArgumentVector() method. This makes it safe for user to modify the original buffer without affecting the tuner. However, it also increases the memory usage. If you are fine with tuner accessing your buffer directly, you can use overloaded version of AddArgumentVector() method, which allows you to customize handling of buffers by tuner. You may also want to read KTT buffer types diagram located here. in order to find out differences between various buffer configuration options.

    @@ -115,7 +113,7 @@

    Using KTT

    diff --git a/Docs/namespacektt.html b/Docs/namespacektt.html index c3f98cde..a13bafb8 100644 --- a/Docs/namespacektt.html +++ b/Docs/namespacektt.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt Namespace Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + - @@ -698,21 +705,21 @@

    -

    Enum for access type of kernel arguments. Specifies whether argument is used for input or output by compute API kernel functions.

    +

    Enum for access type of kernel arguments. Specifies whether argument is used for input or output by compute API kernel functions.

    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -98,65 +97,64 @@ Enumerations | Functions | Variables -
    -
    ktt Namespace Reference
    +
    ktt Namespace Reference
    - - + + + - + - + - + - + - - - + - + + + - + - - - + - + - + - + - + - +

    +

    Classes

    class  Tuner
    class  BufferOutputDescriptor
     
    class  ComputationResult
     
    class  ComputeApiInitializer
     
    class  ComputeInterface
     
    class  KttException
    class  ConfigurationCount
     
    class  DimensionVector
    class  ConfigurationDuration
     
    class  KernelConfiguration
    class  ConfigurationFraction
     
    class  ParameterPair
    class  DeterministicSearcher
     
    class  DeviceInfo
     
    class  PlatformInfo
     
    class  BufferOutputDescriptor
    class  DimensionVector
     
    class  ComputationResult
    struct  EnableBitfieldOperators< ValidationMode >
     
    struct  KernelCompilationData
     
    class  KernelConfiguration
     
    class  KernelProfilingCounter
     
    class  KernelProfilingData
     
    class  KernelResult
     
    class  DeterministicSearcher
    class  KttException
     
    class  McmcSearcher
     
    class  RandomSearcher
     
    class  Searcher
    class  ParameterPair
     
    class  ConfigurationCount
    class  PlatformInfo
     
    class  ConfigurationDuration
    class  RandomSearcher
     
    class  ConfigurationFraction
    class  Searcher
     
    class  StopCondition
     
    class  TuningDuration
    class  Tuner
     
    struct  EnableBitfieldOperators< ValidationMode >
    class  TuningDuration
     
    - @@ -170,7 +168,7 @@ - + @@ -201,7 +199,7 @@

    +

    Typedefs

    using PlatformIndex = uint32_t
     
     
    using ArgumentId = uint64_t
     
    using ParameterInput = std::vector< std::pair< std::string, std::variant< uint64_t, double > >>
    using ParameterInput = std::vector< std::pair< std::string, std::variant< uint64_t, double > > >
     
    using UserData = std::map< std::string, std::string >
     
    using ComputeBuffer = void *
     
    - + +

    +

    Enumerations

    enum class  ExceptionReason { General , CompilerError @@ -304,8 +302,15 @@
    enum class  ArgumentMemoryType { Scalar , Vector , Local +, Symbol }
     
    enum class  KernelRunMode { Running +, OfflineTuning +, OnlineTuning +, ResultValidation + }
     
    enum class  ValidationMethod { AbsoluteDifference , SideBySideComparison , SideBySideRelativeComparison @@ -342,15 +347,17 @@ }
     
    -

    +

    Functions

    KTT_API uint32_t GetKttVersion ()
     
    KTT_API std::string GetKttVersionString ()
     
    - + + @@ -361,9 +368,9 @@

    +

    Variables

    const QueueId InvalidQueueId = std::numeric_limits<QueueId>::max()
     
    const KernelDefinitionId InvalidKernelDefinitionId = std::numeric_limits<KernelDefinitionId>::max()
     
    const KernelId InvalidKernelId = std::numeric_limits<KernelId>::max()
     

    Detailed Description

    -

    All classes, methods and type aliases related to KTT framework are located inside ktt namespace.

    +

    All classes, methods and type aliases related to KTT framework are located inside ktt namespace.

    Typedef Documentation

    - +

    ◆ ArgumentId

    - +

    ◆ ComputeActionId

    - +

    ◆ ComputeBuffer

    - +

    ◆ ComputeContext

    - +

    ◆ ComputeQueue

    - +

    ◆ ConstraintFunction

    - +

    ◆ DeviceIndex

    - +

    ◆ KernelComputeId

    - +

    ◆ KernelDefinitionId

    - +

    ◆ KernelId

    - +

    ◆ KernelLauncher

    - +

    ◆ ModifierFunction

    - +

    ◆ Nanoseconds

    - +

    ◆ ParameterInput

    - +

    ◆ PlatformIndex

    - +

    ◆ QueueId

    - +

    ◆ ReferenceComputation

    - +

    ◆ TransferActionId

    - +

    ◆ UnifiedBufferMemory

    - +

    ◆ UserData

    - +

    ◆ ValueComparator

    Enumeration Type Documentation

    -
    +

    ◆ ArgumentAccessType

    - - - -
    Enumerator
    Undefined 

    Kernel argument access is undefined.

    +
    Enumerator
    Undefined 

    Kernel argument access is undefined.

    ReadOnly 

    Kernel argument is used for input.

    +
    ReadOnly 

    Kernel argument is used for input.

    WriteOnly 

    Kernel argument is used for output.

    +
    WriteOnly 

    Kernel argument is used for output.

    ReadWrite 

    Kernel argument is used for both input and output.

    +
    ReadWrite 

    Kernel argument is used for both input and output.

    - +

    ◆ ArgumentDataType

    - - - - - - - - - - - -
    Enumerator
    Char 

    8-bit signed integer type.

    +
    Enumerator
    Char 

    8-bit signed integer type.

    UnsignedChar 

    8-bit unsigned integer type.

    +
    UnsignedChar 

    8-bit unsigned integer type.

    Short 

    16-bit signed integer type.

    +
    Short 

    16-bit signed integer type.

    UnsignedShort 

    16-bit unsigned integer type.

    +
    UnsignedShort 

    16-bit unsigned integer type.

    Int 

    32-bit signed integer type.

    +
    Int 

    32-bit signed integer type.

    UnsignedInt 

    32-bit unsigned integer type.

    +
    UnsignedInt 

    32-bit unsigned integer type.

    Long 

    64-bit signed integer type.

    +
    Long 

    64-bit signed integer type.

    UnsignedLong 

    64-bit unsigned integer type.

    +
    UnsignedLong 

    64-bit unsigned integer type.

    Half 

    16-bit floating-point type.

    +
    Half 

    16-bit floating-point type.

    Float 

    32-bit floating-point type.

    +
    Float 

    32-bit floating-point type.

    Double 

    64-bit floating-point type.

    +
    Double 

    64-bit floating-point type.

    Custom 

    Custom data type, usually defined by user. Custom data type has to be trivially copyable. It can be for example struct or class.

    +
    Custom 

    Custom data type, usually defined by user. Custom data type has to be trivially copyable. It can be for example struct or class.

    - +

    ◆ ArgumentManagementType

    - -
    Enumerator
    Framework 

    Vector kernel arguments are managed automatically by the framework.

    +
    Enumerator
    Framework 

    Vector kernel arguments are managed automatically by the framework.

    User 

    Vector kernel arguments are managed by user. This means that user is responsible for uploading and downloading argument data into compute API buffers at the right time. This can be achieved by utilizing ComputeInterface methods such as UploadBuffer() and DownloadBuffer().

    +
    User 

    Vector kernel arguments are managed by user. This means that user is responsible for uploading and downloading argument data into compute API buffers at the right time. This can be achieved by utilizing ComputeInterface methods such as UploadBuffer() and DownloadBuffer().

    - +

    ◆ ArgumentMemoryLocation

    - - - -
    Enumerator
    Undefined 

    Default memory location for non-vector kernel arguments.

    +
    Enumerator
    Undefined 

    Default memory location for non-vector kernel arguments.

    Device 

    Argument data will be accessed from device memory. This is recommended setting for devices with dedicated memory, e.g., discrete GPUs.

    +
    Device 

    Argument data will be accessed from device memory. This is recommended setting for devices with dedicated memory, e.g., discrete GPUs.

    Host 

    Argument data will be accessed from host memory. This is recommended setting for CPUs and devices without dedicated memory, e.g., integrated GPUs.

    +
    Host 

    Argument data will be accessed from host memory. This is recommended setting for CPUs and devices without dedicated memory, e.g., integrated GPUs.

    HostZeroCopy 

    Argument data will be accessed from host memory without explicitly creating additional compute API buffer. This flag cannot be used for writable arguments during regular kernel tuning. It can be used for any arguments during step kernel tuning and kernel running. Note that even when this flag is used, extra buffer copy is still sometimes created internally by compute API. This behaviour depends on particular API and device.

    +
    HostZeroCopy 

    Argument data will be accessed from host memory without explicitly creating additional compute API buffer. This flag cannot be used for writable arguments during regular kernel tuning. It can be used for any arguments during step kernel tuning and kernel running. Note that even when this flag is used, extra buffer copy is still sometimes created internally by compute API. This behaviour depends on particular API and device.

    - +

    ◆ ArgumentMemoryType

    - + - -
    Enumerator
    Scalar 

    Argument is a scalar. Scalar arguments are made visible to kernels as a local copy.

    +
    Enumerator
    Scalar 

    Argument is a scalar. Scalar arguments are made visible to kernels as a local copy.

    +
    Vector 

    Argument is a vector. Vector arguments are made visible to kernels through compute API buffers. See ArgumentMemoryLocation for more information.

    Vector 

    Argument is a vector. Vector arguments are made visible to kernels through compute API buffers. See ArgumentMemoryLocation for more information.

    +
    Local 

    Argument will be located in local memory. Kernel arguments cannot be directly transferred into local memory from host memory. Assigning local memory argument to kernel from KTT API simply means that the compute API will allocate enough local memory to hold number of elements specified by the argument. The memory then needs to be filled with data on kernel side.

    Local 

    Argument will be located in local memory. Kernel arguments cannot be directly transferred into local memory from host memory. Assigning local memory argument to kernel from KTT API simply means that the compute API will allocate enough local memory to hold number of elements specified by the argument. The memory then needs to be filled with data on kernel side.

    +
    Symbol 

    Argument corresponds to the CUDA symbol which resides in global or constant device memory and matches the argument's name. In Vulkan and OpenCL, symbol arguments are treated in the same way as scalars.

    - +

    ◆ ComputeApi

    - - -
    Enumerator
    OpenCL 

    Tuner will use OpenCL as compute API.

    +
    Enumerator
    OpenCL 

    Tuner will use OpenCL as compute API.

    CUDA 

    Tuner will use CUDA as compute API.

    +
    CUDA 

    Tuner will use CUDA as compute API.

    Vulkan 

    Tuner will use Vulkan as compute API.

    +
    Vulkan 

    Tuner will use Vulkan as compute API.

    - +

    ◆ DeviceType

    - - -
    Enumerator
    CPU 

    Device is a CPU.

    +
    Enumerator
    CPU 

    Device is a CPU.

    GPU 

    Device is a GPU. All available devices in CUDA API and Vulkan will have this device type.

    +
    GPU 

    Device is a GPU. All available devices in CUDA API and Vulkan will have this device type.

    Custom 

    Device has type other than CPU or GPU.

    +
    Custom 

    Device has type other than CPU or GPU.

    - +

    ◆ ExceptionReason

    - - -
    Enumerator
    General 

    General issue with KTT API usage.

    +
    Enumerator
    General 

    General issue with KTT API usage.

    CompilerError 

    Kernel source file compilation error.

    +
    CompilerError 

    Kernel source file compilation error.

    DeviceLimitsExceeded 

    Compute device limits were exceeded (e.g., local size was too large, shared memory usage was too high).

    +
    DeviceLimitsExceeded 

    Compute device limits were exceeded (e.g., local size was too large, shared memory usage was too high).

    - +

    ◆ GlobalSizeType

    +strong
    +
    +

    Enum for format of global thread size. Specifies the format of global thread size specified by user during kernel addition.

    + + + + +
    Enumerator
    OpenCL 

    Global thread size uses OpenCL format for NDRange dimensions specification.

    +
    CUDA 

    Global thread size uses CUDA format for grid dimensions specification.

    +
    Vulkan 

    Global thread size uses Vulkan format. This format is the same as CUDA format.

    +
    + +
    + + +

    ◆ KernelRunMode

    + +
    +
    + + + @@ -966,19 +1006,21 @@

    -

    Enum for format of global thread size. Specifies the format of global thread size specified by user during kernel addition.

    +

    Enum for differentiating between different kernel running scenarios.

    + + +
    enum class ktt::KernelRunMode
    - - - +
    Enumerator
    OpenCL 

    Global thread size uses OpenCL format for NDRange dimensions specification.

    +
    Enumerator
    Running 

    Regular kernel running.

    CUDA 

    Global thread size uses CUDA format for grid dimensions specification.

    +
    OfflineTuning 

    Offline kernel tuning.

    Vulkan 

    Global thread size uses Vulkan format. This format is the same as CUDA format.

    +
    OnlineTuning 

    Online kernel tuning.

    +
    ResultValidation 

    Computation of reference output for result validation.

    - +

    ◆ LoggingLevel

    - - - - -
    Enumerator
    Off 

    Logging is completely turned off.

    +
    Enumerator
    Off 

    Logging is completely turned off.

    Error 

    Logs information about major problems which usually lead to application termination.

    +
    Error 

    Logs information about major problems which usually lead to application termination.

    Warning 

    Logs information about minor problems which possibly lead to incorrect application behaviour.

    +
    Warning 

    Logs information about minor problems which possibly lead to incorrect application behaviour.

    Info 

    Logs general information about application status.

    +
    Info 

    Logs general information about application status.

    Debug 

    Logs detailed information which is useful for debugging.

    +
    Debug 

    Logs detailed information which is useful for debugging.

    - +

    ◆ ModifierAction

    - - - - -
    Enumerator
    Add 

    Kernel parameter value will be added to corresponding kernel thread size.

    +
    Enumerator
    Add 

    Kernel parameter value will be added to corresponding kernel thread size.

    Subtract 

    Kernel parameter value will be subtracted from corresponding kernel thread size.

    +
    Subtract 

    Kernel parameter value will be subtracted from corresponding kernel thread size.

    Multiply 

    Corresponding kernel thread size will be multiplied by kernel parameter value.

    +
    Multiply 

    Corresponding kernel thread size will be multiplied by kernel parameter value.

    Divide 

    Corresponding kernel thread size will be divided by kernel parameter value.

    +
    Divide 

    Corresponding kernel thread size will be divided by kernel parameter value.

    DivideCeil 

    Corresponding kernel thread size will be divided by kernel parameter value and then rounded up to its multiple.

    +
    DivideCeil 

    Corresponding kernel thread size will be divided by kernel parameter value and then rounded up to its multiple.

    - +

    ◆ ModifierDimension

    - - -
    Enumerator

    Kernel parameter will modify thread size in dimension X.

    +
    Enumerator

    Kernel parameter will modify thread size in dimension X.

    Kernel parameter will modify thread size in dimension Y.

    +

    Kernel parameter will modify thread size in dimension Y.

    Kernel parameter will modify thread size in dimension Z.

    +

    Kernel parameter will modify thread size in dimension Z.

    - +

    ◆ ModifierType

    - -
    Enumerator
    Global 

    Parameter value affects global kernel thread size.

    +
    Enumerator
    Global 

    Parameter value affects global kernel thread size.

    Local 

    Parameter value affects local kernel thread size.

    +
    Local 

    Parameter value affects local kernel thread size.

    - +

    ◆ OutputFormat

    - -
    Enumerator
    JSON 

    Tuner output has JSON format.

    +
    Enumerator
    JSON 

    Tuner output has JSON format.

    XML 

    Tuner output has XML format.

    +
    XML 

    Tuner output has XML format.

    - +

    ◆ ProfilingCounterType

    - - - - - -
    Enumerator
    Int 

    Profiling counter is a signed 64-bit integer.

    +
    Enumerator
    Int 

    Profiling counter is a signed 64-bit integer.

    UnsignedInt 

    Profiling counter is an unsigned 64-bit integer.

    +
    UnsignedInt 

    Profiling counter is an unsigned 64-bit integer.

    Double 

    Profiling counter is a 64-bit float.

    +
    Double 

    Profiling counter is a 64-bit float.

    Percent 

    Profiling counter is a 64-bit float with a range of values between 0.0 and 100.0 (corresponding to 0% - 100%).

    +
    Percent 

    Profiling counter is a 64-bit float with a range of values between 0.0 and 100.0 (corresponding to 0% - 100%).

    Throughput 

    Profiling counter is an unsigned 64-bit integer. The unit for throughput value is bytes/second.

    +
    Throughput 

    Profiling counter is an unsigned 64-bit integer. The unit for throughput value is bytes/second.

    UtilizationLevel 

    Profiling counter is an unsigned 64-bit integer with a range of values between 0 and 10 (0 corresponds to minimum utilization level, 10 to maximum utilization level).

    +
    UtilizationLevel 

    Profiling counter is an unsigned 64-bit integer with a range of values between 0 and 10 (0 corresponds to minimum utilization level, 10 to maximum utilization level).

    - +

    ◆ ResultStatus

    - - - - -
    Enumerator
    Ok 

    Computation was completed successfully.

    +
    Enumerator
    Ok 

    Computation was completed successfully.

    ComputationFailed 

    Computation failed due to generic compute API error.

    +
    ComputationFailed 

    Computation failed due to generic compute API error.

    ValidationFailed 

    Computation was completed successfully, but its output does not match the expected output.

    +
    ValidationFailed 

    Computation was completed successfully, but its output does not match the expected output.

    CompilationFailed 

    Kernel source file failed to compile.

    +
    CompilationFailed 

    Kernel source file failed to compile.

    DeviceLimitsExceeded 

    Computation could not launch due to device limits being exceeded (e.g., local size was too large).

    +
    DeviceLimitsExceeded 

    Computation could not launch due to device limits being exceeded (e.g., local size was too large).

    - +

    ◆ TimeUnit

    - - - -
    Enumerator
    Nanoseconds 

    Durations will be printed in nanoseconds.

    +
    Enumerator
    Nanoseconds 

    Durations will be printed in nanoseconds.

    Microseconds 

    Durations will be printed in microseconds.

    +
    Microseconds 

    Durations will be printed in microseconds.

    Milliseconds 

    Durations will be printed in milliseconds.

    +
    Milliseconds 

    Durations will be printed in milliseconds.

    Seconds 

    Durations will be printed in seconds.

    +
    Seconds 

    Durations will be printed in seconds.

    - +

    ◆ ValidationMethod

    - - -
    Enumerator
    AbsoluteDifference 

    Calculates sum of differences between each pair of elements, then compares the sum to specified threshold.

    +
    Enumerator
    AbsoluteDifference 

    Calculates sum of differences between each pair of elements, then compares the sum to specified threshold.

    SideBySideComparison 

    Calculates difference for each pair of elements, then compares the difference to specified threshold.

    +
    SideBySideComparison 

    Calculates difference for each pair of elements, then compares the difference to specified threshold.

    SideBySideRelativeComparison 

    Calculates difference for each pair of elements, then compares the difference divided by reference value to the specified threshold.

    +
    SideBySideRelativeComparison 

    Calculates difference for each pair of elements, then compares the difference divided by reference value to the specified threshold.

    - +

    ◆ ValidationMode

    - - - - -
    Enumerator
    None 

    Kernel output validation is completely disabled.

    +
    Enumerator
    None 

    Kernel output validation is completely disabled.

    Running 

    Kernel output is validated during kernel running.

    +
    Running 

    Kernel output is validated during kernel running.

    OfflineTuning 

    Kernel output is validated during offline kernel tuning.

    +
    OfflineTuning 

    Kernel output is validated during offline kernel tuning.

    OnlineTuning 

    Kernel output is validated during online kernel tuning.

    +
    OnlineTuning 

    Kernel output is validated during online kernel tuning.

    All 

    Kernel output is always validated.

    +
    All 

    Kernel output is always validated.

    Function Documentation

    -
    +

    ◆ GetKttVersion()

    - +

    ◆ GetKttVersionString()

    Variable Documentation

    -
    +

    ◆ InvalidArgumentId

    - +

    ◆ InvalidDuration

    - +

    ◆ InvalidKernelDefinitionId

    - +

    ◆ InvalidKernelId

    + + +

    ◆ InvalidQueueId

    + +
    +
    + + + + + +
    + + + + +
    const QueueId ktt::InvalidQueueId = std::numeric_limits<QueueId>::max()
    +
    +inline
    +
    +

    Queue id returned by compute queue addition methods in case of an error.

    @@ -1444,7 +1509,7 @@

      - +
    diff --git a/Docs/namespacemembers.html b/Docs/namespacemembers.html index 9f65413d..88f2c5ad 100644 --- a/Docs/namespacemembers.html +++ b/Docs/namespacemembers.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Namespace Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -94,216 +93,126 @@
    Here is a list of all documented namespace members with links to the namespaces they belong to:
    -

    - a -

      -
    • ArgumentAccessType -: ktt -
    • -
    • ArgumentDataType -: ktt -
    • -
    • ArgumentId -: ktt -
    • -
    • ArgumentManagementType -: ktt -
    • -
    • ArgumentMemoryLocation -: ktt -
    • -
    • ArgumentMemoryType -: ktt -
    • +

      - a -

        +
      • ArgumentAccessType : ktt
      • +
      • ArgumentDataType : ktt
      • +
      • ArgumentId : ktt
      • +
      • ArgumentManagementType : ktt
      • +
      • ArgumentMemoryLocation : ktt
      • +
      • ArgumentMemoryType : ktt
      -

      - c -

        -
      • ComputeActionId -: ktt -
      • -
      • ComputeApi -: ktt -
      • -
      • ComputeBuffer -: ktt -
      • -
      • ComputeContext -: ktt -
      • -
      • ComputeQueue -: ktt -
      • -
      • ConstraintFunction -: ktt -
      • +

        - c -

          +
        • ComputeActionId : ktt
        • +
        • ComputeApi : ktt
        • +
        • ComputeBuffer : ktt
        • +
        • ComputeContext : ktt
        • +
        • ComputeQueue : ktt
        • +
        • ConstraintFunction : ktt
        -

        - d -

          -
        • DeviceIndex -: ktt -
        • -
        • DeviceType -: ktt -
        • +

          - d -

            +
          • DeviceIndex : ktt
          • +
          • DeviceType : ktt
          -

          - e -

            -
          • ExceptionReason -: ktt -
          • +

            - e -

              +
            • ExceptionReason : ktt
            -

            - g -

              -
            • GetKttVersion() -: ktt -
            • -
            • GetKttVersionString() -: ktt -
            • -
            • GlobalSizeType -: ktt -
            • +

              - g -

                +
              • GetKttVersion() : ktt
              • +
              • GetKttVersionString() : ktt
              • +
              • GlobalSizeType : ktt
              -

              - i -

                -
              • InvalidArgumentId -: ktt -
              • -
              • InvalidDuration -: ktt -
              • -
              • InvalidKernelDefinitionId -: ktt -
              • -
              • InvalidKernelId -: ktt -
              • +

                - i -

                  +
                • InvalidArgumentId : ktt
                • +
                • InvalidDuration : ktt
                • +
                • InvalidKernelDefinitionId : ktt
                • +
                • InvalidKernelId : ktt
                • +
                • InvalidQueueId : ktt
                -

                - k -

                  -
                • KernelComputeId -: ktt -
                • -
                • KernelDefinitionId -: ktt -
                • -
                • KernelId -: ktt -
                • -
                • KernelLauncher -: ktt -
                • +

                  - k -

                    +
                  • KernelComputeId : ktt
                  • +
                  • KernelDefinitionId : ktt
                  • +
                  • KernelId : ktt
                  • +
                  • KernelLauncher : ktt
                  • +
                  • KernelRunMode : ktt
                  -

                  - l -

                    -
                  • LoggingLevel -: ktt -
                  • +

                    - l -

                      +
                    • LoggingLevel : ktt
                    -

                    - m -

                      -
                    • ModifierAction -: ktt -
                    • -
                    • ModifierDimension -: ktt -
                    • -
                    • ModifierFunction -: ktt -
                    • -
                    • ModifierType -: ktt -
                    • +

                      - m -

                        +
                      • ModifierAction : ktt
                      • +
                      • ModifierDimension : ktt
                      • +
                      • ModifierFunction : ktt
                      • +
                      • ModifierType : ktt
                      -

                      - n -

                        -
                      • Nanoseconds -: ktt -
                      • +

                        - n -

                          +
                        • Nanoseconds : ktt
                        -

                        - o -

                          -
                        • OutputFormat -: ktt -
                        • +

                          - o -

                            +
                          • OutputFormat : ktt
                          -

                          - p -

                            -
                          • ParameterInput -: ktt -
                          • -
                          • PlatformIndex -: ktt -
                          • -
                          • ProfilingCounterType -: ktt -
                          • +

                            - p -

                              +
                            • ParameterInput : ktt
                            • +
                            • PlatformIndex : ktt
                            • +
                            • ProfilingCounterType : ktt
                            -

                            - q -

                              -
                            • QueueId -: ktt -
                            • +

                              - q -

                                +
                              • QueueId : ktt
                              -

                              - r -

                                -
                              • ReferenceComputation -: ktt -
                              • -
                              • ResultStatus -: ktt -
                              • +

                                - r -

                                  +
                                • ReferenceComputation : ktt
                                • +
                                • ResultStatus : ktt
                                -

                                - t -

                                  -
                                • TimeUnit -: ktt -
                                • -
                                • TransferActionId -: ktt -
                                • +

                                  - t -

                                    +
                                  • TimeUnit : ktt
                                  • +
                                  • TransferActionId : ktt
                                  -

                                  - u -

                                    -
                                  • UnifiedBufferMemory -: ktt -
                                  • -
                                  • UserData -: ktt -
                                  • +

                                    - u -

                                      +
                                    • UnifiedBufferMemory : ktt
                                    • +
                                    • UserData : ktt
                                    -

                                    - v -

                                      -
                                    • ValidationMethod -: ktt -
                                    • -
                                    • ValidationMode -: ktt -
                                    • -
                                    • ValueComparator -: ktt -
                                    • +

                                      - v -

                                        +
                                      • ValidationMethod : ktt
                                      • +
                                      • ValidationMode : ktt
                                      • +
                                      • ValueComparator : ktt
    diff --git a/Docs/namespacemembers_enum.html b/Docs/namespacemembers_enum.html index 68ceef4a..9ff28a9f 100644 --- a/Docs/namespacemembers_enum.html +++ b/Docs/namespacemembers_enum.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Namespace Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -93,70 +92,33 @@
     
      -
    • ArgumentAccessType -: ktt -
    • -
    • ArgumentDataType -: ktt -
    • -
    • ArgumentManagementType -: ktt -
    • -
    • ArgumentMemoryLocation -: ktt -
    • -
    • ArgumentMemoryType -: ktt -
    • -
    • ComputeApi -: ktt -
    • -
    • DeviceType -: ktt -
    • -
    • ExceptionReason -: ktt -
    • -
    • GlobalSizeType -: ktt -
    • -
    • LoggingLevel -: ktt -
    • -
    • ModifierAction -: ktt -
    • -
    • ModifierDimension -: ktt -
    • -
    • ModifierType -: ktt -
    • -
    • OutputFormat -: ktt -
    • -
    • ProfilingCounterType -: ktt -
    • -
    • ResultStatus -: ktt -
    • -
    • TimeUnit -: ktt -
    • -
    • ValidationMethod -: ktt -
    • -
    • ValidationMode -: ktt -
    • +
    • ArgumentAccessType : ktt
    • +
    • ArgumentDataType : ktt
    • +
    • ArgumentManagementType : ktt
    • +
    • ArgumentMemoryLocation : ktt
    • +
    • ArgumentMemoryType : ktt
    • +
    • ComputeApi : ktt
    • +
    • DeviceType : ktt
    • +
    • ExceptionReason : ktt
    • +
    • GlobalSizeType : ktt
    • +
    • KernelRunMode : ktt
    • +
    • LoggingLevel : ktt
    • +
    • ModifierAction : ktt
    • +
    • ModifierDimension : ktt
    • +
    • ModifierType : ktt
    • +
    • OutputFormat : ktt
    • +
    • ProfilingCounterType : ktt
    • +
    • ResultStatus : ktt
    • +
    • TimeUnit : ktt
    • +
    • ValidationMethod : ktt
    • +
    • ValidationMode : ktt
    diff --git a/Docs/namespacemembers_func.html b/Docs/namespacemembers_func.html index e1f673ab..85f02acd 100644 --- a/Docs/namespacemembers_func.html +++ b/Docs/namespacemembers_func.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Namespace Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -93,19 +92,15 @@
     
      -
    • GetKttVersion() -: ktt -
    • -
    • GetKttVersionString() -: ktt -
    • +
    • GetKttVersion() : ktt
    • +
    • GetKttVersionString() : ktt
    diff --git a/Docs/namespacemembers_type.html b/Docs/namespacemembers_type.html index 22eeead5..458b8443 100644 --- a/Docs/namespacemembers_type.html +++ b/Docs/namespacemembers_type.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Namespace Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -93,76 +92,34 @@
     
      -
    • ArgumentId -: ktt -
    • -
    • ComputeActionId -: ktt -
    • -
    • ComputeBuffer -: ktt -
    • -
    • ComputeContext -: ktt -
    • -
    • ComputeQueue -: ktt -
    • -
    • ConstraintFunction -: ktt -
    • -
    • DeviceIndex -: ktt -
    • -
    • KernelComputeId -: ktt -
    • -
    • KernelDefinitionId -: ktt -
    • -
    • KernelId -: ktt -
    • -
    • KernelLauncher -: ktt -
    • -
    • ModifierFunction -: ktt -
    • -
    • Nanoseconds -: ktt -
    • -
    • ParameterInput -: ktt -
    • -
    • PlatformIndex -: ktt -
    • -
    • QueueId -: ktt -
    • -
    • ReferenceComputation -: ktt -
    • -
    • TransferActionId -: ktt -
    • -
    • UnifiedBufferMemory -: ktt -
    • -
    • UserData -: ktt -
    • -
    • ValueComparator -: ktt -
    • +
    • ArgumentId : ktt
    • +
    • ComputeActionId : ktt
    • +
    • ComputeBuffer : ktt
    • +
    • ComputeContext : ktt
    • +
    • ComputeQueue : ktt
    • +
    • ConstraintFunction : ktt
    • +
    • DeviceIndex : ktt
    • +
    • KernelComputeId : ktt
    • +
    • KernelDefinitionId : ktt
    • +
    • KernelId : ktt
    • +
    • KernelLauncher : ktt
    • +
    • ModifierFunction : ktt
    • +
    • Nanoseconds : ktt
    • +
    • ParameterInput : ktt
    • +
    • PlatformIndex : ktt
    • +
    • QueueId : ktt
    • +
    • ReferenceComputation : ktt
    • +
    • TransferActionId : ktt
    • +
    • UnifiedBufferMemory : ktt
    • +
    • UserData : ktt
    • +
    • ValueComparator : ktt
    diff --git a/Docs/namespacemembers_vars.html b/Docs/namespacemembers_vars.html index f3813188..0c5c7f9c 100644 --- a/Docs/namespacemembers_vars.html +++ b/Docs/namespacemembers_vars.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Namespace Members @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -93,25 +92,18 @@
     
      -
    • InvalidArgumentId -: ktt -
    • -
    • InvalidDuration -: ktt -
    • -
    • InvalidKernelDefinitionId -: ktt -
    • -
    • InvalidKernelId -: ktt -
    • +
    • InvalidArgumentId : ktt
    • +
    • InvalidDuration : ktt
    • +
    • InvalidKernelDefinitionId : ktt
    • +
    • InvalidKernelId : ktt
    • +
    • InvalidQueueId : ktt
    diff --git a/Docs/navtree.css b/Docs/navtree.css index 33341a67..d8a311a2 100644 --- a/Docs/navtree.css +++ b/Docs/navtree.css @@ -87,6 +87,7 @@ position: absolute; left: 0px; width: 250px; + overflow : hidden; } .ui-resizable .ui-resizable-handle { diff --git a/Docs/navtree.js b/Docs/navtree.js index 1e272d31..27983687 100644 --- a/Docs/navtree.js +++ b/Docs/navtree.js @@ -325,11 +325,14 @@ function selectAndHighlight(hash,n) $(n.itemDiv).addClass('selected'); $(n.itemDiv).attr('id','selected'); } + var topOffset=5; + if (typeof page_layout!=='undefined' && page_layout==1) { + topOffset+=$('#top').outerHeight(); + } if ($('#nav-tree-contents .item:first').hasClass('selected')) { - $('#nav-sync').css('top','30px'); - } else { - $('#nav-sync').css('top','5px'); + topOffset+=25; } + $('#nav-sync').css('top',topOffset+'px'); showRoot(); } diff --git a/Docs/navtreedata.js b/Docs/navtreedata.js index ce4cf122..a218449b 100644 --- a/Docs/navtreedata.js +++ b/Docs/navtreedata.js @@ -57,8 +57,8 @@ var NAVTREE = var NAVTREEINDEX = [ "_argument_access_type_8h.html", -"classktt_1_1_compute_interface.html", -"classktt_1_1_tuner.html#a77e12637ab4df58139ce03161f6a9a47" +"classktt_1_1_computation_result.html#a9a3fe7b77be8409f7556905ec884e5fc", +"classktt_1_1_tuner.html#a32e8f697b84556c3164575897f7f891a" ]; var SYNCONMSG = 'click to disable panel synchronisation'; diff --git a/Docs/navtreeindex0.js b/Docs/navtreeindex0.js index 7b6dde63..9064e462 100644 --- a/Docs/navtreeindex0.js +++ b/Docs/navtreeindex0.js @@ -37,6 +37,7 @@ var NAVTREEINDEX0 = "_argument_memory_location_8h_source.html":[4,0,0,3,3], "_argument_memory_type_8h.html":[4,0,0,3,4], "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771":[4,0,0,3,4,0], +"_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771a02c86eb2792f3262c21d030a87e19793":[4,0,0,3,4,0,3], "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771a509820290d57f333403f490dde7316f4":[4,0,0,3,4,0,2], "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771a57dea6f5039281b7fee517fc43bf3110":[4,0,0,3,4,0,1], "_argument_memory_type_8h.html#a9bff4519ba4718c5713c139e61058771af60357a8d17e45793298323f1b372a74":[4,0,0,3,4,0,0], @@ -94,17 +95,23 @@ var NAVTREEINDEX0 = "_kernel_profiling_data_8h.html":[4,0,0,0,2,4], "_kernel_profiling_data_8h_source.html":[4,0,0,0,2,4], "_kernel_result_8h_source.html":[4,0,0,0,2,5], +"_kernel_run_mode_8h.html":[4,0,0,4,0], +"_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35ee":[4,0,0,4,0,0], +"_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35eea5bda814c4aedb126839228f1a3d92f09":[4,0,0,4,0,0,0], +"_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35eeab2f31c1511a9baf03796731b9fbc2e1b":[4,0,0,4,0,0,3], +"_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35eeabc40db0f46cab2811dede4f5341446fb":[4,0,0,4,0,0,2], +"_kernel_run_mode_8h.html#a405c5e17d3cfbedf3b3eb4e54c7c35eeadd15d361ea740ab5e11145f0fa2f9374":[4,0,0,4,0,0,1], +"_kernel_run_mode_8h_source.html":[4,0,0,4,0], "_ktt_8h.html":[4,0,0,7], "_ktt_8h_source.html":[4,0,0,7], "_ktt_exception_8h.html":[4,0,0,0,8], "_ktt_exception_8h_source.html":[4,0,0,0,8], "_ktt_platform_8h.html":[4,0,0,8], -"_ktt_platform_8h.html#a4b9a881a72b59d26bc084ad29f1fa346":[4,0,0,8,4], -"_ktt_platform_8h.html#ab86eda35e4f644e86fbaf235b9a5490b":[4,0,0,8,0], -"_ktt_platform_8h.html#ac72a954d39b9511660df6fd96f698c2e":[4,0,0,8,3], -"_ktt_platform_8h.html#ad2cfb9fcbae19b1303040b0e2e8584b3":[4,0,0,8,1], -"_ktt_platform_8h.html#adbc2306590f891962c4a46bde02cc466":[4,0,0,8,5], -"_ktt_platform_8h.html#ae5ff38ea2d15f4dbba741735cc2c5959":[4,0,0,8,2], +"_ktt_platform_8h.html#a4b9a881a72b59d26bc084ad29f1fa346":[4,0,0,8,3], +"_ktt_platform_8h.html#ac72a954d39b9511660df6fd96f698c2e":[4,0,0,8,2], +"_ktt_platform_8h.html#ad2cfb9fcbae19b1303040b0e2e8584b3":[4,0,0,8,0], +"_ktt_platform_8h.html#adbc2306590f891962c4a46bde02cc466":[4,0,0,8,4], +"_ktt_platform_8h.html#ae5ff38ea2d15f4dbba741735cc2c5959":[4,0,0,8,1], "_ktt_platform_8h_source.html":[4,0,0,8], "_ktt_types_8h.html":[4,0,0,9], "_ktt_types_8h.html#a06314e7380eb0baeb140510fcae36003":[4,0,0,9,21], @@ -125,6 +132,7 @@ var NAVTREEINDEX0 = "_ktt_types_8h.html#ab2cf30aab952f8ea4366979237f9e1e9":[4,0,0,9,20], "_ktt_types_8h.html#ab65fcc4157cfe64ccd1d00b177ccb5af":[4,0,0,9,2], "_ktt_types_8h.html#ace6bcecdbf444cc8adffa172168f8a47":[4,0,0,9,14], +"_ktt_types_8h.html#ad611ee0e290241acfe5dcec46fce712c":[4,0,0,9,25], "_ktt_types_8h.html#ad8b6453e21ff3d1bf4208886d9af8810":[4,0,0,9,11], "_ktt_types_8h.html#aea210ff848d810aa544c225033ac26cb":[4,0,0,9,0], "_ktt_types_8h.html#aef951ec46e383694c8512ddaa6b200d6":[4,0,0,9,13], @@ -205,49 +213,41 @@ var NAVTREEINDEX0 = "_tuner_8h_source.html":[4,0,0,10], "_tuning_duration_8h.html":[4,0,0,0,4,4], "_tuning_duration_8h_source.html":[4,0,0,0,4,4], -"_validation_method_8h.html":[4,0,0,4,0], -"_validation_method_8h.html#a187d4987bb48bc1f78f628c8aa840a20":[4,0,0,4,0,0], -"_validation_method_8h.html#a187d4987bb48bc1f78f628c8aa840a20a02dc3aadaf5d37fb8def639351068c3b":[4,0,0,4,0,0,1], -"_validation_method_8h.html#a187d4987bb48bc1f78f628c8aa840a20a245c25b99b4bbb551a06085f4c852cc7":[4,0,0,4,0,0,0], -"_validation_method_8h.html#a187d4987bb48bc1f78f628c8aa840a20afbd9c2b8d6c9a13483253add7aae125a":[4,0,0,4,0,0,2], -"_validation_method_8h_source.html":[4,0,0,4,0], -"_validation_mode_8h.html":[4,0,0,4,1], -"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04":[4,0,0,4,1,1], -"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04a5bda814c4aedb126839228f1a3d92f09":[4,0,0,4,1,1,1], -"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04a6adf97f83acf6453d4a6a4b1070f3754":[4,0,0,4,1,1,0], -"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04ab1c94ca2fbc3e78fc30069c8d0f01680":[4,0,0,4,1,1,4], -"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04abc40db0f46cab2811dede4f5341446fb":[4,0,0,4,1,1,3], -"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04add15d361ea740ab5e11145f0fa2f9374":[4,0,0,4,1,1,2], -"_validation_mode_8h_source.html":[4,0,0,4,1], +"_validation_method_8h.html":[4,0,0,4,1], +"_validation_method_8h.html#a187d4987bb48bc1f78f628c8aa840a20":[4,0,0,4,1,0], +"_validation_method_8h.html#a187d4987bb48bc1f78f628c8aa840a20a02dc3aadaf5d37fb8def639351068c3b":[4,0,0,4,1,0,1], +"_validation_method_8h.html#a187d4987bb48bc1f78f628c8aa840a20a245c25b99b4bbb551a06085f4c852cc7":[4,0,0,4,1,0,0], +"_validation_method_8h.html#a187d4987bb48bc1f78f628c8aa840a20afbd9c2b8d6c9a13483253add7aae125a":[4,0,0,4,1,0,2], +"_validation_method_8h_source.html":[4,0,0,4,1], +"_validation_mode_8h.html":[4,0,0,4,2], +"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04":[4,0,0,4,2,1], +"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04a5bda814c4aedb126839228f1a3d92f09":[4,0,0,4,2,1,1], +"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04a6adf97f83acf6453d4a6a4b1070f3754":[4,0,0,4,2,1,0], +"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04ab1c94ca2fbc3e78fc30069c8d0f01680":[4,0,0,4,2,1,4], +"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04abc40db0f46cab2811dede4f5341446fb":[4,0,0,4,2,1,3], +"_validation_mode_8h.html#a3baf318a03750f7418a5faa051967c04add15d361ea740ab5e11145f0fa2f9374":[4,0,0,4,2,1,2], +"_validation_mode_8h_source.html":[4,0,0,4,2], "annotated.html":[3,0], "classes.html":[3,1], -"classktt_1_1_buffer_output_descriptor.html":[3,0,0,9], -"classktt_1_1_buffer_output_descriptor.html#a41600a29e1ec6f81dcfecc05f0f4fa83":[3,0,0,9,1], -"classktt_1_1_buffer_output_descriptor.html#a6f0f9d43a02f3fc0764f71882c54e218":[3,0,0,9,0], -"classktt_1_1_buffer_output_descriptor.html#a75d9b2637d5e032444607f8bac772428":[3,0,0,9,3], -"classktt_1_1_buffer_output_descriptor.html#a90ac81b68278b4f11549a104691e653a":[3,0,0,9,2], -"classktt_1_1_buffer_output_descriptor.html#ab98dc87e57e161b870e547be4b120ba1":[3,0,0,9,4], -"classktt_1_1_computation_result.html":[3,0,0,10], -"classktt_1_1_computation_result.html#a06f99a296e55b0d020bea2483902cc10":[3,0,0,10,13], -"classktt_1_1_computation_result.html#a1053b9bad7ba033e05ba11f6da777433":[3,0,0,10,4], -"classktt_1_1_computation_result.html#a1d9cfbe348b1b781fab1eda2fbff5986":[3,0,0,10,17], -"classktt_1_1_computation_result.html#a1df9421da7d98b039219703fd50367eb":[3,0,0,10,11], -"classktt_1_1_computation_result.html#a235db33fc3db53a69422ba9615367961":[3,0,0,10,9], -"classktt_1_1_computation_result.html#a32e32e26bcda0c6181d88a5ce1365850":[3,0,0,10,12], -"classktt_1_1_computation_result.html#a3ca1be797ffbbdf76c5d6c9274c6412d":[3,0,0,10,15], -"classktt_1_1_computation_result.html#a3ff1503a460eaca31225d188b5171846":[3,0,0,10,8], -"classktt_1_1_computation_result.html#a4c95b075b0b98411f7416245b9584d37":[3,0,0,10,3], -"classktt_1_1_computation_result.html#a5d22628834c0d4498b8f56f1aa065b6a":[3,0,0,10,7], -"classktt_1_1_computation_result.html#a6690facafa70d44b1c4fb9fcad2cacc9":[3,0,0,10,0], -"classktt_1_1_computation_result.html#a6e3dda5e5a9ae57ecd46a9a7a96bee48":[3,0,0,10,5], -"classktt_1_1_computation_result.html#a71a5366522c4ab3390762be4cfd506ba":[3,0,0,10,1], -"classktt_1_1_computation_result.html#a8a03ad8a859224d04b1bebee27750104":[3,0,0,10,6], -"classktt_1_1_computation_result.html#a9a3fe7b77be8409f7556905ec884e5fc":[3,0,0,10,16], -"classktt_1_1_computation_result.html#ab10f828fecb91e5e77bd9c3f55c5986a":[3,0,0,10,14], -"classktt_1_1_computation_result.html#ae614a532b2e4da7d74d6d7ed342e65fc":[3,0,0,10,10], -"classktt_1_1_computation_result.html#aeaf363205b19563407cfa3be25e40102":[3,0,0,10,2], -"classktt_1_1_compute_api_initializer.html":[3,0,0,1], -"classktt_1_1_compute_api_initializer.html#a0c6b772c0164ec1af736b8c8e4787f03":[3,0,0,1,0], -"classktt_1_1_compute_api_initializer.html#a92424e13b0a15d301e321095c4de2c53":[3,0,0,1,2], -"classktt_1_1_compute_api_initializer.html#ab190afaf89b2174d523f2f4cef7fc70d":[3,0,0,1,1] +"classktt_1_1_buffer_output_descriptor.html":[3,0,0,0], +"classktt_1_1_buffer_output_descriptor.html#a41600a29e1ec6f81dcfecc05f0f4fa83":[3,0,0,0,1], +"classktt_1_1_buffer_output_descriptor.html#a6f0f9d43a02f3fc0764f71882c54e218":[3,0,0,0,0], +"classktt_1_1_buffer_output_descriptor.html#a75d9b2637d5e032444607f8bac772428":[3,0,0,0,3], +"classktt_1_1_buffer_output_descriptor.html#a90ac81b68278b4f11549a104691e653a":[3,0,0,0,2], +"classktt_1_1_buffer_output_descriptor.html#ab98dc87e57e161b870e547be4b120ba1":[3,0,0,0,4], +"classktt_1_1_computation_result.html":[3,0,0,1], +"classktt_1_1_computation_result.html#a06f99a296e55b0d020bea2483902cc10":[3,0,0,1,13], +"classktt_1_1_computation_result.html#a1053b9bad7ba033e05ba11f6da777433":[3,0,0,1,4], +"classktt_1_1_computation_result.html#a1d9cfbe348b1b781fab1eda2fbff5986":[3,0,0,1,17], +"classktt_1_1_computation_result.html#a1df9421da7d98b039219703fd50367eb":[3,0,0,1,11], +"classktt_1_1_computation_result.html#a235db33fc3db53a69422ba9615367961":[3,0,0,1,9], +"classktt_1_1_computation_result.html#a32e32e26bcda0c6181d88a5ce1365850":[3,0,0,1,12], +"classktt_1_1_computation_result.html#a3ca1be797ffbbdf76c5d6c9274c6412d":[3,0,0,1,15], +"classktt_1_1_computation_result.html#a3ff1503a460eaca31225d188b5171846":[3,0,0,1,8], +"classktt_1_1_computation_result.html#a4c95b075b0b98411f7416245b9584d37":[3,0,0,1,3], +"classktt_1_1_computation_result.html#a5d22628834c0d4498b8f56f1aa065b6a":[3,0,0,1,7], +"classktt_1_1_computation_result.html#a6690facafa70d44b1c4fb9fcad2cacc9":[3,0,0,1,0], +"classktt_1_1_computation_result.html#a6e3dda5e5a9ae57ecd46a9a7a96bee48":[3,0,0,1,5], +"classktt_1_1_computation_result.html#a71a5366522c4ab3390762be4cfd506ba":[3,0,0,1,1], +"classktt_1_1_computation_result.html#a8a03ad8a859224d04b1bebee27750104":[3,0,0,1,6] }; diff --git a/Docs/navtreeindex1.js b/Docs/navtreeindex1.js index 918f67f0..e29a3da3 100644 --- a/Docs/navtreeindex1.js +++ b/Docs/navtreeindex1.js @@ -1,253 +1,253 @@ var NAVTREEINDEX1 = { -"classktt_1_1_compute_interface.html":[3,0,0,2], -"classktt_1_1_compute_interface.html#a0bd5e6cad0cabf6d747e35acc3ce1b55":[3,0,0,2,16], -"classktt_1_1_compute_interface.html#a0f12ca50eac7d870be4cdc5c4d76379a":[3,0,0,2,30], -"classktt_1_1_compute_interface.html#a0f15c5a48f396ab20feb26a23baf3482":[3,0,0,2,25], -"classktt_1_1_compute_interface.html#a145a4874546d1edfcee7dfe681cbf537":[3,0,0,2,1], -"classktt_1_1_compute_interface.html#a2dc0fb83da544686a16c7a079afe9c05":[3,0,0,2,33], -"classktt_1_1_compute_interface.html#a4169242f94362edbc4dbd0f7c41127b5":[3,0,0,2,21], -"classktt_1_1_compute_interface.html#a46d0d3e33c0f1e3e23413176a2008e23":[3,0,0,2,9], -"classktt_1_1_compute_interface.html#a4d543715c40557ac22aef493595ee9f2":[3,0,0,2,26], -"classktt_1_1_compute_interface.html#a4dcdbd900b888751626bd6fe0194d615":[3,0,0,2,32], -"classktt_1_1_compute_interface.html#a52e7566928e755696360228d2e4cbd76":[3,0,0,2,12], -"classktt_1_1_compute_interface.html#a56983712bdc1f2aa982933179b569369":[3,0,0,2,29], -"classktt_1_1_compute_interface.html#a5a075573b67aa482409c25a9412ef024":[3,0,0,2,14], -"classktt_1_1_compute_interface.html#a64503830018826609d22e3056de84a18":[3,0,0,2,7], -"classktt_1_1_compute_interface.html#a66938288a86c1d954a8e89f69c85d81b":[3,0,0,2,22], -"classktt_1_1_compute_interface.html#a6735b4d5e9a50034e9acd1ac5b34cd52":[3,0,0,2,6], -"classktt_1_1_compute_interface.html#a6f59d270724997174613f9fd2006e9a4":[3,0,0,2,18], -"classktt_1_1_compute_interface.html#a73fd4aef75c013bcf70a62c0134f985a":[3,0,0,2,3], -"classktt_1_1_compute_interface.html#a8a303fb872e97c6de59cc66ee5aa5b59":[3,0,0,2,4], -"classktt_1_1_compute_interface.html#a8b7e7a9742a11f1ef0ae207d3130312f":[3,0,0,2,23], -"classktt_1_1_compute_interface.html#a8d1a5aad6165ffdba835d40443651c97":[3,0,0,2,13], -"classktt_1_1_compute_interface.html#a8d7451f30d523790bbd42792fdd666ca":[3,0,0,2,2], -"classktt_1_1_compute_interface.html#a907624fb3ca339bfba2e7ea9866c5f69":[3,0,0,2,0], -"classktt_1_1_compute_interface.html#a976e07da2b46dc9af1f0b6ce77b50886":[3,0,0,2,10], -"classktt_1_1_compute_interface.html#a98c87a68ccfd62d97570bb499447a32b":[3,0,0,2,24], -"classktt_1_1_compute_interface.html#aa037b0da7fdc94e7deb3bfb0168406ba":[3,0,0,2,19], -"classktt_1_1_compute_interface.html#aa9ef7c867d4180088f06e1c3af231138":[3,0,0,2,17], -"classktt_1_1_compute_interface.html#abc9416b26a01b2ce04d334a379e3fa95":[3,0,0,2,28], -"classktt_1_1_compute_interface.html#abe3d80e67b9daa10aee4cb1ec288139b":[3,0,0,2,11], -"classktt_1_1_compute_interface.html#abe4d68e723947c2493f74e1e3d89d742":[3,0,0,2,15], -"classktt_1_1_compute_interface.html#acdca488b872cba7ab7ba5656e18eced7":[3,0,0,2,20], -"classktt_1_1_compute_interface.html#ad8ca47ec758904973e2ee458276b1335":[3,0,0,2,5], -"classktt_1_1_compute_interface.html#ae82a93622fddc93bd6abe262b3b4f6c0":[3,0,0,2,27], -"classktt_1_1_compute_interface.html#af5a74c4be89520607eb4e45c967a8648":[3,0,0,2,8], -"classktt_1_1_compute_interface.html#afcff8d4aa0d638512d9fefe6aa6fe927":[3,0,0,2,31], -"classktt_1_1_configuration_count.html":[3,0,0,19], -"classktt_1_1_configuration_count.html#a07059a1b39bd501b26bf522921d3a76a":[3,0,0,19,4], -"classktt_1_1_configuration_count.html#a0e0215d55003283a5c6880074f3b17d9":[3,0,0,19,1], -"classktt_1_1_configuration_count.html#a245d4855cbfa0d33022e884689735947":[3,0,0,19,0], -"classktt_1_1_configuration_count.html#ad1e0d855a2e199b4a49441f73fdf4e70":[3,0,0,19,2], -"classktt_1_1_configuration_count.html#ad949547efc769a696c7b8b01b39b2604":[3,0,0,19,3], -"classktt_1_1_configuration_duration.html":[3,0,0,20], -"classktt_1_1_configuration_duration.html#a64dfcad146ec17c95fc3e209972957ca":[3,0,0,20,4], -"classktt_1_1_configuration_duration.html#a75508624ea19dc28478773ecf5c8c858":[3,0,0,20,2], -"classktt_1_1_configuration_duration.html#a7b571c84c3fb14bb0d6e4b0513f0a0e9":[3,0,0,20,3], -"classktt_1_1_configuration_duration.html#a8e4dbd5e3739e62fee835ba6c49896c7":[3,0,0,20,0], -"classktt_1_1_configuration_duration.html#a98c21771fcede0945715b5a6057f52cb":[3,0,0,20,1], -"classktt_1_1_configuration_fraction.html":[3,0,0,21], -"classktt_1_1_configuration_fraction.html#a0d2fa91507e789136d17efe9641e13ad":[3,0,0,21,3], -"classktt_1_1_configuration_fraction.html#a35e73692570bf50ddbac98f2f7eeb089":[3,0,0,21,0], -"classktt_1_1_configuration_fraction.html#a3d070ebc565c97ff4197f162cc93e564":[3,0,0,21,1], -"classktt_1_1_configuration_fraction.html#a55ee4a423ff7cb338662eab04f589e1a":[3,0,0,21,4], -"classktt_1_1_configuration_fraction.html#a702ce7ffac703e2c04454d6884fb1aea":[3,0,0,21,2], -"classktt_1_1_deterministic_searcher.html":[3,0,0,15], -"classktt_1_1_deterministic_searcher.html#a4cad95c610c4c18178482e6619a30b3b":[3,0,0,15,0], -"classktt_1_1_deterministic_searcher.html#a57e71d23ca74ea9433cf28e246e0b6cd":[3,0,0,15,1], -"classktt_1_1_deterministic_searcher.html#a92e0dce557dde77550fd378f39400dbb":[3,0,0,15,2], -"classktt_1_1_deterministic_searcher.html#ae5ab31105b6134c72724de59e34cd2bc":[3,0,0,15,3], -"classktt_1_1_device_info.html":[3,0,0,7], -"classktt_1_1_device_info.html#a05e071b6d35a78281006ef47be4a7d9d":[3,0,0,7,20], -"classktt_1_1_device_info.html#a1c2302c00b2b32f1524ee1c961db4541":[3,0,0,7,0], -"classktt_1_1_device_info.html#a1c4ac471ce324ce40e8dbbc5b262ec72":[3,0,0,7,3], -"classktt_1_1_device_info.html#a312f1f6ceb7989ea07f61c3ad8d4b5ba":[3,0,0,7,4], -"classktt_1_1_device_info.html#a31ad37db18d4241525454239b4a1bd00":[3,0,0,7,13], -"classktt_1_1_device_info.html#a36bf9ec1e6410e5e1dac84e87165b4e5":[3,0,0,7,12], -"classktt_1_1_device_info.html#a3f1d17e18716418023284effc24aa111":[3,0,0,7,19], -"classktt_1_1_device_info.html#a3f5816c0e640d42a097e8cf9d4befb5d":[3,0,0,7,7], -"classktt_1_1_device_info.html#a561a22331da58bf1020cb0f429e4645c":[3,0,0,7,18], -"classktt_1_1_device_info.html#a5d2cd6fc287fd4ba033c02482a3123c0":[3,0,0,7,10], -"classktt_1_1_device_info.html#a6242a7047532cbc7d47a9417bd60b0b8":[3,0,0,7,14], -"classktt_1_1_device_info.html#a8975df22a6ca7ac94c8306969ed057dc":[3,0,0,7,2], -"classktt_1_1_device_info.html#aa903ae3bed5a25d4595ddc5374b0c5cc":[3,0,0,7,15], -"classktt_1_1_device_info.html#abcd75553d8ad9dcf723ba5ecb834313f":[3,0,0,7,5], -"classktt_1_1_device_info.html#acb74cd5f6bb48d2a46dc076f9456d44d":[3,0,0,7,9], -"classktt_1_1_device_info.html#ada297188614d02e7f0a56be80b66195a":[3,0,0,7,1], -"classktt_1_1_device_info.html#adfc43923135d990de34e915995e9df62":[3,0,0,7,8], -"classktt_1_1_device_info.html#ae3854df8444dbb599cb6bc5e533c6c78":[3,0,0,7,11], -"classktt_1_1_device_info.html#ae4e0a9d8871e13476bad9124c7bbcfa6":[3,0,0,7,16], -"classktt_1_1_device_info.html#aee75f9f96efb77e31900f258f9904b4e":[3,0,0,7,17], -"classktt_1_1_device_info.html#affca51f82c46b3280e8893476c02aab5":[3,0,0,7,6], -"classktt_1_1_dimension_vector.html":[3,0,0,4], -"classktt_1_1_dimension_vector.html#a068cc7be826b284136ca1daee01097be":[3,0,0,4,20], -"classktt_1_1_dimension_vector.html#a18486e4a20c01bb46d25bdefaa8c9c4e":[3,0,0,4,18], -"classktt_1_1_dimension_vector.html#a1bfbb2a72cff5585a671c5272848b3b1":[3,0,0,4,1], -"classktt_1_1_dimension_vector.html#a1dae553a3dfaf8d474a4d3a0ab8a95d4":[3,0,0,4,9], -"classktt_1_1_dimension_vector.html#a329579f2ee5b163c43ec4f806ec7290b":[3,0,0,4,6], -"classktt_1_1_dimension_vector.html#a39f4cb4130c4ead3716e8556679639ca":[3,0,0,4,8], -"classktt_1_1_dimension_vector.html#a3a227c566f2b416f19fdd85c62f4ebf5":[3,0,0,4,17], -"classktt_1_1_dimension_vector.html#a481f83a511def47ab7124ed7461024ce":[3,0,0,4,0], -"classktt_1_1_dimension_vector.html#a48e9f835e901fcce7f57bf17b1aeb05c":[3,0,0,4,3], -"classktt_1_1_dimension_vector.html#a51fdff81bd4c1f32afef8c4d20fb72b0":[3,0,0,4,7], -"classktt_1_1_dimension_vector.html#a548756d91eafa34a429dbc5de0473b74":[3,0,0,4,4], -"classktt_1_1_dimension_vector.html#a5e3e65d17829bbdeb1b08ac965192c3c":[3,0,0,4,14], -"classktt_1_1_dimension_vector.html#a6835327b04036570cad0ed0542a126de":[3,0,0,4,21], -"classktt_1_1_dimension_vector.html#a720aceedd128083cfda75a3a1eebab23":[3,0,0,4,15], -"classktt_1_1_dimension_vector.html#a8ca418fd13304e8edc9753eab27a7c24":[3,0,0,4,12], -"classktt_1_1_dimension_vector.html#aad1111e7bef1adc0e781aec708dffe3d":[3,0,0,4,19], -"classktt_1_1_dimension_vector.html#aafe05832a10592824a565e1985919240":[3,0,0,4,13], -"classktt_1_1_dimension_vector.html#ab390902241d5d5d544e67e5a8944d0b4":[3,0,0,4,5], -"classktt_1_1_dimension_vector.html#abc2848fa38c641a5727979b79c479c25":[3,0,0,4,11], -"classktt_1_1_dimension_vector.html#ac40ec611802bf43b67515c04f7b7775f":[3,0,0,4,2], -"classktt_1_1_dimension_vector.html#ad1eeeaf51882f0f41159e7bed569f5c7":[3,0,0,4,10], -"classktt_1_1_dimension_vector.html#adbac0fe41fbe4716c43ab0aed0e9a7ce":[3,0,0,4,16], -"classktt_1_1_kernel_configuration.html":[3,0,0,5], -"classktt_1_1_kernel_configuration.html#a25e068aeddea633414e923b3b01615d5":[3,0,0,5,4], -"classktt_1_1_kernel_configuration.html#a2e97ef5d6ca155af04289536426078da":[3,0,0,5,3], -"classktt_1_1_kernel_configuration.html#a3a5a9d4ffa080cf0e5b6efe8024ef1b9":[3,0,0,5,5], -"classktt_1_1_kernel_configuration.html#a577d70ddf1238d7410315b0906748a65":[3,0,0,5,9], -"classktt_1_1_kernel_configuration.html#a6e8fb7526d1c826742885e60e3560b5a":[3,0,0,5,0], -"classktt_1_1_kernel_configuration.html#a6ee2fd5153a7e5442ed9f36496db5dbd":[3,0,0,5,7], -"classktt_1_1_kernel_configuration.html#a8a3bd003aa6e264a497602688d589bc6":[3,0,0,5,2], -"classktt_1_1_kernel_configuration.html#a9ed979305aafb0bb5eaf4aa14165c922":[3,0,0,5,6], -"classktt_1_1_kernel_configuration.html#acba125886ed2c2642a1aa7c25d4d2d75":[3,0,0,5,8], -"classktt_1_1_kernel_configuration.html#aee63d61d9b9e9ab8c30b8e8160de0b31":[3,0,0,5,1], -"classktt_1_1_kernel_profiling_counter.html":[3,0,0,12], -"classktt_1_1_kernel_profiling_counter.html#a1ef12bbb76f6bfa213a8119ea7a7b049":[3,0,0,12,9], -"classktt_1_1_kernel_profiling_counter.html#a2e74a0754342eaaa59ecec2b59036d38":[3,0,0,12,8], -"classktt_1_1_kernel_profiling_counter.html#a30ab328958eeaac77779d35c0fcbf0f2":[3,0,0,12,0], -"classktt_1_1_kernel_profiling_counter.html#a35ea235dae6d237dcffe77913ea5d8d6":[3,0,0,12,4], -"classktt_1_1_kernel_profiling_counter.html#a4039c08d34f68eb8f46663878a0393a3":[3,0,0,12,2], -"classktt_1_1_kernel_profiling_counter.html#a41fe747d9a80d48cac428a4013f44c65":[3,0,0,12,6], -"classktt_1_1_kernel_profiling_counter.html#a4dff75794d30fa5cd21de106d7f4d5a8":[3,0,0,12,5], -"classktt_1_1_kernel_profiling_counter.html#a626d22d51b8544119ffe0c03edded569":[3,0,0,12,10], -"classktt_1_1_kernel_profiling_counter.html#a9bd46a83e38739b0ac41c0f439475331":[3,0,0,12,7], -"classktt_1_1_kernel_profiling_counter.html#aaf6a73668395a8aa160476f5e0895291":[3,0,0,12,3], -"classktt_1_1_kernel_profiling_counter.html#af682e50f61c5e02ea9c9dc4f97f5a496":[3,0,0,12,1], -"classktt_1_1_kernel_profiling_counter.html#afe41f379ca864885d6271604b22d439e":[3,0,0,12,11], -"classktt_1_1_kernel_profiling_data.html":[3,0,0,13], -"classktt_1_1_kernel_profiling_data.html#a073207dfc6c68e07f73d15ae5c03a020":[3,0,0,13,2], -"classktt_1_1_kernel_profiling_data.html#a17fcce3646d228f9e29da5d2553d313e":[3,0,0,13,10], -"classktt_1_1_kernel_profiling_data.html#a5e33137f931a103ed97ede31c81bdbdc":[3,0,0,13,4], -"classktt_1_1_kernel_profiling_data.html#a60a5921a950830d719d07d3a4add9b5b":[3,0,0,13,1], -"classktt_1_1_kernel_profiling_data.html#a6515eaea540104d99c6c340070c3688b":[3,0,0,13,9], -"classktt_1_1_kernel_profiling_data.html#a7d64d12e01400f90cccee547dd67c076":[3,0,0,13,8], -"classktt_1_1_kernel_profiling_data.html#aac88b3b1e5e932bcdd313037593ab914":[3,0,0,13,7], -"classktt_1_1_kernel_profiling_data.html#ab457cb17731ad1b122801011f31d300b":[3,0,0,13,5], -"classktt_1_1_kernel_profiling_data.html#ad231ce41ebbd42c923b4c0ba7c6e7b7d":[3,0,0,13,6], -"classktt_1_1_kernel_profiling_data.html#adccfc87b20595b3467e3bb4d3ca239a8":[3,0,0,13,3], -"classktt_1_1_kernel_profiling_data.html#aef6205745d78ecb389efa38d9942e4ce":[3,0,0,13,0], -"classktt_1_1_kernel_profiling_data.html#af3dccefc935dddf9933ba9377da009cb":[3,0,0,13,11], -"classktt_1_1_kernel_result.html":[3,0,0,14], -"classktt_1_1_kernel_result.html#a1fa061eaec6b9f756d59cd6c988f2e69":[3,0,0,14,13], -"classktt_1_1_kernel_result.html#a2c413a2a9e444f2370e244b9a2a216fe":[3,0,0,14,11], -"classktt_1_1_kernel_result.html#a36938e66c910dc9de499faeb6e5b573d":[3,0,0,14,9], -"classktt_1_1_kernel_result.html#a4d7c5c73cb5cd9425f3d48749fd29533":[3,0,0,14,12], -"classktt_1_1_kernel_result.html#a5a6462e356db1bed3df35db5606a4a7c":[3,0,0,14,16], -"classktt_1_1_kernel_result.html#a62d49b89c426b97283ffc72075bd36c7":[3,0,0,14,15], -"classktt_1_1_kernel_result.html#a65c5f8856417eed5bcc1592a0415a3d6":[3,0,0,14,10], -"classktt_1_1_kernel_result.html#a7004a0fd0766610281b6ffdf06411696":[3,0,0,14,3], -"classktt_1_1_kernel_result.html#a85655eb95f6a69d3989acf6aba64d810":[3,0,0,14,8], -"classktt_1_1_kernel_result.html#a9735a1357e010cc89061e6cbf404777d":[3,0,0,14,0], -"classktt_1_1_kernel_result.html#aa8adbadb09278f0f2f9a3d5924e11f59":[3,0,0,14,6], -"classktt_1_1_kernel_result.html#abc545615523a4dea055784660f8fe618":[3,0,0,14,4], -"classktt_1_1_kernel_result.html#ac29366a0b1b5e9a9d0a18750c57f881e":[3,0,0,14,17], -"classktt_1_1_kernel_result.html#ada777a7053bf09118204e846239d6e10":[3,0,0,14,5], -"classktt_1_1_kernel_result.html#ade0c3b9a58e9561e0bc008cd583cd351":[3,0,0,14,7], -"classktt_1_1_kernel_result.html#ae6e6ba75f1db55df46e93ef745135e70":[3,0,0,14,1], -"classktt_1_1_kernel_result.html#aec57eff202632e1a51cde6839a5f59a8":[3,0,0,14,2], -"classktt_1_1_kernel_result.html#aec7ac5fb0ac54f762a33854df14b6eb0":[3,0,0,14,14], -"classktt_1_1_ktt_exception.html":[3,0,0,3], -"classktt_1_1_ktt_exception.html#a47058989269370f6b3ca0933e5462720":[3,0,0,3,1], -"classktt_1_1_ktt_exception.html#ab6dcdf4a93cf0bce9d9f47a79c549f8c":[3,0,0,3,0], -"classktt_1_1_ktt_exception.html#ada9eeeda3a8152e72f1d01c40b2d4608":[3,0,0,3,2], -"classktt_1_1_mcmc_searcher.html":[3,0,0,16], -"classktt_1_1_mcmc_searcher.html#a6f2ddec84f5b53535e8ef162569080a7":[3,0,0,16,2], -"classktt_1_1_mcmc_searcher.html#a7686a12442d4c52eba6cf0c50f707934":[3,0,0,16,1], -"classktt_1_1_mcmc_searcher.html#a78ae10efa302515fec75e78e8bcd0dce":[3,0,0,16,4], -"classktt_1_1_mcmc_searcher.html#a8df1133d653a1e9dbad3c813efb42f73":[3,0,0,16,0], -"classktt_1_1_mcmc_searcher.html#a91ca30679a5bafcac5f3dd5818eb2c1f":[3,0,0,16,3], -"classktt_1_1_parameter_pair.html":[3,0,0,6], -"classktt_1_1_parameter_pair.html#a00a92aa5dd7eecf18015c818c8af3a42":[3,0,0,6,9], -"classktt_1_1_parameter_pair.html#a04c8de22404e79dcd78d2259cf3d30e6":[3,0,0,6,8], -"classktt_1_1_parameter_pair.html#a0542a9454902488e990906cc276197ef":[3,0,0,6,2], -"classktt_1_1_parameter_pair.html#a25fec44fa46797a76cbe1cb75ea3dde3":[3,0,0,6,11], -"classktt_1_1_parameter_pair.html#a638129565de501add5ea0a042a41a741":[3,0,0,6,13], -"classktt_1_1_parameter_pair.html#a63d30900d44bcba18d0bbc29f0bea9ac":[3,0,0,6,10], -"classktt_1_1_parameter_pair.html#a70d9cd2866b5a27e05de99de4f0eecd8":[3,0,0,6,6], -"classktt_1_1_parameter_pair.html#a8d6f54576f30ac9214dc8e0bc25eaa0c":[3,0,0,6,5], -"classktt_1_1_parameter_pair.html#a8f4d2b272fb3a3e1a31661cb10d9da17":[3,0,0,6,3], -"classktt_1_1_parameter_pair.html#a98275cb6450f000fe7d11a189094cd19":[3,0,0,6,12], -"classktt_1_1_parameter_pair.html#aa1a0cc60c3c77a523d2a82533e1cba5e":[3,0,0,6,0], -"classktt_1_1_parameter_pair.html#ab5d21e0b7e4459a8ba38c29994e0cbb9":[3,0,0,6,7], -"classktt_1_1_parameter_pair.html#ae5c4d77d766795b1b6213afefbd287df":[3,0,0,6,4], -"classktt_1_1_parameter_pair.html#af60f395c67c664ebc2956cfb117ddd08":[3,0,0,6,1], -"classktt_1_1_platform_info.html":[3,0,0,8], -"classktt_1_1_platform_info.html#a23708c8522cdbff24d11a7674b7fffc1":[3,0,0,8,5], -"classktt_1_1_platform_info.html#a2df54982dc384626b5d729bb4b5b8552":[3,0,0,8,8], -"classktt_1_1_platform_info.html#a34454734b44eb0daff35a691088c70e2":[3,0,0,8,1], -"classktt_1_1_platform_info.html#a3b0fb61668b450eb83f2ebd60a96fa7e":[3,0,0,8,3], -"classktt_1_1_platform_info.html#aa211d094d3f5b23565eda47288403cf9":[3,0,0,8,0], -"classktt_1_1_platform_info.html#ac85f24f5c3776ef90fd5db5641dc8806":[3,0,0,8,7], -"classktt_1_1_platform_info.html#acc66684305cc6cf2a0288d6ecd11f23a":[3,0,0,8,4], -"classktt_1_1_platform_info.html#ad4d55a0c9df7fcab4e6db838e8954c99":[3,0,0,8,6], -"classktt_1_1_platform_info.html#af2a945add259928b5f3914fc0d85c323":[3,0,0,8,2], -"classktt_1_1_platform_info.html#afae7a17ec655b25e1486e532ae2a99a1":[3,0,0,8,9], -"classktt_1_1_random_searcher.html":[3,0,0,17], -"classktt_1_1_random_searcher.html#aa7a085151dd0da33343ed98eaac7173f":[3,0,0,17,0], -"classktt_1_1_random_searcher.html#aa8dbbec845feb323a87702e8072bf037":[3,0,0,17,1], -"classktt_1_1_random_searcher.html#ab583154ca3d44951a107f68672a81c4d":[3,0,0,17,3], -"classktt_1_1_random_searcher.html#ac71348413937c204ea7c9e15dbd22b83":[3,0,0,17,2], -"classktt_1_1_searcher.html":[3,0,0,18], -"classktt_1_1_searcher.html#a1bdd6c20c266a2d713f3014fcd31bf49":[3,0,0,18,1], -"classktt_1_1_searcher.html#a4ce9b618216a950a2f678aeca29ad02f":[3,0,0,18,12], -"classktt_1_1_searcher.html#a5f4f5320e51d04d140ecab230a882190":[3,0,0,18,6], -"classktt_1_1_searcher.html#a630d6dbe3300f4399861d175d3dbbb63":[3,0,0,18,9], -"classktt_1_1_searcher.html#a634b39d630532ff59966c16c15294f76":[3,0,0,18,10], -"classktt_1_1_searcher.html#a6b12def7ae2d28bed61f1f75689c4d49":[3,0,0,18,0], -"classktt_1_1_searcher.html#a75fa4a6baf0ce399d38b42b2130eb825":[3,0,0,18,8], -"classktt_1_1_searcher.html#a8d7420d7014a7a647f6a28a669e57530":[3,0,0,18,11], -"classktt_1_1_searcher.html#a97adc4f408d500072d57bc471edc89f9":[3,0,0,18,5], -"classktt_1_1_searcher.html#ab8071e7133a0b489c13595cb7f8f1e04":[3,0,0,18,13], -"classktt_1_1_searcher.html#abd7f41cec1585f973d128ff883124bd5":[3,0,0,18,4], -"classktt_1_1_searcher.html#acf461b7992a2c3dfa7b087b447c42b96":[3,0,0,18,14], -"classktt_1_1_searcher.html#ae4f580a9d9531f0a80724fff289645ab":[3,0,0,18,2], -"classktt_1_1_searcher.html#af42b81f88b4ac08e5d219a7fca5b475c":[3,0,0,18,3], -"classktt_1_1_searcher.html#afe2188ab642c82bd1df103662ecb40bd":[3,0,0,18,7], +"classktt_1_1_computation_result.html#a9a3fe7b77be8409f7556905ec884e5fc":[3,0,0,1,16], +"classktt_1_1_computation_result.html#ab10f828fecb91e5e77bd9c3f55c5986a":[3,0,0,1,14], +"classktt_1_1_computation_result.html#ae614a532b2e4da7d74d6d7ed342e65fc":[3,0,0,1,10], +"classktt_1_1_computation_result.html#aeaf363205b19563407cfa3be25e40102":[3,0,0,1,2], +"classktt_1_1_compute_api_initializer.html":[3,0,0,2], +"classktt_1_1_compute_api_initializer.html#a0c6b772c0164ec1af736b8c8e4787f03":[3,0,0,2,0], +"classktt_1_1_compute_api_initializer.html#a92424e13b0a15d301e321095c4de2c53":[3,0,0,2,2], +"classktt_1_1_compute_api_initializer.html#ab190afaf89b2174d523f2f4cef7fc70d":[3,0,0,2,1], +"classktt_1_1_compute_interface.html":[3,0,0,3], +"classktt_1_1_compute_interface.html#a0bd5e6cad0cabf6d747e35acc3ce1b55":[3,0,0,3,17], +"classktt_1_1_compute_interface.html#a0f12ca50eac7d870be4cdc5c4d76379a":[3,0,0,3,32], +"classktt_1_1_compute_interface.html#a0f15c5a48f396ab20feb26a23baf3482":[3,0,0,3,26], +"classktt_1_1_compute_interface.html#a145a4874546d1edfcee7dfe681cbf537":[3,0,0,3,1], +"classktt_1_1_compute_interface.html#a2dc0fb83da544686a16c7a079afe9c05":[3,0,0,3,35], +"classktt_1_1_compute_interface.html#a4169242f94362edbc4dbd0f7c41127b5":[3,0,0,3,22], +"classktt_1_1_compute_interface.html#a46d0d3e33c0f1e3e23413176a2008e23":[3,0,0,3,9], +"classktt_1_1_compute_interface.html#a4d543715c40557ac22aef493595ee9f2":[3,0,0,3,28], +"classktt_1_1_compute_interface.html#a4dcdbd900b888751626bd6fe0194d615":[3,0,0,3,34], +"classktt_1_1_compute_interface.html#a52e7566928e755696360228d2e4cbd76":[3,0,0,3,12], +"classktt_1_1_compute_interface.html#a56983712bdc1f2aa982933179b569369":[3,0,0,3,31], +"classktt_1_1_compute_interface.html#a5a075573b67aa482409c25a9412ef024":[3,0,0,3,15], +"classktt_1_1_compute_interface.html#a64503830018826609d22e3056de84a18":[3,0,0,3,7], +"classktt_1_1_compute_interface.html#a66938288a86c1d954a8e89f69c85d81b":[3,0,0,3,23], +"classktt_1_1_compute_interface.html#a6735b4d5e9a50034e9acd1ac5b34cd52":[3,0,0,3,6], +"classktt_1_1_compute_interface.html#a6f59d270724997174613f9fd2006e9a4":[3,0,0,3,19], +"classktt_1_1_compute_interface.html#a73fd4aef75c013bcf70a62c0134f985a":[3,0,0,3,3], +"classktt_1_1_compute_interface.html#a794206c1530523c7a00d8da20582c37f":[3,0,0,3,27], +"classktt_1_1_compute_interface.html#a8a303fb872e97c6de59cc66ee5aa5b59":[3,0,0,3,4], +"classktt_1_1_compute_interface.html#a8b7e7a9742a11f1ef0ae207d3130312f":[3,0,0,3,24], +"classktt_1_1_compute_interface.html#a8d1a5aad6165ffdba835d40443651c97":[3,0,0,3,13], +"classktt_1_1_compute_interface.html#a8d7451f30d523790bbd42792fdd666ca":[3,0,0,3,2], +"classktt_1_1_compute_interface.html#a8f087328c03c3264e040fb9165805d73":[3,0,0,3,14], +"classktt_1_1_compute_interface.html#a907624fb3ca339bfba2e7ea9866c5f69":[3,0,0,3,0], +"classktt_1_1_compute_interface.html#a976e07da2b46dc9af1f0b6ce77b50886":[3,0,0,3,10], +"classktt_1_1_compute_interface.html#a98c87a68ccfd62d97570bb499447a32b":[3,0,0,3,25], +"classktt_1_1_compute_interface.html#aa037b0da7fdc94e7deb3bfb0168406ba":[3,0,0,3,20], +"classktt_1_1_compute_interface.html#aa9ef7c867d4180088f06e1c3af231138":[3,0,0,3,18], +"classktt_1_1_compute_interface.html#abc9416b26a01b2ce04d334a379e3fa95":[3,0,0,3,30], +"classktt_1_1_compute_interface.html#abe3d80e67b9daa10aee4cb1ec288139b":[3,0,0,3,11], +"classktt_1_1_compute_interface.html#abe4d68e723947c2493f74e1e3d89d742":[3,0,0,3,16], +"classktt_1_1_compute_interface.html#acdca488b872cba7ab7ba5656e18eced7":[3,0,0,3,21], +"classktt_1_1_compute_interface.html#ad8ca47ec758904973e2ee458276b1335":[3,0,0,3,5], +"classktt_1_1_compute_interface.html#ae82a93622fddc93bd6abe262b3b4f6c0":[3,0,0,3,29], +"classktt_1_1_compute_interface.html#af5a74c4be89520607eb4e45c967a8648":[3,0,0,3,8], +"classktt_1_1_compute_interface.html#afcff8d4aa0d638512d9fefe6aa6fe927":[3,0,0,3,33], +"classktt_1_1_configuration_count.html":[3,0,0,4], +"classktt_1_1_configuration_count.html#a07059a1b39bd501b26bf522921d3a76a":[3,0,0,4,4], +"classktt_1_1_configuration_count.html#a0e0215d55003283a5c6880074f3b17d9":[3,0,0,4,1], +"classktt_1_1_configuration_count.html#a245d4855cbfa0d33022e884689735947":[3,0,0,4,0], +"classktt_1_1_configuration_count.html#ad1e0d855a2e199b4a49441f73fdf4e70":[3,0,0,4,2], +"classktt_1_1_configuration_count.html#ad949547efc769a696c7b8b01b39b2604":[3,0,0,4,3], +"classktt_1_1_configuration_duration.html":[3,0,0,5], +"classktt_1_1_configuration_duration.html#a64dfcad146ec17c95fc3e209972957ca":[3,0,0,5,4], +"classktt_1_1_configuration_duration.html#a75508624ea19dc28478773ecf5c8c858":[3,0,0,5,2], +"classktt_1_1_configuration_duration.html#a7b571c84c3fb14bb0d6e4b0513f0a0e9":[3,0,0,5,3], +"classktt_1_1_configuration_duration.html#a8e4dbd5e3739e62fee835ba6c49896c7":[3,0,0,5,0], +"classktt_1_1_configuration_duration.html#a98c21771fcede0945715b5a6057f52cb":[3,0,0,5,1], +"classktt_1_1_configuration_fraction.html":[3,0,0,6], +"classktt_1_1_configuration_fraction.html#a0d2fa91507e789136d17efe9641e13ad":[3,0,0,6,3], +"classktt_1_1_configuration_fraction.html#a35e73692570bf50ddbac98f2f7eeb089":[3,0,0,6,0], +"classktt_1_1_configuration_fraction.html#a3d070ebc565c97ff4197f162cc93e564":[3,0,0,6,1], +"classktt_1_1_configuration_fraction.html#a55ee4a423ff7cb338662eab04f589e1a":[3,0,0,6,4], +"classktt_1_1_configuration_fraction.html#a702ce7ffac703e2c04454d6884fb1aea":[3,0,0,6,2], +"classktt_1_1_deterministic_searcher.html":[3,0,0,7], +"classktt_1_1_deterministic_searcher.html#a4cad95c610c4c18178482e6619a30b3b":[3,0,0,7,0], +"classktt_1_1_deterministic_searcher.html#a57e71d23ca74ea9433cf28e246e0b6cd":[3,0,0,7,1], +"classktt_1_1_deterministic_searcher.html#a92e0dce557dde77550fd378f39400dbb":[3,0,0,7,2], +"classktt_1_1_deterministic_searcher.html#ae5ab31105b6134c72724de59e34cd2bc":[3,0,0,7,3], +"classktt_1_1_device_info.html":[3,0,0,8], +"classktt_1_1_device_info.html#a05e071b6d35a78281006ef47be4a7d9d":[3,0,0,8,20], +"classktt_1_1_device_info.html#a1c2302c00b2b32f1524ee1c961db4541":[3,0,0,8,0], +"classktt_1_1_device_info.html#a1c4ac471ce324ce40e8dbbc5b262ec72":[3,0,0,8,3], +"classktt_1_1_device_info.html#a312f1f6ceb7989ea07f61c3ad8d4b5ba":[3,0,0,8,4], +"classktt_1_1_device_info.html#a31ad37db18d4241525454239b4a1bd00":[3,0,0,8,13], +"classktt_1_1_device_info.html#a36bf9ec1e6410e5e1dac84e87165b4e5":[3,0,0,8,12], +"classktt_1_1_device_info.html#a3f1d17e18716418023284effc24aa111":[3,0,0,8,19], +"classktt_1_1_device_info.html#a3f5816c0e640d42a097e8cf9d4befb5d":[3,0,0,8,7], +"classktt_1_1_device_info.html#a561a22331da58bf1020cb0f429e4645c":[3,0,0,8,18], +"classktt_1_1_device_info.html#a5d2cd6fc287fd4ba033c02482a3123c0":[3,0,0,8,10], +"classktt_1_1_device_info.html#a6242a7047532cbc7d47a9417bd60b0b8":[3,0,0,8,14], +"classktt_1_1_device_info.html#a8975df22a6ca7ac94c8306969ed057dc":[3,0,0,8,2], +"classktt_1_1_device_info.html#aa903ae3bed5a25d4595ddc5374b0c5cc":[3,0,0,8,15], +"classktt_1_1_device_info.html#abcd75553d8ad9dcf723ba5ecb834313f":[3,0,0,8,5], +"classktt_1_1_device_info.html#acb74cd5f6bb48d2a46dc076f9456d44d":[3,0,0,8,9], +"classktt_1_1_device_info.html#ada297188614d02e7f0a56be80b66195a":[3,0,0,8,1], +"classktt_1_1_device_info.html#adfc43923135d990de34e915995e9df62":[3,0,0,8,8], +"classktt_1_1_device_info.html#ae3854df8444dbb599cb6bc5e533c6c78":[3,0,0,8,11], +"classktt_1_1_device_info.html#ae4e0a9d8871e13476bad9124c7bbcfa6":[3,0,0,8,16], +"classktt_1_1_device_info.html#aee75f9f96efb77e31900f258f9904b4e":[3,0,0,8,17], +"classktt_1_1_device_info.html#affca51f82c46b3280e8893476c02aab5":[3,0,0,8,6], +"classktt_1_1_dimension_vector.html":[3,0,0,9], +"classktt_1_1_dimension_vector.html#a068cc7be826b284136ca1daee01097be":[3,0,0,9,20], +"classktt_1_1_dimension_vector.html#a18486e4a20c01bb46d25bdefaa8c9c4e":[3,0,0,9,18], +"classktt_1_1_dimension_vector.html#a1bfbb2a72cff5585a671c5272848b3b1":[3,0,0,9,1], +"classktt_1_1_dimension_vector.html#a1dae553a3dfaf8d474a4d3a0ab8a95d4":[3,0,0,9,9], +"classktt_1_1_dimension_vector.html#a329579f2ee5b163c43ec4f806ec7290b":[3,0,0,9,6], +"classktt_1_1_dimension_vector.html#a39f4cb4130c4ead3716e8556679639ca":[3,0,0,9,8], +"classktt_1_1_dimension_vector.html#a3a227c566f2b416f19fdd85c62f4ebf5":[3,0,0,9,17], +"classktt_1_1_dimension_vector.html#a481f83a511def47ab7124ed7461024ce":[3,0,0,9,0], +"classktt_1_1_dimension_vector.html#a48e9f835e901fcce7f57bf17b1aeb05c":[3,0,0,9,3], +"classktt_1_1_dimension_vector.html#a51fdff81bd4c1f32afef8c4d20fb72b0":[3,0,0,9,7], +"classktt_1_1_dimension_vector.html#a548756d91eafa34a429dbc5de0473b74":[3,0,0,9,4], +"classktt_1_1_dimension_vector.html#a5e3e65d17829bbdeb1b08ac965192c3c":[3,0,0,9,14], +"classktt_1_1_dimension_vector.html#a6835327b04036570cad0ed0542a126de":[3,0,0,9,21], +"classktt_1_1_dimension_vector.html#a720aceedd128083cfda75a3a1eebab23":[3,0,0,9,15], +"classktt_1_1_dimension_vector.html#a8ca418fd13304e8edc9753eab27a7c24":[3,0,0,9,12], +"classktt_1_1_dimension_vector.html#aad1111e7bef1adc0e781aec708dffe3d":[3,0,0,9,19], +"classktt_1_1_dimension_vector.html#aafe05832a10592824a565e1985919240":[3,0,0,9,13], +"classktt_1_1_dimension_vector.html#ab390902241d5d5d544e67e5a8944d0b4":[3,0,0,9,5], +"classktt_1_1_dimension_vector.html#abc2848fa38c641a5727979b79c479c25":[3,0,0,9,11], +"classktt_1_1_dimension_vector.html#ac40ec611802bf43b67515c04f7b7775f":[3,0,0,9,2], +"classktt_1_1_dimension_vector.html#ad1eeeaf51882f0f41159e7bed569f5c7":[3,0,0,9,10], +"classktt_1_1_dimension_vector.html#adbac0fe41fbe4716c43ab0aed0e9a7ce":[3,0,0,9,16], +"classktt_1_1_kernel_configuration.html":[3,0,0,12], +"classktt_1_1_kernel_configuration.html#a25e068aeddea633414e923b3b01615d5":[3,0,0,12,4], +"classktt_1_1_kernel_configuration.html#a2e97ef5d6ca155af04289536426078da":[3,0,0,12,3], +"classktt_1_1_kernel_configuration.html#a3a5a9d4ffa080cf0e5b6efe8024ef1b9":[3,0,0,12,5], +"classktt_1_1_kernel_configuration.html#a577d70ddf1238d7410315b0906748a65":[3,0,0,12,9], +"classktt_1_1_kernel_configuration.html#a6e8fb7526d1c826742885e60e3560b5a":[3,0,0,12,0], +"classktt_1_1_kernel_configuration.html#a6ee2fd5153a7e5442ed9f36496db5dbd":[3,0,0,12,7], +"classktt_1_1_kernel_configuration.html#a8a3bd003aa6e264a497602688d589bc6":[3,0,0,12,2], +"classktt_1_1_kernel_configuration.html#a9ed979305aafb0bb5eaf4aa14165c922":[3,0,0,12,6], +"classktt_1_1_kernel_configuration.html#acba125886ed2c2642a1aa7c25d4d2d75":[3,0,0,12,8], +"classktt_1_1_kernel_configuration.html#aee63d61d9b9e9ab8c30b8e8160de0b31":[3,0,0,12,1], +"classktt_1_1_kernel_profiling_counter.html":[3,0,0,13], +"classktt_1_1_kernel_profiling_counter.html#a1ef12bbb76f6bfa213a8119ea7a7b049":[3,0,0,13,9], +"classktt_1_1_kernel_profiling_counter.html#a2e74a0754342eaaa59ecec2b59036d38":[3,0,0,13,8], +"classktt_1_1_kernel_profiling_counter.html#a30ab328958eeaac77779d35c0fcbf0f2":[3,0,0,13,0], +"classktt_1_1_kernel_profiling_counter.html#a35ea235dae6d237dcffe77913ea5d8d6":[3,0,0,13,4], +"classktt_1_1_kernel_profiling_counter.html#a4039c08d34f68eb8f46663878a0393a3":[3,0,0,13,2], +"classktt_1_1_kernel_profiling_counter.html#a41fe747d9a80d48cac428a4013f44c65":[3,0,0,13,6], +"classktt_1_1_kernel_profiling_counter.html#a4dff75794d30fa5cd21de106d7f4d5a8":[3,0,0,13,5], +"classktt_1_1_kernel_profiling_counter.html#a626d22d51b8544119ffe0c03edded569":[3,0,0,13,10], +"classktt_1_1_kernel_profiling_counter.html#a9bd46a83e38739b0ac41c0f439475331":[3,0,0,13,7], +"classktt_1_1_kernel_profiling_counter.html#aaf6a73668395a8aa160476f5e0895291":[3,0,0,13,3], +"classktt_1_1_kernel_profiling_counter.html#af682e50f61c5e02ea9c9dc4f97f5a496":[3,0,0,13,1], +"classktt_1_1_kernel_profiling_counter.html#afe41f379ca864885d6271604b22d439e":[3,0,0,13,11], +"classktt_1_1_kernel_profiling_data.html":[3,0,0,14], +"classktt_1_1_kernel_profiling_data.html#a073207dfc6c68e07f73d15ae5c03a020":[3,0,0,14,2], +"classktt_1_1_kernel_profiling_data.html#a17fcce3646d228f9e29da5d2553d313e":[3,0,0,14,10], +"classktt_1_1_kernel_profiling_data.html#a5e33137f931a103ed97ede31c81bdbdc":[3,0,0,14,4], +"classktt_1_1_kernel_profiling_data.html#a60a5921a950830d719d07d3a4add9b5b":[3,0,0,14,1], +"classktt_1_1_kernel_profiling_data.html#a6515eaea540104d99c6c340070c3688b":[3,0,0,14,9], +"classktt_1_1_kernel_profiling_data.html#a7d64d12e01400f90cccee547dd67c076":[3,0,0,14,8], +"classktt_1_1_kernel_profiling_data.html#aac88b3b1e5e932bcdd313037593ab914":[3,0,0,14,7], +"classktt_1_1_kernel_profiling_data.html#ab457cb17731ad1b122801011f31d300b":[3,0,0,14,5], +"classktt_1_1_kernel_profiling_data.html#ad231ce41ebbd42c923b4c0ba7c6e7b7d":[3,0,0,14,6], +"classktt_1_1_kernel_profiling_data.html#adccfc87b20595b3467e3bb4d3ca239a8":[3,0,0,14,3], +"classktt_1_1_kernel_profiling_data.html#aef6205745d78ecb389efa38d9942e4ce":[3,0,0,14,0], +"classktt_1_1_kernel_profiling_data.html#af3dccefc935dddf9933ba9377da009cb":[3,0,0,14,11], +"classktt_1_1_kernel_result.html":[3,0,0,15], +"classktt_1_1_kernel_result.html#a1fa061eaec6b9f756d59cd6c988f2e69":[3,0,0,15,13], +"classktt_1_1_kernel_result.html#a2c413a2a9e444f2370e244b9a2a216fe":[3,0,0,15,11], +"classktt_1_1_kernel_result.html#a36938e66c910dc9de499faeb6e5b573d":[3,0,0,15,9], +"classktt_1_1_kernel_result.html#a4d7c5c73cb5cd9425f3d48749fd29533":[3,0,0,15,12], +"classktt_1_1_kernel_result.html#a5a6462e356db1bed3df35db5606a4a7c":[3,0,0,15,16], +"classktt_1_1_kernel_result.html#a62d49b89c426b97283ffc72075bd36c7":[3,0,0,15,15], +"classktt_1_1_kernel_result.html#a65c5f8856417eed5bcc1592a0415a3d6":[3,0,0,15,10], +"classktt_1_1_kernel_result.html#a7004a0fd0766610281b6ffdf06411696":[3,0,0,15,3], +"classktt_1_1_kernel_result.html#a85655eb95f6a69d3989acf6aba64d810":[3,0,0,15,8], +"classktt_1_1_kernel_result.html#a9735a1357e010cc89061e6cbf404777d":[3,0,0,15,0], +"classktt_1_1_kernel_result.html#aa8adbadb09278f0f2f9a3d5924e11f59":[3,0,0,15,6], +"classktt_1_1_kernel_result.html#abc545615523a4dea055784660f8fe618":[3,0,0,15,4], +"classktt_1_1_kernel_result.html#ac29366a0b1b5e9a9d0a18750c57f881e":[3,0,0,15,17], +"classktt_1_1_kernel_result.html#ada777a7053bf09118204e846239d6e10":[3,0,0,15,5], +"classktt_1_1_kernel_result.html#ade0c3b9a58e9561e0bc008cd583cd351":[3,0,0,15,7], +"classktt_1_1_kernel_result.html#ae6e6ba75f1db55df46e93ef745135e70":[3,0,0,15,1], +"classktt_1_1_kernel_result.html#aec57eff202632e1a51cde6839a5f59a8":[3,0,0,15,2], +"classktt_1_1_kernel_result.html#aec7ac5fb0ac54f762a33854df14b6eb0":[3,0,0,15,14], +"classktt_1_1_ktt_exception.html":[3,0,0,16], +"classktt_1_1_ktt_exception.html#a47058989269370f6b3ca0933e5462720":[3,0,0,16,1], +"classktt_1_1_ktt_exception.html#ab6dcdf4a93cf0bce9d9f47a79c549f8c":[3,0,0,16,0], +"classktt_1_1_ktt_exception.html#ada9eeeda3a8152e72f1d01c40b2d4608":[3,0,0,16,2], +"classktt_1_1_mcmc_searcher.html":[3,0,0,17], +"classktt_1_1_mcmc_searcher.html#a26787c6d480b599445f7ba1028e5afae":[3,0,0,17,0], +"classktt_1_1_mcmc_searcher.html#a6f2ddec84f5b53535e8ef162569080a7":[3,0,0,17,3], +"classktt_1_1_mcmc_searcher.html#a7686a12442d4c52eba6cf0c50f707934":[3,0,0,17,2], +"classktt_1_1_mcmc_searcher.html#a78ae10efa302515fec75e78e8bcd0dce":[3,0,0,17,5], +"classktt_1_1_mcmc_searcher.html#a8df1133d653a1e9dbad3c813efb42f73":[3,0,0,17,1], +"classktt_1_1_mcmc_searcher.html#a91ca30679a5bafcac5f3dd5818eb2c1f":[3,0,0,17,4], +"classktt_1_1_parameter_pair.html":[3,0,0,18], +"classktt_1_1_parameter_pair.html#a00a92aa5dd7eecf18015c818c8af3a42":[3,0,0,18,9], +"classktt_1_1_parameter_pair.html#a04c8de22404e79dcd78d2259cf3d30e6":[3,0,0,18,8], +"classktt_1_1_parameter_pair.html#a0542a9454902488e990906cc276197ef":[3,0,0,18,2], +"classktt_1_1_parameter_pair.html#a25fec44fa46797a76cbe1cb75ea3dde3":[3,0,0,18,11], +"classktt_1_1_parameter_pair.html#a638129565de501add5ea0a042a41a741":[3,0,0,18,13], +"classktt_1_1_parameter_pair.html#a63d30900d44bcba18d0bbc29f0bea9ac":[3,0,0,18,10], +"classktt_1_1_parameter_pair.html#a70d9cd2866b5a27e05de99de4f0eecd8":[3,0,0,18,6], +"classktt_1_1_parameter_pair.html#a8d6f54576f30ac9214dc8e0bc25eaa0c":[3,0,0,18,5], +"classktt_1_1_parameter_pair.html#a8f4d2b272fb3a3e1a31661cb10d9da17":[3,0,0,18,3], +"classktt_1_1_parameter_pair.html#a98275cb6450f000fe7d11a189094cd19":[3,0,0,18,12], +"classktt_1_1_parameter_pair.html#aa1a0cc60c3c77a523d2a82533e1cba5e":[3,0,0,18,0], +"classktt_1_1_parameter_pair.html#ab5d21e0b7e4459a8ba38c29994e0cbb9":[3,0,0,18,7], +"classktt_1_1_parameter_pair.html#ae5c4d77d766795b1b6213afefbd287df":[3,0,0,18,4], +"classktt_1_1_parameter_pair.html#af60f395c67c664ebc2956cfb117ddd08":[3,0,0,18,1], +"classktt_1_1_platform_info.html":[3,0,0,19], +"classktt_1_1_platform_info.html#a23708c8522cdbff24d11a7674b7fffc1":[3,0,0,19,5], +"classktt_1_1_platform_info.html#a2df54982dc384626b5d729bb4b5b8552":[3,0,0,19,8], +"classktt_1_1_platform_info.html#a34454734b44eb0daff35a691088c70e2":[3,0,0,19,1], +"classktt_1_1_platform_info.html#a3b0fb61668b450eb83f2ebd60a96fa7e":[3,0,0,19,3], +"classktt_1_1_platform_info.html#aa211d094d3f5b23565eda47288403cf9":[3,0,0,19,0], +"classktt_1_1_platform_info.html#ac85f24f5c3776ef90fd5db5641dc8806":[3,0,0,19,7], +"classktt_1_1_platform_info.html#acc66684305cc6cf2a0288d6ecd11f23a":[3,0,0,19,4], +"classktt_1_1_platform_info.html#ad4d55a0c9df7fcab4e6db838e8954c99":[3,0,0,19,6], +"classktt_1_1_platform_info.html#af2a945add259928b5f3914fc0d85c323":[3,0,0,19,2], +"classktt_1_1_platform_info.html#afae7a17ec655b25e1486e532ae2a99a1":[3,0,0,19,9], +"classktt_1_1_random_searcher.html":[3,0,0,20], +"classktt_1_1_random_searcher.html#aa7a085151dd0da33343ed98eaac7173f":[3,0,0,20,0], +"classktt_1_1_random_searcher.html#aa8dbbec845feb323a87702e8072bf037":[3,0,0,20,1], +"classktt_1_1_random_searcher.html#ab583154ca3d44951a107f68672a81c4d":[3,0,0,20,3], +"classktt_1_1_random_searcher.html#ac71348413937c204ea7c9e15dbd22b83":[3,0,0,20,2], +"classktt_1_1_searcher.html":[3,0,0,21], +"classktt_1_1_searcher.html#a1bdd6c20c266a2d713f3014fcd31bf49":[3,0,0,21,1], +"classktt_1_1_searcher.html#a4ce9b618216a950a2f678aeca29ad02f":[3,0,0,21,12], +"classktt_1_1_searcher.html#a5f4f5320e51d04d140ecab230a882190":[3,0,0,21,6], +"classktt_1_1_searcher.html#a630d6dbe3300f4399861d175d3dbbb63":[3,0,0,21,9], +"classktt_1_1_searcher.html#a634b39d630532ff59966c16c15294f76":[3,0,0,21,10], +"classktt_1_1_searcher.html#a6b12def7ae2d28bed61f1f75689c4d49":[3,0,0,21,0], +"classktt_1_1_searcher.html#a75fa4a6baf0ce399d38b42b2130eb825":[3,0,0,21,8], +"classktt_1_1_searcher.html#a8d7420d7014a7a647f6a28a669e57530":[3,0,0,21,11], +"classktt_1_1_searcher.html#a97adc4f408d500072d57bc471edc89f9":[3,0,0,21,5], +"classktt_1_1_searcher.html#ab8071e7133a0b489c13595cb7f8f1e04":[3,0,0,21,13], +"classktt_1_1_searcher.html#abd7f41cec1585f973d128ff883124bd5":[3,0,0,21,4], +"classktt_1_1_searcher.html#acf461b7992a2c3dfa7b087b447c42b96":[3,0,0,21,14], +"classktt_1_1_searcher.html#ae4f580a9d9531f0a80724fff289645ab":[3,0,0,21,2], +"classktt_1_1_searcher.html#af42b81f88b4ac08e5d219a7fca5b475c":[3,0,0,21,3], +"classktt_1_1_searcher.html#afe2188ab642c82bd1df103662ecb40bd":[3,0,0,21,7], "classktt_1_1_stop_condition.html":[3,0,0,22], "classktt_1_1_stop_condition.html#a03174c2a6251dc4ee72d02a767d86d79":[3,0,0,22,0], "classktt_1_1_stop_condition.html#a0cd1d2182dfa62fd58eaa68617cce041":[3,0,0,22,1], "classktt_1_1_stop_condition.html#a63e13a89b468dce6c66b4e520177048d":[3,0,0,22,3], "classktt_1_1_stop_condition.html#a6bd7c8e1a4a43ebfe47c9d7f81d2ce88":[3,0,0,22,2], "classktt_1_1_stop_condition.html#af53dece69f861c0d30a9ee288c2610f2":[3,0,0,22,4], -"classktt_1_1_tuner.html":[3,0,0,0], -"classktt_1_1_tuner.html#a005ffd8e117b1f6f6a564bdbe53e33ca":[3,0,0,0,43], -"classktt_1_1_tuner.html#a018e398312c674b8aca5e7fbfa128871":[3,0,0,0,37], -"classktt_1_1_tuner.html#a04863df669de4665b5687174ef53908d":[3,0,0,0,2], -"classktt_1_1_tuner.html#a09198b07768176d263dbee8a6c54b377":[3,0,0,0,3], -"classktt_1_1_tuner.html#a0c8667aa5703517c100b261a23eed0dd":[3,0,0,0,55], -"classktt_1_1_tuner.html#a0ddedf54b28a454e347f0d40d4c57185":[3,0,0,0,50], -"classktt_1_1_tuner.html#a0e2675eb66dd12edc676df06e4a6db5a":[3,0,0,0,27], -"classktt_1_1_tuner.html#a1044c7e763c75254ddb25ba53a749dd9":[3,0,0,0,47], -"classktt_1_1_tuner.html#a136a3e45466bc0434484b06ae338c3cc":[3,0,0,0,19], -"classktt_1_1_tuner.html#a16040c3a6f5603ef337ef4a7057f9b69":[3,0,0,0,57], -"classktt_1_1_tuner.html#a1a99601a2d8876cecb6a5455a55e8c1e":[3,0,0,0,17], -"classktt_1_1_tuner.html#a1c2b24bebe7d9408c947c30f9471966d":[3,0,0,0,33], -"classktt_1_1_tuner.html#a226f0185c96bcc8f54b1b9deb9d6b4d8":[3,0,0,0,23], -"classktt_1_1_tuner.html#a2b3b932d8a87ff1e2a20b1c791af38dc":[3,0,0,0,8], -"classktt_1_1_tuner.html#a360361884d6c490d557d9926da3b813d":[3,0,0,0,20], -"classktt_1_1_tuner.html#a385b67cfa5ac085f540fe21c1f461bf2":[3,0,0,0,53], -"classktt_1_1_tuner.html#a3a08f0fef2d05fd93913f6a461c8d77a":[3,0,0,0,21], -"classktt_1_1_tuner.html#a4a8fdad5788a9f1c6bb71b731c17d89c":[3,0,0,0,7], -"classktt_1_1_tuner.html#a4bc8f3f6cca5a25c91a26a78d1581e3f":[3,0,0,0,22], -"classktt_1_1_tuner.html#a5870dcea68c8006b1ca92c90171018b4":[3,0,0,0,26], -"classktt_1_1_tuner.html#a5ac6711abae75fff387e8083af75dc79":[3,0,0,0,32], -"classktt_1_1_tuner.html#a5c02e27e4f5631fa2b45d6ed9f7edea1":[3,0,0,0,25], -"classktt_1_1_tuner.html#a5cfc141a0f3d3dbb237a76c394d760b8":[3,0,0,0,9], -"classktt_1_1_tuner.html#a63c326b21d847e1ec8d2d63cfb2bb698":[3,0,0,0,0], -"classktt_1_1_tuner.html#a65b9a2ebdc68c3edc6c9102354f2dcd8":[3,0,0,0,49], -"classktt_1_1_tuner.html#a6cce9a3ee4f44f2e7dbd4e01fc4d9d4b":[3,0,0,0,35], -"classktt_1_1_tuner.html#a6d9ed472e7c2bd1b01799bf37802c796":[3,0,0,0,42] +"classktt_1_1_tuner.html":[3,0,0,23], +"classktt_1_1_tuner.html#a0014a51fff4d963f7eccba50ae4ecb28":[3,0,0,23,7], +"classktt_1_1_tuner.html#a005ffd8e117b1f6f6a564bdbe53e33ca":[3,0,0,23,50], +"classktt_1_1_tuner.html#a018e398312c674b8aca5e7fbfa128871":[3,0,0,23,44], +"classktt_1_1_tuner.html#a04863df669de4665b5687174ef53908d":[3,0,0,23,2], +"classktt_1_1_tuner.html#a09198b07768176d263dbee8a6c54b377":[3,0,0,23,4], +"classktt_1_1_tuner.html#a0c8667aa5703517c100b261a23eed0dd":[3,0,0,23,62], +"classktt_1_1_tuner.html#a0ddedf54b28a454e347f0d40d4c57185":[3,0,0,23,57], +"classktt_1_1_tuner.html#a0e2675eb66dd12edc676df06e4a6db5a":[3,0,0,23,33], +"classktt_1_1_tuner.html#a1044c7e763c75254ddb25ba53a749dd9":[3,0,0,23,54], +"classktt_1_1_tuner.html#a136a3e45466bc0434484b06ae338c3cc":[3,0,0,23,24], +"classktt_1_1_tuner.html#a16040c3a6f5603ef337ef4a7057f9b69":[3,0,0,23,67], +"classktt_1_1_tuner.html#a1a99601a2d8876cecb6a5455a55e8c1e":[3,0,0,23,22], +"classktt_1_1_tuner.html#a1c2b24bebe7d9408c947c30f9471966d":[3,0,0,23,40], +"classktt_1_1_tuner.html#a2016ef58ab7ade647bf93fa43808c78e":[3,0,0,23,35], +"classktt_1_1_tuner.html#a226f0185c96bcc8f54b1b9deb9d6b4d8":[3,0,0,23,29], +"classktt_1_1_tuner.html#a2b3b932d8a87ff1e2a20b1c791af38dc":[3,0,0,23,12] }; diff --git a/Docs/navtreeindex2.js b/Docs/navtreeindex2.js index a7f19986..404634f0 100644 --- a/Docs/navtreeindex2.js +++ b/Docs/navtreeindex2.js @@ -1,43 +1,66 @@ var NAVTREEINDEX2 = { -"classktt_1_1_tuner.html#a77e12637ab4df58139ce03161f6a9a47":[3,0,0,0,51], -"classktt_1_1_tuner.html#a789c4c550bd615bda43afd152d410b6c":[3,0,0,0,30], -"classktt_1_1_tuner.html#a78ced5f956d194f95d1eab209c0da355":[3,0,0,0,31], -"classktt_1_1_tuner.html#a7e95025e0f39526e756a947e953afd9a":[3,0,0,0,34], -"classktt_1_1_tuner.html#a7ed0d38ea099aef96218bd95a98e8a06":[3,0,0,0,56], -"classktt_1_1_tuner.html#a7f1f558e170b15a155be9c7fac64d3a2":[3,0,0,0,4], -"classktt_1_1_tuner.html#a805fb0026bbd0e2787b75ea058d1f10f":[3,0,0,0,12], -"classktt_1_1_tuner.html#a8503c25c10dc31b9e15e56d7a597856d":[3,0,0,0,39], -"classktt_1_1_tuner.html#a86b6039192d8740b2a457479e584cdc8":[3,0,0,0,1], -"classktt_1_1_tuner.html#a888c5696b3aac3ff9892df7ebbb7022e":[3,0,0,0,18], -"classktt_1_1_tuner.html#a8aafe5e4d2e7d89fb1a59ad9069e3f88":[3,0,0,0,5], -"classktt_1_1_tuner.html#a8d0a53f0d54ac5a285b40386ec5a39a4":[3,0,0,0,52], -"classktt_1_1_tuner.html#a9243c05712e47d182385b2e097403ea5":[3,0,0,0,28], -"classktt_1_1_tuner.html#a95f62e176f33adafdde70dd077fb9e88":[3,0,0,0,29], -"classktt_1_1_tuner.html#a966c08e5ca21c2731288a7b2eb3b1229":[3,0,0,0,46], -"classktt_1_1_tuner.html#a98d47480e40733abfad1780f599a4be8":[3,0,0,0,10], -"classktt_1_1_tuner.html#aa34ed21ac3bdfd2421f7ba0abe62aa7b":[3,0,0,0,14], -"classktt_1_1_tuner.html#aa74cb565dee534d538c85d2d57a4b3f5":[3,0,0,0,6], -"classktt_1_1_tuner.html#aa87f966ad499bd35c2c7526574a7ed34":[3,0,0,0,16], -"classktt_1_1_tuner.html#aaf79f9092d20f151b7d9690ffbdfc8ff":[3,0,0,0,13], -"classktt_1_1_tuner.html#aaff564535e11125821fb39b1bd863fdb":[3,0,0,0,48], -"classktt_1_1_tuner.html#ab5a9ca12b75ea217b0a7f9f68e28639a":[3,0,0,0,44], -"classktt_1_1_tuner.html#ab89be1d46163994257094765fc398fac":[3,0,0,0,41], -"classktt_1_1_tuner.html#ab906111fbc44d2a7be1afad5c9b131a1":[3,0,0,0,45], -"classktt_1_1_tuner.html#ab98c514e83ad83f0ecffb7f1e4b48e34":[3,0,0,0,58], -"classktt_1_1_tuner.html#abd1c985b64a88a2b66835c4afa9fdc8f":[3,0,0,0,36], -"classktt_1_1_tuner.html#acf0bb2189bf6c1210c42b0f1ddd93399":[3,0,0,0,54], -"classktt_1_1_tuner.html#ad3c8e80a4eefa9dd00b475b237a40e9b":[3,0,0,0,11], -"classktt_1_1_tuner.html#adfaa5b962d4742ff4aa70171a990a3dd":[3,0,0,0,15], -"classktt_1_1_tuner.html#af135d787110cbbc19bc66def1b705b4e":[3,0,0,0,40], -"classktt_1_1_tuner.html#af17f07f5fa61fcfcd9dbe77068209992":[3,0,0,0,38], -"classktt_1_1_tuner.html#afa014f51574f99a828f040895290f3f4":[3,0,0,0,24], -"classktt_1_1_tuning_duration.html":[3,0,0,23], -"classktt_1_1_tuning_duration.html#a2469d018df99279dbab3c674859f930e":[3,0,0,23,4], -"classktt_1_1_tuning_duration.html#a3ee74e7bdc4cafb98260585ae025fc7c":[3,0,0,23,3], -"classktt_1_1_tuning_duration.html#a6da9bcd238c1ee54f5b50d6834b534c2":[3,0,0,23,1], -"classktt_1_1_tuning_duration.html#acfa2d9803c60e9b684ea903af051d351":[3,0,0,23,2], -"classktt_1_1_tuning_duration.html#aedc68c9e3bf876d3ec2a479592332a37":[3,0,0,23,0], +"classktt_1_1_tuner.html#a32e8f697b84556c3164575897f7f891a":[3,0,0,23,70], +"classktt_1_1_tuner.html#a34b8bb17df50309173a07303fb1b10a7":[3,0,0,23,10], +"classktt_1_1_tuner.html#a360361884d6c490d557d9926da3b813d":[3,0,0,23,25], +"classktt_1_1_tuner.html#a385b67cfa5ac085f540fe21c1f461bf2":[3,0,0,23,60], +"classktt_1_1_tuner.html#a3a08f0fef2d05fd93913f6a461c8d77a":[3,0,0,23,26], +"classktt_1_1_tuner.html#a42bfecb3a7da093420dcff86ff3a235f":[3,0,0,23,69], +"classktt_1_1_tuner.html#a4a8fdad5788a9f1c6bb71b731c17d89c":[3,0,0,23,11], +"classktt_1_1_tuner.html#a4bc8f3f6cca5a25c91a26a78d1581e3f":[3,0,0,23,27], +"classktt_1_1_tuner.html#a5870dcea68c8006b1ca92c90171018b4":[3,0,0,23,32], +"classktt_1_1_tuner.html#a5ac6711abae75fff387e8083af75dc79":[3,0,0,23,39], +"classktt_1_1_tuner.html#a5c02e27e4f5631fa2b45d6ed9f7edea1":[3,0,0,23,31], +"classktt_1_1_tuner.html#a5cfc141a0f3d3dbb237a76c394d760b8":[3,0,0,23,14], +"classktt_1_1_tuner.html#a63c326b21d847e1ec8d2d63cfb2bb698":[3,0,0,23,0], +"classktt_1_1_tuner.html#a65b9a2ebdc68c3edc6c9102354f2dcd8":[3,0,0,23,56], +"classktt_1_1_tuner.html#a6cce9a3ee4f44f2e7dbd4e01fc4d9d4b":[3,0,0,23,42], +"classktt_1_1_tuner.html#a6d9ed472e7c2bd1b01799bf37802c796":[3,0,0,23,49], +"classktt_1_1_tuner.html#a704d563ca882d602f1f4e374fe03fb19":[3,0,0,23,3], +"classktt_1_1_tuner.html#a77e12637ab4df58139ce03161f6a9a47":[3,0,0,23,58], +"classktt_1_1_tuner.html#a789c4c550bd615bda43afd152d410b6c":[3,0,0,23,37], +"classktt_1_1_tuner.html#a78ced5f956d194f95d1eab209c0da355":[3,0,0,23,38], +"classktt_1_1_tuner.html#a7e95025e0f39526e756a947e953afd9a":[3,0,0,23,41], +"classktt_1_1_tuner.html#a7ed0d38ea099aef96218bd95a98e8a06":[3,0,0,23,66], +"classktt_1_1_tuner.html#a7f1f558e170b15a155be9c7fac64d3a2":[3,0,0,23,5], +"classktt_1_1_tuner.html#a805fb0026bbd0e2787b75ea058d1f10f":[3,0,0,23,17], +"classktt_1_1_tuner.html#a8503c25c10dc31b9e15e56d7a597856d":[3,0,0,23,46], +"classktt_1_1_tuner.html#a86b6039192d8740b2a457479e584cdc8":[3,0,0,23,1], +"classktt_1_1_tuner.html#a888c5696b3aac3ff9892df7ebbb7022e":[3,0,0,23,23], +"classktt_1_1_tuner.html#a8aafe5e4d2e7d89fb1a59ad9069e3f88":[3,0,0,23,6], +"classktt_1_1_tuner.html#a8d0a53f0d54ac5a285b40386ec5a39a4":[3,0,0,23,59], +"classktt_1_1_tuner.html#a9243c05712e47d182385b2e097403ea5":[3,0,0,23,34], +"classktt_1_1_tuner.html#a95f62e176f33adafdde70dd077fb9e88":[3,0,0,23,36], +"classktt_1_1_tuner.html#a966c08e5ca21c2731288a7b2eb3b1229":[3,0,0,23,53], +"classktt_1_1_tuner.html#a9894503f98831c5c3f391c5bb5729ed4":[3,0,0,23,13], +"classktt_1_1_tuner.html#a98d47480e40733abfad1780f599a4be8":[3,0,0,23,15], +"classktt_1_1_tuner.html#a9db4f79c0e1d7cc2e2eb3e194e6a0cb8":[3,0,0,23,28], +"classktt_1_1_tuner.html#a9f056f4076db595826eea1bdbacbcdcb":[3,0,0,23,64], +"classktt_1_1_tuner.html#aa34ed21ac3bdfd2421f7ba0abe62aa7b":[3,0,0,23,19], +"classktt_1_1_tuner.html#aa38e76c15db1d25743288ebccd67df3b":[3,0,0,23,63], +"classktt_1_1_tuner.html#aa74cb565dee534d538c85d2d57a4b3f5":[3,0,0,23,9], +"classktt_1_1_tuner.html#aa87f966ad499bd35c2c7526574a7ed34":[3,0,0,23,21], +"classktt_1_1_tuner.html#aaf79f9092d20f151b7d9690ffbdfc8ff":[3,0,0,23,18], +"classktt_1_1_tuner.html#aaff564535e11125821fb39b1bd863fdb":[3,0,0,23,55], +"classktt_1_1_tuner.html#ab5a9ca12b75ea217b0a7f9f68e28639a":[3,0,0,23,51], +"classktt_1_1_tuner.html#ab89be1d46163994257094765fc398fac":[3,0,0,23,48], +"classktt_1_1_tuner.html#ab906111fbc44d2a7be1afad5c9b131a1":[3,0,0,23,52], +"classktt_1_1_tuner.html#ab98c514e83ad83f0ecffb7f1e4b48e34":[3,0,0,23,68], +"classktt_1_1_tuner.html#abd1c985b64a88a2b66835c4afa9fdc8f":[3,0,0,23,43], +"classktt_1_1_tuner.html#acf0bb2189bf6c1210c42b0f1ddd93399":[3,0,0,23,61], +"classktt_1_1_tuner.html#ad1cc2c4aac72510f028ada44a0283fab":[3,0,0,23,65], +"classktt_1_1_tuner.html#ad3c8e80a4eefa9dd00b475b237a40e9b":[3,0,0,23,16], +"classktt_1_1_tuner.html#adfaa5b962d4742ff4aa70171a990a3dd":[3,0,0,23,20], +"classktt_1_1_tuner.html#af135d787110cbbc19bc66def1b705b4e":[3,0,0,23,47], +"classktt_1_1_tuner.html#af17f07f5fa61fcfcd9dbe77068209992":[3,0,0,23,45], +"classktt_1_1_tuner.html#af45fabe98321bfe3f51bf5011897c69e":[3,0,0,23,8], +"classktt_1_1_tuner.html#afa014f51574f99a828f040895290f3f4":[3,0,0,23,30], +"classktt_1_1_tuning_duration.html":[3,0,0,24], +"classktt_1_1_tuning_duration.html#a2469d018df99279dbab3c674859f930e":[3,0,0,24,4], +"classktt_1_1_tuning_duration.html#a3ee74e7bdc4cafb98260585ae025fc7c":[3,0,0,24,3], +"classktt_1_1_tuning_duration.html#a6da9bcd238c1ee54f5b50d6834b534c2":[3,0,0,24,1], +"classktt_1_1_tuning_duration.html#acfa2d9803c60e9b684ea903af051d351":[3,0,0,24,2], +"classktt_1_1_tuning_duration.html#aedc68c9e3bf876d3ec2a479592332a37":[3,0,0,24,0], "dir_0b41b623c32abd77cf87d94fa0e997df.html":[4,0,0,1], "dir_4027c6dcc37a2421c6b4a72f50c05b6d.html":[4,0,0,0,2], "dir_44a111874746047cefd3f7a73e059188.html":[4,0,0,5], @@ -59,8 +82,8 @@ var NAVTREEINDEX2 = "functions_b.html":[3,3,0,1], "functions_c.html":[3,3,0,2], "functions_d.html":[3,3,0,3], -"functions_func.html":[3,3,1], "functions_func.html":[3,3,1,0], +"functions_func.html":[3,3,1], "functions_func_b.html":[3,3,1,1], "functions_func_c.html":[3,3,1,2], "functions_func_d.html":[3,3,1,3], @@ -105,8 +128,8 @@ var NAVTREEINDEX2 = "namespacemembers_type.html":[2,3], "namespacemembers_vars.html":[2,2], "pages.html":[], -"structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html":[3,0,0,24], -"structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html#a817e62a8f5d838d0c141e7438b9f2b31":[3,0,0,24,0], +"structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html":[3,0,0,10], +"structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html#a817e62a8f5d838d0c141e7438b9f2b31":[3,0,0,10,0], "structktt_1_1_kernel_compilation_data.html":[3,0,0,11], "structktt_1_1_kernel_compilation_data.html#a4338861a5206bbda070872aa03f9553e":[3,0,0,11,0], "structktt_1_1_kernel_compilation_data.html#a5fd344b2104da29fb6eac41348aec33f":[3,0,0,11,1], diff --git a/Docs/pages.html b/Docs/pages.html index 1141ad0b..b03dae8e 100644 --- a/Docs/pages.html +++ b/Docs/pages.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Related Pages @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -92,8 +91,7 @@
    -
    -
    Related Pages
    +
    Related Pages
    Here is a list of all related documentation pages:
    @@ -106,7 +104,7 @@ diff --git a/Docs/resize.js b/Docs/resize.js index e1ad0fe3..7fe30d10 100644 --- a/Docs/resize.js +++ b/Docs/resize.js @@ -53,7 +53,7 @@ function initResizable() date.setTime(date.getTime()+(10*365*24*60*60*1000)); // default expiration is one week expiration = date.toGMTString(); } - document.cookie = cookie_namespace + "_" + cookie + "=" + val + "; expires=" + expiration+"; path=/"; + document.cookie = cookie_namespace + "_" + cookie + "=" + val + "; SameSite=Lax; expires=" + expiration+"; path=/"; } function resizeWidth() @@ -75,10 +75,20 @@ function initResizable() { var headerHeight = header.outerHeight(); var footerHeight = footer.outerHeight(); - var windowHeight = $(window).height() - headerHeight - footerHeight; - content.css({height:windowHeight + "px"}); - navtree.css({height:windowHeight + "px"}); - sidenav.css({height:windowHeight + "px"}); + var windowHeight = $(window).height(); + var contentHeight,navtreeHeight,sideNavHeight; + if (typeof page_layout==='undefined' || page_layout==0) { /* DISABLE_INDEX=NO */ + contentHeight = windowHeight - headerHeight - footerHeight; + navtreeHeight = contentHeight; + sideNavHeight = contentHeight; + } else if (page_layout==1) { /* DISABLE_INDEX=YES */ + contentHeight = windowHeight - footerHeight; + navtreeHeight = windowHeight - headerHeight; + sideNavHeight = windowHeight; + } + content.css({height:contentHeight + "px"}); + navtree.css({height:navtreeHeight + "px"}); + sidenav.css({height:sideNavHeight + "px"}); var width=$(window).width(); if (width!=collapsedWidth) { if (width=desktop_vp) { diff --git a/Docs/search/all_0.html b/Docs/search/all_0.html index 1ec5b2d5..c36c9af5 100644 --- a/Docs/search/all_0.html +++ b/Docs/search/all_0.html @@ -2,7 +2,7 @@ - + @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/Docs/search/enums_c.js b/Docs/search/enums_c.js new file mode 100644 index 00000000..a225b658 --- /dev/null +++ b/Docs/search/enums_c.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['validationmethod_0',['ValidationMethod',['../namespacektt.html#a187d4987bb48bc1f78f628c8aa840a20',1,'ktt']]], + ['validationmode_1',['ValidationMode',['../namespacektt.html#a3baf318a03750f7418a5faa051967c04',1,'ktt']]] +]; diff --git a/Docs/search/enumvalues_0.html b/Docs/search/enumvalues_0.html index 0d131d95..7a520ff8 100644 --- a/Docs/search/enumvalues_0.html +++ b/Docs/search/enumvalues_0.html @@ -2,7 +2,7 @@ - + @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -12,14 +12,14 @@
    Loading...
    Searching...
    No Matches
    @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -92,8 +91,7 @@
    -
    -
    ktt::EnableBitfieldOperators< ValidationMode > Member List
    +
    ktt::EnableBitfieldOperators< ValidationMode > Member List
    @@ -105,7 +103,7 @@ diff --git a/Docs/structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html b/Docs/structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html index 1b339ea2..83bad2a2 100644 --- a/Docs/structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html +++ b/Docs/structktt_1_1_enable_bitfield_operators_3_01_validation_mode_01_4.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::EnableBitfieldOperators< ValidationMode > Struct Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -95,22 +94,21 @@ -
    -
    ktt::EnableBitfieldOperators< ValidationMode > Struct Reference
    +
    ktt::EnableBitfieldOperators< ValidationMode > Struct Reference

    #include <ValidationMode.h>

    -

    +

    Static Public Attributes

    static const bool m_Enable = true
     

    Detailed Description

    -

    Validation mode enum supports bitwise operations.

    +

    Validation mode enum supports bitwise operations.

    Member Data Documentation

    - +

    ◆ m_Enable

    @@ -142,7 +140,7 @@

    diff --git a/Docs/structktt_1_1_kernel_compilation_data-members.html b/Docs/structktt_1_1_kernel_compilation_data-members.html index ee284935..3f9d4634 100644 --- a/Docs/structktt_1_1_kernel_compilation_data-members.html +++ b/Docs/structktt_1_1_kernel_compilation_data-members.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: Member List @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -92,25 +91,24 @@
    -
    -
    ktt::KernelCompilationData Member List
    +
    ktt::KernelCompilationData Member List
    diff --git a/Docs/structktt_1_1_kernel_compilation_data.html b/Docs/structktt_1_1_kernel_compilation_data.html index 4206120c..0161ab5a 100644 --- a/Docs/structktt_1_1_kernel_compilation_data.html +++ b/Docs/structktt_1_1_kernel_compilation_data.html @@ -2,8 +2,8 @@ - - + + Kernel Tuning Toolkit: ktt::KernelCompilationData Struct Reference @@ -17,7 +17,7 @@ @@ -28,13 +28,12 @@
    - - + -
    -
    Kernel Tuning Toolkit -  2.0.1 +
    +
    Kernel Tuning Toolkit 2.1
    +
    - + @@ -72,7 +71,7 @@ @@ -96,19 +95,18 @@ Public Member Functions | Public Attributes | List of all members -
    -
    ktt::KernelCompilationData Struct Reference
    +
    ktt::KernelCompilationData Struct Reference

    #include <KernelCompilationData.h>

    -

    +

    Public Member Functions

     KernelCompilationData ()
     
    - @@ -122,9 +120,9 @@

    +

    Public Attributes

    uint64_t m_MaxWorkGroupSize
     
     

    Detailed Description

    -

    Structure which holds compilation information about specific kernel configuration.

    +

    Structure which holds compilation information about specific kernel configuration.

    Constructor & Destructor Documentation

    - +

    ◆ KernelCompilationData()

    Member Data Documentation

    -
    +

    ◆ m_ConstantMemorySize

    - +

    ◆ m_LocalMemorySize

    - +

    ◆ m_MaxWorkGroupSize

    - +

    ◆ m_PrivateMemorySize

    - +

    ◆ m_RegistersCount

    @@ -227,7 +225,7 @@

    diff --git a/Docs/tabs.css b/Docs/tabs.css index 85a0cd5b..00d1c602 100644 --- a/Docs/tabs.css +++ b/Docs/tabs.css @@ -1 +1 @@ -.sm{position:relative;z-index:9999}.sm,.sm ul,.sm li{display:block;list-style:none;margin:0;padding:0;line-height:normal;direction:ltr;text-align:left;-webkit-tap-highlight-color:rgba(0,0,0,0)}.sm-rtl,.sm-rtl ul,.sm-rtl li{direction:rtl;text-align:right}.sm>li>h1,.sm>li>h2,.sm>li>h3,.sm>li>h4,.sm>li>h5,.sm>li>h6{margin:0;padding:0}.sm ul{display:none}.sm li,.sm a{position:relative}.sm a{display:block}.sm a.disabled{cursor:not-allowed}.sm:after{content:"\00a0";display:block;height:0;font:0/0 serif;clear:both;visibility:hidden;overflow:hidden}.sm,.sm *,.sm *:before,.sm *:after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}.sm-dox{background-image:url("tab_b.png")}.sm-dox a,.sm-dox a:focus,.sm-dox a:hover,.sm-dox a:active{padding:0 12px;padding-right:43px;font-family:"Lucida Grande","Geneva","Helvetica",Arial,sans-serif;font-size:13px;font-weight:bold;line-height:36px;text-decoration:none;text-shadow:0 1px 1px rgba(255,255,255,0.9);color:#283a5d;outline:0}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a.current{color:#d23600}.sm-dox a.disabled{color:#bbb}.sm-dox a span.sub-arrow{position:absolute;top:50%;margin-top:-14px;left:auto;right:3px;width:28px;height:28px;overflow:hidden;font:bold 12px/28px monospace!important;text-align:center;text-shadow:none;background:rgba(255,255,255,0.5);-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox a.highlighted span.sub-arrow:before{display:block;content:'-'}.sm-dox>li:first-child>a,.sm-dox>li:first-child>:not(ul) a{-moz-border-radius:5px 5px 0 0;-webkit-border-radius:5px;border-radius:5px 5px 0 0}.sm-dox>li:last-child>a,.sm-dox>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul{-moz-border-radius:0 0 5px 5px;-webkit-border-radius:0;border-radius:0 0 5px 5px}.sm-dox>li:last-child>a.highlighted,.sm-dox>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted{-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox ul{background:rgba(162,162,162,0.1)}.sm-dox ul a,.sm-dox ul a:focus,.sm-dox ul a:hover,.sm-dox ul a:active{font-size:12px;border-left:8px solid transparent;line-height:36px;text-shadow:none;background-color:white;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul ul a,.sm-dox ul ul a:hover,.sm-dox ul ul a:focus,.sm-dox ul ul a:active{border-left:16px solid transparent}.sm-dox ul ul ul a,.sm-dox ul ul ul a:hover,.sm-dox ul ul ul a:focus,.sm-dox ul ul ul a:active{border-left:24px solid transparent}.sm-dox ul ul ul ul a,.sm-dox ul ul ul ul a:hover,.sm-dox ul ul ul ul a:focus,.sm-dox ul ul ul ul a:active{border-left:32px solid transparent}.sm-dox ul ul ul ul ul a,.sm-dox ul ul ul ul ul a:hover,.sm-dox ul ul ul ul ul a:focus,.sm-dox ul ul ul ul ul a:active{border-left:40px solid transparent}@media(min-width:768px){.sm-dox ul{position:absolute;width:12em}.sm-dox li{float:left}.sm-dox.sm-rtl li{float:right}.sm-dox ul li,.sm-dox.sm-rtl ul li,.sm-dox.sm-vertical li{float:none}.sm-dox a{white-space:nowrap}.sm-dox ul a,.sm-dox.sm-vertical a{white-space:normal}.sm-dox .sm-nowrap>li>a,.sm-dox .sm-nowrap>li>:not(ul) a{white-space:nowrap}.sm-dox{padding:0 10px;background-image:url("tab_b.png");line-height:36px}.sm-dox a span.sub-arrow{top:50%;margin-top:-2px;right:12px;width:0;height:0;border-width:4px;border-style:solid dashed dashed dashed;border-color:#283a5d transparent transparent transparent;background:transparent;-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox a,.sm-dox a:focus,.sm-dox a:active,.sm-dox a:hover,.sm-dox a.highlighted{padding:0 12px;background-image:url("tab_s.png");background-repeat:no-repeat;background-position:right;-moz-border-radius:0!important;-webkit-border-radius:0;border-radius:0!important}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a:hover span.sub-arrow{border-color:white transparent transparent transparent}.sm-dox a.has-submenu{padding-right:24px}.sm-dox li{border-top:0}.sm-dox>li>ul:before,.sm-dox>li>ul:after{content:'';position:absolute;top:-18px;left:30px;width:0;height:0;overflow:hidden;border-width:9px;border-style:dashed dashed solid dashed;border-color:transparent transparent #bbb transparent}.sm-dox>li>ul:after{top:-16px;left:31px;border-width:8px;border-color:transparent transparent #fff transparent}.sm-dox ul{border:1px solid #bbb;padding:5px 0;background:#fff;-moz-border-radius:5px!important;-webkit-border-radius:5px;border-radius:5px!important;-moz-box-shadow:0 5px 9px rgba(0,0,0,0.2);-webkit-box-shadow:0 5px 9px rgba(0,0,0,0.2);box-shadow:0 5px 9px rgba(0,0,0,0.2)}.sm-dox ul a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-color:transparent transparent transparent #555;border-style:dashed dashed dashed solid}.sm-dox ul a,.sm-dox ul a:hover,.sm-dox ul a:focus,.sm-dox ul a:active,.sm-dox ul a.highlighted{color:#555;background-image:none;border:0!important;color:#555;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul a:hover span.sub-arrow{border-color:transparent transparent transparent white}.sm-dox span.scroll-up,.sm-dox span.scroll-down{position:absolute;display:none;visibility:hidden;overflow:hidden;background:#fff;height:36px}.sm-dox span.scroll-up:hover,.sm-dox span.scroll-down:hover{background:#eee}.sm-dox span.scroll-up:hover span.scroll-up-arrow,.sm-dox span.scroll-up:hover span.scroll-down-arrow{border-color:transparent transparent #d23600 transparent}.sm-dox span.scroll-down:hover span.scroll-down-arrow{border-color:#d23600 transparent transparent transparent}.sm-dox span.scroll-up-arrow,.sm-dox span.scroll-down-arrow{position:absolute;top:0;left:50%;margin-left:-6px;width:0;height:0;overflow:hidden;border-width:6px;border-style:dashed dashed solid dashed;border-color:transparent transparent #555 transparent}.sm-dox span.scroll-down-arrow{top:8px;border-style:solid dashed dashed dashed;border-color:#555 transparent transparent transparent}.sm-dox.sm-rtl a.has-submenu{padding-right:12px;padding-left:24px}.sm-dox.sm-rtl a span.sub-arrow{right:auto;left:12px}.sm-dox.sm-rtl.sm-vertical a.has-submenu{padding:10px 20px}.sm-dox.sm-rtl.sm-vertical a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-rtl>li>ul:before{left:auto;right:30px}.sm-dox.sm-rtl>li>ul:after{left:auto;right:31px}.sm-dox.sm-rtl ul a.has-submenu{padding:10px 20px!important}.sm-dox.sm-rtl ul a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-vertical{padding:10px 0;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox.sm-vertical a{padding:10px 20px}.sm-dox.sm-vertical a:hover,.sm-dox.sm-vertical a:focus,.sm-dox.sm-vertical a:active,.sm-dox.sm-vertical a.highlighted{background:#fff}.sm-dox.sm-vertical a.disabled{background-image:url("tab_b.png")}.sm-dox.sm-vertical a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-style:dashed dashed dashed solid;border-color:transparent transparent transparent #555}.sm-dox.sm-vertical>li>ul:before,.sm-dox.sm-vertical>li>ul:after{display:none}.sm-dox.sm-vertical ul a{padding:10px 20px}.sm-dox.sm-vertical ul a:hover,.sm-dox.sm-vertical ul a:focus,.sm-dox.sm-vertical ul a:active,.sm-dox.sm-vertical ul a.highlighted{background:#eee}.sm-dox.sm-vertical ul a.disabled{background:#fff}} \ No newline at end of file +.sm{position:relative;z-index:9999}.sm,.sm ul,.sm li{display:block;list-style:none;margin:0;padding:0;line-height:normal;direction:ltr;text-align:left;-webkit-tap-highlight-color:rgba(0,0,0,0)}.sm-rtl,.sm-rtl ul,.sm-rtl li{direction:rtl;text-align:right}.sm>li>h1,.sm>li>h2,.sm>li>h3,.sm>li>h4,.sm>li>h5,.sm>li>h6{margin:0;padding:0}.sm ul{display:none}.sm li,.sm a{position:relative}.sm a{display:block}.sm a.disabled{cursor:not-allowed}.sm:after{content:"\00a0";display:block;height:0;font:0/0 serif;clear:both;visibility:hidden;overflow:hidden}.sm,.sm *,.sm *:before,.sm *:after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}.main-menu-btn{position:relative;display:inline-block;width:36px;height:36px;text-indent:36px;margin-left:8px;white-space:nowrap;overflow:hidden;cursor:pointer;-webkit-tap-highlight-color:rgba(0,0,0,0)}.main-menu-btn-icon,.main-menu-btn-icon:before,.main-menu-btn-icon:after{position:absolute;top:50%;left:2px;height:2px;width:24px;background:#666;-webkit-transition:all .25s;transition:all .25s}.main-menu-btn-icon:before{content:'';top:-7px;left:0}.main-menu-btn-icon:after{content:'';top:7px;left:0}#main-menu-state:checked ~ .main-menu-btn .main-menu-btn-icon{height:0}#main-menu-state:checked ~ .main-menu-btn .main-menu-btn-icon:before{top:0;-webkit-transform:rotate(-45deg);transform:rotate(-45deg)}#main-menu-state:checked ~ .main-menu-btn .main-menu-btn-icon:after{top:0;-webkit-transform:rotate(45deg);transform:rotate(45deg)}#main-menu-state{position:absolute;width:1px;height:1px;margin:-1px;border:0;padding:0;overflow:hidden;clip:rect(1px,1px,1px,1px)}#main-menu-state:not(:checked) ~ #main-menu{display:none}#main-menu-state:checked ~ #main-menu{display:block}@media(min-width:768px){.main-menu-btn{position:absolute;top:-99999px}#main-menu-state:not(:checked) ~ #main-menu{display:block}}.sm-dox{background-image:url("tab_b.png")}.sm-dox a,.sm-dox a:focus,.sm-dox a:hover,.sm-dox a:active{padding:0 12px;padding-right:43px;font-family:"Lucida Grande","Geneva","Helvetica",Arial,sans-serif;font-size:13px;font-weight:bold;line-height:36px;text-decoration:none;text-shadow:0 1px 1px rgba(255,255,255,0.9);color:#283a5d;outline:0}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a.current{color:#d23600}.sm-dox a.disabled{color:#bbb}.sm-dox a span.sub-arrow{position:absolute;top:50%;margin-top:-14px;left:auto;right:3px;width:28px;height:28px;overflow:hidden;font:bold 12px/28px monospace !important;text-align:center;text-shadow:none;background:rgba(255,255,255,0.5);-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox a span.sub-arrow:before{display:block;content:'+'}.sm-dox a.highlighted span.sub-arrow:before{display:block;content:'-'}.sm-dox>li:first-child>a,.sm-dox>li:first-child>:not(ul) a{-moz-border-radius:5px 5px 0 0;-webkit-border-radius:5px;border-radius:5px 5px 0 0}.sm-dox>li:last-child>a,.sm-dox>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul{-moz-border-radius:0 0 5px 5px;-webkit-border-radius:0;border-radius:0 0 5px 5px}.sm-dox>li:last-child>a.highlighted,.sm-dox>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted{-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox ul{background:rgba(162,162,162,0.1)}.sm-dox ul a,.sm-dox ul a:focus,.sm-dox ul a:hover,.sm-dox ul a:active{font-size:12px;border-left:8px solid transparent;line-height:36px;text-shadow:none;background-color:white;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul ul a,.sm-dox ul ul a:hover,.sm-dox ul ul a:focus,.sm-dox ul ul a:active{border-left:16px solid transparent}.sm-dox ul ul ul a,.sm-dox ul ul ul a:hover,.sm-dox ul ul ul a:focus,.sm-dox ul ul ul a:active{border-left:24px solid transparent}.sm-dox ul ul ul ul a,.sm-dox ul ul ul ul a:hover,.sm-dox ul ul ul ul a:focus,.sm-dox ul ul ul ul a:active{border-left:32px solid transparent}.sm-dox ul ul ul ul ul a,.sm-dox ul ul ul ul ul a:hover,.sm-dox ul ul ul ul ul a:focus,.sm-dox ul ul ul ul ul a:active{border-left:40px solid transparent}@media(min-width:768px){.sm-dox ul{position:absolute;width:12em}.sm-dox li{float:left}.sm-dox.sm-rtl li{float:right}.sm-dox ul li,.sm-dox.sm-rtl ul li,.sm-dox.sm-vertical li{float:none}.sm-dox a{white-space:nowrap}.sm-dox ul a,.sm-dox.sm-vertical a{white-space:normal}.sm-dox .sm-nowrap>li>a,.sm-dox .sm-nowrap>li>:not(ul) a{white-space:nowrap}.sm-dox{padding:0 10px;background-image:url("tab_b.png");line-height:36px}.sm-dox a span.sub-arrow{top:50%;margin-top:-2px;right:12px;width:0;height:0;border-width:4px;border-style:solid dashed dashed dashed;border-color:#283a5d transparent transparent transparent;background:transparent;-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox a,.sm-dox a:focus,.sm-dox a:active,.sm-dox a:hover,.sm-dox a.highlighted{padding:0 12px;background-image:url("tab_s.png");background-repeat:no-repeat;background-position:right;-moz-border-radius:0 !important;-webkit-border-radius:0;border-radius:0 !important}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a:hover span.sub-arrow{border-color:white transparent transparent transparent}.sm-dox a.has-submenu{padding-right:24px}.sm-dox li{border-top:0}.sm-dox>li>ul:before,.sm-dox>li>ul:after{content:'';position:absolute;top:-18px;left:30px;width:0;height:0;overflow:hidden;border-width:9px;border-style:dashed dashed solid dashed;border-color:transparent transparent #bbb transparent}.sm-dox>li>ul:after{top:-16px;left:31px;border-width:8px;border-color:transparent transparent #fff transparent}.sm-dox ul{border:1px solid #bbb;padding:5px 0;background:#fff;-moz-border-radius:5px !important;-webkit-border-radius:5px;border-radius:5px !important;-moz-box-shadow:0 5px 9px rgba(0,0,0,0.2);-webkit-box-shadow:0 5px 9px rgba(0,0,0,0.2);box-shadow:0 5px 9px rgba(0,0,0,0.2)}.sm-dox ul a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-color:transparent transparent transparent #555;border-style:dashed dashed dashed solid}.sm-dox ul a,.sm-dox ul a:hover,.sm-dox ul a:focus,.sm-dox ul a:active,.sm-dox ul a.highlighted{color:#555;background-image:none;border:0 !important;color:#555;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul a:hover span.sub-arrow{border-color:transparent transparent transparent white}.sm-dox span.scroll-up,.sm-dox span.scroll-down{position:absolute;display:none;visibility:hidden;overflow:hidden;background:#fff;height:36px}.sm-dox span.scroll-up:hover,.sm-dox span.scroll-down:hover{background:#eee}.sm-dox span.scroll-up:hover span.scroll-up-arrow,.sm-dox span.scroll-up:hover span.scroll-down-arrow{border-color:transparent transparent #d23600 transparent}.sm-dox span.scroll-down:hover span.scroll-down-arrow{border-color:#d23600 transparent transparent transparent}.sm-dox span.scroll-up-arrow,.sm-dox span.scroll-down-arrow{position:absolute;top:0;left:50%;margin-left:-6px;width:0;height:0;overflow:hidden;border-width:6px;border-style:dashed dashed solid dashed;border-color:transparent transparent #555 transparent}.sm-dox span.scroll-down-arrow{top:8px;border-style:solid dashed dashed dashed;border-color:#555 transparent transparent transparent}.sm-dox.sm-rtl a.has-submenu{padding-right:12px;padding-left:24px}.sm-dox.sm-rtl a span.sub-arrow{right:auto;left:12px}.sm-dox.sm-rtl.sm-vertical a.has-submenu{padding:10px 20px}.sm-dox.sm-rtl.sm-vertical a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-rtl>li>ul:before{left:auto;right:30px}.sm-dox.sm-rtl>li>ul:after{left:auto;right:31px}.sm-dox.sm-rtl ul a.has-submenu{padding:10px 20px !important}.sm-dox.sm-rtl ul a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-vertical{padding:10px 0;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox.sm-vertical a{padding:10px 20px}.sm-dox.sm-vertical a:hover,.sm-dox.sm-vertical a:focus,.sm-dox.sm-vertical a:active,.sm-dox.sm-vertical a.highlighted{background:#fff}.sm-dox.sm-vertical a.disabled{background-image:url("tab_b.png")}.sm-dox.sm-vertical a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-style:dashed dashed dashed solid;border-color:transparent transparent transparent #555}.sm-dox.sm-vertical>li>ul:before,.sm-dox.sm-vertical>li>ul:after{display:none}.sm-dox.sm-vertical ul a{padding:10px 20px}.sm-dox.sm-vertical ul a:hover,.sm-dox.sm-vertical ul a:focus,.sm-dox.sm-vertical ul a:active,.sm-dox.sm-vertical ul a.highlighted{background:#eee}.sm-dox.sm-vertical ul a.disabled{background:#fff}} \ No newline at end of file diff --git a/Doxyfile b/Doxyfile index 98886994..9462355e 100644 --- a/Doxyfile +++ b/Doxyfile @@ -1,4 +1,4 @@ -# Doxyfile 1.9.1 +# Doxyfile 1.9.3 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -38,7 +38,7 @@ PROJECT_NAME = "Kernel Tuning Toolkit" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 2.0.1 +PROJECT_NUMBER = 2.1 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -93,14 +93,6 @@ ALLOW_UNICODE_NAMES = NO OUTPUT_LANGUAGE = English -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. @@ -258,16 +250,16 @@ TAB_SIZE = 4 # the documentation. An alias has the form: # name=value # For example adding -# "sideeffect=@par Side Effects:\n" +# "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) ALIASES = @@ -312,8 +304,8 @@ OPTIMIZE_OUTPUT_SLICE = NO # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files). For instance to make doxygen treat .inc files @@ -466,7 +458,7 @@ LOOKUP_CACHE_SIZE = 0 # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, -# which efficively disables parallel processing. Please report any issues you +# which effectively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. @@ -610,6 +602,12 @@ HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = YES + # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -767,7 +765,8 @@ FILE_VERSION_FILTER = # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE @@ -813,18 +812,26 @@ WARNINGS = YES WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO @@ -850,7 +857,10 @@ WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard -# error (stderr). +# error (stderr). In case the file specified cannot be opened for writing the +# warning and error messages are written to standard error. When as file - is +# specified the warning and error messages are written to standard output +# (stdout). WARN_LOGFILE = @@ -883,6 +893,7 @@ INPUT = Readme.md \ Source/KernelArgument/ArgumentManagementType.h \ Source/KernelArgument/ArgumentMemoryLocation.h \ Source/KernelArgument/ArgumentMemoryType.h \ + Source/KernelRunner/KernelRunMode.h \ Source/KernelRunner/ValidationMethod.h \ Source/KernelRunner/ValidationMode.h \ Source/Output/TimeConfiguration/TimeUnit.h \ @@ -911,10 +922,10 @@ INPUT_ENCODING = UTF-8 # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), -# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, -# *.ucf, *.qsf and *.ice. +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.h @@ -953,7 +964,7 @@ EXCLUDE_PATTERNS = # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test +# ANamespace::AClass, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* @@ -1139,9 +1150,11 @@ VERBATIM_HEADERS = YES CLANG_ASSISTED_PARSING = NO -# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to -# YES then doxygen will add the directory of each input to the include path. +# If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS +# tag is set to YES then doxygen will add the directory of each input to the +# include path. # The default value is: YES. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_ADD_INC_PATHS = YES @@ -1276,7 +1289,7 @@ HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see +# this color. Hue is specified as an angle on a color-wheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. @@ -1286,7 +1299,7 @@ HTML_EXTRA_FILES = HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A +# in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1368,6 +1381,13 @@ GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" +# This tag determines the URL of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDURL = + # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. @@ -1393,8 +1413,12 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: -# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1553,16 +1577,28 @@ DISABLE_INDEX = YES # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = YES +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATE_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # @@ -1587,6 +1623,13 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO +# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email +# addresses. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +OBFUSCATE_EMAILS = YES + # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # https://inkscape.org) to generate formulas as SVG images instead of PNGs for @@ -1635,11 +1678,29 @@ FORMULA_MACROFILE = USE_MATHJAX = NO +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + # When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1652,15 +1713,21 @@ MATHJAX_FORMAT = HTML-CSS # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see +# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = @@ -1840,29 +1907,31 @@ PAPER_TYPE = a4 EXTRA_PACKAGES = -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the -# generated LaTeX document. The header should contain everything until the first -# chapter. If it is left blank doxygen will generate a standard header. See -# section "Doxygen usage" for information on how to let doxygen write the -# default header to a separate file. +# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for +# the generated LaTeX document. The header should contain everything until the +# first chapter. If it is left blank doxygen will generate a standard header. It +# is highly recommended to start with a default header using +# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty +# and then modify the file new_header.tex. See also section "Doxygen usage" for +# information on how to generate the default header that doxygen normally uses. # -# Note: Only use a user-defined header if you know what you are doing! The -# following commands have a special meaning inside the header: $title, -# $datetime, $date, $doxygenversion, $projectname, $projectnumber, -# $projectbrief, $projectlogo. Doxygen will replace $title with the empty -# string, for the replacement values of the other commands the user is referred -# to HTML_HEADER. +# Note: Only use a user-defined header if you know what you are doing! +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. The following +# commands have a special meaning inside the header (and footer): For a +# description of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_HEADER = -# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the -# generated LaTeX document. The footer should contain everything after the last -# chapter. If it is left blank doxygen will generate a standard footer. See +# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for +# the generated LaTeX document. The footer should contain everything after the +# last chapter. If it is left blank doxygen will generate a standard footer. See # LATEX_HEADER for more information on how to generate a default footer and what -# special commands can be used inside the footer. -# -# Note: Only use a user-defined footer if you know what you are doing! +# special commands can be used inside the footer. See also section "Doxygen +# usage" for information on how to generate the default footer that doxygen +# normally uses. Note: Only use a user-defined footer if you know what you are +# doing! # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_FOOTER = @@ -1907,8 +1976,7 @@ USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode # command to the generated LaTeX files. This will instruct LaTeX to keep running -# if errors occur, instead of asking the user for help. This option is also used -# when generating formulas in HTML. +# if errors occur, instead of asking the user for help. # The default value is: NO. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1921,16 +1989,6 @@ LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = YES -# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source -# code with syntax highlighting in the LaTeX output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_SOURCE_CODE = NO - # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See # https://en.wikipedia.org/wiki/BibTeX and \cite for more info. @@ -2011,16 +2069,6 @@ RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = -# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code -# with syntax highlighting in the RTF output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_SOURCE_CODE = NO - #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- @@ -2117,15 +2165,6 @@ GENERATE_DOCBOOK = NO DOCBOOK_OUTPUT = docbook -# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the -# program listings (including syntax highlighting and cross-referencing -# information) to the DOCBOOK output. Note that enabling this will significantly -# increase the size of the DOCBOOK output. -# The default value is: NO. -# This tag requires that the tag GENERATE_DOCBOOK is set to YES. - -DOCBOOK_PROGRAMLISTING = NO - #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- @@ -2304,15 +2343,6 @@ EXTERNAL_PAGES = YES # Configuration options related to the dot tool #--------------------------------------------------------------------------- -# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram -# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to -# NO turns the diagrams off. Note that this option also works with HAVE_DOT -# disabled, but it is recommended to install and use dot, since it yields more -# powerful graphs. -# The default value is: YES. - -CLASS_DIAGRAMS = YES - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. @@ -2369,11 +2399,14 @@ DOT_FONTSIZE = 10 DOT_FONTPATH = -# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for -# each documented class showing the direct and indirect inheritance relations. -# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# If the CLASS_GRAPH tag is set to YES (or GRAPH) then doxygen will generate a +# graph for each documented class showing the direct and indirect inheritance +# relations. In case HAVE_DOT is set as well dot will be used to draw the graph, +# otherwise the built-in generator will be used. If the CLASS_GRAPH tag is set +# to TEXT the direct and indirect inheritance relations will be shown as texts / +# links. +# Possible values are: NO, YES, TEXT and GRAPH. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. CLASS_GRAPH = YES @@ -2502,6 +2535,13 @@ GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES +# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels +# of child directories generated in directory dependency graphs by dot. +# Minimum value: 1, maximum value: 25, default value: 1. +# This tag requires that the tag DIRECTORY_GRAPH is set to YES. + +DIR_GRAPH_MAX_DEPTH = 1 + # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. For an explanation of the image formats see the section # output formats in the documentation of the dot tool (Graphviz (see: @@ -2555,10 +2595,10 @@ MSCFILE_DIRS = DIAFILE_DIRS = # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the -# path where java can find the plantuml.jar file. If left blank, it is assumed -# PlantUML is not used or called during a preprocessing step. Doxygen will -# generate a warning when it encounters a \startuml command in this case and -# will not generate output for the diagram. +# path where java can find the plantuml.jar file or to the filename of jar file +# to be used. If left blank, it is assumed PlantUML is not used or called during +# a preprocessing step. Doxygen will generate a warning when it encounters a +# \startuml command in this case and will not generate output for the diagram. PLANTUML_JAR_PATH = @@ -2620,6 +2660,8 @@ DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page # explaining the meaning of the various boxes and arrows in the dot generated # graphs. +# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal +# graphical representation for inheritance and collaboration diagrams is used. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2628,8 +2670,8 @@ GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. # -# Note: This setting is not only used for dot files but also for msc and -# plantuml temporary files. +# Note: This setting is not only used for dot files but also for msc temporary +# files. # The default value is: YES. DOT_CLEANUP = YES diff --git a/Readme.md b/Readme.md index 82d32323..b8b38524 100644 --- a/Readme.md +++ b/Readme.md @@ -2,8 +2,8 @@ KTT - Kernel Tuning Toolkit =========================== -KTT is an autotuning framework for OpenCL, CUDA kernels and GLSL compute shaders. Version 2.0, which contains -significant API overhaul and new features and improvements, is now available. +KTT is an autotuning framework for OpenCL, CUDA kernels and GLSL compute shaders. Version 2.1 which introduces +API bindings for Python and new onboarding guide is now available. Main features ------------- @@ -21,7 +21,7 @@ kernel compiler flags and more. Getting started --------------- -* Introductory guide to KTT can be found [here](https://github.com/HiPerCoRe/KTT/blob/development/OnboardingGuide.md). +* Introductory guide to KTT can be found [here](https://github.com/HiPerCoRe/KTT/blob/master/OnboardingGuide.md). * Full documentation for KTT API can be found [here](https://hipercore.github.io/KTT/). * KTT FAQ can be found [here](https://hipercore.github.io/KTT/md__docs__resources__faq.html). * The newest release of the KTT framework can be found [here](https://github.com/HiPerCoRe/KTT/releases). diff --git a/Source/KttPlatform.h b/Source/KttPlatform.h index f887c384..45e4075e 100644 --- a/Source/KttPlatform.h +++ b/Source/KttPlatform.h @@ -31,11 +31,11 @@ /** Minor version of KTT framework. Second number in KTT version description. */ -#define KTT_VERSION_MINOR 0 +#define KTT_VERSION_MINOR 1 /** Patch version of KTT framework. Third number in KTT version description. */ -#define KTT_VERSION_PATCH 1 +#define KTT_VERSION_PATCH 0 namespace ktt { diff --git a/Source/Tuner.h b/Source/Tuner.h index 6d8ac084..d7ec642a 100644 --- a/Source/Tuner.h +++ b/Source/Tuner.h @@ -349,7 +349,7 @@ class KTT_API Tuner template ArgumentId AddArgumentScalar(const T& data); - /** @fn ArgumentId AddArgumentScalar(const void* data, const size_t elementSize) + /** @fn ArgumentId AddArgumentScalar(const void* data, const size_t dataSize) * Adds new scalar argument to the tuner. All scalar arguments are read-only. This method can be utilized when templated version * of scalar argument addition cannot be used. * @param data Pointer to memory with kernel argument data. From f41f5f13dc1ce5d245a222f4d0369f453e9d1c1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Filipovi=C4=8D?= Date: Thu, 20 Jan 2022 23:01:30 +0100 Subject: [PATCH 61/63] Update OnboardingGuide.md Small changes in the rest of the text --- OnboardingGuide.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index bd1a271c..fedabdc1 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -39,7 +39,7 @@ and timing of tuned kernels, allows dynamic (online) tuning during program runti * [Kernel launchers](#kernel-launchers) * [Kernel running and tuning modes](#kernel-running-and-tuning-modes) * [Offline tuning](#offline-tuning) - * [Online tuning](#online-tuning) + * [Dynamic tuning](#dynamic-tuning) * [Accuracy of tuning results](#accuracy-of-tuning-results) * [Stop conditions](#stop-conditions) * [Searchers](#searchers) @@ -525,13 +525,13 @@ const std::vector results = tuner.Tune(kernel); tuner.SaveResults(results, "TuningOutput", ktt::OutputFormat::JSON); ``` -#### Online tuning +#### Dynamic tuning -Online tuning combines kernel tuning with regular running. We can retrieve and use the output from each kernel run like during kernel running. However, +Dynamic tuning combines kernel tuning with regular running. We can retrieve and use the output from each kernel run like during kernel running. However, we do not specify the configuration under which kernel is run, but the tuner launches a different configuration each time a kernel is launched, similar to offline tuning. This mode does not separate tuning and usage of a tuned kernel but enables both to happen simultaneously. This can be beneficial in situations where offline tuning is impractical (e.g., when the size of kernel input is frequently changed, which causes the optimal configuration -to change as well). If a kernel is launched via online tuning after exploring all configurations, the best configuration is used. +to change as well). If a kernel is launched via dynamic tuning after exploring all configurations, the best configuration is used. ```cpp std::vector output(numberOfElements, 0.0f); @@ -547,8 +547,8 @@ const auto result = tuner.TuneIteration(kernel, {ktt::BufferOutputDescriptor(out In order to identify the best configuration accurately, it is necessary to launch all configurations under the same conditions so that metrics such as kernel function execution times can be objectively compared. This means that tuned kernels should be launched on the target device in isolation. Launching multiple kernels concurrently while performing tuning may cause inaccuracies in collected data. Furthermore, if the size of kernel input is -changed (e.g., during online tuning), we should restart the tuning process from the beginning since the input size often affects the best configuration. -We can achieve the restart by calling the `ClearData` API method. +changed (e.g., during dynamic tuning), we should restart the tuning process from the beginning since the input size often affects the best configuration. +It is programmer's responsibility to ensure this. We can achieve the restart by calling the `ClearData` API method. ---- @@ -563,13 +563,13 @@ offers the following stop conditions: * TuningDuration - tuning stops after the specified duration has passed. The stop condition API is public, allowing users to create their own stop conditions. All of the built-in conditions are implemented in public API, so -it is possible to modify them as well. +it is possible to modify them as well. TODO: here there is the first mention on public API, it should be probably discussed somewhere what is public API ---- ### Searchers -Searchers decide the order in which kernel configurations are selected and run during offline and online tuning. Having an efficient searcher can significantly +Searchers decide the order in which kernel configurations are selected and run during offline and dynamic tuning. Having an efficient searcher can significantly reduce the time it takes to find a well-performing configuration. Like stop conditions, a searcher is initialized before tuning begins and updated after each tested configuration with access to the `KernelResult` structure from the previous run. Searchers are assigned to kernels individually so that each kernel can have a different searcher. The following searchers are available in KTT API: @@ -603,20 +603,20 @@ same configuration is launched multiple times (e.g., inside kernel launcher or d ### Profiling metrics collection Apart from execution times, KTT can also collect other types of information from kernel runs. This includes low-level profiling metrics from kernel function -executions such as global memory utilization, number of executed instructions and more. These metrics can be utilized e.g., by searchers to find well-performing -configurations faster. The collection of profiling metrics is disabled by default as it changes the default tuning behavior. In order to collect all profiling -metrics, it is usually necessary to run the same kernel function multiple times (the number increases when more metrics are collected). It furthermore requires -kernels to be run synchronously. Enabling profiling metrics collection thus decreases tuning performance. It is possible to mitigate performance impact by allowing -only specific metrics, which can be done through KTT API. +executions such as global memory utilization, number of executed instructions and more. These metrics can be utilized, e.g., by searchers to find well-performing +configurations faster, or KTT user may want to collect them to better understand their kernel's performance. The collection of profiling metrics is disabled by +default as it changes the default tuning behavior. In order to collect all profiling metrics, it is usually necessary to run the same kernel function multiple +times (the number increases when more metrics are collected). It furthermore requires kernels to be run synchronously. Enabling profiling metrics collection thus +decreases tuning performance. It is possible to mitigate performance impact by allowing only specific metrics, which can be done through KTT API. Collection of profiling metrics is currently supported for Nvidia devices on CUDA backend and AMD devices on OpenCL backend. Intel devices are unsupported at the moment due to a lack of profiling library support. Profiling metrics can also be collected for composite kernels. Note, however, that the metrics collection is restricted to a single definition within a composite kernel for AMD devices and newer Nvidia devices (Turing and onwards). This is due to profiling library limitations. -#### Interaction with online tuning and kernel running +#### Interaction with dynamic tuning and kernel running -When utilizing kernel running and online tuning, it is possible to decrease further the performance impact of executing the same kernel function multiple times +When utilizing kernel running and dynamic tuning, it is possible to decrease further the performance impact of executing the same kernel function multiple times during profiling. Rather than performing all of the profiling runs at once, it is possible to split the profiling metric collection over multiple online tuning or kernel running API function invocations and utilize output from each run. The intermediate `KernelResult` structures from such runs will not contain valid profiling metrics, but the other data will remain accurate. Once the profiling for the current configuration is concluded, the final kernel result will have valid @@ -662,8 +662,8 @@ must first remove all kernels which utilize that definition. The native KTT API is available in C++. Users who prefer Python have an option to build KTT as a Python module which can then be imported into Python. The majority of KTT API methods can be afterward called directly from Python while still benefitting from the performance of the KTT module built in C++. It is also possible to implement custom searchers and stop conditions directly in Python. Therefore, users can take advantage of libraries available in Python but not in C++ for more -complex searcher implementations. The majority of functions, enums and classes have the same names and arguments as in C++. A small number of limitations is -described in the follow-up subsection. +complex searcher implementations, e.g., using a python-based machine learning framework. The majority of functions, enums and classes have the same names and +arguments as in C++. A small number of limitations is described in the follow-up subsection. #### Python limitations From cd3f64d2ead3456c8e7ec7528cef633fa75eeafe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Mon, 24 Jan 2022 12:38:19 +0100 Subject: [PATCH 62/63] * Minor grammar fixes in onboarding guide --- OnboardingGuide.md | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/OnboardingGuide.md b/OnboardingGuide.md index fedabdc1..be4281f9 100644 --- a/OnboardingGuide.md +++ b/OnboardingGuide.md @@ -12,9 +12,10 @@ Naturally, a batch script can be sufficient for autotuning in a simple use case. usage of an autotuning framework can be beneficial, as it can automatically handle memory objects, detect errors in autotuned kernels or perform autotuning during program runtime. -Kernel Tuning Toolkit is a framework that allows autotuning of compute kernels written in CUDA, OpenCL or Vulkan. It provides -unified interface for those APIs, handles communication between host (CPU) and accelerator (GPU, Xeon Phi, etc.), checks results -and timing of tuned kernels, allows dynamic (online) tuning during program runtime, profiling of autotuned kernels and more. +Kernel Tuning Toolkit (KTT) is a framework that allows autotuning of compute kernels written in CUDA, OpenCL or Vulkan. It provides +a unified interface for those APIs, handles communication between host (CPU) and accelerator (GPU, Xeon Phi, etc.), checks results +and timing of tuned kernels, allows dynamic (online) tuning during program runtime, profiling of autotuned kernels and more. This +functionality is exposed in the public API through which KTT can be integrated into user applications. ---- @@ -66,7 +67,7 @@ versions of our computation to see which value performs best. In practice, the computations are often complex enough to contain multiple parts that can be optimized, leading to a definition of many tuning parameters. For example, we may have the previously mentioned loop unroll parameter with -values {1, 2, 4, 8} and another parameter switching two types of data arrangement in memory with values {0, 1}. Combinations of these +values {1, 2, 4, 8} and another parameter that switches two types of data arrangement in memory with values {0, 1}. Combinations of these parameters now define eight different versions of computation. One such combination is called tuning configuration. Together, all tuning configurations define configuration space. The size of the space grows exponentially with the addition of more tuning parameters. KTT framework offers functionality to mitigate this problem which we will discuss in the follow-up sections. @@ -128,7 +129,7 @@ The first step before we can utilize KTT is a creation of a tuner instance. The autotuning logic. The KTT structures such as kernels, kernel arguments and tuning parameters are tied to a specific tuner instance. The simplest tuner constructor requires three parameters - index for a platform, index for a device and compute API that will be utilized (e.g., CUDA, OpenCL). The indices for platforms and devices are assigned by KTT. We can retrieve them through `PlatformInfo` and `DeviceInfo` structures. These -structures also contain some other useful information such as a list of supported extensions, global memory size, a number of available compute units and +structures also contain other useful information such as a list of supported extensions, global memory size, a number of available compute units and more. Note that the assigned indices remain the same when autotuning applications are launched multiple times on the same computer. They only change when the hardware configuration changes (e.g., a new device is added, an old device is removed, a device driver is reinstalled). Also note, that the indices may not be the same across multiple compute APIs (e.g., an index for the same device may be different under OpenCL and CUDA). @@ -193,7 +194,7 @@ const ktt::KernelId kernel = tuner.CreateCompositeKernel("Sort", {definition0, d ### Kernel arguments Kernel arguments define the input and output of a kernel. KTT supports multiple forms of kernel arguments such as buffers, scalars and constant memory -arguments. The tuner must receive an argument's description before it can be assigned to a kernel. In case of a buffer argument, this includes the +arguments. The tuner must receive an argument's description before it can be assigned to a kernel. In the case of a buffer argument, this includes the initial data placed inside the buffer before a kernel is launched, its access type (read or write) and the memory location from which kernel accesses the buffer (host or device). Once the information is provided, the tuner returns a handle to the argument. As the code below shows, we can assign arguments to kernel definitions through this handle. KTT supports a wide range of data types for kernel arguments, including all built-in integer and floating-point @@ -241,19 +242,19 @@ Next, it is possible to decide the memory location from which a kernel accesses memory. Users may wish to choose a different location depending on the type of device used for autotuning (e.g., host memory for CPUs, device memory for dedicated GPUs). For host memory, it is additionally possible to use zero-copy optimization. This optimization causes kernels to access the argument data directly instead of creating a separate buffer and thus reduces memory usage. For CUDA and OpenCL 2.0, one additional memory location option exists - unified. -Unified memory buffers can be accessed from both host and kernel side, relying on a device driver to migrate the data automatically. +Unified memory buffers can be accessed from both host and kernel sides, relying on a device driver to migrate the data automatically. Management type option specifies whether buffer management is handled automatically by the tuner (e.g., write arguments are automatically reset to initial state before a new kernel configuration is launched, buffers are created and deleted automatically) or by the user. In some advanced cases, users may wish to manage the buffers manually. Note, however, that this requires the usage of kernel launchers which we will discuss later. -The final option for vector arguments is whether the initial data provided by the user should be copied inside the tuner or referenced directly (note that -this is different option than memory location of data accessed by the kernel -- KTT can make its private copy of provided buffer and then copy it to the -host or device to be directly used by the kernel). By default, the data is copied, which is safer (i.e., temporary arguments work correctly) but less memory +The final option for vector arguments is whether the initial data provided by the user should be copied inside the tuner or referenced directly. Note that +this is a different option than the memory location of data accessed by the kernel - KTT can make its private copy of provided buffer and then copy it to the +host or device to be directly used by the kernel. By default, the data is copied, which is safer (i.e., temporary arguments work correctly) but less memory efficient. If the initial data is provided in the form of an lvalue argument, the tuner can use a direct reference to avoid copying. This requires the user to keep the initial data buffer valid while the tuner uses the argument. -The comprehensive diagram of KTT buffer types is located [here](https://github.com/HiPerCoRe/KTT/tree/master/Docs/Resources/KttBufferTypes.png). +The comprehensive diagram of KTT buffer copy options is located [here](https://github.com/HiPerCoRe/KTT/tree/master/Docs/Resources/KttBufferTypes.png). ```cpp std::vector input1; @@ -282,7 +283,8 @@ const ktt::ArgumentId resultId = tuner.AddArgumentVector(result, ktt::ArgumentAc #### Local memory arguments Local (shared in CUDA terminology) memory arguments are used to allocate a corresponding amount of cache-like memory, which is shared across all work-items -(threads) inside a work-group (thread block). We just need to specify the data type and total size of allocated memory in bytes. Note that the local (shared) memory of static size can be also allocated inside the kernel code by using __local (__shared__) declaration specifier. +(threads) inside a work-group (thread block). We just need to specify the data type and total size of allocated memory in bytes. The local (shared) memory +of static size can also be allocated inside the kernel code by using __local (__shared__) declaration specifier. ```cpp // Allocate local memory for 4 floats and 2 integers. @@ -371,7 +373,9 @@ allows only one tuning parameter to be tied to the modifier. Another option is u parameters. Creating multiple thread modifiers for the same thread size type (global/local) and dimension is possible. In that case, the modifiers will be applied in the order of their addition to the tuner. Similar to constraints, it is possible to tie only integer parameters to thread modifiers. -Note that KTT can be configured to use global and local sizes according to OpenCL standard (global size is overal number of work-items in NDRange, local size is work-group size) or CUDA (global size is number of thread block and local size is number of threads per thread block). In the example below, CUDA standard is used. +KTT can be configured to use global and local sizes according to OpenCL standard (global size is the overall number of work-items in NDRange, local size is +work-group size) or CUDA (global size is the number of thread blocks, and local size is the number of threads per thread block). In the example below, CUDA +standard is used. ```cpp tuner.AddParameter(kernel, "block_size", std::vector{32, 64, 128, 256}); @@ -548,22 +552,22 @@ In order to identify the best configuration accurately, it is necessary to launc kernel function execution times can be objectively compared. This means that tuned kernels should be launched on the target device in isolation. Launching multiple kernels concurrently while performing tuning may cause inaccuracies in collected data. Furthermore, if the size of kernel input is changed (e.g., during dynamic tuning), we should restart the tuning process from the beginning since the input size often affects the best configuration. -It is programmer's responsibility to ensure this. We can achieve the restart by calling the `ClearData` API method. +It is the programmer's responsibility to ensure this. We can achieve the restart by calling the `ClearData` API method. ---- ### Stop conditions We can utilize stop conditions to interrupt offline tuning when certain criteria are met. The stop condition is initialized before offline tuning begins -and updated after each tested configuration. Within the update, the condition has access to the `KernelResult` structure from prior kernel run. KTT currently -offers the following stop conditions: +and updated after each tested configuration. Within the update, the condition has access to the `KernelResult` structure from the prior kernel run. KTT +currently offers the following stop conditions: * ConfigurationCount - tuning stops after reaching the specified number of tested configurations. * ConfigurationDuration - tuning stops after a configuration with execution time below the specified threshold is found. * ConfigurationFraction - tuning stops after exploring the specified fraction of configuration space. * TuningDuration - tuning stops after the specified duration has passed. The stop condition API is public, allowing users to create their own stop conditions. All of the built-in conditions are implemented in public API, so -it is possible to modify them as well. TODO: here there is the first mention on public API, it should be probably discussed somewhere what is public API +it is possible to modify them as well. ---- @@ -595,7 +599,7 @@ APIs. APIs such as OpenCL. * `SetKernelCacheCapacity` - changes size of a cache for compiled kernels. KTT utilizes the cache to improve performance when the same kernel function with the same configuration is launched multiple times (e.g., inside kernel launcher or during kernel running). -* `SetLoggingLevel` - controls the amount of logging information printed to the output. Higher levels print more detailed information which aids debugging. +* `SetLoggingLevel` - controls the amount of logging information printed to the output. Higher levels print more detailed information, which aids debugging. * `SetTimeUnit` - specifies time unit used for printing execution times. This affects console output as well as kernel results saved into a file. ---- @@ -604,7 +608,7 @@ same configuration is launched multiple times (e.g., inside kernel launcher or d Apart from execution times, KTT can also collect other types of information from kernel runs. This includes low-level profiling metrics from kernel function executions such as global memory utilization, number of executed instructions and more. These metrics can be utilized, e.g., by searchers to find well-performing -configurations faster, or KTT user may want to collect them to better understand their kernel's performance. The collection of profiling metrics is disabled by +configurations faster, or KTT users may want to collect them to better understand their kernel's performance. The collection of profiling metrics is disabled by default as it changes the default tuning behavior. In order to collect all profiling metrics, it is usually necessary to run the same kernel function multiple times (the number increases when more metrics are collected). It furthermore requires kernels to be run synchronously. Enabling profiling metrics collection thus decreases tuning performance. It is possible to mitigate performance impact by allowing only specific metrics, which can be done through KTT API. @@ -662,7 +666,7 @@ must first remove all kernels which utilize that definition. The native KTT API is available in C++. Users who prefer Python have an option to build KTT as a Python module which can then be imported into Python. The majority of KTT API methods can be afterward called directly from Python while still benefitting from the performance of the KTT module built in C++. It is also possible to implement custom searchers and stop conditions directly in Python. Therefore, users can take advantage of libraries available in Python but not in C++ for more -complex searcher implementations, e.g., using a python-based machine learning framework. The majority of functions, enums and classes have the same names and +complex searcher implementations, e.g., using a Python-based machine learning framework. The majority of functions, enums and classes have the same names and arguments as in C++. A small number of limitations is described in the follow-up subsection. #### Python limitations From 6c45d204ac535dd104e6a48c128e5039b2dfce84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Petrovi=C4=8D?= Date: Tue, 25 Jan 2022 11:59:45 +0100 Subject: [PATCH 63/63] * Updated gitignore and release date in changelog --- .gitignore | 1 + Changelog.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4ee9dbf2..2f6d010b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ Build/* premake5.exe premake5 +*.json diff --git a/Changelog.txt b/Changelog.txt index 5390ae2f..d9472923 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,4 @@ -Version 2.1 (24/1/2022) +Version 2.1 (25/1/2022) * Introduced KTT Python bindings making it possible to utilize KTT API in Python * Added onboarding guide for KTT which describes core KTT features and their usage * Added new methods for compute queue management