Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into benchmark-test
Browse files Browse the repository at this point in the history
  • Loading branch information
mewim committed May 14, 2024
2 parents 3accd61 + 44f09ee commit 7292d74
Show file tree
Hide file tree
Showing 243 changed files with 3,361 additions and 1,467 deletions.
32 changes: 29 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.15)

project(Kuzu VERSION 0.4.1.1 LANGUAGES CXX C)
project(Kuzu VERSION 0.4.1.2 LANGUAGES CXX C)

find_package(Threads REQUIRED)

Expand Down Expand Up @@ -167,16 +167,35 @@ option(BUILD_SINGLE_FILE_HEADER "Build single file header. Requires Python >= 3.
option(BUILD_TESTS "Build C++ tests." FALSE)
option(BUILD_EXTENSION_TESTS "Build C++ extension tests." FALSE)
option(BUILD_KUZU "Build Kuzu." TRUE)
option(ENABLE_BACKTRACES "Enable backtrace printing for exceptions and segfaults" FALSE)

option(BUILD_LCOV "Build coverage report." FALSE)
if(${BUILD_LCOV})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
endif()

if (ENABLE_BACKTRACES)
find_package(cpptrace)
if (NOT cpptrace_FOUND)
include(FetchContent)
FetchContent_Declare(
cpptrace
GIT_REPOSITORY https://github.com/jeremy-rifkin/cpptrace.git
GIT_TAG v0.5.4
GIT_SHALLOW TRUE
)
FetchContent_MakeAvailable(cpptrace)
endif()
add_compile_definitions(KUZU_BACKTRACE)
endif()

function(add_kuzu_test TEST_NAME)
set(SRCS ${ARGN})
add_executable(${TEST_NAME} ${SRCS})
target_link_libraries(${TEST_NAME} PRIVATE test_helper test_runner graph_test)
if (ENABLE_BACKTRACES)
target_link_libraries(${TEST_NAME} PRIVATE register_backtrace_signal_handler)
endif()
target_include_directories(${TEST_NAME} PRIVATE ${PROJECT_SOURCE_DIR}/test/include)
include(GoogleTest)
gtest_discover_tests(${TEST_NAME} DISCOVERY_TIMEOUT 600 DISCOVERY_MODE PRE_TEST)
Expand All @@ -186,6 +205,9 @@ function(add_kuzu_api_test TEST_NAME)
set(SRCS ${ARGN})
add_executable(${TEST_NAME} ${SRCS})
target_link_libraries(${TEST_NAME} PRIVATE api_graph_test api_test_helper)
if (ENABLE_BACKTRACES)
target_link_libraries(${TEST_NAME} PRIVATE register_backtrace_signal_handler)
endif()
target_include_directories(${TEST_NAME} PRIVATE ${PROJECT_SOURCE_DIR}/test/include)
include(GoogleTest)
gtest_discover_tests(${TEST_NAME})
Expand All @@ -208,7 +230,11 @@ include_directories(third_party/pybind11/include)
include_directories(third_party/pyparse)
include_directories(third_party/re2/include)
include_directories(third_party/serd/include)
include_directories(third_party/spdlog)
if (${BUILD_TESTS})
include_directories(third_party/spdlog)
elseif (${BUILD_BENCHMARK})
include_directories(third_party/spdlog)
endif ()
include_directories(third_party/utf8proc/include)
include_directories(third_party/zstd/include)
include_directories(third_party/httplib)
Expand All @@ -219,7 +245,7 @@ add_subdirectory(third_party)
if(${BUILD_KUZU})
add_definitions(-DKUZU_ROOT_DIRECTORY="${PROJECT_SOURCE_DIR}")
add_definitions(-DKUZU_CMAKE_VERSION="${CMAKE_PROJECT_VERSION}")
add_definitions(-DKUZU_EXTENSION_VERSION="0.3.2")
add_definitions(-DKUZU_EXTENSION_VERSION="0.3.4")

include_directories(src/include)

Expand Down
33 changes: 26 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
clangd tidy clangd-diagnostics \
install \
clean-extension clean-python-api clean-java clean \
extension-test shell-test
extension-test shell-test

.ONESHELL:
.SHELLFLAGS = -ec
Expand Down Expand Up @@ -57,6 +57,9 @@ endif
release:
$(call run-cmake-release,)

relwithdebinfo:
$(call run-cmake-relwithdebinfo,)

debug:
$(call run-cmake-debug,)

Expand Down Expand Up @@ -90,8 +93,8 @@ alldebug:

# Main tests
test:
$(call run-cmake-release, -DBUILD_TESTS=TRUE)
ctest --test-dir build/release/test --output-on-failure -j ${TEST_JOBS}
$(call run-cmake-relwithdebinfo, -DBUILD_TESTS=TRUE -DENABLE_BACKTRACES=TRUE)
ctest --test-dir build/relwithdebinfo/test --output-on-failure -j ${TEST_JOBS}

lcov:
$(call run-cmake-release, -DBUILD_TESTS=TRUE -DBUILD_LCOV=TRUE)
Expand Down Expand Up @@ -161,12 +164,13 @@ example:
$(call run-cmake-release, -DBUILD_EXAMPLES=TRUE)

extension-test:
$(call run-cmake-release, \
$(call run-cmake-relwithdebinfo, \
-DBUILD_EXTENSIONS="httpfs;duckdb;postgres" \
-DBUILD_EXTENSION_TESTS=TRUE \
-DENABLE_ADDRESS_SANITIZER=TRUE \
-DENABLE_BACKTRACES=TRUE \
)
ctest --test-dir build/release/extension --output-on-failure -j ${TEST_JOBS}
ctest --test-dir build/relwithdebinfo/extension --output-on-failure -j ${TEST_JOBS}
aws s3 rm s3://kuzu-dataset-us/${RUN_ID}/ --recursive

extension-debug:
Expand All @@ -182,10 +186,10 @@ extension-release:
)

shell-test:
$(call run-cmake-release, \
$(call run-cmake-relwithdebinfo, \
-DBUILD_SHELL=TRUE \
)
cd tools/shell/test && python3 -m pytest -v
cd tools/shell/test && python3 -m pytest -v

# Clang-related tools and checks

Expand Down Expand Up @@ -214,6 +218,8 @@ install:
# Cleaning
clean-extension:
cmake -E rm -rf extension/httpfs/build
cmake -E rm -rf extension/duckdb/build
cmake -E rm -rf extension/postgres/build

clean-python-api:
cmake -E rm -rf tools/python_api/build
Expand Down Expand Up @@ -247,11 +253,24 @@ define build-cmake-release
$(call build-cmake,release,Release,$1)
endef

define build-cmake-relwithdebinfo
$(call build-cmake,relwithdebinfo,RelWithDebInfo,$1)
endef

define config-cmake-release
$(call config-cmake,release,Release,$1)
endef

define config-cmake-relwithdebinfo
$(call config-cmake,relwithdebinfo,RelWithDebInfo,$1)
endef

define run-cmake-release
$(call config-cmake-release,$1)
$(call build-cmake-release,$1)
endef

define run-cmake-relwithdebinfo
$(call config-cmake-relwithdebinfo,$1)
$(call build-cmake-relwithdebinfo,$1)
endef
Empty file added dataset/binary-demo/.lock
Empty file.
Empty file added dataset/binary-demo/.shadow
Empty file.
Empty file added dataset/binary-demo/.wal
Empty file.
Binary file added dataset/binary-demo/catalog.kz
Binary file not shown.
Binary file added dataset/binary-demo/data.kz
Binary file not shown.
Binary file added dataset/binary-demo/metadata.kz
Binary file not shown.
Binary file added dataset/binary-demo/n-0.hindex
Binary file not shown.
Binary file added dataset/binary-demo/n-0.hindex.ovf
Binary file not shown.
Binary file added dataset/binary-demo/n-1.hindex
Binary file not shown.
Binary file added dataset/binary-demo/n-1.hindex.ovf
Binary file not shown.
Binary file added dataset/binary-demo/nodes.statistics_and_deleted.ids
Binary file not shown.
Binary file added dataset/binary-demo/rels.statistics
Binary file not shown.
1 change: 1 addition & 0 deletions dataset/databases/non-empty-wal/.wal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
aaaaaaaaaaaaaaaa
Empty file.
Empty file added dataset/databases/tinysnb/.wal
Empty file.
Binary file added dataset/databases/tinysnb/catalog.kz
Binary file not shown.
Binary file added dataset/databases/tinysnb/data.kz
Binary file not shown.
Binary file added dataset/databases/tinysnb/metadata.kz
Binary file not shown.
Binary file added dataset/databases/tinysnb/n-0.hindex
Binary file not shown.
Binary file added dataset/databases/tinysnb/n-1.hindex
Binary file not shown.
Binary file added dataset/databases/tinysnb/n-2.hindex
Binary file not shown.
Binary file added dataset/databases/tinysnb/n-2.hindex.ovf
Binary file not shown.
Binary file not shown.
Binary file added dataset/databases/tinysnb/rels.statistics
Binary file not shown.
1 change: 1 addition & 0 deletions extension/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ if ("postgres" IN_LIST BUILD_EXTENSIONS)
endif()

if (${BUILD_EXTENSION_TESTS})
include_directories(${CMAKE_SOURCE_DIR}/third_party/spdlog)
add_definitions(-DTEST_FILES_DIR="extension")
add_subdirectory(${CMAKE_SOURCE_DIR}/test/gtest ${CMAKE_CURRENT_BINARY_DIR}/test/gtest EXCLUDE_FROM_ALL)
# Make gtest available to subdirectories.
Expand Down
13 changes: 13 additions & 0 deletions extension/httpfs/src/httpfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,19 @@ bool HTTPFileSystem::canHandleFile(const std::string& path) const {
return path.rfind("https://", 0) == 0 || path.rfind("http://", 0) == 0;
}

bool HTTPFileSystem::fileOrPathExists(const std::string& path) {
try {
auto fileInfo = openFile(path, O_RDONLY, nullptr, FileLockType::READ_LOCK);
auto httpFileInfo = fileInfo->constPtrCast<HTTPFileInfo>();
if (httpFileInfo->length == 0) {
return false;
}
return true;
} catch (...) {
return false;
}
}

void HTTPFileSystem::readFromFile(common::FileInfo& fileInfo, void* buffer, uint64_t numBytes,
uint64_t position) const {
auto& httpFileInfo = ku_dynamic_cast<FileInfo&, HTTPFileInfo&>(fileInfo);
Expand Down
2 changes: 2 additions & 0 deletions extension/httpfs/src/include/httpfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class HTTPFileSystem : public common::FileSystem {

bool canHandleFile(const std::string& path) const override;

bool fileOrPathExists(const std::string& path) override;

static std::unique_ptr<httplib::Client> getClient(const std::string& host);

static std::unique_ptr<httplib::Headers> getHTTPHeaders(HeaderMap& headerMap);
Expand Down
43 changes: 21 additions & 22 deletions extension/httpfs/src/s3fs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,24 @@ S3FileInfo::S3FileInfo(std::string path, common::FileSystem* fileSystem, int fla
uploaderHasException{false} {}

S3FileInfo::~S3FileInfo() {
auto s3fs = ku_dynamic_cast<FileSystem*, S3FileSystem*>(fileSystem);
auto s3FS = fileSystem->ptrCast<S3FileSystem>();
if ((((flags & O_ACCMODE) & O_WRONLY)) && !uploadFinalized) {
s3fs->flushAllBuffers(this);
s3FS->flushAllBuffers(this);
if (numPartsUploaded) {
s3fs->finalizeMultipartUpload(this);
s3FS->finalizeMultipartUpload(this);
}
}
}

void S3FileInfo::initialize() {
HTTPFileInfo::initialize();
auto s3fs = ku_dynamic_cast<const common::FileSystem*, const S3FileSystem*>(fileSystem);
auto s3FS = fileSystem->constPtrCast<S3FileSystem>();
if ((flags & O_ACCMODE) & O_WRONLY) {
auto maxNumParts = uploadParams.maxNumPartsPerFile;
auto requiredPartSize = uploadParams.maxFileSize / maxNumParts;
partSize = std::max<uint64_t>(AWS_MINIMUM_PART_SIZE, requiredPartSize);
KU_ASSERT(partSize * maxNumParts >= uploadParams.maxFileSize);
multipartUploadID = s3fs->initializeMultiPartUpload(this);
multipartUploadID = s3FS->initializeMultiPartUpload(this);
}
}

Expand All @@ -75,7 +75,7 @@ void S3FileInfo::initializeClient() {

std::shared_ptr<S3WriteBuffer> S3FileInfo::getBuffer(uint16_t writeBufferIdx) {
std::unique_lock<std::mutex> lck(writeBuffersLock);
auto s3FS = ku_dynamic_cast<common::FileSystem*, S3FileSystem*>(fileSystem);
auto s3FS = fileSystem->ptrCast<S3FileSystem>();
if (writeBuffers.contains(writeBufferIdx)) {
return writeBuffers.at(writeBufferIdx);
}
Expand Down Expand Up @@ -458,8 +458,8 @@ std::string S3FileSystem::initializeMultiPartUpload(S3FileInfo* fileInfo) const

void S3FileSystem::writeFile(common::FileInfo& fileInfo, const uint8_t* buffer, uint64_t numBytes,
uint64_t offset) const {
auto& s3FileInfo = ku_dynamic_cast<FileInfo&, S3FileInfo&>(fileInfo);
if (!((s3FileInfo.flags & O_ACCMODE) & O_WRONLY)) {
auto s3FileInfo = fileInfo.ptrCast<S3FileInfo>();
if (!((s3FileInfo->flags & O_ACCMODE) & O_WRONLY)) {
throw IOException("Write called on a file which is not open in write mode.");
}
uint64_t numBytesWritten = 0;
Expand All @@ -468,20 +468,20 @@ void S3FileSystem::writeFile(common::FileInfo& fileInfo, const uint8_t* buffer,
// We use amazon multipart upload API which segments an object into a set of parts. Since we
// don't track the usage of individual part, determining whether we can upload a part is
// challenging if we allow non-sequential write.
if (currOffset != s3FileInfo.fileOffset) {
if (currOffset != s3FileInfo->fileOffset) {
throw InternalException("Non-sequential write not supported.");
}
auto writeBufferIdx = currOffset / s3FileInfo.partSize;
auto writeBuffer = s3FileInfo.getBuffer(writeBufferIdx);
auto writeBufferIdx = currOffset / s3FileInfo->partSize;
auto writeBuffer = s3FileInfo->getBuffer(writeBufferIdx);
auto offsetToWrite = currOffset - writeBuffer->startOffset;
auto numBytesToWrite =
std::min<uint64_t>(numBytes - numBytesWritten, s3FileInfo.partSize - offsetToWrite);
std::min<uint64_t>(numBytes - numBytesWritten, s3FileInfo->partSize - offsetToWrite);
memcpy(writeBuffer->getData() + offsetToWrite, buffer + numBytesWritten, numBytesToWrite);
writeBuffer->numBytesWritten += numBytesToWrite;
if (writeBuffer->numBytesWritten >= s3FileInfo.partSize) {
flushBuffer(&s3FileInfo, writeBuffer);
if (writeBuffer->numBytesWritten >= s3FileInfo->partSize) {
flushBuffer(s3FileInfo, writeBuffer);
}
s3FileInfo.fileOffset += numBytesToWrite;
s3FileInfo->fileOffset += numBytesToWrite;
numBytesWritten += numBytesToWrite;
}
}
Expand Down Expand Up @@ -541,8 +541,7 @@ static void verifyUploadResult(const std::string& result, const HTTPResponse& re
}

void S3FileSystem::finalizeMultipartUpload(S3FileInfo* fileInfo) {
auto s3FS =
ku_dynamic_cast<const common::FileSystem*, const S3FileSystem*>(fileInfo->fileSystem);
auto s3FS = fileInfo->fileSystem->constPtrCast<S3FileSystem>();
fileInfo->uploadFinalized = true;
auto finalizeUploadQueryBody = getFinalizeUploadQueryBody(fileInfo);
auto body = finalizeUploadQueryBody.str();
Expand Down Expand Up @@ -653,7 +652,7 @@ HeaderMap S3FileSystem::createS3Header(std::string url, std::string query, std::

std::unique_ptr<HTTPResponse> S3FileSystem::headRequest(common::FileInfo* fileInfo,
const std::string& url, HeaderMap /*headerMap*/) const {
auto& authParams = ku_dynamic_cast<FileInfo*, S3FileInfo*>(fileInfo)->authParams;
auto& authParams = fileInfo->ptrCast<S3FileInfo>()->authParams;
auto parsedS3URL = parseS3URL(url, authParams);
auto httpURL = parsedS3URL.getHTTPURL();
auto headers = createS3Header(parsedS3URL.path, "", parsedS3URL.host, "s3", "HEAD", authParams);
Expand All @@ -663,7 +662,7 @@ std::unique_ptr<HTTPResponse> S3FileSystem::headRequest(common::FileInfo* fileIn
std::unique_ptr<HTTPResponse> S3FileSystem::getRangeRequest(common::FileInfo* fileInfo,
const std::string& url, HeaderMap /*headerMap*/, uint64_t fileOffset, char* buffer,
uint64_t bufferLen) const {
auto& authParams = ku_dynamic_cast<FileInfo*, S3FileInfo*>(fileInfo)->authParams;
auto& authParams = fileInfo->ptrCast<S3FileInfo>()->authParams;
auto parsedS3URL = parseS3URL(url, authParams);
auto s3HTTPUrl = parsedS3URL.getHTTPURL();
auto headers = createS3Header(parsedS3URL.path, "", parsedS3URL.host, "s3", "GET", authParams);
Expand All @@ -675,7 +674,7 @@ std::unique_ptr<HTTPResponse> S3FileSystem::postRequest(common::FileInfo* fileIn
const std::string& url, kuzu::httpfs::HeaderMap /*headerMap*/,
std::unique_ptr<uint8_t[]>& outputBuffer, uint64_t& outputBufferLen, const uint8_t* inputBuffer,
uint64_t inputBufferLen, std::string httpParams) const {
auto& authParams = ku_dynamic_cast<FileInfo*, S3FileInfo*>(fileInfo)->authParams;
auto& authParams = fileInfo->ptrCast<S3FileInfo>()->authParams;
auto parsedS3URL = parseS3URL(url, authParams);
auto httpURL = parsedS3URL.getHTTPURL(httpParams);
auto payloadHash = getPayloadHash(inputBuffer, inputBufferLen);
Expand All @@ -688,7 +687,7 @@ std::unique_ptr<HTTPResponse> S3FileSystem::postRequest(common::FileInfo* fileIn
std::unique_ptr<HTTPResponse> S3FileSystem::putRequest(common::FileInfo* fileInfo,
const std::string& url, kuzu::httpfs::HeaderMap /*headerMap*/, const uint8_t* inputBuffer,
uint64_t inputBufferLen, std::string httpParams) const {
auto& authParams = ku_dynamic_cast<FileInfo*, S3FileInfo*>(fileInfo)->authParams;
auto& authParams = fileInfo->ptrCast<S3FileInfo>()->authParams;
auto parsedS3URL = parseS3URL(url, authParams);
auto httpURL = parsedS3URL.getHTTPURL(httpParams);
auto payloadHash = getPayloadHash(inputBuffer, inputBufferLen);
Expand Down Expand Up @@ -737,7 +736,7 @@ void S3FileSystem::flushBuffer(S3FileInfo* fileInfo,

void S3FileSystem::uploadBuffer(S3FileInfo* fileInfo,
std::shared_ptr<S3WriteBuffer> bufferToUpload) {
auto s3FileSystem = ku_dynamic_cast<FileSystem*, S3FileSystem*>(fileInfo->fileSystem);
auto s3FileSystem = fileInfo->fileSystem->ptrCast<S3FileSystem>();
std::string queryParam =
"partNumber=" + std::to_string(bufferToUpload->partID + 1) + "&" +
"uploadId=" + S3FileSystem::encodeURL(fileInfo->multipartUploadID, true);
Expand Down
Loading

0 comments on commit 7292d74

Please sign in to comment.