Skip to content

Commit

Permalink
Use hash function to generate stable table types (#943)
Browse files Browse the repository at this point in the history
See: https://gaiaplatform.atlassian.net/browse/GAIAPLAT-1389 for details on this workitem.

Some unit tests are updated due to the change of type generation algorithms. We can no longer assume type 1 or 2 always exists.
  • Loading branch information
chuan authored Sep 23, 2021
1 parent 051ac51 commit e92afdc
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 37 deletions.
23 changes: 19 additions & 4 deletions production/catalog/src/ddl_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "gaia_internal/catalog/ddl_executor.hpp"

#include <limits>
#include <memory>
#include <optional>
#include <string>
Expand All @@ -13,11 +14,11 @@
#include <utility>

#include "gaia/common.hpp"
#include "gaia/db/db.hpp"
#include "gaia/exception.hpp"

#include "gaia_internal/catalog/catalog.hpp"
#include "gaia_internal/catalog/gaia_catalog.h"
#include "gaia_internal/common/hash.hpp"
#include "gaia_internal/common/logger_internal.hpp"
#include "gaia_internal/common/retail_assert.hpp"
#include "gaia_internal/common/system_table_types.hpp"
Expand All @@ -32,9 +33,7 @@ using namespace gaia::db;
using namespace gaia::direct_access;

using std::make_unique;
using std::shared_lock;
using std::string;
using std::unique_lock;

namespace gaia
{
Expand Down Expand Up @@ -783,6 +782,19 @@ reference_offset_t ddl_executor_t::find_available_offset(gaia::common::gaia_id_t
find_parent_available_offset(table.outgoing_relationships()));
}

uint32_t generate_table_type(const string& db_name, const string& table_name)
{
// An identifier length is limited to a flex token which is limited to the
// size of the bison/flex input buffer (YY_BUF_SIZE). We currently use
// default setting which is 16k. The assertions below make sure the token
// length does not exceed the `len` parameter of the hash function.
ASSERT_PRECONDITION(db_name.length() <= std::numeric_limits<int>::max(), "The DB name is too long.");
ASSERT_PRECONDITION(table_name.length() <= std::numeric_limits<int>::max(), "The table name is too long.");

return murmurhash3_x86_32(table_name.data(), static_cast<int>(table_name.length()))
^ (murmurhash3_x86_32(db_name.data(), static_cast<int>(db_name.length())) << 1);
}

gaia_id_t ddl_executor_t::create_table_impl(
const string& db_name,
const string& table_name,
Expand Down Expand Up @@ -852,7 +864,10 @@ gaia_id_t ddl_executor_t::create_table_impl(
const std::vector<uint8_t> bfbs = generate_bfbs(fbs);
const std::vector<uint8_t> bin = generate_bin(fbs, generate_json(fields));

gaia_type_t table_type = fixed_type == c_invalid_gaia_type ? allocate_type() : fixed_type;
gaia_type_t table_type
= (fixed_type == c_invalid_gaia_type)
? generate_table_type(in_context(db_name), table_name)
: fixed_type;

gaia_id_t table_id = gaia_table_t::insert_row(
table_name.c_str(),
Expand Down
4 changes: 0 additions & 4 deletions production/db/core/tests/db_test_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ namespace gaia::db::test
// The relationship can be both 1:1 or 1:n depending on the test.
// Doctor --> Patient
// This relationship is used in most relationship/references tests.

constexpr common::gaia_type_t c_doctor_type = 1;
constexpr common::gaia_type_t c_patient_type = 2;

constexpr common::reference_offset_t c_first_patient_offset = 0;
constexpr common::reference_offset_t c_next_patient_offset = 0;
constexpr common::reference_offset_t c_parent_doctor_offset = 1;
Expand Down
58 changes: 39 additions & 19 deletions production/db/core/tests/test_relationships.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,32 @@ using namespace gaia::db::test;

class gaia_relationships_test : public db_test_base_t
{
protected:
void SetUp() override
{
db_test_base_t::SetUp();
gaia_id_t doctor_table_id = gaia::catalog::create_table("doctor", {});
gaia_id_t patient_table_id = gaia::catalog::create_table("patient", {});

begin_transaction();
doctor_table_type = gaia::catalog::gaia_table_t::get(doctor_table_id).type();
patient_table_type = gaia::catalog::gaia_table_t::get(patient_table_id).type();
commit_transaction();
}

void TearDown() override
{
clean_type_registry();
db_test_base_t::TearDown();
}

static gaia_type_t doctor_table_type;
static gaia_type_t patient_table_type;
};

gaia_type_t gaia_relationships_test::doctor_table_type = c_invalid_gaia_type;
gaia_type_t gaia_relationships_test::patient_table_type = c_invalid_gaia_type;

// simone: I tried overloading the operator == with no success.
bool compare_relationships(const relationship_t& lhs, const relationship_t& rhs)
{
Expand Down Expand Up @@ -128,15 +148,15 @@ TEST_F(gaia_relationships_test, metadata_one_to_many)
type_registry_t& test_registry = type_registry_t::instance();

relationship_builder_t::one_to_many()
.parent(c_doctor_type)
.child(c_patient_type)
.parent(doctor_table_type)
.child(patient_table_type)
.create_relationship();

auto& parent = test_registry.get(c_doctor_type);
auto& child = test_registry.get(c_patient_type);
auto& parent = test_registry.get(doctor_table_type);
auto& child = test_registry.get(patient_table_type);

ASSERT_EQ(parent.get_type(), c_doctor_type);
ASSERT_EQ(child.get_type(), c_patient_type);
ASSERT_EQ(parent.get_type(), doctor_table_type);
ASSERT_EQ(child.get_type(), patient_table_type);

ASSERT_EQ(parent.num_references(), 1);
ASSERT_EQ(child.num_references(), 2);
Expand All @@ -150,8 +170,8 @@ TEST_F(gaia_relationships_test, metadata_one_to_many)
// Parent and child should be sharing the same relation.
ASSERT_TRUE(compare_relationships(*parent_rel, *child_rel));

ASSERT_EQ(parent_rel->parent_type, c_doctor_type);
ASSERT_EQ(parent_rel->child_type, c_patient_type);
ASSERT_EQ(parent_rel->parent_type, doctor_table_type);
ASSERT_EQ(parent_rel->child_type, patient_table_type);
ASSERT_EQ(parent_rel->first_child_offset, c_first_patient_offset);
ASSERT_EQ(parent_rel->next_child_offset, c_next_patient_offset);
ASSERT_EQ(parent_rel->parent_offset, c_parent_doctor_offset);
Expand All @@ -164,15 +184,15 @@ TEST_F(gaia_relationships_test, metadata_one_to_one)
type_registry_t& test_registry = type_registry_t::instance();

relationship_builder_t::one_to_one()
.parent(c_doctor_type)
.child(c_patient_type)
.parent(doctor_table_type)
.child(patient_table_type)
.create_relationship();

auto& parent = test_registry.get(c_doctor_type);
auto& child = test_registry.get(c_patient_type);
auto& parent = test_registry.get(doctor_table_type);
auto& child = test_registry.get(patient_table_type);

ASSERT_EQ(parent.get_type(), c_doctor_type);
ASSERT_EQ(child.get_type(), c_patient_type);
ASSERT_EQ(parent.get_type(), doctor_table_type);
ASSERT_EQ(child.get_type(), patient_table_type);

ASSERT_EQ(parent.num_references(), 1);
ASSERT_EQ(child.num_references(), 2);
Expand All @@ -186,8 +206,8 @@ TEST_F(gaia_relationships_test, metadata_one_to_one)
// Parent and child should be sharing the same relation.
ASSERT_TRUE(compare_relationships(*parent_rel, *child_rel));

ASSERT_EQ(parent_rel->parent_type, c_doctor_type);
ASSERT_EQ(parent_rel->child_type, c_patient_type);
ASSERT_EQ(parent_rel->parent_type, doctor_table_type);
ASSERT_EQ(parent_rel->child_type, patient_table_type);
ASSERT_EQ(parent_rel->first_child_offset, c_first_patient_offset);
ASSERT_EQ(parent_rel->next_child_offset, c_next_patient_offset);
ASSERT_EQ(parent_rel->parent_offset, c_parent_doctor_offset);
Expand All @@ -200,11 +220,11 @@ TEST_F(gaia_relationships_test, child_relation_do_not_use_next_child)
type_registry_t& test_registry = type_registry_t::instance();

relationship_builder_t::one_to_one()
.parent(c_doctor_type)
.child(c_patient_type)
.parent(doctor_table_type)
.child(patient_table_type)
.create_relationship();

auto& child = test_registry.get(c_patient_type);
auto& child = test_registry.get(patient_table_type);
// although next_patient offset exists in child, it is not the one used
// to identify the relation
auto child_rel = child.find_child_relationship(c_next_patient_offset);
Expand Down
85 changes: 85 additions & 0 deletions production/inc/gaia_internal/common/hash.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/////////////////////////////////////////////
// Copyright (c) Gaia Platform LLC
// All rights reserved.
/////////////////////////////////////////////
#pragma once

#include <cstdint>
#include <cstring>

// Adapted from the public domain murmur3 hash implementation at:
// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
uint32_t murmurhash3_x86_32(const void* key, int len)
{
const auto* data = static_cast<const uint8_t*>(key);
const int nblocks = len / 4;

uint32_t h1 = len;

const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;

//----------
// body

const auto* blocks = reinterpret_cast<const uint32_t*>(data + nblocks * 4);

for (int i = -nblocks; i; i++)
{
uint32_t k1;
std::memcpy(&k1, (blocks + i), sizeof(k1));

k1 *= c1;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
k1 = (k1 << 15) | (k1 >> (32 - 15));
k1 *= c2;

h1 ^= k1;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
h1 = (k1 << 13) | (k1 >> (32 - 13));
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
h1 = h1 * 5 + 0xe6546b64;
}

//----------
// tail

const auto* tail = static_cast<const uint8_t*>(data + nblocks * 4);

uint32_t k1 = 0;

switch (len & 3)
{
case 3:
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
k1 ^= tail[2] << 16;
case 2:
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
k1 ^= tail[1] << 8;
case 1:
k1 ^= tail[0];
k1 *= c1;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
k1 = (k1 << 15) | (k1 >> (32 - 15));
k1 *= c2;
h1 ^= k1;
};

//----------
// finalization

h1 ^= len;

// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
h1 ^= h1 >> 16;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
h1 *= 0x85ebca6b;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
h1 ^= h1 >> 13;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
h1 *= 0xc2b2ae35;
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
h1 ^= h1 >> 16;

return h1;
}
2 changes: 1 addition & 1 deletion production/sdk/tests/test_sdk_no_init_rules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ TEST(sdk_test_no_init_rules, app_check)
employee_writer w;
w.name_first = "Did_not";
w.name_last = "Provide_initialize_rules";
w.insert_row();
// Don't write to the db because catalog is not properly populated.
// Don't change the state of the db at all (no commit).
}
gaia::system::shutdown();
Expand Down
35 changes: 26 additions & 9 deletions production/system/tests/test_recovery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,15 @@ class recovery_test : public ::testing::Test
begin_session();
type_id_mapping_t::instance().clear();
schema_loader_t::instance().load_schema("addr_book.ddl");

gaia_id_t doctor_table_id = gaia::catalog::create_table("doctor", {});
gaia_id_t patient_table_id = gaia::catalog::create_table("patient", {});

begin_transaction();
doctor_table_type = gaia::catalog::gaia_table_t::get(doctor_table_id).type();
patient_table_type = gaia::catalog::gaia_table_t::get(patient_table_id).type();
commit_transaction();

end_session();
s_server.stop();
}
Expand All @@ -128,12 +137,18 @@ class recovery_test : public ::testing::Test
s_server.delete_data_dir();
}

static gaia_type_t doctor_table_type;
static gaia_type_t patient_table_type;

private:
// Map of employees for which the server has returned a successful commit.
// We maintain this map in memory & will use it to validate recovered shared memory post crash.
static inline std::map<gaia_id_t, employee_copy_t> s_employee_map{};
};

gaia_type_t recovery_test::doctor_table_type = c_invalid_gaia_type;
gaia_type_t recovery_test::patient_table_type = c_invalid_gaia_type;

void recovery_test::validate_data()
{
size_t count = 0;
Expand Down Expand Up @@ -531,20 +546,22 @@ TEST_F(recovery_test, reference_create_delete_test_new)
{
auto_transaction_t txn;

txn.commit();

// Create the relationship.
relationship_builder_t::one_to_many()
.parent(c_doctor_type)
.child(c_patient_type)
.parent(doctor_table_type)
.child(patient_table_type)
.create_relationship();

// Create the parent.
gaia_ptr_t parent = create_object(c_doctor_type, "Dr. House");
gaia_ptr_t parent = create_object(doctor_table_type, "Dr. House");
parent_id = parent.id();

// Create the children.
for (int i = 0; i < c_num_children; i++)
{
gaia_ptr_t child = create_object(c_patient_type, "John Doe " + std::to_string(i));
gaia_ptr_t child = create_object(patient_table_type, "John Doe " + std::to_string(i));

// Add half references from the parent and half from the children.
// (semantically same operation)
Expand Down Expand Up @@ -636,16 +653,16 @@ TEST_F(recovery_test, reference_update_test_new)

// Create the relationship.
relationship_builder_t::one_to_many()
.parent(c_doctor_type)
.child(c_patient_type)
.parent(doctor_table_type)
.child(patient_table_type)
.create_relationship();

// Create the parent.
gaia_ptr_t parent = create_object(c_doctor_type, "Dr. House");
gaia_ptr_t parent = create_object(doctor_table_type, "Dr. House");
parent_id = parent.id();

// Create child.
gaia_ptr_t child = create_object(c_patient_type, "John Doe ");
gaia_ptr_t child = create_object(patient_table_type, "John Doe ");
child_id = child.id();

parent.add_child_reference(child_id, c_first_patient_offset);
Expand All @@ -661,7 +678,7 @@ TEST_F(recovery_test, reference_update_test_new)
auto_transaction_t txn;

// Create the new parent.
gaia_ptr_t new_parent = create_object(c_doctor_type, "Dr. House");
gaia_ptr_t new_parent = create_object(doctor_table_type, "Dr. House");
new_parent_id = new_parent.id();

// Get the child
Expand Down

0 comments on commit e92afdc

Please sign in to comment.