From b27749ab4306c0260b1840254ded45916985ff0a Mon Sep 17 00:00:00 2001 From: Robin Linden Date: Sat, 12 Oct 2024 02:45:12 +0200 Subject: [PATCH] wasm: Simplify the parsed instruction format This reduces the memory usage of the instructions by a lot and simplifies the code that works with them. --- wasm/byte_code_parser.cpp | 28 ++++----- wasm/byte_code_parser_test.cpp | 16 ++--- wasm/instructions.cpp | 22 ------- wasm/instructions.h | 7 +-- wasm/instructions_test.cpp | 106 +++++++++++++++++---------------- wasm/serialize.cpp | 56 ++++++++--------- wasm/serialize.h | 2 + wasm/serialize_test.cpp | 58 +++++++++++------- wasm/validation.cpp | 24 +------- wasm/validation_test.cpp | 35 ++++++----- 10 files changed, 168 insertions(+), 186 deletions(-) delete mode 100644 wasm/instructions.cpp diff --git a/wasm/byte_code_parser.cpp b/wasm/byte_code_parser.cpp index 48ab37472..3175062d2 100644 --- a/wasm/byte_code_parser.cpp +++ b/wasm/byte_code_parser.cpp @@ -661,11 +661,13 @@ tl::expected ByteCodeParser::parse_module(std::istream return module; } -// NOLINTNEXTLINE(misc-no-recursion) std::optional> ByteCodeParser::parse_instructions(std::istream &is) { using namespace instructions; std::vector instructions{}; + // If an End-opcode is encountered when nesting == 0, we're done. + int nesting = 0; + while (true) { std::uint8_t opcode{}; if (!is.read(reinterpret_cast(&opcode), sizeof(opcode))) { @@ -679,12 +681,8 @@ std::optional> ByteCodeParser::parse_inst return std::nullopt; } - auto block_instructions = parse_instructions(is); - if (!block_instructions) { - return std::nullopt; - } - - instructions.emplace_back(Block{*std::move(type), *std::move(block_instructions)}); + instructions.emplace_back(Block{*std::move(type)}); + ++nesting; break; } case Loop::kOpcode: { @@ -693,12 +691,8 @@ std::optional> ByteCodeParser::parse_inst return std::nullopt; } - auto block_instructions = parse_instructions(is); - if (!block_instructions) { - return std::nullopt; - } - - instructions.emplace_back(Loop{*std::move(type), *std::move(block_instructions)}); + instructions.emplace_back(Loop{*std::move(type)}); + ++nesting; break; } case Branch::kOpcode: { @@ -721,7 +715,13 @@ std::optional> ByteCodeParser::parse_inst instructions.emplace_back(Return{}); break; case End::kOpcode: - return instructions; + instructions.emplace_back(End{}); + if (nesting == 0) { + return instructions; + } + + --nesting; + break; case I32Const::kOpcode: { auto value = wasm::Leb128::decode_from(is); if (!value) { diff --git a/wasm/byte_code_parser_test.cpp b/wasm/byte_code_parser_test.cpp index 670c55dc5..05e61896c 100644 --- a/wasm/byte_code_parser_test.cpp +++ b/wasm/byte_code_parser_test.cpp @@ -411,7 +411,7 @@ void global_section_tests(etest::Suite &s) { a.expect_eq(module.global_section, wasm::GlobalSection{.globals{{ .type{wasm::ValueType::Int32, wasm::GlobalType::Mutability::Const}, - .init{wasm::instructions::I32Const{42}}, + .init{wasm::instructions::I32Const{42}, wasm::instructions::End{}}, }}}); }); @@ -421,7 +421,7 @@ void global_section_tests(etest::Suite &s) { a.expect_eq(module.global_section, wasm::GlobalSection{.globals{{ .type{wasm::ValueType::Int32, wasm::GlobalType::Mutability::Var}, - .init{wasm::instructions::I32Const{42}}, + .init{wasm::instructions::I32Const{42}, wasm::instructions::End{}}, }}}); }); @@ -433,11 +433,11 @@ void global_section_tests(etest::Suite &s) { wasm::GlobalSection{.globals{ { .type{wasm::ValueType::Int32, wasm::GlobalType::Mutability::Var}, - .init{wasm::instructions::I32Const{42}}, + .init{wasm::instructions::I32Const{42}, wasm::instructions::End{}}, }, { .type{wasm::ValueType::Int32, wasm::GlobalType::Mutability::Const}, - .init{wasm::instructions::I32Const{42}}, + .init{wasm::instructions::I32Const{42}, wasm::instructions::End{}}, }, }}); }); @@ -633,7 +633,9 @@ void code_section_tests(etest::Suite &s) { wasm::CodeSection expected{.entries{ wasm::CodeEntry{ - .code{wasm::instructions::I32Const{0b11}, wasm::instructions::I32PopulationCount{}}, + .code{wasm::instructions::I32Const{0b11}, + wasm::instructions::I32PopulationCount{}, + wasm::instructions::End{}}, .locals{{1, wasm::ValueType::Int32}}, }, }}; @@ -647,11 +649,11 @@ void code_section_tests(etest::Suite &s) { wasm::CodeSection expected{.entries{ wasm::CodeEntry{ - .code{wasm::instructions::I32Const{42}}, + .code{wasm::instructions::I32Const{42}, wasm::instructions::End{}}, .locals{{1, wasm::ValueType::Int32}}, }, wasm::CodeEntry{ - .code{}, + .code{wasm::instructions::End{}}, .locals{{5, wasm::ValueType::Int64}, {6, wasm::ValueType::Float32}}, }, }}; diff --git a/wasm/instructions.cpp b/wasm/instructions.cpp deleted file mode 100644 index 114a589e8..000000000 --- a/wasm/instructions.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// SPDX-FileCopyrightText: 2023-2024 Robin Lindén -// SPDX-FileCopyrightText: 2024 David Zero -// -// SPDX-License-Identifier: BSD-2-Clause - -#include "wasm/instructions.h" - -namespace wasm::instructions { - -// clangd (16) crashes if this is = default even though though it's allowed and -// clang has alledegly implemented it starting with Clang 14: -// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2085r0.html -// https://clang.llvm.org/cxx_status.html -bool Block::operator==(Block const &b) const { - return b.type == type && b.instructions == instructions; -} - -bool Loop::operator==(Loop const &l) const { - return l.type == type && l.instructions == instructions; -} - -} // namespace wasm::instructions diff --git a/wasm/instructions.h b/wasm/instructions.h index d27910957..60532bfdd 100644 --- a/wasm/instructions.h +++ b/wasm/instructions.h @@ -11,7 +11,6 @@ #include #include #include -#include namespace wasm::instructions { @@ -140,16 +139,14 @@ struct Block { static constexpr std::uint8_t kOpcode = 0x02; static constexpr std::string_view kMnemonic = "block"; BlockType type{}; - std::vector instructions; - [[nodiscard]] bool operator==(Block const &) const; + [[nodiscard]] bool operator==(Block const &) const = default; }; struct Loop { static constexpr std::uint8_t kOpcode = 0x03; static constexpr std::string_view kMnemonic = "loop"; BlockType type{}; - std::vector instructions; - [[nodiscard]] bool operator==(Loop const &) const; + [[nodiscard]] bool operator==(Loop const &) const = default; }; struct Branch { diff --git a/wasm/instructions_test.cpp b/wasm/instructions_test.cpp index 1d043a4d0..319830fb6 100644 --- a/wasm/instructions_test.cpp +++ b/wasm/instructions_test.cpp @@ -33,11 +33,11 @@ int main() { s.add_test("block", [](etest::IActions &a) { // No instructions, empty function prototype. - a.expect_eq(parse("\x02\x40\x0b\x0b"), InsnVec{Block{.type{BlockType::Empty{}}}}); + a.expect_eq(parse("\x02\x40\x0b\x0b"), InsnVec{Block{.type{BlockType::Empty{}}}, End{}, End{}}); // No instructions, function returning an f32. - a.expect_eq(parse("\x02\x7d\x0b\x0b"), InsnVec{Block{.type{wasm::ValueType::Float32}}}); + a.expect_eq(parse("\x02\x7d\x0b\x0b"), InsnVec{Block{.type{wasm::ValueType::Float32}}, End{}, End{}}); // Return, empty function prototype. - a.expect_eq(parse("\x02\x40\x0f\x0b\x0b"), InsnVec{Block{.type{BlockType::Empty{}}, .instructions{Return{}}}}); + a.expect_eq(parse("\x02\x40\x0f\x0b\x0b"), InsnVec{Block{.type{BlockType::Empty{}}}, Return{}, End{}, End{}}); // Unexpected eof. a.expect_eq(parse("\x02"), std::nullopt); @@ -48,11 +48,11 @@ int main() { s.add_test("loop", [](etest::IActions &a) { // No instructions, empty function prototype. - a.expect_eq(parse("\x03\x40\x0b\x0b"), InsnVec{Loop{.type{BlockType::Empty{}}}}); + a.expect_eq(parse("\x03\x40\x0b\x0b"), InsnVec{Loop{.type{BlockType::Empty{}}}, End{}, End{}}); // No instructions, function returning an f32. - a.expect_eq(parse("\x03\x7d\x0b\x0b"), InsnVec{Loop{.type{wasm::ValueType::Float32}}}); + a.expect_eq(parse("\x03\x7d\x0b\x0b"), InsnVec{Loop{.type{wasm::ValueType::Float32}}, End{}, End{}}); // Return, empty function prototype. - a.expect_eq(parse("\x03\x40\x0f\x0b\x0b"), InsnVec{Loop{.type{BlockType::Empty{}}, .instructions{Return{}}}}); + a.expect_eq(parse("\x03\x40\x0f\x0b\x0b"), InsnVec{Loop{.type{BlockType::Empty{}}}, Return{}, End{}, End{}}); // Unexpected eof. a.expect_eq(parse("\x03"), std::nullopt); @@ -63,7 +63,7 @@ int main() { s.add_test("branch", [](etest::IActions &a) { // Valid label index. - a.expect_eq(parse("\x0c\x09\x0b"), InsnVec{Branch{.label_idx = 0x09}}); + a.expect_eq(parse("\x0c\x09\x0b"), InsnVec{Branch{.label_idx = 0x09}, End{}}); // Unexpected eof. a.expect_eq(parse("\x0c"), std::nullopt); @@ -73,7 +73,7 @@ int main() { s.add_test("branch_if", [](etest::IActions &a) { // Valid label index. - a.expect_eq(parse("\x0d\x09\x0b"), InsnVec{BranchIf{.label_idx = 0x09}}); + a.expect_eq(parse("\x0d\x09\x0b"), InsnVec{BranchIf{.label_idx = 0x09}, End{}}); // Unexpected eof. a.expect_eq(parse("\x0d"), std::nullopt); @@ -83,7 +83,7 @@ int main() { s.add_test("i32_const", [](etest::IActions &a) { // Valid value. - a.expect_eq(parse("\x41\x20\x0b"), InsnVec{I32Const{.value = 0x20}}); + a.expect_eq(parse("\x41\x20\x0b"), InsnVec{I32Const{.value = 0x20}, End{}}); // Unexpected eof. a.expect_eq(parse("\x41"), std::nullopt); @@ -91,151 +91,157 @@ int main() { a.expect_eq(parse("\x41\x80\x0b"), std::nullopt); }); - s.add_test("i32_eqz", [](etest::IActions &a) { a.expect_eq(parse("\x45\x0b"), InsnVec{I32EqualZero{}}); }); + s.add_test("i32_eqz", [](etest::IActions &a) { + a.expect_eq(parse("\x45\x0b"), InsnVec{I32EqualZero{}, End{}}); // + }); - s.add_test("i32_eq", [](etest::IActions &a) { a.expect_eq(parse("\x46\x0b"), InsnVec{I32Equal{}}); }); + s.add_test("i32_eq", [](etest::IActions &a) { + a.expect_eq(parse("\x46\x0b"), InsnVec{I32Equal{}, End{}}); // + }); - s.add_test("i32_ne", [](etest::IActions &a) { a.expect_eq(parse("\x47\x0b"), InsnVec{I32NotEqual{}}); }); + s.add_test("i32_ne", [](etest::IActions &a) { + a.expect_eq(parse("\x47\x0b"), InsnVec{I32NotEqual{}, End{}}); // + }); s.add_test("i32_less_than_signed", [](etest::IActions &a) { - a.expect_eq(parse("\x48\x0b"), InsnVec{I32LessThanSigned{}}); // + a.expect_eq(parse("\x48\x0b"), InsnVec{I32LessThanSigned{}, End{}}); // }); s.add_test("i32_less_than_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\x49\x0b"), InsnVec{I32LessThanUnsigned{}}); // + a.expect_eq(parse("\x49\x0b"), InsnVec{I32LessThanUnsigned{}, End{}}); // }); s.add_test("i32_greater_than_signed", [](etest::IActions &a) { - a.expect_eq(parse("\x4a\x0b"), InsnVec{I32GreaterThanSigned{}}); // + a.expect_eq(parse("\x4a\x0b"), InsnVec{I32GreaterThanSigned{}, End{}}); // }); s.add_test("i32_greater_than_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\x4b\x0b"), InsnVec{I32GreaterThanUnsigned{}}); // + a.expect_eq(parse("\x4b\x0b"), InsnVec{I32GreaterThanUnsigned{}, End{}}); // }); s.add_test("i32_less_than_equal_signed", [](etest::IActions &a) { - a.expect_eq(parse("\x4c\x0b"), InsnVec{I32LessThanEqualSigned{}}); // + a.expect_eq(parse("\x4c\x0b"), InsnVec{I32LessThanEqualSigned{}, End{}}); // }); s.add_test("i32_less_than_equal_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\x4d\x0b"), InsnVec{I32LessThanEqualUnsigned{}}); // + a.expect_eq(parse("\x4d\x0b"), InsnVec{I32LessThanEqualUnsigned{}, End{}}); // }); s.add_test("i32_greater_than_equal_signed", [](etest::IActions &a) { - a.expect_eq(parse("\x4e\x0b"), InsnVec{I32GreaterThanEqualSigned{}}); // + a.expect_eq(parse("\x4e\x0b"), InsnVec{I32GreaterThanEqualSigned{}, End{}}); // }); s.add_test("i32_greater_than_equal_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\x4f\x0b"), InsnVec{I32GreaterThanEqualUnsigned{}}); // + a.expect_eq(parse("\x4f\x0b"), InsnVec{I32GreaterThanEqualUnsigned{}, End{}}); // }); s.add_test("i32_count_leading_zeros", [](etest::IActions &a) { - a.expect_eq(parse("\x67\x0b"), InsnVec{I32CountLeadingZeros{}}); // + a.expect_eq(parse("\x67\x0b"), InsnVec{I32CountLeadingZeros{}, End{}}); // }); s.add_test("i32_count_trailing_zeros", [](etest::IActions &a) { - a.expect_eq(parse("\x68\x0b"), InsnVec{I32CountTrailingZeros{}}); // + a.expect_eq(parse("\x68\x0b"), InsnVec{I32CountTrailingZeros{}, End{}}); // }); s.add_test("i32_population_count", [](etest::IActions &a) { - a.expect_eq(parse("\x69\x0b"), InsnVec{I32PopulationCount{}}); // + a.expect_eq(parse("\x69\x0b"), InsnVec{I32PopulationCount{}, End{}}); // }); s.add_test("i32_add", [](etest::IActions &a) { - a.expect_eq(parse("\x6a\x0b"), InsnVec{I32Add{}}); // + a.expect_eq(parse("\x6a\x0b"), InsnVec{I32Add{}, End{}}); // }); s.add_test("i32_subtract", [](etest::IActions &a) { - a.expect_eq(parse("\x6b\x0b"), InsnVec{I32Subtract{}}); // + a.expect_eq(parse("\x6b\x0b"), InsnVec{I32Subtract{}, End{}}); // }); s.add_test("i32_multiply", [](etest::IActions &a) { - a.expect_eq(parse("\x6c\x0b"), InsnVec{I32Multiply{}}); // + a.expect_eq(parse("\x6c\x0b"), InsnVec{I32Multiply{}, End{}}); // }); s.add_test("i32_divide_signed", [](etest::IActions &a) { - a.expect_eq(parse("\x6d\x0b"), InsnVec{I32DivideSigned{}}); // + a.expect_eq(parse("\x6d\x0b"), InsnVec{I32DivideSigned{}, End{}}); // }); s.add_test("i32_divide_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\x6e\x0b"), InsnVec{I32DivideUnsigned{}}); // + a.expect_eq(parse("\x6e\x0b"), InsnVec{I32DivideUnsigned{}, End{}}); // }); s.add_test("i32_remainder_signed", [](etest::IActions &a) { - a.expect_eq(parse("\x6f\x0b"), InsnVec{I32RemainderSigned{}}); // + a.expect_eq(parse("\x6f\x0b"), InsnVec{I32RemainderSigned{}, End{}}); // }); s.add_test("i32_remainder_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\x70\x0b"), InsnVec{I32RemainderUnsigned{}}); // + a.expect_eq(parse("\x70\x0b"), InsnVec{I32RemainderUnsigned{}, End{}}); // }); s.add_test("i32_and", [](etest::IActions &a) { - a.expect_eq(parse("\x71\x0b"), InsnVec{I32And{}}); // + a.expect_eq(parse("\x71\x0b"), InsnVec{I32And{}, End{}}); // }); s.add_test("i32_or", [](etest::IActions &a) { - a.expect_eq(parse("\x72\x0b"), InsnVec{I32Or{}}); // + a.expect_eq(parse("\x72\x0b"), InsnVec{I32Or{}, End{}}); // }); s.add_test("i32_exclusive_or", [](etest::IActions &a) { - a.expect_eq(parse("\x73\x0b"), InsnVec{I32ExclusiveOr{}}); // + a.expect_eq(parse("\x73\x0b"), InsnVec{I32ExclusiveOr{}, End{}}); // }); s.add_test("i32_shift_left", [](etest::IActions &a) { - a.expect_eq(parse("\x74\x0b"), InsnVec{I32ShiftLeft{}}); // + a.expect_eq(parse("\x74\x0b"), InsnVec{I32ShiftLeft{}, End{}}); // }); s.add_test("i32_shift_right_signed", [](etest::IActions &a) { - a.expect_eq(parse("\x75\x0b"), InsnVec{I32ShiftRightSigned{}}); // + a.expect_eq(parse("\x75\x0b"), InsnVec{I32ShiftRightSigned{}, End{}}); // }); s.add_test("i32_shift_right_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\x76\x0b"), InsnVec{I32ShiftRightUnsigned{}}); // + a.expect_eq(parse("\x76\x0b"), InsnVec{I32ShiftRightUnsigned{}, End{}}); // }); s.add_test("i32_rotate_left", [](etest::IActions &a) { - a.expect_eq(parse("\x77\x0b"), InsnVec{I32RotateLeft{}}); // + a.expect_eq(parse("\x77\x0b"), InsnVec{I32RotateLeft{}, End{}}); // }); s.add_test("i32_rotate_right", [](etest::IActions &a) { - a.expect_eq(parse("\x78\x0b"), InsnVec{I32RotateRight{}}); // + a.expect_eq(parse("\x78\x0b"), InsnVec{I32RotateRight{}, End{}}); // }); s.add_test("i32_wrap_i64", [](etest::IActions &a) { - a.expect_eq(parse("\xa7\x0b"), InsnVec{I32WrapI64{}}); // + a.expect_eq(parse("\xa7\x0b"), InsnVec{I32WrapI64{}, End{}}); // }); s.add_test("i32_truncate_f32_signed", [](etest::IActions &a) { - a.expect_eq(parse("\xa8\x0b"), InsnVec{I32TruncateF32Signed{}}); // + a.expect_eq(parse("\xa8\x0b"), InsnVec{I32TruncateF32Signed{}, End{}}); // }); s.add_test("i32_truncate_f32_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\xa9\x0b"), InsnVec{I32TruncateF32Unsigned{}}); // + a.expect_eq(parse("\xa9\x0b"), InsnVec{I32TruncateF32Unsigned{}, End{}}); // }); s.add_test("i32_truncate_f64_signed", [](etest::IActions &a) { - a.expect_eq(parse("\xaa\x0b"), InsnVec{I32TruncateF64Signed{}}); // + a.expect_eq(parse("\xaa\x0b"), InsnVec{I32TruncateF64Signed{}, End{}}); // }); s.add_test("i32_truncate_f64_unsigned", [](etest::IActions &a) { - a.expect_eq(parse("\xab\x0b"), InsnVec{I32TruncateF64Unsigned{}}); // + a.expect_eq(parse("\xab\x0b"), InsnVec{I32TruncateF64Unsigned{}, End{}}); // }); s.add_test("i32_reinterpret_f32", [](etest::IActions &a) { - a.expect_eq(parse("\xbc\x0b"), InsnVec{I32ReinterpretF32{}}); // + a.expect_eq(parse("\xbc\x0b"), InsnVec{I32ReinterpretF32{}, End{}}); // }); s.add_test("i32_extend8_signed", [](etest::IActions &a) { - a.expect_eq(parse("\xc0\x0b"), InsnVec{I32Extend8Signed{}}); // + a.expect_eq(parse("\xc0\x0b"), InsnVec{I32Extend8Signed{}, End{}}); // }); s.add_test("i32_extend16_signed", [](etest::IActions &a) { - a.expect_eq(parse("\xc1\x0b"), InsnVec{I32Extend16Signed{}}); // + a.expect_eq(parse("\xc1\x0b"), InsnVec{I32Extend16Signed{}, End{}}); // }); s.add_test("local_get", [](etest::IActions &a) { // Valid index. - a.expect_eq(parse("\x20\x09\x0b"), InsnVec{LocalGet{.idx = 0x09}}); + a.expect_eq(parse("\x20\x09\x0b"), InsnVec{LocalGet{.idx = 0x09}, End{}}); // Unexpected eof. a.expect_eq(parse("\x20"), std::nullopt); @@ -245,7 +251,7 @@ int main() { s.add_test("local_set", [](etest::IActions &a) { // Valid index. - a.expect_eq(parse("\x21\x09\x0b"), InsnVec{LocalSet{.idx = 0x09}}); + a.expect_eq(parse("\x21\x09\x0b"), InsnVec{LocalSet{.idx = 0x09}, End{}}); // Unexpected eof. a.expect_eq(parse("\x21"), std::nullopt); @@ -255,7 +261,7 @@ int main() { s.add_test("local_tee", [](etest::IActions &a) { // Valid index. - a.expect_eq(parse("\x22\x09\x0b"), InsnVec{LocalTee{.idx = 0x09}}); + a.expect_eq(parse("\x22\x09\x0b"), InsnVec{LocalTee{.idx = 0x09}, End{}}); // Unexpected eof. a.expect_eq(parse("\x22"), std::nullopt); @@ -265,7 +271,7 @@ int main() { s.add_test("i32_load", [](etest::IActions &a) { // Valid memarg. - a.expect_eq(parse("\x28\x0a\x0c\x0b"), InsnVec{I32Load{MemArg{.align = 0x0a, .offset = 0x0c}}}); + a.expect_eq(parse("\x28\x0a\x0c\x0b"), InsnVec{I32Load{MemArg{.align = 0x0a, .offset = 0x0c}}, End{}}); // Unexpected eof. a.expect_eq(parse("\x28"), std::nullopt); diff --git a/wasm/serialize.cpp b/wasm/serialize.cpp index fa22dee37..09023804e 100644 --- a/wasm/serialize.cpp +++ b/wasm/serialize.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,7 @@ struct InstructionStringifyVisitor { void operator()(Branch const &t); void operator()(BranchIf const &t); void operator()(Return const &); + void operator()(End const &); void operator()(I32Const const &t); void operator()(LocalGet const &t); void operator()(LocalSet const &t); @@ -47,39 +49,13 @@ void InstructionStringifyVisitor::apply_indent() { } void InstructionStringifyVisitor::operator()(Block const &t) { - out << Block::kMnemonic << " " << to_string(t.type) << " "; - + out << Block::kMnemonic << " " << to_string(t.type); indent++; - - for (Instruction const &i : t.instructions) { - out << "\n"; - apply_indent(); - std::visit(*this, i); - } - - indent--; - - out << "\n"; - apply_indent(); - out << "end"; } void InstructionStringifyVisitor::operator()(Loop const &t) { - out << Loop::kMnemonic << " " << to_string(t.type) << " "; - + out << Loop::kMnemonic << " " << to_string(t.type); indent++; - - for (Instruction const &i : t.instructions) { - out << "\n"; - apply_indent(); - std::visit(*this, i); - } - - indent--; - - out << "\n"; - apply_indent(); - out << "end"; } void InstructionStringifyVisitor::operator()(Branch const &t) { @@ -94,6 +70,10 @@ void InstructionStringifyVisitor::operator()(Return const &) { out << Return::kMnemonic; } +void InstructionStringifyVisitor::operator()(End const &) { + out << End::kMnemonic; +} + void InstructionStringifyVisitor::operator()(I32Const const &t) { out << I32Const::kMnemonic << " " << std::to_string(t.value); } @@ -134,4 +114,24 @@ std::string to_string(Instruction const &inst) { return std::move(v.out).str(); } +// TODO(robinlinden): Nicer handling of indentation. End should dedent and +// block/loop should indent. +std::string to_string(std::span insns) { + InstructionStringifyVisitor v; + for (std::size_t i = 0; i < insns.size(); ++i) { + auto const &insn = insns[i]; + if (std::holds_alternative(insn)) { + v.indent--; + } + + v.apply_indent(); + std::visit(v, insn); + + if (i != insns.size() - 1) { + v.out << '\n'; + } + } + return std::move(v.out).str(); +} + } // namespace wasm::instructions diff --git a/wasm/serialize.h b/wasm/serialize.h index cb1f794b6..0f5395599 100644 --- a/wasm/serialize.h +++ b/wasm/serialize.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +87,7 @@ constexpr std::string to_string(MemArg const &ma, std::optional n } std::string to_string(Instruction const &); +std::string to_string(std::span); } // namespace wasm::instructions diff --git a/wasm/serialize_test.cpp b/wasm/serialize_test.cpp index 893e674d7..0d32d1924 100644 --- a/wasm/serialize_test.cpp +++ b/wasm/serialize_test.cpp @@ -1,43 +1,59 @@ // SPDX-FileCopyrightText: 2024 David Zero +// SPDX-FileCopyrightText: 2024-2025 Robin Lindén // // SPDX-License-Identifier: BSD-2-Clause -#include "wasm/instructions.h" #include "wasm/serialize.h" + +#include "wasm/instructions.h" #include "wasm/types.h" #include "etest/etest2.h" +#include + int main() { etest::Suite s{"wasm module serialization"}; using namespace wasm::instructions; + using Insns = std::vector; s.add_test("block", [](etest::IActions &a) { - a.expect_eq(to_string(Block{.type{wasm::ValueType::Int32}, .instructions{I32Const{2}, I32Const{2}, I32Add{}}}), - "block (result i32) \n\ti32.const 2\n\ti32.const 2\n\ti32.add\nend"); - a.expect_eq(to_string(Block{.type{wasm::TypeIdx{7}}, .instructions{I32Const{2}, I32Const{2}, I32Add{}}}), - "block (type 7) \n\ti32.const 2\n\ti32.const 2\n\ti32.add\nend"); - a.expect_eq(to_string(Block{.type{wasm::ValueType::Int32}, - .instructions{Block{.type{wasm::ValueType::Int32}, .instructions{I32Const{8}}}, - I32Const{2}, - I32Const{2}, - I32Add{}}}), - "block (result i32) \n\tblock (result i32) \n\t\ti32.const 8\n\tend\n\ti32.const 2\n\ti32.const " + a.expect_eq(to_string(Insns{Block{.type{wasm::ValueType::Int32}}, I32Const{2}, I32Const{2}, I32Add{}, End{}}), + "block (result i32)\n\ti32.const 2\n\ti32.const 2\n\ti32.add\nend"); + + a.expect_eq(to_string(Insns{Block{.type{wasm::TypeIdx{7}}}, I32Const{2}, I32Const{2}, I32Add{}, End{}}), + "block (type 7)\n\ti32.const 2\n\ti32.const 2\n\ti32.add\nend"); + + a.expect_eq(to_string(Insns{Block{.type{wasm::ValueType::Int32}}, + Block{.type{wasm::ValueType::Int32}}, + I32Const{8}, + End{}, + I32Const{2}, + I32Const{2}, + I32Add{}, + End{}}), + + "block (result i32)\n\tblock (result i32)\n\t\ti32.const 8\n\tend\n\ti32.const 2\n\ti32.const " "2\n\ti32.add\nend"); }); s.add_test("loop", [](etest::IActions &a) { - a.expect_eq(to_string(Loop{.type{wasm::ValueType::Int32}, .instructions{I32Const{2}, I32Const{2}, I32Add{}}}), - "loop (result i32) \n\ti32.const 2\n\ti32.const 2\n\ti32.add\nend"); - a.expect_eq(to_string(Loop{.type{wasm::TypeIdx{7}}, .instructions{I32Const{2}, I32Const{2}, I32Add{}}}), - "loop (type 7) \n\ti32.const 2\n\ti32.const 2\n\ti32.add\nend"); - a.expect_eq(to_string(Loop{.type{wasm::ValueType::Int32}, - .instructions{Loop{.type{wasm::ValueType::Int32}, .instructions{I32Const{8}}}, - I32Const{2}, - I32Const{2}, - I32Add{}}}), - "loop (result i32) \n\tloop (result i32) \n\t\ti32.const 8\n\tend\n\ti32.const 2\n\ti32.const " + a.expect_eq(to_string(Insns{Loop{.type{wasm::ValueType::Int32}}, I32Const{2}, I32Const{2}, I32Add{}, End{}}), + "loop (result i32)\n\ti32.const 2\n\ti32.const 2\n\ti32.add\nend"); + + a.expect_eq(to_string(Insns{Loop{.type{wasm::TypeIdx{7}}}, I32Const{2}, I32Const{2}, I32Add{}, End{}}), + "loop (type 7)\n\ti32.const 2\n\ti32.const 2\n\ti32.add\nend"); + + a.expect_eq(to_string(Insns{Loop{.type{wasm::ValueType::Int32}}, + Loop{.type{wasm::ValueType::Int32}}, + I32Const{8}, + End{}, + I32Const{2}, + I32Const{2}, + I32Add{}, + End{}}), + "loop (result i32)\n\tloop (result i32)\n\t\ti32.const 8\n\tend\n\ti32.const 2\n\ti32.const " "2\n\ti32.add\nend"); }); diff --git a/wasm/validation.cpp b/wasm/validation.cpp index 6b304170f..437f213fd 100644 --- a/wasm/validation.cpp +++ b/wasm/validation.cpp @@ -1,5 +1,5 @@ // SPDX-FileCopyrightText: 2024 David Zero -// SPDX-FileCopyrightText: 2024 Robin Lindén +// SPDX-FileCopyrightText: 2024-2025 Robin Lindén // // SPDX-License-Identifier: BSD-2-Clause @@ -257,15 +257,9 @@ tl::expected validate_function(std::uint32_t func_idx, v.push_ctrl(Block{}, func_type.parameters, func_type.results); - std::vector code = func_code.code; - - for (std::size_t i = 0; i < code.size(); i++) { + for (auto inst : func_code.code) { assert(!v.control_stack.empty()); - // This can't be a reference, because we modify 'code' as we iterate - // over it. This is also why we're not using a range-based for loop. - Instruction const inst = code[i]; - // https://webassembly.github.io/spec/core/valid/instructions.html#numeric-instructions // constant if (std::holds_alternative(inst)) { @@ -403,12 +397,6 @@ tl::expected validate_function(std::uint32_t func_idx, return tl::unexpected{ValidationError::BlockTypeInvalid}; } - std::vector block_code = block->instructions; - - block_code.emplace_back(End{}); - - code.insert(code.begin() + i + 1, block_code.begin(), block_code.end()); - std::vector params; std::vector results; @@ -427,12 +415,6 @@ tl::expected validate_function(std::uint32_t func_idx, return tl::unexpected{ValidationError::BlockTypeInvalid}; } - std::vector loop_code = loop->instructions; - - loop_code.emplace_back(End{}); - - code.insert(code.begin() + i + 1, loop_code.begin(), loop_code.end()); - std::vector params; std::vector results; @@ -502,7 +484,7 @@ tl::expected validate_function(std::uint32_t func_idx, // Check function return values, but only if we didn't just execute a // return. This only happens if a "return" was the last instruction in the // sequence. - if (!std::holds_alternative(func_code.code.back())) { + if (!std::holds_alternative(func_code.code.back()) && !v.control_stack.empty()) { tl::expected maybe_vals = v.pop_vals(v.label_types(v.control_stack[0])); if (!maybe_vals.has_value()) { diff --git a/wasm/validation_test.cpp b/wasm/validation_test.cpp index 78a283c77..2afab6e02 100644 --- a/wasm/validation_test.cpp +++ b/wasm/validation_test.cpp @@ -44,76 +44,75 @@ int main() { s.add_test("Function: block with valid body", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Block{.type = {ValueType::Int32}, .instructions = {I32Const{42}, I32Const{42}, I32Add{}}}}; + Block{.type = {ValueType::Int32}}, I32Const{42}, I32Const{42}, I32Add{}, End{}}; a.expect(validate(m).has_value()); }); s.add_test("Function: loop with valid body", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Loop{.type = {ValueType::Int32}, .instructions = {I32Const{42}, I32Const{42}, I32Add{}}}}; + Loop{.type = {ValueType::Int32}}, I32Const{42}, I32Const{42}, I32Add{}, End{}}; a.expect(validate(m).has_value()); }); s.add_test("Function: block with invalid body", [=](etest::IActions &a) mutable { - m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}, .instructions = {I32Const{42}, I32Add{}}}}; + m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}}, I32Const{42}, I32Add{}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::ValueStackUnderflow}); }); s.add_test("Function: block returning with unclean stack", [=](etest::IActions &a) mutable { - m.code_section->entries[0].code = {Block{ - .type = {ValueType::Int32}, .instructions = {I32Const{42}, I32Const{42}, I32Const{42}, I32Add{}}}}; + m.code_section->entries[0].code = { + Block{.type = {ValueType::Int32}}, I32Const{42}, I32Const{42}, I32Const{42}, I32Add{}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::ValueStackHeightMismatch}); }); s.add_test("Function: block with valid body and invalid return value", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Block{.type = {ValueType::Int64}, .instructions = {I32Const{42}, I32Const{42}, I32Add{}}}}; + Block{.type = {ValueType::Int64}}, I32Const{42}, I32Const{42}, I32Add{}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::ValueStackUnexpected}); }); s.add_test("Function: block ending with branch", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Block{.type = {ValueType::Int32}, .instructions = {I32Const{42}, Branch{.label_idx = 0}}}}; + Block{.type = {ValueType::Int32}}, I32Const{42}, Branch{.label_idx = 0}, End{}}; a.expect(validate(m).has_value()); }); s.add_test("Function: loop with conditional branch", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Loop{.type = {BlockType::Empty{}}, .instructions = {I32Const{1}, BranchIf{.label_idx = 0}}}, - I32Const{1}}; + Loop{.type = {BlockType::Empty{}}}, I32Const{1}, BranchIf{.label_idx = 0}, End{}, I32Const{1}, End{}}; a.expect(validate(m).has_value()); }); s.add_test("Function: loop with conditional branch, invalid label", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Loop{.type = {BlockType::Empty{}}, .instructions = {I32Const{1}, BranchIf{.label_idx = 4}}}}; + Loop{.type = {BlockType::Empty{}}}, I32Const{1}, BranchIf{.label_idx = 4}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::LabelInvalid}); }); s.add_test("Function: block with branch, dead code", [=](etest::IActions &a) mutable { - m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}, - .instructions = {I32Const{42}, I32Const{42}, Branch{.label_idx = 0}, I32Add{}}}}; + m.code_section->entries[0].code = { + Block{.type = {ValueType::Int32}}, I32Const{42}, I32Const{42}, Branch{.label_idx = 0}, I32Add{}, End{}}; a.expect(validate(m).has_value()); }); s.add_test("Function: block with branch, incorrect return value", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Block{.type = {ValueType::Int64}, .instructions = {I32Const{42}, Branch{.label_idx = 0}}}}; + Block{.type = {ValueType::Int64}}, I32Const{42}, Branch{.label_idx = 0}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::ValueStackUnexpected}); }); s.add_test("Function: block with branch, invalid label", [=](etest::IActions &a) mutable { - m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}, .instructions = {Branch{.label_idx = 4}}}}; + m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}}, Branch{.label_idx = 4}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::LabelInvalid}); }); @@ -125,27 +124,27 @@ int main() { }); s.add_test("Function: getting undefined local", [=](etest::IActions &a) mutable { - m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}, .instructions = {LocalGet{0}}}}; + m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}}, LocalGet{0}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::LocalUndefined}); }); s.add_test("Function: valid return", [=](etest::IActions &a) mutable { - m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}, .instructions = {I32Const{42}, Return{}}}}; + m.code_section->entries[0].code = {Block{.type = {ValueType::Int32}}, I32Const{42}, Return{}, End{}}; a.expect(validate(m).has_value()); }); s.add_test("Function: invalid return, implicit", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Loop{.type = {BlockType::Empty{}}, .instructions = {I32Const{1}, BranchIf{.label_idx = 0}}}}; + Loop{.type = {BlockType::Empty{}}}, I32Const{1}, BranchIf{.label_idx = 0}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::ValueStackUnderflow}); }); s.add_test("Function: invalid return, explicit", [=](etest::IActions &a) mutable { m.code_section->entries[0].code = { - Loop{.type = {BlockType::Empty{}}, .instructions = {I32Const{1}, BranchIf{.label_idx = 0}}}, Return{}}; + Loop{.type = {BlockType::Empty{}}}, I32Const{1}, BranchIf{.label_idx = 0}, End{}, Return{}, End{}}; a.expect_eq(validate(m), tl::unexpected{ValidationError::ValueStackUnderflow}); });