From 8375b6716f592ee1936af8cba14b74af1554d31e Mon Sep 17 00:00:00 2001 From: Robin Linden Date: Tue, 21 Mar 2023 01:14:44 +0100 Subject: [PATCH] wasm: Add initial instruction parsing --- wasm/instructions.cpp | 168 +++++++++++++++++++++++++++++++++++++ wasm/instructions.h | 140 +++++++++++++++++++++++++++++++ wasm/instructions_test.cpp | 135 +++++++++++++++++++++++++++++ wasm/wasm_example.cpp | 11 +++ 4 files changed, 454 insertions(+) create mode 100644 wasm/instructions.cpp create mode 100644 wasm/instructions.h create mode 100644 wasm/instructions_test.cpp diff --git a/wasm/instructions.cpp b/wasm/instructions.cpp new file mode 100644 index 00000000..df9858e1 --- /dev/null +++ b/wasm/instructions.cpp @@ -0,0 +1,168 @@ +// SPDX-FileCopyrightText: 2023 Robin Lindén +// +// SPDX-License-Identifier: BSD-2-Clause + +#include "wasm/instructions.h" + +#include "wasm/leb128.h" + +#include +#include +#include +#include + +namespace wasm::instructions { + +// clangd (16) crashes if this is = default even though though it's allowed and +// clang has alledegly implemented it starting with Clang 14: +// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2085r0.html +// https://clang.llvm.org/cxx_status.html +bool Block::operator==(Block const &b) const { + return b.type == type && b.instructions == instructions; +} + +bool Loop::operator==(Loop const &l) const { + return l.type == type && l.instructions == instructions; +} + +std::optional BlockType::parse(std::istream &is) { + std::uint8_t type{}; + if (!is.read(reinterpret_cast(&type), sizeof(type))) { + return std::nullopt; + } + + constexpr std::uint8_t kEmptyTag = 0x40; + if (type == kEmptyTag) { + return BlockType{{BlockType::Empty{}}}; + } + + std::stringstream ss{std::string{static_cast(type)}}; + auto value_type = ValueType::parse(ss); + if (value_type) { + return BlockType{{*std::move(value_type)}}; + } + + std::cerr << "Unhandled BlockType\n"; + return std::nullopt; +} + +std::optional MemArg::parse(std::istream &is) { + auto a = wasm::Leb128::decode_from(is); + if (!a) { + return std::nullopt; + } + + auto o = wasm::Leb128::decode_from(is); + if (!o) { + return std::nullopt; + } + + return MemArg{.align = *std::move(a), .offset = *std::move(o)}; +} + +std::optional> parse(std::istream &is) { + std::vector instructions{}; + + while (true) { + std::uint8_t opcode{}; + if (!is.read(reinterpret_cast(&opcode), sizeof(opcode))) { + return std::nullopt; + } + + switch (opcode) { + case Block::kOpcode: { + auto type = BlockType::parse(is); + if (!type) { + return std::nullopt; + } + + auto block_instructions = parse(is); + if (!block_instructions) { + return std::nullopt; + } + + instructions.emplace_back(Block{*std::move(type), *std::move(block_instructions)}); + break; + } + case Loop::kOpcode: { + auto type = BlockType::parse(is); + if (!type) { + return std::nullopt; + } + + auto block_instructions = parse(is); + if (!block_instructions) { + return std::nullopt; + } + + instructions.emplace_back(Loop{*std::move(type), *std::move(block_instructions)}); + break; + } + case BreakIf::kOpcode: { + auto value = wasm::Leb128::decode_from(is); + if (!value) { + return std::nullopt; + } + instructions.emplace_back(BreakIf{*value}); + break; + } + case Return::kOpcode: + instructions.emplace_back(Return{}); + break; + case End::kOpcode: + return instructions; + case I32Const::kOpcode: { + auto value = wasm::Leb128::decode_from(is); + if (!value) { + return std::nullopt; + } + instructions.emplace_back(I32Const{*value}); + break; + } + case I32LessThanSigned::kOpcode: + instructions.emplace_back(I32LessThanSigned{}); + break; + case I32Add::kOpcode: + instructions.emplace_back(I32Add{}); + break; + case LocalGet::kOpcode: { + auto value = wasm::Leb128::decode_from(is); + if (!value) { + return std::nullopt; + } + instructions.emplace_back(LocalGet{*value}); + break; + } + case LocalSet::kOpcode: { + auto value = wasm::Leb128::decode_from(is); + if (!value) { + return std::nullopt; + } + instructions.emplace_back(LocalSet{*value}); + break; + } + case LocalTee::kOpcode: { + auto value = wasm::Leb128::decode_from(is); + if (!value) { + return std::nullopt; + } + instructions.emplace_back(LocalTee{*value}); + break; + } + case I32Load::kOpcode: { + auto arg = MemArg::parse(is); + if (!arg) { + return std::nullopt; + } + + instructions.emplace_back(I32Load{*std::move(arg)}); + break; + } + default: + std::cerr << "Unhandled opcode 0x" << std::setw(2) << std::setfill('0') << std::hex << +opcode; + return std::nullopt; + } + } +} + +} // namespace wasm::instructions diff --git a/wasm/instructions.h b/wasm/instructions.h new file mode 100644 index 00000000..53e06929 --- /dev/null +++ b/wasm/instructions.h @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: 2023 Robin Lindén +// +// SPDX-License-Identifier: BSD-2-Clause + +#ifndef WASM_INSTRUCTIONS_H_ +#define WASM_INSTRUCTIONS_H_ + +#include "wasm/wasm.h" + +#include +#include +#include +#include +#include + +namespace wasm::instructions { + +struct BlockType { + static std::optional parse(std::istream &); + + struct Empty { + [[nodiscard]] bool operator==(Empty const &) const = default; + }; + std::variant value; + [[nodiscard]] bool operator==(BlockType const &) const = default; +}; + +struct MemArg { + static std::optional parse(std::istream &); + + std::uint32_t align{}; + std::uint32_t offset{}; + [[nodiscard]] bool operator==(MemArg const &) const = default; +}; + +struct Block; +struct Loop; +struct BreakIf; +struct Return; + +struct I32Const; +struct I32LessThanSigned; +struct I32Add; + +struct LocalGet; +struct LocalSet; +struct LocalTee; + +struct I32Load; + +using Instruction = std::variant; + +// https://webassembly.github.io/spec/core/binary/instructions.html#control-instructions +struct Block { + static constexpr std::uint8_t kOpcode = 0x02; + BlockType type{}; + std::vector instructions; + [[nodiscard]] bool operator==(Block const &) const; +}; + +struct Loop { + static constexpr std::uint8_t kOpcode = 0x03; + BlockType type{}; + std::vector instructions; + [[nodiscard]] bool operator==(Loop const &) const; +}; + +struct BreakIf { + static constexpr std::uint8_t kOpcode = 0x0d; + std::uint32_t label_idx{}; + [[nodiscard]] bool operator==(BreakIf const &) const = default; +}; + +struct Return { + static constexpr std::uint8_t kOpcode = 0x0f; + [[nodiscard]] bool operator==(Return const &) const = default; +}; + +struct End { + static constexpr std::uint8_t kOpcode = 0x0b; + [[nodiscard]] bool operator==(End const &) const = default; +}; + +// https://webassembly.github.io/spec/core/binary/instructions.html#numeric-instructions +struct I32Const { + static constexpr std::uint8_t kOpcode = 0x41; + std::int32_t value{}; + [[nodiscard]] bool operator==(I32Const const &) const = default; +}; + +struct I32LessThanSigned { + static constexpr std::uint8_t kOpcode = 0x48; + [[nodiscard]] bool operator==(I32LessThanSigned const &) const = default; +}; + +struct I32Add { + static constexpr std::uint8_t kOpcode = 0x6a; + [[nodiscard]] bool operator==(I32Add const &) const = default; +}; + +// https://webassembly.github.io/spec/core/binary/instructions.html#variable-instructions +struct LocalGet { + static constexpr std::uint8_t kOpcode = 0x20; + std::uint32_t idx{}; + [[nodiscard]] bool operator==(LocalGet const &) const = default; +}; + +struct LocalSet { + static constexpr std::uint8_t kOpcode = 0x21; + std::uint32_t idx{}; + [[nodiscard]] bool operator==(LocalSet const &) const = default; +}; + +struct LocalTee { + static constexpr std::uint8_t kOpcode = 0x22; + std::uint32_t idx{}; + [[nodiscard]] bool operator==(LocalTee const &) const = default; +}; + +// https://webassembly.github.io/spec/core/binary/instructions.html#memory-instructions +struct I32Load { + static constexpr std::uint8_t kOpcode = 0x28; + MemArg arg{}; + [[nodiscard]] bool operator==(I32Load const &) const = default; +}; +std::optional> parse(std::istream &); + +} // namespace wasm::instructions + +#endif diff --git a/wasm/instructions_test.cpp b/wasm/instructions_test.cpp new file mode 100644 index 00000000..34df4993 --- /dev/null +++ b/wasm/instructions_test.cpp @@ -0,0 +1,135 @@ +// SPDX-FileCopyrightText: 2023 Robin Lindén +// +// SPDX-License-Identifier: BSD-2-Clause + +#include "wasm/instructions.h" + +#include "etest/etest2.h" + +#include +#include +#include +#include +#include + +using InsnVec = std::vector; + +namespace { +std::optional parse(std::string s) { + std::stringstream ss{std::move(s)}; + return wasm::instructions::parse(ss); +} +} // namespace + +int main() { + // NOLINTBEGIN(modernize-raw-string-literal): This is hex data, not 'A'. + etest::Suite s{"wasm::instructions"}; + using namespace wasm::instructions; + + s.add_test("block", [](etest::IActions &a) { + // No instructions, empty function prototype. + a.expect_eq(parse("\x02\x40\x0b\x0b"), InsnVec{Block{.type{BlockType::Empty{}}}}); + // No instructions, function returning an f32. + a.expect_eq(parse("\x02\x7d\x0b\x0b"), InsnVec{Block{.type{wasm::ValueType{wasm::ValueType::Kind::Float32}}}}); + // Return, empty function prototype. + a.expect_eq(parse("\x02\x40\x0f\x0b\x0b"), InsnVec{Block{.type{BlockType::Empty{}}, .instructions{Return{}}}}); + + // Unexpected eof. + a.expect_eq(parse("\x02"), std::nullopt); + a.expect_eq(parse("\x02\x40"), std::nullopt); + // Unhandled block type. + a.expect_eq(parse("\x02\x0a\x0b\x0b"), std::nullopt); + }); + + s.add_test("loop", [](etest::IActions &a) { + // No instructions, empty function prototype. + a.expect_eq(parse("\x03\x40\x0b\x0b"), InsnVec{Loop{.type{BlockType::Empty{}}}}); + // No instructions, function returning an f32. + a.expect_eq(parse("\x03\x7d\x0b\x0b"), InsnVec{Loop{.type{wasm::ValueType{wasm::ValueType::Kind::Float32}}}}); + // Return, empty function prototype. + a.expect_eq(parse("\x03\x40\x0f\x0b\x0b"), InsnVec{Loop{.type{BlockType::Empty{}}, .instructions{Return{}}}}); + + // Unexpected eof. + a.expect_eq(parse("\x03"), std::nullopt); + a.expect_eq(parse("\x03\x40"), std::nullopt); + // Unhandled block type. + a.expect_eq(parse("\x03\x0a\x0b\x0b"), std::nullopt); + }); + + s.add_test("break_if", [](etest::IActions &a) { + // Valid label index. + a.expect_eq(parse("\x0d\x09\x0b"), InsnVec{BreakIf{.label_idx = 0x09}}); + + // Unexpected eof. + a.expect_eq(parse("\x0d"), std::nullopt); + // Invalid label index. + a.expect_eq(parse("\x0d\x80\x0b"), std::nullopt); + }); + + s.add_test("i32_const", [](etest::IActions &a) { + // Valid value. + a.expect_eq(parse("\x41\x20\x0b"), InsnVec{I32Const{.value = 0x20}}); + + // Unexpected eof. + a.expect_eq(parse("\x41"), std::nullopt); + // Invalid value. + a.expect_eq(parse("\x41\x80\x0b"), std::nullopt); + }); + + s.add_test("i32_less_than_signed", [](etest::IActions &a) { + a.expect_eq(parse("\x48\x0b"), InsnVec{I32LessThanSigned{}}); // + }); + + s.add_test("i32_add", [](etest::IActions &a) { + a.expect_eq(parse("\x6a\x0b"), InsnVec{I32Add{}}); // + }); + + s.add_test("local_get", [](etest::IActions &a) { + // Valid index. + a.expect_eq(parse("\x20\x09\x0b"), InsnVec{LocalGet{.idx = 0x09}}); + + // Unexpected eof. + a.expect_eq(parse("\x20"), std::nullopt); + // Invalid index. + a.expect_eq(parse("\x20\x80\x0b"), std::nullopt); + }); + + s.add_test("local_set", [](etest::IActions &a) { + // Valid index. + a.expect_eq(parse("\x21\x09\x0b"), InsnVec{LocalSet{.idx = 0x09}}); + + // Unexpected eof. + a.expect_eq(parse("\x21"), std::nullopt); + // Invalid index. + a.expect_eq(parse("\x21\x80\x0b"), std::nullopt); + }); + + s.add_test("local_tee", [](etest::IActions &a) { + // Valid index. + a.expect_eq(parse("\x22\x09\x0b"), InsnVec{LocalTee{.idx = 0x09}}); + + // Unexpected eof. + a.expect_eq(parse("\x22"), std::nullopt); + // Invalid index. + a.expect_eq(parse("\x22\x80\x0b"), std::nullopt); + }); + + s.add_test("i32_load", [](etest::IActions &a) { + // Valid memarg. + a.expect_eq(parse("\x28\x0a\x0c\x0b"), InsnVec{I32Load{MemArg{.align = 0x0a, .offset = 0x0c}}}); + + // Unexpected eof. + a.expect_eq(parse("\x28"), std::nullopt); + a.expect_eq(parse("\x28\x0a"), std::nullopt); + // Invalid memarg. + a.expect_eq(parse("\x28\x80\x0a\x0b"), std::nullopt); + a.expect_eq(parse("\x28\x0a\x80\x0b"), std::nullopt); + }); + + s.add_test("unhandled opcode", [](etest::IActions &a) { + a.expect_eq(parse("\xff"), std::nullopt); // + }); + + // NOLINTEND(modernize-raw-string-literal) + return s.run(); +} diff --git a/wasm/wasm_example.cpp b/wasm/wasm_example.cpp index bf041a45..e30b68f6 100644 --- a/wasm/wasm_example.cpp +++ b/wasm/wasm_example.cpp @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: BSD-2-Clause +#include "wasm/instructions.h" #include "wasm/wasm.h" #include @@ -10,6 +11,8 @@ #include #include +#include + namespace wasm { std::ostream &operator<<(std::ostream &, wasm::ValueType); std::ostream &operator<<(std::ostream &os, wasm::ValueType type) { @@ -112,6 +115,14 @@ int main(int argc, char **argv) { std::cout << " (" << local.type << ": " << local.count << ')'; } std::cout << '\n'; + + std::stringstream ss{std::string{reinterpret_cast(e.code.data()), e.code.size()}}; + auto instructions = wasm::instructions::parse(ss); + if (!instructions) { + std::cout << "failure!\n"; + } else { + std::cout << "success: " << instructions->size(); + } } } }