diff --git a/Cargo.lock b/Cargo.lock index 8467538..1889a7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,39 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "anyhow" version = "1.0.79" @@ -44,14 +77,25 @@ dependencies = [ "anyhow", "bitfield-struct", "derive_more", + "hashbrown", "log", + "miniz_oxide", + "num-bigint", + "num_enum", "paste", + "regex", "spin", "strum", "strum_macros", "uefi", ] +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "convert_case" version = "0.4.0" @@ -71,6 +115,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "heck" version = "0.5.0" @@ -93,6 +147,75 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_enum" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02339744ee7253741199f897151b38e72257d13802d4ee837285cc2990a90845" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "681030a937600a36906c185595136d26abfebb4aa9c65701cefcaf8578bb982b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + [[package]] name = "paste" version = "1.0.15" @@ -137,6 +260,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "rustc_version" version = "0.4.0" @@ -271,3 +423,29 @@ name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] diff --git a/Cargo.toml b/Cargo.toml index 8871f20..62ae306 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,11 @@ version = "0.1.0" edition = "2021" [dependencies] +num_enum = { version = "0.7.2", default-features = false } +miniz_oxide = "0.7.4" +# bimap = { version = "0.6.3", default-features = false } +num-bigint = { version = "0.4", default-features = false } +hashbrown = "0.14" strum = { version = "0.26", default-features = false } strum_macros = { version = "0.26", default-features = false } spin = "0.9.8" @@ -16,5 +21,11 @@ anyhow = { version = "1.0.79", default-features = false } derive_more = "0.99.17" paste = "1.0.15" +[build-dependencies] +regex = "1.10" + [features] trace-pmm = [] +trace-malloc = [] +trace-beam = [] +trace-messages = [] diff --git a/Makefile b/Makefile index e505807..534c6e4 100644 --- a/Makefile +++ b/Makefile @@ -8,11 +8,11 @@ clean: mkdir build # Emulator (the EFI executable) -PROFILE=release -PROFILE_DIR=release -CARGOFLAGS=--target x86_64-unknown-uefi-debug.json -Zbuild-std=core,compiler_builtins,alloc -Zbuild-std-features=compiler-builtins-mem -MAGIC_SECTION_OFFSET=0x140800000 -RELOC_SECTION_OFFSET=0x140801000 +PROFILE=dev +PROFILE_DIR=debug +CARGOFLAGS=--target x86_64-unknown-uefi-debug.json -Zbuild-std=core,compiler_builtins,alloc -Zbuild-std-features=compiler-builtins-mem --features=trace-messages +MAGIC_SECTION_OFFSET=0x141000000 +RELOC_SECTION_OFFSET=0x141001000 emu: cargo build $(CARGOFLAGS) --profile $(PROFILE) cargo clippy $(CARGOFLAGS) --profile $(PROFILE) @@ -32,11 +32,13 @@ ERL_OUT=build/ebin ebin: $(ERL_SOURCES) mkdir -p $(ERL_OUT) erlc $(ERLC_FLAGS) -o $(ERL_OUT) $(ERL_SOURCES) + src/etfify base/base.app.src $(ERL_OUT)/base.app # BOSS Base Image, a collection of base BEAM files bosbaima: ebin date > build/date - tar cf build/bosbaima.tar build/ebin/ build/date + cp base/emu.cfg build/emu.cfg + tar cf build/bosbaima.tar -C build/ ebin/ date emu.cfg esp: emu bosbaima mkdir -p build/esp/{EFI/BOOT,BOSS} diff --git a/README.md b/README.md index 1451c71..b212619 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # BOSS -BOSS (BEAM-based Operating System with Security) is a **proof-of-concept** -operating system heavily inspired by the BEAM virtual machine and Erlang/OTP. -Its distinctive features include: +BOSS (BEAM-based Operating System with Security) is a proof-of-concept operating +system written in Erlang together with Rust. Its distinctive features include: - not having a clear kernel-userspace divide; - capability-based security from the ground up. @@ -11,11 +10,19 @@ Its distinctive features include: - implement the dumbest fucking idea that came to my head at 3 in the morning. This project is purely for my education and enjoyment. I am not claiming that my -code is safe, fast, secure nor correct. In fact, it is FULL of over-engineering -and otherwise "things that could have been done easier", but I'm going to learn -Rust faster the way that this project is now. +code is safe, fast, secure nor correct. + +### Urrr-lang? +Yes. Erlang is a very cool functional (at a low level) / object-oriented (at a +high level) concurrent language. Erlang makes it easy to write code that scales +almost effortlessly across multiple cores, processors or even machines. Existing +implementations of Erlang (most notably, the official BEAM virtual machine) are +very performant in both single-core and multi-core operations. This +implementation is not. Like, not at all. But it works and it's a starting point! ## Structure + +### Emulator The basis of this project is the **emulator**, a multiprocessing-aware Erlang VM that runs on bare hardware. It only implements the bare minimum it has to implement in order to reach this objective; for example, it does parse some ACPI @@ -26,13 +33,120 @@ from approximately 2010 onwards. It could be argued that the emulator is a microkernel, since it implements the things a uK would (scheduling and IPC), but it's my project and I prefer not to name it that. -On startup, the emulator loads the **base image** (`BOSBAIMA.TAR`) that contains -initial emulator configuration and just enough BEAM modules to bootstrap a -functional OS. +Even though the emulator is _a_ BEAM, it's not _the_ BEAM. It is compatible with +Erlang's standard binary module format and its bytecode instructions, but that's +about where the similarities end. This incompatibility is a conscious design +decision that was made to introduce isolation and capability-based security into +the BEAM. Here's an incomplete list of the differences: + - In OTP, an application is a code-level abstraction; it does not exist in the + virtual machine. In OTP, only one version of an application may be loaded at + the same time, and only one instance of that application may be running at + the same time. In BOSS, applications (but not app instances) exist at the + emulator level. + - In OTP, modules are global. In BOSS, modules are namespaced to a particular + application, and isolation between them is enforced by the emulator. + External calls to modules in other applications are termed "far calls", are + not always allowed and demand a special module name: + ```erl + % this calls the function `baz' from the module `bar' in the application `foo' + % this will only be allowed if `foo' "exports" the module `bar' + 'foo:bar':baz(). % far call + % this calls the function `baz' from the module `bar' in the current app + bar:baz(). % external call + % this calls the function `baz' from the current module in the current app + baz(). % local call + ``` + - In OTP, messages are sent to ports and processes as-is. In BOSS, the + sender's id (whatever it is, a pid or a port) is transparently appended to + the message: + ```erl + Self = self(), + Self ! hello, + receive + {Self, hello} -> yay, + hello -> nay + end. + ``` + - In OTP, every module and every function is trusted to do anything. In BOSS, + every at least slightly privileged operation requires an access token, thus + breaking compatibility with many BIFs. Access tokens cannot be forged, but + can be shared and subdivided into a finer-grained permission set. + +The current implementation is not the fastest, but it's a starting point to get +things working. +### Base image +On startup, the emulator loads the **BOSS base image** (`BOSBAIMA.TAR`) that +contains initial emulator configuration and just enough BEAM modules to +bootstrap a functional OS. It's akin to OTP's `kernel` app. + +### Everything else As was previously said, there is no clear kernel-userspace divide; instead, BOSS -goes for an Erlang/OTP-like supervision tree model. +goes for the supervision tree model that forms the basis of Erlang/OTP. + +## Checklist +Emulator: + - [x] Hello, World! + - [x] Logging + - [x] Physical memory management + - [x] Virtual memory management + - [x] Relocation + - [x] Interrupt handling + - [x] Heap + - [x] BEAM bytecode parsing + - [x] BEAM code execution + - [x] Basic ports + - [ ] Code cleanup before public release + - [ ] SMP + - [ ] Advanced ports + - [ ] Performance enhancements + - [ ] Secure NIFs in ring 3 + - [ ] Compatibility sandbox for OTP applications + +Base image: + - [x] Hello, World! + - [ ] Standard library + - [ ] Basic drivers: + - [ ] UEFI GOP + - [ ] PS/2 + - [ ] AHCI + - [ ] FAT32 + - [ ] Logging + - [ ] Code management + - [ ] Application support + +"Userland": + - [ ] Hello, World! + - [ ] A GUI, probably? + - [ ] Other things + +## I wanna run it!!!! +I see that I'm not the only one fueled by bad decisions. Anyways, you will need: + - Git + - A nightly Rust toolchain. I don't know the MSRV yet; your best bet is to + just get the latest version. + - Make + - Erlang + - QEMU + +Run this project in QEMU with: +```shell +$ git clone https://github.com/portasynthinca3/boss +$ cd boss +$ make qemu +``` + +Currently, the OS does not display anything on the screen. Refer to serial +output that relayed to the terminal instead. + +### No, I wanna run it on bare hardware +Please don't. Run `make` and make a FAT32 disk image from `boot/esp` yourself. +Note, again, that the OS currently only outputs to the serial port, thus you +won't be able to see anything on the screen. + +### I need an ISO +The OS is not anywhere near the state where I'm comfortable with releasing a +ready-to-run ISO to the general public. ## Credits -A huge thank you to @thecaralice and @polina4096 for helping me with the goal of -understanding Rust. +A huge thank you to @thecaralice and @polina4096 for helping me understand Rust. diff --git a/base/base.app.src b/base/base.app.src new file mode 100644 index 0000000..8e15358 --- /dev/null +++ b/base/base.app.src @@ -0,0 +1,4 @@ +{application, base, + [{description, "Modules needed to bootstrap a functional BOSS system"}, + {vsn, "0.1"}, + {modules, [main]}]}. diff --git a/base/emu.cfg b/base/emu.cfg new file mode 100644 index 0000000..930a839 --- /dev/null +++ b/base/emu.cfg @@ -0,0 +1 @@ +# hi! whatcha lookin' at? diff --git a/base/main.erl b/base/main.erl index d806b68..4811942 100644 --- a/base/main.erl +++ b/base/main.erl @@ -1,15 +1,36 @@ %%% This is the BEAM entry point. So far, BOSS (its Rust part) has: -%%% - set up a JIT-ed BEAM-like VM; +%%% - set up a BEAM-like VM; %%% - loaded base OS modules (incl. this one); %%% - spawned several ports for low-level access to the outside world; -%%% - spawned a process running main:main/1. +%%% - spawned a process running 'base:main':main/2. %%% We truly are in a barebones environment, huh. We don't even have access to %%% the filesystem - it must be implemented by us. In Erlang. %%% Sometimes i really do have the dumbest ideas come to me. -module(main). --vsn(1). --export([main/1]). +-export([main/2]). -main(_Ports) -> - ok. +main(Config, Ports) -> + % assert platform, get log port + #{platform := 'x86_64-uefi'} = Config, + #{log_port := LogPort} = Ports, + + % acquire access token (this can only be done once) + % capability-based security from the ground up! + ConversationId = erlang:make_ref(), + LogPort ! {ConversationId, mint_token, [], []}, + % notice how we don't sent our pid explicitly: the emulator does that for us + AccessToken = receive + {LogPort, {ConversationId, {ok, Token}}} -> Token + end, + + % finally write hello world + % a new conversation id may be generated, but that's not required here + LogPort ! {ConversationId, write, AccessToken, [<<"Hello, World!">>]}, + receive + {LogPort, {ConversationId, ok}} -> ok; + {LogPort, {ConversationId, {error, Err}}} -> + % welp! + % i dunno, let's raise a badmatch exception to crash the emulator + ok = {error, Err} + end. diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..356c575 --- /dev/null +++ b/build.rs @@ -0,0 +1,63 @@ +use std::{env, path::Path, fs}; +use regex::Regex; + +const ENUM_START: &str = "/// BEAM \"generic opcodes\" generated at compile time +#[derive(Clone, Copy, PartialEq, Eq, FromRepr, Debug)] +pub enum Opcode { +"; +const IMPL_START: &str = "impl Opcode { + pub const fn arity(self) -> usize { + match self { +"; + +fn snake_to_pascal(snake: &str) -> String { + snake.split('_') + .map(|p| { + let (first, rest) = p.split_at(1); + first.to_uppercase() + rest + }) + .collect::>() + .join("") +} + +fn main() { + // generate genop.rs from genop.tab at compile time + let out_dir = env::var_os("OUT_DIR").unwrap(); + let dest_path = Path::new(&out_dir).join("genop.rs"); + + // parse input file + let insn_regex = Regex::new(r"^([0-9]+):\s*([a-z0-9_]+)/([0-9]+)").unwrap(); + let source = fs::read_to_string(Path::new("src/vm/genop.tab")).unwrap(); + let specifications: Vec<[&str; 3]> = source + .split('\n') + .filter(|l| !l.starts_with('#')) + .map(|l| insn_regex.captures_iter(l).map(|c| c.extract()).next()) + .filter_map(|opt| opt.map(|t| t.1)) + .collect(); + + let mut enum_str = ENUM_START.to_owned(); + let mut impl_str = IMPL_START.to_owned(); + + // add enum variants and impl match arms + for [opcode, name, arity] in specifications { + let opcode = opcode.parse::().unwrap(); + let name = snake_to_pascal(name); + let arity = arity.parse::().unwrap(); + enum_str.push_str(format!(" {name} = {opcode},\n").as_str()); + impl_str.push_str(format!(" Self::{name} => {arity},\n").as_str()); + } + + enum_str.push_str("}"); + impl_str.push_str(" }\n }\n}"); + + fs::write(dest_path, format!( +"use strum_macros::FromRepr; + +{enum_str} + +{impl_str} +")).unwrap(); + + println!("cargo::rerun-if-changed=src/vm/genop.tab"); + println!("cargo::rerun-if-changed=build.rs"); +} diff --git a/src/bosbaima.rs b/src/bosbaima.rs index 69db1e9..89bb373 100644 --- a/src/bosbaima.rs +++ b/src/bosbaima.rs @@ -21,12 +21,13 @@ use uefi::{ } }, }; -use crate::util::byte_size::ByteSize; +use spin::Once; + +use crate::{reloc::Relocatable, util::{byte_size::ByteSize, tar::TarFile}}; use crate::{checkpoint, checkpoint::Checkpoint}; -use spin::RwLock; -/// Holds the binary image -static BINARY: RwLock> = RwLock::new(None); +/// Holds the base image +static IMAGE: Once> = Once::new(); /// Loads the BBI into memory and returns a static slice. /// @@ -71,14 +72,20 @@ pub fn load(executable_handle: Handle, system_table: &SystemTable) -> uefi let bbi_size = bbi_info.file_size() as usize; log::info!("base image size: {}", ByteSize(bbi_size)); - // allocate memory for file - let file_ptr = system_table.boot_services().allocate_pool(MemoryType::LOADER_DATA, bbi_size)?; + // allocate memory for and read file + let mut file_ptr = system_table.boot_services().allocate_pool(MemoryType::LOADER_DATA, bbi_size)?; let file_buf = unsafe { core::slice::from_raw_parts_mut(file_ptr, bbi_size) }; - - // read file bosbaima.read(file_buf)?; - *BINARY.write() = Some(file_buf); + // relocate and assign file buffer + file_ptr.relocate(); + let file_buf = unsafe { core::slice::from_raw_parts(file_ptr, bbi_size) }; + IMAGE.call_once(|| TarFile::new(file_buf)); + checkpoint::advance(Checkpoint::BaseImageLoaded).unwrap(); Ok(()) } + +pub fn get() -> &'static TarFile<'static> { + IMAGE.get().unwrap() +} diff --git a/src/checkpoint.rs b/src/checkpoint.rs index 0825b6a..d170caf 100644 --- a/src/checkpoint.rs +++ b/src/checkpoint.rs @@ -25,12 +25,10 @@ pub enum Checkpoint { /// Heap allocation available Heap, /// All CPUs online - SmpOnline, + // SmpOnline, /// BEAM structures initialized BeamInitd, - /// Initial argument constructed - BeamInitArg, - /// Created process running `main:main/1` from the base image + /// Created process running `main:main/2` from the base image Running, } diff --git a/src/etfify b/src/etfify new file mode 100755 index 0000000..b32edb0 --- /dev/null +++ b/src/etfify @@ -0,0 +1,21 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%% Converts a sequence of Erlang terms in a file into binary ETF (External Term +%% Format). + +main([Input, Output]) when is_list(Input) and is_list(Output) -> + {ok, Terms} = file:consult(Input), + {ok, OutFile} = file:open(Output, [write]), + Etf = [term_to_binary(Term) || Term <- Terms], + save(OutFile, Etf); + +main(_) -> + io:format("usage: etfify "), + halt(1). + +save(Out, [Etf | Others]) -> + ok = file:write(Out, Etf), + save(Out, Others); + +save(Out, []) -> + ok = file:close(Out). diff --git a/src/hal/io_port.rs b/src/hal/io_port.rs index 1f0ee05..c2d4a4f 100644 --- a/src/hal/io_port.rs +++ b/src/hal/io_port.rs @@ -60,15 +60,24 @@ pub struct Port { } impl Port { - pub fn new(number: u16) -> Port { + /// Instantiates a [Port] + /// + /// # Safety + /// Care must be taken not to instantiate multiple instances of [Port] that + /// use the same I/O port, otherwise race conditions may occur. + pub unsafe fn new(number: u16) -> Port { Port { number, phantom: PhantomData } } pub fn read(&self) -> T { + // SAFETY: accessing an I/O port is memory safe, but may lead to race + // conditions. It is the responsibility of the API user to make sure + // that this doesn't happen. See [Port::new] unsafe { T::read(self.number) } } pub fn write(&self, value: T) { + // SAFETY: see [Port::read] unsafe { T::write(self.number, value) } } } diff --git a/src/hal/mod.rs b/src/hal/mod.rs index c282e0b..024b474 100644 --- a/src/hal/mod.rs +++ b/src/hal/mod.rs @@ -1,2 +1,3 @@ pub mod io_port; pub mod serial; +// pub mod wall_clock; diff --git a/src/hal/serial.rs b/src/hal/serial.rs index dd50c39..d71733d 100644 --- a/src/hal/serial.rs +++ b/src/hal/serial.rs @@ -37,7 +37,7 @@ impl SerialPort { pub fn new(number: usize) -> SerialPort { // create array of IO ports let base = PORT_BASES[number]; - let io_ports: [Port; 8] = core::array::from_fn(|i| Port::new(base + i as u16)); + let io_ports: [Port; 8] = core::array::from_fn(|i| unsafe { Port::new(base + i as u16) }); // create port and apply default configuration let mut port = SerialPort { io_ports }; diff --git a/src/hal/wall_clock.rs b/src/hal/wall_clock.rs new file mode 100644 index 0000000..a8fdb1c --- /dev/null +++ b/src/hal/wall_clock.rs @@ -0,0 +1,48 @@ +//! Reads nanoseconds since boot. +//! +//! Calibrates the Time-Stamp Counter (Volume 3B, Section 18.17 of the Intel +//! SDM) using the Programmable Interval Timer (Intel 8254 datasheet). + +use core::arch::asm; + +use super::io_port::Port; + +/// How many ticks there are in a microsecond +static mut TICKS_IN_US: Option = None; + +/// Reads the time stamp counter without any serialization +fn rdtsc() -> u64 { + let rax: u64; + let rdx: u64; + // SAFETY: the RDTSC instruction cannot cause any memory errors + unsafe { asm!("rdtsc", out("rax") rax, out("rdx") rdx); } + (rax & 0xffff_ffff) | (rdx << 32) +} + +/// Reads the time stamp counter with serializing instructions in place, meaning +/// that all loads and stores that happen before and after this function is +/// called will be globally visible. +fn rdtsc_serializing() -> u64 { + let rax: u64; + let rdx: u64; + // SAFETY: the MFENCE, LFENCE and RDTSC instructions cannot cause any memory + // errors + unsafe { + asm!( + "mfence", "lfence", + "rdtsc", + "lfence", + out("rax") rax, out("rdx") rdx + ); + } + (rax & 0xffff_ffff) | (rdx << 32) +} + +pub fn calibrate() { + // SAFETY: this code may cause race conditions. However, it is assumed that + // the containing codebase does not use the keyboard controller and the PIT + // while this function + let kbd_port_b = unsafe { Port::new(0x61) }; + let pit_mc = unsafe { Port::new(0x43) }; + let pit_ch2 = unsafe { Port::new(0x42) }; +} diff --git a/src/interrupt.rs b/src/interrupt.rs index b23aa79..f8b6177 100644 --- a/src/interrupt.rs +++ b/src/interrupt.rs @@ -567,7 +567,7 @@ impl Manager { /// Sets the function that will be called when an interrupt occurs. This /// function is provided with a mutable reference to the state that the CPU - /// was is when it was interrupted. The handler is free to modify it; any + /// was in when it was interrupted. The handler is free to modify it; any /// changes that it makes will be applied once it returns. /// /// If the vector is for an internal interrupt (i.e. an exception), then the diff --git a/src/ll/mod.rs b/src/ll/mod.rs new file mode 100644 index 0000000..631df30 --- /dev/null +++ b/src/ll/mod.rs @@ -0,0 +1 @@ +pub mod msr; diff --git a/src/ll/msr.rs b/src/ll/msr.rs new file mode 100644 index 0000000..c1263c5 --- /dev/null +++ b/src/ll/msr.rs @@ -0,0 +1,58 @@ +//! Model-Specific Registers +//! +//! These registers are mostly used for CPU configuration and thus are +//! inherently unsafe. + +use core::arch::asm; +use num_enum::IntoPrimitive; + +/// Trait for convenience enums which represent MSRs +pub trait Msr: Sized + Into { + /// Writes a 64-bit value into an MSR + /// # Safety + /// Safety depends on what register is being written to. Some registers are + /// harmless and will not cause undefined behavior when accessed (such as + /// `IA32_TSC_ADJUST`), others fundamentally change the configuration of the + /// CPU and thus are very hazardous (e.g. `IA32_EFER`), and others still + /// BOSS uses to store its state (e.g. `IA32_GS_BASE`). + unsafe fn write(self, value: u64) { + let address: u32 = self.into(); + // SAFETY: read contract in docstring + asm!( + "wrmsr", + in("ecx") address, + in("eax") value & 0xffff_ffff, + in("edx") (value >> 32) & 0xffff_ffff, + ); + } + + /// Reads a 64-bit value from an MSR + fn read(self) -> u64 { + let address: u32 = self.into(); + let value_lo: u64; + let value_hi: u64; + // SAFETY: reading an MSR does not cause any side effects and thus + // cannot cause memory safety errors + unsafe { + asm!( + "rdmsr", + in("ecx") address, + out("rax") value_lo, + out("rdx") value_hi, + ); + } + (value_hi << 32) | value_lo + } +} + +/// The so-called "architectural" "model-specific" registers. They're not really +/// model-specific, they're present on every CPU that this code can execute on. +#[repr(u32)] +#[derive(Clone, Copy, PartialEq, IntoPrimitive)] +pub enum IA32Msr { + /// `IA32_FS_BASE`: FS segment base address + FsBase = 0xC000_0100, + /// `IA32_GS_BASE`: GS segment base address + GsBase = 0xC000_0101, +} +impl Msr for IA32Msr { } diff --git a/src/main.rs b/src/main.rs index d636742..1ff14cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,10 @@ slice_ptr_get, non_null_convenience, alloc_layout_extra, + generic_arg_infer, + generic_const_exprs, + never_type, + slice_as_chunks, )] #![allow(dead_code)] @@ -21,9 +25,8 @@ use core::{arch::asm, panic::PanicInfo}; use uefi::{ prelude::*, table::boot::MemoryType, - proto::loaded_image::LoadedImage, + proto::loaded_image::LoadedImage, }; -use alloc::{boxed::Box, string::String, vec}; use hal::serial::SerialLogger; use mem_manager::*; @@ -36,6 +39,8 @@ mod bosbaima; mod util; mod interrupt; mod segment; +mod vm; +mod ll; #[cfg(not(test))] #[panic_handler] @@ -107,16 +112,20 @@ extern "C" fn after_reloc(_data: &()) -> ! { checkpoint::advance(Checkpoint::Interrupts).unwrap(); // unmap lower half - { - let mut guard = addr_space.modify(); - guard.unmap_range(VirtAddr::from_usize(0)..=VirtAddr::from_usize(0x0000_7fff_ffff_ffff)).unwrap(); - } + addr_space.modify().unmap_range(VirtAddr::from_usize(0)..=VirtAddr::from_usize(0x0000_7fff_ffff_ffff)).unwrap(); checkpoint::advance(Checkpoint::RelocDone).unwrap(); // initialize global allocator unsafe { malloc::initialize_default(addr_space) }; checkpoint::advance(Checkpoint::Heap).unwrap(); + // start the VM + vm::init(bosbaima::get()).unwrap(); + // unsafe { vm::executor::Pls::init() }; + // let date = core::str::from_utf8(bosbaima::get().read_file("date").unwrap()).unwrap().trim(); + // log::info!("base image built on {date}"); + // let main_mod = vm::module::Module::new(bosbaima::get().read_file("ebin/main.beam").unwrap()).unwrap(); + loop { unsafe { asm!("hlt"); } } diff --git a/src/mem_manager/malloc.rs b/src/mem_manager/malloc.rs index 8e08c44..4953e4f 100644 --- a/src/mem_manager/malloc.rs +++ b/src/mem_manager/malloc.rs @@ -10,52 +10,76 @@ use super::{MemMgrError, virt::AddressSpace, VirtAddr, phys, PAGE_SIZE}; pub type Result = core::result::Result; -struct Block { +// Two kinds of padding are referred to in this module: "pre" and "post". +// "Pre" padding is inserted between the block header and the allocated data. +// "Post" padding is inserted between the allocated data and the next block. +// Here's an illustration: +// +// -----+----------+-------------+----------------+--------------+----------+---- +// ... | BlockHdr | pre_padding | allocated data | post_padding | BlockHdr | ... +// -----+----------+-------------+----------------+--------------+----------+---- + +/// Block header. Present at the start of both free and non-free blocks. +struct BlockHdr { used: bool, + /// Total block size, including this header and all paddings size: usize, - next: Option<&'static mut Block>, + next: Option<&'static mut BlockHdr>, } -impl Block { +impl BlockHdr { + /// [Layout] of [BlockHdr] fn layout() -> Layout { Layout::from_size_align(size_of::(), align_of::()).unwrap() } + /// Gets the virtual address of a block header + fn addr(&self) -> VirtAddr { + self.into() + } + + /// Allocates a specific layout, possibly inserting a new free block after + /// the one this method was called on. fn allocate_possibly_split(&mut self, layout: Layout) -> Option> { - let padding = Self::layout().padding_needed_for(layout.align()); - let total_layout = Layout::from_size_align(Self::layout().size() + padding + layout.size(), Self::layout().align()).unwrap().pad_to_align(); - if self.used || self.size < total_layout.size() { + let pre_padding = Self::layout().padding_needed_for(layout.align()); + // includes all sizes and paddings + let block = Layout::from_size_align(Self::layout().size() + pre_padding + layout.size(), Self::layout().align()).unwrap().pad_to_align(); + if self.used || self.size < block.size() { return None; } self.used = true; // get address of data to return let blk_addr = self as *mut _ as usize; - let data_addr = blk_addr + Self::layout().size() + padding; + let data_addr = blk_addr + Self::layout().size() + pre_padding; let data = ptr::slice_from_raw_parts_mut(data_addr as *mut u8, layout.size()); - // log::trace!("{total_layout:?}"); - if self.size >= total_layout.size() { - // insert new free block + // insert new free block if there's space + let block_and_next_hdr = Layout::from_size_align(block.size() + Self::layout().size(), Self::layout().align()).unwrap().pad_to_align(); + if self.size > block_and_next_hdr.size() { let next = self.next.take(); let new_blk = unsafe { - let new_blk = (blk_addr as *mut Block).byte_add(total_layout.size()); - *new_blk = Block { + // SAFETY: the value is properly aligned thanks to the various + // paddings that we added + let new_blk = (blk_addr as *mut BlockHdr).byte_add(block.size()); + // SAFETY: valid for writes, properly aligned + *new_blk = BlockHdr { used: false, next, - size: self.size - total_layout.size() - Self::layout().size(), + size: self.size - block.size(), }; + // SAFETY: just initialized the value new_blk.as_uninit_mut().unwrap().assume_init_mut() }; self.next = Some(new_blk); - self.size = total_layout.size(); + self.size = block.size(); } NonNull::new(data) } } -impl core::fmt::Debug for Block { +impl core::fmt::Debug for BlockHdr { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let addr: VirtAddr = self.into(); f.debug_struct("Block") @@ -67,38 +91,36 @@ impl core::fmt::Debug for Block { } pub struct LinkedListAllocator { - first_block: Mutex, - address_space: Mutex, // FIXME: we shouldn't be able to control the entire address space + first_block: Mutex, + address_space: Mutex, // FIXME: design: we shouldn't be able to control the entire address space } impl LinkedListAllocator { /// Creates a new allocator. /// /// # Safety - /// The function is unsafe in case there are other allocators managing the - /// memory region defined by `bottom` and `max_size`. It is also unsafe if - /// there are any active references to that memory region, or, more - /// generally, any other active objects. + /// The function is unsafe in case there are other objects at any address + /// above `bottom`. pub unsafe fn new<'a>(bottom: VirtAddr, mut address_space: AddressSpace) -> Result { - // place first block + // place first block header let pages = phys::allocate(1); if pages.len() < 1 { return Err(MemMgrError::OutOfMemory); } let first_block: VirtAddr = bottom; address_space.modify().map_range(first_block, pages[0], 1, Default::default(), false).unwrap(); - let first_block = first_block.0 as *mut Block; - *first_block = Block { + let first_block = first_block.0 as *mut BlockHdr; + *first_block = BlockHdr { used: false, next: None, - size: PAGE_SIZE - size_of::(), + size: PAGE_SIZE, }; let first_block = first_block.as_uninit_mut().unwrap().assume_init_mut(); - let top = VirtAddr(bottom.0 + PAGE_SIZE); + #[cfg(feature = "trace-malloc")] log::trace!("new LinkedListAllocator: bottom={bottom:?}, address_space={address_space:?}"); // return allocator Ok(LinkedListAllocator { - first_block: Mutex::new(Block { used: true, size: 0, next: Some(first_block) }), + first_block: Mutex::new(BlockHdr { used: true, size: 0, next: Some(first_block) }), address_space: Mutex::new(address_space), }) } @@ -106,25 +128,38 @@ impl LinkedListAllocator { impl core::fmt::Debug for LinkedListAllocator { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "LinkedListAllocator")?; + writeln!(f, "LinkedListAllocator:")?; let guard = self.first_block.lock(); let mut previous = &*guard; - let mut used = ByteSize(0); - let mut capacity = ByteSize(0); - let mut list = f.debug_list(); + let mut display_used = ByteSize(0); + let mut display_capacity = ByteSize(0); while let Some(ref current) = previous.next { - list.entry(current); - let block_size = ByteSize(current.size + size_of::()); - capacity += block_size; - if current.used { used += block_size }; + let display_size = ByteSize(current.size); + let block_addr = current.addr(); + write!(f, "{display_size} @ {block_addr:?} ")?; + for _ in 0 .. BlockHdr::layout().size() / 8 { + write!(f, "h")?; + } + for _ in 0 .. (current.size / 8).min(64) { + if current.used { + write!(f, "x")?; + } else { + write!(f, "-")?; + } + } + if current.size > 64 * 8 { + write!(f, "...")?; + } + writeln!(f, "")?; + display_capacity += display_size; + if current.used { display_used += display_size }; previous = previous.next.as_ref().unwrap(); } - list.finish()?; - f.debug_struct("") - .field("used", &used) - .field("capacity", &capacity) + f.debug_struct("Stats") + .field("used", &display_used) + .field("capacity", &display_capacity) .finish() } } @@ -136,39 +171,97 @@ unsafe impl Allocator for LinkedListAllocator { let mut previous = &mut *guard; while let Some(ref mut current) = previous.next { if let Some(ptr) = current.allocate_possibly_split(layout) { - log::trace!("allocate({layout:?}) = {ptr:?}"); + #[cfg(feature = "trace-malloc")] + { + drop(guard); + log::trace!("allocate({layout:?}) = {ptr:?}"); + log::trace!("{self:#?}"); + } return Ok(ptr); } else { + // no sufficiently big free blocks found previous = previous.next.as_mut().unwrap(); } } - // no sufficiently big free blocks found + // at this point we need to either extend the last block (if it's free) + // or allocate a new one + + // start of extension + let new_start = unsafe { (previous as *mut BlockHdr).byte_add(previous.size) }; + let new_start: VirtAddr = new_start.into(); + assert!(new_start.0 % PAGE_SIZE == 0, "last block not ended on page boundary"); + const { assert!(align_of::() <= PAGE_SIZE); } + + let pre_padding = BlockHdr::layout().padding_needed_for(layout.align()); + let post_padding = layout.padding_needed_for(BlockHdr::layout().align()); + let previous = if previous.used { // last block is used: need to allocate a new one - todo!(); + let mut page_cnt = (BlockHdr::layout().size() + pre_padding + layout.size()).div_ceil(PAGE_SIZE); + #[cfg(feature = "trace-malloc")] + log::trace!("adding new free block of {} at {previous:?} {new_start:?}", ByteSize(page_cnt * PAGE_SIZE)); + + // the physical allocator may be unable to handle all of the pages once + // TODO: AddressSpaceGuard::allocate_range or sumn + let mut v_addr = new_start; + while page_cnt > 0 { + let mut guard = self.address_space.lock(); + let mut modifier = guard.modify(); + let pages = phys::allocate(page_cnt); + for p_addr in pages.iter() { + modifier.map_range(v_addr, p_addr, 1, Default::default(), false).unwrap(); + #[cfg(feature = "trace-malloc")] + log::trace!("map {v_addr:?} -> {p_addr:?}"); + page_cnt -= 1; + v_addr = VirtAddr(v_addr.0 + PAGE_SIZE); + } + if pages.len() == 0 { + #[cfg(feature = "trace-malloc")] + log::trace!("allocate({layout:?}) = no memory!"); + return Err(AllocError) + } + } + + // place block header + let blk_ptr: *mut BlockHdr = new_start.into(); + let blk_ref = unsafe { + // SAFETY: valid for write, aligned + *blk_ptr = BlockHdr { + used: false, + size: v_addr - new_start, + next: None, + }; + // SAFETY: just initialized the value + blk_ptr.as_uninit_mut().unwrap().assume_init_mut() + }; + + previous.next = Some(blk_ref); + (&mut previous).next.as_mut().unwrap() } else { // last block is free but small: need to extend it - let padding = Block::layout().padding_needed_for(layout.align()); - let target_size = layout.size() + padding; + let target_size = pre_padding + layout.size() + post_padding; let increase_by = target_size - previous.size; let mut increase_by_pages = increase_by.div_ceil(PAGE_SIZE); - let mut extension_start: VirtAddr = previous.into(); - extension_start = VirtAddr(extension_start.0 + PAGE_SIZE - (extension_start.0 % PAGE_SIZE)); + #[cfg(feature = "trace-malloc")] log::trace!("extending last block by {} pages to {}", increase_by_pages, ByteSize(target_size)); // the physical allocator may be unable to handle all of them at once + let mut v_addr = new_start; while increase_by_pages > 0 { let mut guard = self.address_space.lock(); let mut modifier = guard.modify(); let pages = phys::allocate(increase_by_pages); for p_addr in pages.iter() { - modifier.map_range(extension_start, p_addr, 1, Default::default(), false).unwrap(); + modifier.map_range(v_addr, p_addr, 1, Default::default(), false).unwrap(); + #[cfg(feature = "trace-malloc")] + log::trace!("map {v_addr:?} -> {p_addr:?}"); increase_by_pages -= 1; previous.size += PAGE_SIZE; - extension_start = VirtAddr(extension_start.0 + PAGE_SIZE); + v_addr = VirtAddr(v_addr.0 + PAGE_SIZE); } if pages.len() == 0 { + #[cfg(feature = "trace-malloc")] log::trace!("allocate({layout:?}) = no memory!"); return Err(AllocError) } @@ -177,18 +270,23 @@ unsafe impl Allocator for LinkedListAllocator { previous }; - // try allocating again - Ok(previous.allocate_possibly_split(layout).unwrap()) + // try allocating again now that there's definitely enough space + let result = previous.allocate_possibly_split(layout); + #[cfg(feature = "trace-malloc")] + { + drop(guard); + log::trace!("{self:#?}"); + } + Ok(result.unwrap()) } unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + #[cfg(feature = "trace-malloc")] log::trace!("deallocate({ptr:?}, {layout:?})"); // obtain block reference - let blk_layout = Block::layout(); + let blk_layout = BlockHdr::layout(); let down_offs = (layout.align() as isize - blk_layout.align() as isize).max(blk_layout.size() as isize); - // log::trace!("{ptr:?} {down_offs}"); - let block = ptr.byte_sub(down_offs as usize).cast::().as_uninit_mut().assume_init_mut(); - // log::trace!("{block:?}"); + let block = ptr.byte_sub(down_offs as usize).cast::().as_uninit_mut().assume_init_mut(); // free block block.used = false; @@ -203,6 +301,9 @@ unsafe impl Allocator for LinkedListAllocator { } }, } + + #[cfg(feature = "trace-malloc")] + log::trace!("{self:#?}"); } } @@ -235,8 +336,3 @@ pub unsafe fn initialize_default(addr_space: AddressSpace) { .expect("failed to create global allocator"); ALLOCATOR = LateAllocator(Some(allocator)); } - -pub fn dump_default() { - let allocator = unsafe { &ALLOCATOR }.0.as_ref().unwrap(); - log::trace!("{allocator:#?}"); -} diff --git a/src/mem_manager/mod.rs b/src/mem_manager/mod.rs index b4a4897..51d6336 100644 --- a/src/mem_manager/mod.rs +++ b/src/mem_manager/mod.rs @@ -1,11 +1,11 @@ use core::{fmt::{self, Debug, Formatter}, ops::Sub}; use derive_more::{Add, Sub, AddAssign, SubAssign, Into}; -use crate::checkpoint::{self, Checkpoint}; pub mod phys; pub mod virt; pub mod reloc; pub mod malloc; +pub mod pls; // The following constants outline the memory map: // - `0` - `0xffff_8000_0000_0000`: NIF memory, identity mapped physical diff --git a/src/mem_manager/phys.rs b/src/mem_manager/phys.rs index 28e0072..880c9b8 100644 --- a/src/mem_manager/phys.rs +++ b/src/mem_manager/phys.rs @@ -90,7 +90,7 @@ impl PageRangeHeader { let header_ptr = desc.phys_start as *mut PageRangeHeader; unsafe { *header_ptr = header; - &mut *header_ptr + header_ptr.as_uninit_mut().unwrap().assume_init_mut() } } @@ -250,6 +250,10 @@ pub fn init(mem_map: &MemoryMap) { let size = entry.page_count as usize * PAGE_SIZE; log::debug!("uefi: {:#018x} to {:#018x} {:?}", entry.phys_start, entry.phys_start as usize + size, entry.ty); + + // skip ranges starting at address zero + // (i know, i know :c) + if entry.phys_start == 0 { continue; } // check whether the range is usable match entry.ty { diff --git a/src/mem_manager/pls.rs b/src/mem_manager/pls.rs new file mode 100644 index 0000000..c26d25c --- /dev/null +++ b/src/mem_manager/pls.rs @@ -0,0 +1,39 @@ +//! Processor-local storage +//! +//! Like many other kernels, the emulator stores a pointer to the per-processor +//! structure in the `IA32_GS_BASE` MSR. Other kernels also typically use the GS +//! segment register override prefix to fetch data from the struct located at +//! the address contained in the MSR, but that is not done here. + +use alloc::boxed::Box; +use crate::ll::msr::{IA32Msr, Msr}; + +/// Processor-local storage +pub struct Pls { + +} + +impl Pls { + /// Initializes the processor-local storage + /// # Safety + /// This method must be called **exactly once** before [Self::get] is + /// called. Calling [Self::get] before this method is called in unsafe. + /// Calling [Self::init] again after it has already been called is unsafe. + unsafe fn init() { + let pls: &'static Pls = Box::leak(Box::new(Pls { })); + let pointer = pls as *const Pls; + // SAFETY: as long as this register is only being used by us (here and + // in [Self::get]), overwriting this register is safe. + IA32Msr::GsBase.write(pointer as u64); + } + + /// Gets a shared reference to the processor-local storage + fn get() -> &'static Self { + let pointer = IA32Msr::GsBase.read() as *const Pls; + unsafe { + // SAFETY: the pointer that we fetched is valid if [Self::init] + // initialized it. If it didn't, tough luck \(-_-)/ + pointer.as_uninit_ref().unwrap().assume_init_ref() + } + } +} diff --git a/src/mem_manager/virt.rs b/src/mem_manager/virt.rs index 4364ab5..129c001 100644 --- a/src/mem_manager/virt.rs +++ b/src/mem_manager/virt.rs @@ -715,7 +715,8 @@ impl<'guard, 'r> AddressSpaceGuard<'guard, 'r> { /// /// The callback is expected to apply modifications to the table as it /// desires. If must return a boolean that signals whether any changes were - /// made. + /// made. Returning `false` when a change has been made is a logic error + /// that may leave the memory map in an inconsistent state. fn iterate_over_range_mut( &mut self, addresses: RangeInclusive, diff --git a/src/util/byte_size.rs b/src/util/byte_size.rs index 787bdb4..be68e9d 100644 --- a/src/util/byte_size.rs +++ b/src/util/byte_size.rs @@ -1,7 +1,7 @@ use core::fmt::{Debug, Display, Formatter, self}; use derive_more::{Add, Sub, AddAssign, SubAssign}; -/// Display-friendly byte size type +/// [Display]-friendly byte size type #[derive(Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[derive(Add, Sub, AddAssign, SubAssign)] pub struct ByteSize(pub usize); @@ -9,11 +9,11 @@ pub struct ByteSize(pub usize); impl Display for ByteSize { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { if self.0 >= 1024 * 1024 { - write!(f, "{} MiB", self.0 / 1024 / 1024) + write!(f, "{}MiB", self.0 / 1024 / 1024) } else if self.0 >= 1024 { - write!(f, "{} KiB", self.0 / 1024) + write!(f, "{}KiB", self.0 / 1024) } else { - write!(f, "{} bytes", self.0) + write!(f, "{}B", self.0) } } } diff --git a/src/util/cursor.rs b/src/util/cursor.rs new file mode 100644 index 0000000..b057a4e --- /dev/null +++ b/src/util/cursor.rs @@ -0,0 +1,65 @@ +//! Convenience functions for parsing data in immutable u8 slices + +use core::str::{from_utf8, Utf8Error}; + +/// Convenience functions for reading a buffer as a stream +#[derive(Clone)] +pub struct Cursor<'d> { + data: &'d [u8], + pub position: usize, +} + +// Convenience functions for parsing data in immutable u8 slices +impl<'d> Cursor<'d> { + pub fn new(data: &'d [u8]) -> Cursor<'d> { + Cursor { data, position: 0 } + } + + pub fn read_u8(&mut self) -> u8 { + let data = self.data[self.position]; + self.position += 1; + data + } + + pub fn read_u16_be(&mut self) -> u16 { + let slice: [u8; 2] = self.data[self.position .. self.position + 2].try_into().unwrap(); + self.position += 2; + u16::from_be_bytes(slice) + } + + pub fn read_u32_be(&mut self) -> u32 { + let slice: [u8; 4] = self.data[self.position .. self.position + 4].try_into().unwrap(); + self.position += 4; + u32::from_be_bytes(slice) + } + + pub fn read_u64_be(&mut self) -> u64 { + let slice: [u8; 8] = self.data[self.position .. self.position + 4].try_into().unwrap(); + self.position += 8; + u64::from_be_bytes(slice) + } + + pub fn read_slice(&mut self, size: usize) -> &'d [u8] { + let slice = &self.data[self.position .. self.position + size]; + self.position += size; + slice + } + + pub fn read_utf8(&mut self, size: usize) -> Result<&'d str, Utf8Error> { + let slice = &self.data[self.position .. self.position + size]; + self.position += size; + from_utf8(slice) + } + + pub fn skip(&mut self, count: usize) { + self.position += count; + } + + pub fn reached_end(&self) -> bool { + self.position >= self.data.len() + } + + pub fn remaining(&self) -> usize { + self.data.len() - self.position + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index cb50f2d..01a0d68 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,2 +1,16 @@ +//! Various utility functions + +use core::str::{from_utf8, Utf8Error}; + pub mod dyn_arr; pub mod byte_size; +pub mod tar; +pub mod cursor; + +/// Converts a null-terminated string representation into a string slice +fn from_null_term(source: &[u8]) -> Result<&str, Utf8Error> { + let end = source.iter() + .position(|&c| c == 0) + .unwrap_or(source.len()); + from_utf8(&source[0..end]) +} diff --git a/src/util/tar.rs b/src/util/tar.rs new file mode 100644 index 0000000..8f3035b --- /dev/null +++ b/src/util/tar.rs @@ -0,0 +1,45 @@ +//! Read-only TAR file format parser. Based on this specification: +//! https://www.gnu.org/software/tar/manual/html_node/Standard.html + +use super::from_null_term; + +const BLOCK_SIZE: usize = 512; + +/// Read-only TAR file format parser +#[derive(Clone, Debug)] +pub struct TarFile<'d> { + data: &'d [u8], +} + +impl<'d> TarFile<'d> { + pub fn new(data: &'d [u8]) -> TarFile<'d> { + TarFile { data } + } + + /// Returns a slice containing the contents of the file. If no file with the + /// specified name is found, `None` is returned. + pub fn read_file(&self, name: &str) -> Option<&'d [u8]> { + let mut position = 0; + + loop { + // read block + let block: [u8; BLOCK_SIZE] = self.data[position .. position+BLOCK_SIZE].try_into().unwrap(); + let magic = from_null_term(&block[257..263]).unwrap(); + if !magic.starts_with("ustar") { + // reached end of archive + return None; + } + + let block_name = from_null_term(&block[0..100]).unwrap(); + let size = usize::from_str_radix(from_null_term(&block[124..136]).unwrap(), 8).unwrap(); + + if block_name == name { + let file = &self.data[position+BLOCK_SIZE .. position+BLOCK_SIZE+size]; + return Some(file); + } + + let skip_blocks = 1 + size.div_ceil(BLOCK_SIZE); + position += skip_blocks * BLOCK_SIZE; + } + } +} diff --git a/src/vm/app.rs b/src/vm/app.rs new file mode 100644 index 0000000..7416f83 --- /dev/null +++ b/src/vm/app.rs @@ -0,0 +1,50 @@ +//! Unlike in normal Erlang/OTP, apps exist at the emulator level in this +//! implementation. This is used to strengthen security through emulator-level +//! isolation of applications. This behavior is unwarranted and unexpected in +//! traditional OTP code, and thus a compatibility mode is needed. + +use alloc::{rc::Rc, boxed::Box}; +use hashbrown::HashMap; + +use super::{module::Module, state::{LocalAtomRef, LocalContext}, term::{LocalTerm, TermError}}; + +/// Named collection of related modules +/// +/// In OTP, there can only be one instance of an app. In BOSS, multiple versions +/// of an app could be loaded at the same time, and multiple instances if each +/// of those versions could be running at the same time. +#[derive(Debug)] +pub struct Application { + /// Name of application as an atom + pub name: LocalAtomRef, + /// Version of application + pub version: Box, + /// Modules belonging to this version of the application. A module may not + /// yet be loaded. + pub modules: HashMap>>, +} + +impl Application { + /// Parses a binary `.app` specification + pub fn new(data: &[u8], context: &mut LocalContext) -> Result { + // parse ETF + let term = match LocalTerm::from_etf(data, context) { + Ok(term) => term, + Err(e) => return Err(e), + }; + + // deconstruct term + let [name, properties] = term.get_tagged_tuple("application", context)?; + let name = name.get_atom()?; + let module_names = properties.get_proplist_value("modules", context)?.get_list()?; + let version = properties.get_proplist_value("vsn", context)?.get_charlist()?; + + let mut modules = HashMap::with_capacity(module_names.len()); + for name in module_names { + let name = name.get_atom()?; + modules.insert(name, None); + } + + Ok(Application { name, version, modules }) + } +} diff --git a/src/vm/genop.tab b/src/vm/genop.tab new file mode 100644 index 0000000..83aa741 --- /dev/null +++ b/src/vm/genop.tab @@ -0,0 +1,700 @@ +# This file is taken from Erlang source code: +# https://github.com/erlang/otp/blob/master/lib/compiler/src/genop.tab + +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1998-2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# %CopyrightEnd% +# +BEAM_FORMAT_NUMBER=0 + +# +# Generic instructions, generated by the compiler. If any of them change number, +# arity or semantics, the format number above must be bumped. +# + +## @spec label Lbl +## @doc Specify a module local label. +## Label gives this code address a name (Lbl) and marks the start of +## a basic block. +1: label/1 + +## @spec func_info M F A +## @doc Define a function M:F/A +2: func_info/3 + +3: int_code_end/0 + +# +# Function and BIF calls. +# + +## @spec call Arity Label +## @doc Call the function at Label. +## Save the next instruction as the return address in the CP register. +4: call/2 + +## @spec call_last Arity Label Deallocate +## @doc Deallocate and do a tail recursive call to the function at Label. +## Do not update the CP register. +## Before the call deallocate Deallocate words of stack. +5: call_last/3 + +## @spec call_only Arity Label +## @doc Do a tail recursive call to the function at Label. +## Do not update the CP register. +6: call_only/2 + +## @spec call_ext Arity Destination +## @doc Call the function of arity Arity pointed to by Destination. +## Save the next instruction as the return address in the CP register. +7: call_ext/2 + +## @spec call_ext_last Arity Destination Deallocate +## @doc Deallocate and do a tail call to function of arity Arity +## pointed to by Destination. +## Do not update the CP register. +## Deallocate Deallocate words from the stack before the call. +8: call_ext_last/3 + +## @spec bif0 Bif Reg +## @doc Call the bif Bif and store the result in Reg. +9: bif0/2 + +## @spec bif1 Lbl Bif Arg Reg +## @doc Call the bif Bif with the argument Arg, and store the result in Reg. +## On failure jump to Lbl. +10: bif1/4 + +## @spec bif2 Lbl Bif Arg1 Arg2 Reg +## @doc Call the bif Bif with the arguments Arg1 and Arg2, +## and store the result in Reg. +## On failure jump to Lbl. +11: bif2/5 + +# +# Allocating, deallocating and returning. +# + +## @spec allocate StackNeed Live +## @doc Allocate space for StackNeed words on the stack. If a GC is needed +## during allocation there are Live number of live X registers. +## Also save the continuation pointer (CP) on the stack. +12: allocate/2 + +## @spec allocate_heap StackNeed HeapNeed Live +## @doc Allocate space for StackNeed words on the stack and ensure there is +## space for HeapNeed words on the heap. If a GC is needed +## save Live number of X registers. +## Also save the continuation pointer (CP) on the stack. +13: allocate_heap/3 + +## @spec allocate_zero StackNeed Live +## @doc Allocate space for StackNeed words on the stack. If a GC is needed +## during allocation there are Live number of live X registers. +## Clear the new stack words. (By writing NIL.) +## Also save the continuation pointer (CP) on the stack. +## +## OTP 24: This instruction has been superseded by allocate/2 followed +## by init_yregs/1. +14: -allocate_zero/2 + +## @spec allocate_heap_zero StackNeed HeapNeed Live +## @doc Allocate space for StackNeed words on the stack and HeapNeed words +## on the heap. If a GC is needed +## during allocation there are Live number of live X registers. +## Clear the new stack words. (By writing NIL.) +## Also save the continuation pointer (CP) on the stack. +## +## OTP 24: This instruction has been superseded by allocate_heap/2 +## followed by init_yregs/1. +15: -allocate_heap_zero/3 + +## @spec test_heap HeapNeed Live +## @doc Ensure there is space for HeapNeed words on the heap. If a GC is needed +## save Live number of X registers. +16: test_heap/2 + +## @spec init N +## @doc Clear the Nth stack word. (By writing NIL.) +## +## OTP 24: This instruction has been superseded by init_yregs/1. +17: -init/1 + +## @spec deallocate N +## @doc Restore the continuation pointer (CP) from the stack and deallocate +## N+1 words from the stack (the + 1 is for the CP). +18: deallocate/1 + +## @spec return +## @doc Return to the address in the continuation pointer (CP). +19: return/0 + +# +# Sending & receiving. +# +## @spec send +## @doc Send argument in x(1) as a message to the destination process in x(0). +## The message in x(1) ends up as the result of the send in x(0). +20: send/0 + +## @spec remove_message +## @doc Unlink the current message from the message queue. Remove any timeout. +21: remove_message/0 + +## @spec timeout +## @doc Reset the save point of the mailbox and clear the timeout flag. +22: timeout/0 + +## @spec loop_rec Label Source +## @doc Loop over the message queue, if it is empty jump to Label. +23: loop_rec/2 + +## @spec loop_rec_end Label +## @doc Advance the save pointer to the next message and jump back to Label. +24: loop_rec_end/1 + +## @spec wait Label +## @doc Suspend the processes and set the entry point to the beginning of the +## receive loop at Label. +25: wait/1 + +## @spec wait_timeout Label Time +## @doc Sets up a timeout of Time milliseconds and saves the address of the +## following instruction as the entry point if the timeout triggers. +26: wait_timeout/2 + +# +# Arithmetic opcodes. +# +27: -m_plus/4 +28: -m_minus/4 +29: -m_times/4 +30: -m_div/4 +31: -int_div/4 +32: -int_rem/4 +33: -int_band/4 +34: -int_bor/4 +35: -int_bxor/4 +36: -int_bsl/4 +37: -int_bsr/4 +38: -int_bnot/3 + +# +# Comparison operators. +# + +## @spec is_lt Lbl Arg1 Arg2 +## @doc Compare two terms and jump to Lbl if Arg1 is not less than Arg2. +39: is_lt/3 + +## @spec is_ge Lbl Arg1 Arg2 +## @doc Compare two terms and jump to Lbl if Arg1 is less than Arg2. +40: is_ge/3 + +## @spec is_eq Lbl Arg1 Arg2 +## @doc Compare two terms and jump to Lbl if Arg1 is not (numerically) equal to Arg2. +41: is_eq/3 + +## @spec is_ne Lbl Arg1 Arg2 +## @doc Compare two terms and jump to Lbl if Arg1 is (numerically) equal to Arg2. +42: is_ne/3 + +## @spec is_eq_exact Lbl Arg1 Arg2 +## @doc Compare two terms and jump to Lbl if Arg1 is not exactly equal to Arg2. +43: is_eq_exact/3 + +## @spec is_ne_exact Lbl Arg1 Arg2 +## @doc Compare two terms and jump to Lbl if Arg1 is exactly equal to Arg2. +44: is_ne_exact/3 + +# +# Type tests. +# + +## @spec is_integer Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not an integer. +45: is_integer/2 + +## @spec is_float Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a float. +46: is_float/2 + +## @spec is_number Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a number. +47: is_number/2 + +## @spec is_atom Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not an atom. +48: is_atom/2 + +## @spec is_pid Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a pid. +49: is_pid/2 + +## @spec is_reference Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a reference. +50: is_reference/2 + +## @spec is_port Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a port. +51: is_port/2 + +## @spec is_nil Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not nil. +52: is_nil/2 + +## @spec is_binary Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a binary. +53: is_binary/2 + +54: -is_constant/2 + +## @spec is_list Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a cons or nil. +55: is_list/2 + +## @spec is_nonempty_list Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a cons. +56: is_nonempty_list/2 + +## @spec is_tuple Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a tuple. +57: is_tuple/2 + +## @spec test_arity Lbl Arg1 Arity +## @doc Test the arity of (the tuple in) Arg1 and jump +## to Lbl if it is not equal to Arity. +58: test_arity/3 + +# +# Indexing & jumping. +# + +## @spec select_val Arg FailLabel Destinations +## @doc Jump to the destination label corresponding to Arg +## in the Destinations list, if no arity matches, jump to FailLabel. +59: select_val/3 + +## @spec select_tuple_arity Tuple FailLabel Destinations +## @doc Check the arity of the tuple Tuple and jump to the corresponding +## destination label, if no arity matches, jump to FailLabel. +60: select_tuple_arity/3 + +## @spec jump Label +## @doc Jump to Label. +61: jump/1 + +# +# Catch. +# +62: catch/2 +63: catch_end/1 + +# +# Moving, extracting, modifying. +# + +## @spec move Source Destination +## @doc Move the source Source (a literal or a register) to +## the destination register Destination. +64: move/2 + +## @spec get_list Source Head Tail +## @doc Get the head and tail (or car and cdr) parts of a list +## (a cons cell) from Source and put them into the registers +## Head and Tail. +65: get_list/3 + +## @spec get_tuple_element Source Element Destination +## @doc Get element number Element from the tuple in Source and put +## it in the destination register Destination. +66: get_tuple_element/3 + +## @spec set_tuple_element NewElement Tuple Position +## @doc Update the element at position Position of the tuple Tuple +## with the new element NewElement. +67: set_tuple_element/3 + +# +# Building terms. +# +68: -put_string/3 +69: put_list/3 +70: -put_tuple/2 +71: -put/1 + +# +# Raising errors. +# +72: badmatch/1 +73: if_end/0 +74: case_end/1 + +# +# 'fun' support. +# +## @spec call_fun Arity +## @doc Call a fun of arity Arity. Assume arguments in +## registers x(0) to x(Arity-1) and that the fun is in x(Arity). +## Save the next instruction as the return address in the CP register. +75: call_fun/1 + +76: -make_fun/3 + +## @spec is_function Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a +## function (i.e. fun or closure). +77: is_function/2 + +# +# Late additions to R5. +# + +## @spec call_ext_only Arity Label +## Do a tail recursive call to the function at Label. +## Do not update the CP register. +78: call_ext_only/2 + +# +# Binary matching (R7). +# +79: -bs_start_match/2 +80: -bs_get_integer/5 +81: -bs_get_float/5 +82: -bs_get_binary/5 +83: -bs_skip_bits/4 +84: -bs_test_tail/2 +85: -bs_save/1 +86: -bs_restore/1 + +# +# Binary construction (R7A). +# +87: -bs_init/2 +88: -bs_final/2 +89: -bs_put_integer/5 +90: -bs_put_binary/5 +91: -bs_put_float/5 +92: -bs_put_string/2 + +# +# Binary construction (R7B). +# +93: -bs_need_buf/1 + +# +# Floating point arithmetic (R8). +# +# The fclearerror and fcheckerror instructions are not used in OTP 24 +# and later. +# +94: -fclearerror/0 +95: -fcheckerror/1 +96: fmove/2 +97: fconv/2 +98: fadd/4 +99: fsub/4 +100: fmul/4 +101: fdiv/4 +102: fnegate/3 + +# New fun construction (R8). +103: -make_fun2/1 + +# Try/catch/raise (R10B). +104: try/2 +105: try_end/1 +106: try_case/1 +107: try_case_end/1 +108: raise/2 + +# New instructions in R10B. +109: -bs_init2/6 +110: -bs_bits_to_bytes/3 +111: -bs_add/5 +112: apply/1 +113: apply_last/2 +## @spec is_boolean Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a Boolean. +114: is_boolean/2 + +# New instructions in R10B-6. +## @spec is_function2 Lbl Arg1 Arity +## @doc Test the type of Arg1 and jump to Lbl if it is not a +## function of arity Arity. +115: is_function2/3 + +# New bit syntax matching in R11B. + +116: -bs_start_match2/5 +117: bs_get_integer2/7 +118: bs_get_float2/7 +119: bs_get_binary2/7 +120: bs_skip_bits2/5 +121: bs_test_tail2/3 +122: -bs_save2/2 +123: -bs_restore2/2 + +# New GC bifs introduced in R11B. + +## @spec gc_bif1 Lbl Live Bif Arg Reg +## @doc Call the bif Bif with the argument Arg, and store the result in Reg. +## On failure jump to Lbl. +## Do a garbage collection if necessary to allocate space on the heap +## for the result (saving Live number of X registers). +124: gc_bif1/5 + +## @spec gc_bif2 Lbl Live Bif Arg1 Arg2 Reg +## @doc Call the bif Bif with the arguments Arg1 and Arg2, +## and store the result in Reg. +## On failure jump to Lbl. +## Do a garbage collection if necessary to allocate space on the heap +## for the result (saving Live number of X registers). +125: gc_bif2/6 + +# Experimental new bit_level bifs introduced in R11B. +# NOT used in R12B. +126: -bs_final2/2 +127: -bs_bits_to_bytes2/2 + +# R11B-4 +128: -put_literal/2 + +# R11B-5 +## @spec is_bitstr Lbl Arg1 +## @doc Test the type of Arg1 and jump to Lbl if it is not a bit string. +129: is_bitstr/2 + +# R12B +130: -bs_context_to_binary/1 +131: bs_test_unit/3 +132: bs_match_string/4 +133: bs_init_writable/0 +134: -bs_append/8 +135: -bs_private_append/6 + +## @spec trim N Remaining +## @doc Reduce the stack usage by N words, +## keeping the CP on the top of the stack. +136: trim/2 + +137: -bs_init_bits/6 + +# R12B-5 +138: bs_get_utf8/5 +139: bs_skip_utf8/4 + +140: bs_get_utf16/5 +141: bs_skip_utf16/4 + +142: bs_get_utf32/5 +143: bs_skip_utf32/4 + +144: -bs_utf8_size/3 +145: -bs_put_utf8/3 + +146: -bs_utf16_size/3 +147: -bs_put_utf16/3 + +148: -bs_put_utf32/3 + +# R13B03 + +149: on_load/0 + +# R14A + +150: -recv_mark/1 +151: -recv_set/1 + +## @spec gc_bif3 Lbl Live Bif Arg1 Arg2 Arg3 Reg +## @doc Call the bif Bif with the arguments Arg1, Arg2 and Arg3, +## and store the result in Reg. +## On failure jump to Lbl. +## Do a garbage collection if necessary to allocate space on the heap +## for the result (saving Live number of X registers). +152: gc_bif3/7 + +# R15A + +153: line/1 + +# R17 + +154: put_map_assoc/5 +155: put_map_exact/5 +156: is_map/2 +157: has_map_fields/3 +158: get_map_elements/3 + +# OTP 20 + +## @spec is_tagged_tuple Lbl Reg N Atom +## @doc Test the type of Reg and jumps to Lbl if it is not a tuple. +## Test the arity of Reg and jumps to Lbl if it is not N. +## Test the first element of the tuple and jumps to Lbl if it is not Atom. +159: is_tagged_tuple/4 + +# OTP 21 + +## @spec build_stacktrace +## @doc Given the raw stacktrace in x(0), build a cooked stacktrace suitable +## for human consumption. Store it in x(0). Destroys all other registers. +## Do a garbage collection if necessary to allocate space on the heap +## for the result. +160: build_stacktrace/0 + +## @spec raw_raise +## @doc This instruction works like the erlang:raise/3 BIF, except that the +## stacktrace in x(2) must be a raw stacktrace. +## x(0) is the class of the exception (error, exit, or throw), +## x(1) is the exception term, and x(2) is the raw stackframe. +## If x(0) is not a valid class, the instruction will not throw an +## exception, but store the atom 'badarg' in x(0) and execute the +## next instruction. +161: raw_raise/0 + +## @spec get_hd Source Head +## @doc Get the head (or car) part of a list (a cons cell) from Source and +## put it into the register Head. +162: get_hd/2 + +## @spec get_tl Source Tail +## @doc Get the tail (or cdr) part of a list (a cons cell) from Source and +## put it into the register Tail. +163: get_tl/2 + +# OTP 22 + +## @spec put_tuple2 Destination Elements +## @doc Build a tuple with the elements in the list Elements and put it +## into register Destination. +164: put_tuple2/2 + +## @spec bs_get_tail Ctx Dst Live +## @doc Sets Dst to the tail of Ctx at the current position +165: bs_get_tail/3 + +## @spec bs_start_match3 Fail Bin Live Dst +## @doc Starts a binary match sequence +166: bs_start_match3/4 + +## @spec bs_get_position Ctx Dst Live +## @doc Sets Dst to the current position of Ctx +167: bs_get_position/3 + +## @spec bs_set_positon Ctx Pos +## @doc Sets the current position of Ctx to Pos +168: bs_set_position/2 + +# OTP 23 + +## @spec swap Register1 Register2 +## @doc Swaps the contents of two registers. +169: swap/2 + +## @spec bs_start_match4 Fail Bin Live Dst +## @doc As bs_start_match3, but the fail label can be 'no_fail' when we know +## it will never fail at runtime, or 'resume' when we know the input is +## a match context. +170: bs_start_match4/4 + +# OTP 24 + +## @spec make_fun3 OldIndex Dst EnvTerms +## @doc Build a fun with the environment in the list EnvTerms and put it +## into register Dst. +171: make_fun3/3 + +## @spec init_yregs ListOfYRegs +## @doc Initialize the Y registers in the list. +172: init_yregs/1 + +## @spec recv_marker_bind Marker Reference +## @doc Associates Reference with a previously reserved marker. +173: recv_marker_bind/2 + +## @spec recv_marker_clear Reference +## @doc Clears the receive marker associated with the given Reference. +174: recv_marker_clear/1 + +## @spec recv_marker_reserve Marker +## @doc Creates a receive marker which can be later bound to a reference. +175: recv_marker_reserve/1 + +## @spec recv_marker_use Reference +## @doc Sets the current receive cursor to the marker associated with +## the given Reference. +176: recv_marker_use/1 + +# OTP 25 + +## @spec bs_create_bin Fail Alloc Live Unit Dst OpList +## @doc Builda a new binary using the binary syntax. +177: bs_create_bin/6 + +## @spec call_fun2 Tag Arity Func +## @doc Calls the fun Func with arity Arity. Assume arguments in registers x(0) +## to x(Arity-1). Tag can be one of: +## +## * FunIndex - `Func` is always a local fun identified by `FunIndex` +## * {atom,safe} - `Func` is known to be a fun of correct arity. +## * {atom,unsafe} - Nothing is known about `Func`. +178: call_fun2/3 + +## @spec nif_start +## @doc No-op at start of each function declared in -nifs(). +179: nif_start/0 + +## @spec badrecord Value +## @doc Raises a {badrecord,Value} error exception. +180: badrecord/1 + +# OTP 26 + +## @spec update_record Hint Size Src Dst Updates=[Index, Value] +## @doc Sets Dst to a copy of Src with the update list applied. Hint can be +## one of: +## +## * {atom,copy} - The result will always differ from Src, so +## don't bother checking if it can be reused. +## * {atom,reuse} - Reuse Src if a runtime check deduces that it's +## equal to the result. +## +## Note that these are just hints and the implementation is free to +## ignore them. More hints may be added in the future. +181: update_record/5 + +## @spec bs_match Fail Ctx {commands,Commands} +## @doc Match one or more binary segments of fixed size. Commands +## can be one of the following: +## +## * {ensure_at_least,Stride,Unit} +## * {ensure_exactly,Stride} +## * {binary,Live,Flags,Size,Unit,Dst} +## * {integer,Live,Flags,Size,Unit,Dst} +## * {skip,Stride} +## * {get_tail,Live,Unit,Dst} +## * {'=:=',Live,Size,Value}. +182: bs_match/3 + +# OTP 27 + +## @spec executable_line Location Index +## @doc Provide location for an executable line. +183: executable_line/2 diff --git a/src/vm/interpreter.rs b/src/vm/interpreter.rs new file mode 100644 index 0000000..23d2b38 --- /dev/null +++ b/src/vm/interpreter.rs @@ -0,0 +1,444 @@ +//! Basic BEAM interpreter + +use core::iter; + +use alloc::vec; +use alloc::{borrow::ToOwned, boxed::Box, rc::Rc, vec::Vec}; + +use crate::vm::scheduler::{ExecuteStatus, TransferAgent}; + +use super::{module::{Instruction, Module, Opcode, Operand}, scheduler::{CommonState, Execute, ExecuteMake}, state::{LocalAtomRef, LocalContext}, term::LocalTerm}; + +#[derive(Clone, Debug)] +pub struct InstructionPtr { + module: Rc, + instruction: usize, +} +impl InstructionPtr { + /// Dumps the instruction and the instructions around it + pub fn log_context(&self) { + let low = (self.instruction - 2).max(0); + let high = (self.instruction + 3).min(self.module.instructions.len()); + for i in low..high { + if i == self.instruction { + log::error!("\x1b[31m >>> \x1b[38;5;238m{:#?}", self.module.instructions[i]); + } else { + log::error!("\x1b[38;5;238m{: >4}: {:#?}", i, self.module.instructions[i]); + } + } + } +} + +/// Values stored on the stack (in the Y "registers") +#[derive(Clone, Debug)] +pub enum YRegister { + /// Register storing a term + Term(LocalTerm), + /// Register storing a stack frame (continuation pointer and top of stack) + StkFrame(Option, usize), +} + +/// Architectural state of the BEAM virtual machine +#[derive(Clone, Debug)] +struct BeamState { + /// X registers whose values are not preserved across function calls + x: Vec, + /// Y "registers" form a stack divided into stack frames. Values in these + /// registers are preserved across function calls + y: Vec, + /// The top of the stack, i.e. the lowest Y register that is visible to the + /// current function + stop: usize, + /// Message counter, used in a receive loop + loop_rec_ctr: usize, + /// Instruction pointer + ip: InstructionPtr, + /// Continuation (return) pointer + cp: Option, +} + +/// Instruction interpretation should stop and something else should be done +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Terminate { + /// Match error + Badmatch, + /// Bad instruction + BadInsn, + /// Enter wait state, wait until a message is received + EnterWait, + /// Normal termination of process + Normal, +} + +impl BeamState { + /// Creates a new state that starts execution at the specified entry point + fn new<'a>(entry: InstructionPtr, args: &'a [LocalTerm]) -> BeamState { + BeamState { + x: Vec::from(args), + y: Vec::new(), + stop: 0, + loop_rec_ctr: 0, + ip: entry, + cp: None, + } + } + + /// Takes the value of an [Operand]. An operand may depend on the state of + /// the virtual machine, e.g. if it's a register. + fn get_operand(&self, operand: &Operand) -> Result { + match operand { + Operand::Nil => Ok(LocalTerm::List(Box::new([]), None)), + Operand::Atom(atom) => Ok(LocalTerm::Atom(atom.clone())), + Operand::Literal(lit) => Ok(lit.clone()), + Operand::Number(num) => Ok(LocalTerm::Integer(num.clone())), + Operand::XReg(x) => { + match self.x.get(*x) { + Some(term) => Ok(term.clone()), + None => Err(Terminate::BadInsn), + } + }, + Operand::YReg(y) => { + match self.y.get(self.stop + *y) { + Some(YRegister::Term(term)) => Ok(term.clone()), + _ => Err(Terminate::BadInsn), + } + }, + _ => Err(Terminate::BadInsn), + } + } + + /// Assigns `value` to the supplied [Operand]. + fn assign_to_operand(&mut self, operand: &Operand, value: LocalTerm) -> Result<(), Terminate> { + match operand { + Operand::XReg(x) => { + let x = *x; + if x >= self.x.len() { + let extend_by = x - self.x.len() + 1; + self.x.extend(iter::repeat(LocalTerm::nil()).take(extend_by)); + } + self.x[x] = value; + Ok(()) + }, + Operand::YReg(y) => { + let y = *y + self.stop; + if y >= self.y.len() { + return Err(Terminate::BadInsn); + } + self.y[y] = YRegister::Term(value); + Ok(()) + }, + _ => Err(Terminate::BadInsn), + } + } + + /// Sets up a stack frame + fn setup_frame(&mut self, y_regs: usize, cp: Option) { + self.y.push(YRegister::StkFrame(cp, self.stop)); + self.y.extend(iter::repeat(YRegister::Term(LocalTerm::nil())).take(y_regs)); + self.stop += 1; + } +} + +/// Interprets instructions one by one without any optimizations +pub struct BeamInterpreter { + common: CommonState, + state: BeamState, +} + +// convenience macros +macro_rules! jump { + ($self:ident, $label:expr) => { + { + $self.state.ip.instruction = $self.state.ip.module.labels[$label]; + return Ok(()) + } + }; +} +macro_rules! bad_insn { + () => { + return Err(Terminate::BadInsn) + }; +} +macro_rules! atom_match { + ($a:ident, $ctx:expr, $at:expr) => { + $ctx.atom_table.get_or_make_atom($at) == $a + } +} + +impl BeamInterpreter { + /// Runs a single instruction, transforming the state in the process. + fn run_insn(&mut self, context: &mut LocalContext, insn: &Instruction) -> Result<(), Terminate> { + let erlang_atom = context.atom_table.get_or_make_atom("erlang"); + + match (insn.opcode, &insn.operands) { + (Opcode::Label | Opcode::Line, _) => Ok(()), + + (Opcode::CallExt, [Some(Operand::Number(arity)), Some(Operand::Number(dest)), ..]) => { + let arity: usize = arity.try_into().map_err(|_| Terminate::BadInsn)?; + let dest: usize = dest.try_into().map_err(|_| Terminate::BadInsn)?; + let (module, fun, import_arity) = self.state.ip.module.imports[dest].clone(); + if import_arity != arity { bad_insn!(); } + if module != erlang_atom { bad_insn!(); } // TODO: actual calls :^) + self.state.x.truncate(arity); + self.bif(fun, context) + }, + + (Opcode::Allocate, [Some(Operand::Number(stack)), Some(Operand::Number(live)), ..]) => { + // deallocate all terms past X[live] and make a stack frame + let stack = stack.try_into().map_err(|_| Terminate::BadInsn)?; + let live = live.try_into().map_err(|_| Terminate::BadInsn)?; + self.state.x.truncate(live); + self.state.setup_frame(stack, self.state.cp.clone()); + Ok(()) + }, + + (Opcode::TestHeap, [Some(_), Some(Operand::Number(live)), ..]) => { + // The first register specifies the number in words that need to + // be allocated. This hint is meaningless to us because the way + // that we lay out data differs from how BEAM does so. + let live = live.try_into().map_err(|_| Terminate::BadInsn)?; + self.state.x.truncate(live); + Ok(()) + }, + + (Opcode::Deallocate, [Some(Operand::Number(_)), ..]) => { + let YRegister::StkFrame(ref cp, stop) = self.state.y[self.state.stop - 1] else { + panic!("corrupted stack frame"); + }; + self.state.cp = cp.clone(); + self.state.stop = stop; + Ok(()) + }, + + (Opcode::Return, [..]) => { + if let Some(ref cp) = self.state.cp { + self.state.ip = cp.clone(); + Ok(()) + } else { + Err(Terminate::Normal) + } + }, + + (Opcode::Send, [..]) => { + if self.state.x.len() < 2 { bad_insn!(); } + let (LocalTerm::Pid(receiver) | LocalTerm::Port(receiver)) = self.state.x[0] else { bad_insn!() }; + let complete_msg = LocalTerm::Tuple(vec![LocalTerm::Pid(self.common.id), self.state.x[1].clone()]); + context.messenger.as_mut() + .expect("transfer agent not initialized") + .route_message(receiver, complete_msg, 16); + Ok(()) + }, + + (Opcode::RemoveMessage, [..]) => { + self.common.mailbox.remove(self.state.loop_rec_ctr); + self.state.loop_rec_ctr = 0; + Ok(()) + }, + + (Opcode::LoopRec, [Some(Operand::Label(fail)), Some(dest), ..]) => { + let message = self.common.mailbox.get(self.state.loop_rec_ctr); + let Some(message) = message else { jump!(self, *fail) }; + self.state.assign_to_operand(dest, message.clone())?; + Ok(()) + }, + + (Opcode::LoopRecEnd, [Some(Operand::Label(next)), ..]) => { + self.state.loop_rec_ctr += 1; + jump!(self, *next); + }, + + (Opcode::Wait, [Some(Operand::Label(cont)), ..]) => { + self.state.loop_rec_ctr = 0; + self.state.ip.instruction = self.state.ip.module.labels[*cont]; + Err(Terminate::EnterWait) + }, + + (Opcode::IsEqExact, [Some(Operand::Label(fail)), Some(left), Some(right), ..]) => { + let left = self.state.get_operand(left)?; + let right = self.state.get_operand(right)?; + if left != right { + jump!(self, *fail); + } + Ok(()) + }, + + (Opcode::IsTuple, [Some(Operand::Label(fail)), Some(val), ..]) => { + let val = self.state.get_operand(val)?; + let LocalTerm::Tuple(_) = val else { jump!(self, *fail) }; + Ok(()) + }, + + (Opcode::TestArity, [Some(Operand::Label(fail)), Some(val), Some(Operand::Number(arity)), ..]) => { + let arity: usize = arity.try_into().map_err(|_| Terminate::BadInsn)?; + let val = self.state.get_operand(val)?; + let LocalTerm::Tuple(tuple) = val else { bad_insn!() }; + if tuple.len() != arity { + jump!(self, *fail); + } + Ok(()) + }, + + (Opcode::Move, [Some(src), Some(dest), ..]) => { + let src = self.state.get_operand(src)?; + self.state.assign_to_operand(dest, src)?; + Ok(()) + }, + + (Opcode::GetTupleElement, [Some(src), Some(Operand::Number(index)), Some(dest), ..]) => { + let index: usize = index.try_into().map_err(|_| Terminate::BadInsn)?; + let LocalTerm::Tuple(src) = self.state.get_operand(src)? else { bad_insn!() }; + let element = src.get(index).ok_or(Terminate::BadInsn)?; + self.state.assign_to_operand(dest, element.clone())?; + Ok(()) + }, + + (Opcode::IsMap, [Some(Operand::Label(fail)), Some(arg), ..]) => { + let arg = self.state.get_operand(arg); + match arg { + Ok(LocalTerm::Map(_)) => Ok(()), + _ => jump!(self, *fail), + } + }, + + (Opcode::GetMapElements, [Some(Operand::Label(fail)), Some(src), Some(Operand::List(spec)), ..]) => { + let src = self.state.get_operand(src)?; + if let LocalTerm::Map(src) = src { + // `spec` is encoded as a sequence of KV pairs + // the value corresponding to K is fetched and put into V + let (chunks, []) = spec.as_chunks::<2>() else { bad_insn!() }; + for [left, right] in chunks { + let ref left = self.state.get_operand(left)?; + let Some(value) = src.0.get(left) else { jump!(self, *fail) }; + self.state.assign_to_operand(right, value.clone())?; + } + Ok(()) + } else { jump!(self, *fail) } + }, + + (Opcode::IsTaggedTuple, [Some(Operand::Label(fail)), Some(src), Some(Operand::Number(arity)), Some(tag), ..]) => { + let src = self.state.get_operand(src)?; + let tag = self.state.get_operand(tag)?; + let arity: usize = arity.try_into().map_err(|_| Terminate::BadInsn)?; + let LocalTerm::Tuple(src) = src else { jump!(self, *fail) }; + if src.len() != arity { jump!(self, *fail) }; + if src.get(0) != Some(&tag) { jump!(self, *fail) }; + Ok(()) + }, + + (Opcode::Badmatch, [Some(arg), ..]) => { // TODO: not ignore arg + Err(Terminate::Badmatch) + }, + + (Opcode::PutTuple2, [Some(dest), Some(Operand::List(tuple)), ..]) => { + let mut out = Vec::with_capacity(tuple.len()); + for element in tuple.iter() { + out.push(self.state.get_operand(element)?); + } + self.state.assign_to_operand(dest, LocalTerm::Tuple(out))?; + Ok(()) + }, + + // receive markers enhance performance if implemented, but not required + (Opcode::RecvMarkerBind | Opcode::RecvMarkerClear + | Opcode::RecvMarkerReserve | Opcode::RecvMarkerUse, _) => Ok(()), + + (_, _) => { + Err(Terminate::BadInsn) + }, + } + } + + /// Executes a Built-In Function + fn bif(&mut self, fun: LocalAtomRef, context: &mut LocalContext) -> Result<(), Terminate> { + match fun { + a if atom_match!(a, context, "make_ref") => { + self.state.x.push(context.make_ref()); + Ok(()) + }, + _ => Err(Terminate::BadInsn), + } + } +} + +/// The errors of [BeamInterpreter::new] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BeamInterpreterMakeError { + /// Application not found in local context + NoApp, + /// Module not found in application + NoMod, + /// Function/arity not found in module + NoFun, +} + +impl Execute for BeamInterpreter { + fn get_common_state(&self) -> &CommonState { &self.common } + fn get_common_state_mut(&mut self) -> &mut CommonState { &mut self.common } + + fn run_for(&mut self, context: &mut LocalContext, mut reductions: isize) { + loop { + // fetch instruction + #[cfg(feature = "trace-beam")] + let ip = self.state.ip.clone(); + let instruction = &self.state.ip.module.instructions[self.state.ip.instruction].clone(); + #[cfg(feature = "trace-beam")] + log::trace!("{instruction:#?}"); + self.state.ip.instruction += 1; + + // execute instruction + let result = self.run_insn(context, instruction); + match result { + Ok(()) => (), + Err(Terminate::Normal) => { + #[cfg(feature = "trace-beam")] + log::trace!("process {:?} exited normally", self.common.id); + self.common.status = ExecuteStatus::Exited; + return; + }, + Err(Terminate::EnterWait) => { + self.common.status = ExecuteStatus::Waiting; + return; + }, + Err(Terminate::Badmatch) => { + self.common.status = ExecuteStatus::Exited; + return; + }, + Err(Terminate::BadInsn) => { + #[cfg(feature = "trace-beam")] + { + log::error!("invalid combination of opcode and operands or opcode not implemented:"); + ip.log_context(); + } + self.common.status = ExecuteStatus::Exited; + return; + }, + } + + // determine if we're ought to finish executing + reductions -= 1; + if reductions <= 0 { + break; + } + } + } +} + +impl<'i> ExecuteMake<'i> for BeamInterpreter { + const IS_PORT: bool = false; + type Init = (LocalAtomRef, LocalAtomRef, LocalAtomRef, &'i [LocalTerm]); + type Error = BeamInterpreterMakeError; + fn new(common: CommonState, init: &'i Self::Init, context: &LocalContext) -> Result { + use BeamInterpreterMakeError::*; + let app = context.applications.get(&init.0).ok_or(NoApp)?; + let module = Rc::clone(app.modules.get(&init.1).ok_or(NoMod)?.as_ref().ok_or(NoMod)?); + let fun_label = *module.exports.get(&(init.2.clone(), init.3.len())).ok_or(NoFun)?; + let fun_instruction = module.labels[fun_label]; + Ok(Self { + common, + state: BeamState::new(InstructionPtr { + module, + instruction: fun_instruction + }, init.3), + }) + } +} diff --git a/src/vm/mod.rs b/src/vm/mod.rs new file mode 100644 index 0000000..01b9a6f --- /dev/null +++ b/src/vm/mod.rs @@ -0,0 +1,97 @@ +//! The Erlang Virtual Machine. The thing that this entire project (which was +//! 3000 lines long at the time of creation of this module) strives to +//! implement. Everything else around this module and its descendants is more or +//! less just a generic microkernel. + +use alloc::{format, boxed::Box, rc::Rc}; + +use app::Application; +use hashbrown::HashMap; +use port::LogPort; +use interpreter::{BeamInterpreter, BeamInterpreterMakeError}; +use module::{Module, LoadError}; +use scheduler::{PrimitiveScheduler, Schedule}; +use state::LocalContext; +use term::{LocalTerm, MapTerm, TermError}; +use crate::util::tar::TarFile; + +pub const CURRENT_OPCODE_MAX: usize = 178; + +pub mod term; +pub mod module; +pub mod scheduler; +pub mod interpreter; +pub mod app; +pub mod state; +pub mod port; + +/// Virtual machine initialization error +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InitError { + /// No `ebin/base.app` file in base image + NoBaseApp, + /// No `ebin/{module}.beam` file as requested by the app descriptor + NoBaseModule, + /// Bad data in `ebin/base.app` + BadSpec(TermError), + /// Bad data in `ebin/{module}.beam` + BadModule(LoadError), + /// Interpreter initialization error + Interpreter(BeamInterpreterMakeError) +} +impl From for InitError { + fn from(value: TermError) -> Self { + Self::BadSpec(value) + } +} +impl From for InitError { + fn from(value: LoadError) -> Self { + Self::BadModule(value) + } +} +impl From for InitError { + fn from(value: BeamInterpreterMakeError) -> Self { + Self::Interpreter(value) + } +} + +/// Initializes an Erlang virtual machine given a base image +pub fn init(base_image: &TarFile) -> Result { + // create context + let mut context: LocalContext = Default::default(); + let base_atom = context.atom_table.get_or_make_atom("base"); + let platform_atom = context.atom_table.get_or_make_atom("platform"); + let log_port_atom = context.atom_table.get_or_make_atom("log_port"); + let x86_64_uefi_atom = context.atom_table.get_or_make_atom("x86_64-uefi"); + let main_atom = context.atom_table.get_or_make_atom("main"); + + // load base application and modules + let base_app = base_image.read_file("ebin/base.app").ok_or(InitError::NoBaseApp)?; + let mut base_app = Application::new(base_app, &mut context)?; + for (name, module) in base_app.modules.iter_mut() { + let path = format!("ebin/{name}.beam"); + let module_data = base_image.read_file(path.as_str()).ok_or(InitError::NoBaseModule)?; + *module = Some(Rc::new(Module::new(module_data, &mut context)?)); + } + context.applications.insert(base_atom.clone(), base_app); + + // create a scheduler and initial ports + let mut scheduler = PrimitiveScheduler::new(context, 0); + let log_port = scheduler.add::(&(), 0).unwrap(); + + // construct initial arguments to 'base:main':main/2 + let config = LocalTerm::Map(MapTerm(HashMap::from([ + (platform_atom.into(), x86_64_uefi_atom.into()) + ]))); + let ports = LocalTerm::Map(MapTerm(HashMap::from([ + (log_port_atom.into(), log_port.into()) + ]))); + + // Run 'base:main':main/2 + let arguments: &[LocalTerm] = &[config, ports]; + let interpreter_init = (base_atom.clone(), main_atom.clone(), main_atom.clone(), arguments); + scheduler.add::(&interpreter_init, 0)?; + + // here we go! + scheduler.run(); +} diff --git a/src/vm/module.rs b/src/vm/module.rs new file mode 100644 index 0000000..9f00dc5 --- /dev/null +++ b/src/vm/module.rs @@ -0,0 +1,407 @@ +//! Parses BEAM modules from their binary representation in memory. Based on +//! Chapter 6 of the BEAM Book, the BEAM source code and just general googling. + +use alloc::{borrow::ToOwned, boxed::Box, vec::Vec, vec}; + +use hashbrown::HashMap; +use miniz_oxide::inflate; +use num_bigint::{BigInt, Sign}; + +include!(concat!(env!("OUT_DIR"), "/genop.rs")); +use super::{state::{LocalAtomRef, LocalContext}, term::LocalTerm, CURRENT_OPCODE_MAX}; +use crate::util::cursor::Cursor; + +/// Maximum number of operands for an opcode +pub const MAX_OPERANDS: usize = 8; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum LoadError { + /// Invalid BEAM file signature + NotBEAMFile, + /// A feature is not implemented + NotImplemented, + /// Invalid data inside BEAM file + FormatError, + /// The BEAM file is too new for this VM + UnsupportedMaxOpcode(usize), + /// An undefined opcode was encountered + UndefinedOpcode(u8), +} + +/// Operands for bytecode instructions +#[derive(Clone)] +pub enum Operand { + Number(BigInt), + Atom(LocalAtomRef), + XReg(usize), + YReg(usize), + Label(usize), + FpReg(usize), + AllocList(usize), + Literal(LocalTerm), + List(Box<[Operand]>), + Nil, +} + +impl Operand { + pub fn read(cursor: &mut Cursor<'_>, module_atoms: &[LocalAtomRef], literal_map: &[LocalTerm]) -> Result { + // depending on the operand's value, it can be encoded in 4 different ways + let tag = cursor.read_u8(); + if tag & 0b111 == 0b111 { + // "extended" value + match (tag >> 4) & 0xf { + 1 => { + // list + let Operand::Number(length) = Self::read(cursor, module_atoms, literal_map)? else { + return Err(LoadError::FormatError); + }; + let length: usize = length.try_into().map_err(|_| LoadError::FormatError)?; + let mut elements = Vec::with_capacity(length); + for _ in 0..length { + let operand = Self::read(cursor, module_atoms, literal_map)?; + elements.push(operand); + } + Ok(Operand::List(elements.into_boxed_slice())) + }, + 4 => { + // literal + if let Operand::Number(index) = Self::read(cursor, module_atoms, literal_map)? { + let index: usize = index.try_into().map_err(|_| LoadError::FormatError)?; + Ok(Operand::Literal(literal_map[index].clone())) + } else { + Err(LoadError::FormatError) + } + }, + 5 => { + // typed register + let operand = Self::read(cursor, module_atoms, literal_map)?; + let _ = Self::read(cursor, module_atoms, literal_map)?; // type hint, ignore + Ok(operand) + }, + val => todo!("compact term format extended tag {val}"), + } + } else { + // not an "extended value" + // size depends on middle two bits + let size_tag = (tag >> 3) & 0b11; + let value: BigInt = match size_tag { + 0b00|0b10 => { + // value smaller than 16, contained in the upper 4 bits + ((tag >> 4) & 0xf).into() + }, + 0b01 => { + // value smaller than 2048, contained in the next byte and + // in the upper 3 bits of the current byte + let next = cursor.read_u8() as usize; + (next | ((tag as usize >> 5) & 0b111)).into() + }, + 0b11 => { + // large or very large value + let size = ((tag >> 3) & 0b11111) as usize; + let size = if size == 0x1f { + // very large value (>= 9 bytes) + // size is contained in nested unsigned operand + if let Operand::Number(size) = Self::read(cursor, module_atoms, literal_map)? { + size + BigInt::from(9) + } else { + return Err(LoadError::FormatError); + } + } else { + // large value (2 to 8 bytes) + (size + 2).into() + }; + let bytes = cursor.read_slice(size.try_into().map_err(|_| LoadError::FormatError)?); + BigInt::from_bytes_be(Sign::NoSign, bytes) + }, + _ => unreachable!(), + }; + + // depending on the tag, the value represents different things + // but we don't really care about that now + Ok(match tag & 0b111 { + 0b000|0b001|0b110 => Operand::Number(value), + 0b010 if value == 0.into() => Operand::Nil, + 0b010 => { + let atom_idx: usize = value.try_into().map_err(|_| LoadError::FormatError)?; + Operand::Atom(module_atoms[atom_idx - 1].clone()) + }, + 0b011 => Operand::XReg(value.try_into().map_err(|_| LoadError::FormatError)?), + 0b100 => Operand::YReg(value.try_into().map_err(|_| LoadError::FormatError)?), + 0b101 => Operand::Label(value.try_into().map_err(|_| LoadError::FormatError)?), + _ => unreachable!(), + }) + } + } +} + +impl core::fmt::Debug for Operand { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Operand::Atom(reference) => { + let atom: alloc::rc::Rc = reference.into(); + write!(f, "{atom}") + }, + Operand::Label(index) => write!(f, "L{index}"), + Operand::Literal(lit) => write!(f, "{lit:?}"), + Operand::Nil=> write!(f, "[]"), + Operand::Number(n) => write!(f, "{n}"), + Operand::XReg(reg) => write!(f, "X{reg}"), + Operand::YReg(reg) => write!(f, "Y{reg}"), + Operand::List(list) => { + write!(f, "[")?; + for (i, element) in list.iter().enumerate() { + write!(f, "{element:?}")?; + if i != list.len() - 1 { + write!(f, ", ")?; + } + } + write!(f, "]") + }, + _ => todo!(), + } + } +} + +#[derive(Clone)] +/// Complete instruction representation +pub struct Instruction { + source: Option>, + pub opcode: Opcode, + pub operands: [Option; MAX_OPERANDS], +} + +impl Instruction { + pub fn read(cursor: &mut Cursor<'_>, module_atoms: &[LocalAtomRef], literal_map: &[LocalTerm]) -> Result { + #[cfg(feature = "trace-beam")] + let mut start_cur = cursor.clone(); + + // read opcode + let opcode = cursor.read_u8() as usize; + if opcode > CURRENT_OPCODE_MAX { return Err(LoadError::FormatError); } + let opcode = Opcode::from_repr(opcode).ok_or(LoadError::UndefinedOpcode(opcode as u8))?; + + // read operands + let mut insn = Instruction { opcode, operands: [const { None }; MAX_OPERANDS], source: None }; + for i in 0..opcode.arity() { + let operand = Operand::read(cursor, module_atoms, literal_map)?; + insn.operands[i] = Some(operand); + } + + // remember entire source of instruction for debugging + #[cfg(feature = "trace-beam")] + { + let insn_len = cursor.position - start_cur.position; + insn.source = Some(Box::from(start_cur.read_slice(insn_len))); + } + + Ok(insn) + } +} + +impl core::fmt::Debug for Instruction { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + const BYTECODE_WIDTH: isize = 30; + + // source bytecode + if f.alternate() { + if let Some(ref source) = self.source { + let mut length = 0; + for byte in source.iter() { + write!(f, "{byte:02x} ")?; + length += 3; + } + // padding + for _ in 0..(BYTECODE_WIDTH - length).max(0) { + write!(f, " ")?; + } + } + } + + // special case: label + if self.opcode == Opcode::Label { + return write!(f, "\x1b[33mL{:?}\x1b[0;0m:", self.operands[0].as_ref().expect("instruction operand within arity is None")) + } + + // opcode + if f.alternate() { + write!(f, "\x1b[34m{:?}\x1b[0;0m ", self.opcode)?; + } else { + write!(f, "{:?} ", self.opcode)?; + } + + // operands + for i in 0..self.opcode.arity() { + write!(f, "{:?}", self.operands[i].as_ref().expect("instruction operand within arity is None"))?; + let last = i == self.opcode.arity() - 1; + if !last { + if f.alternate() { write!(f, "\x1b[36m,\x1b[0;0m ") } else { write!(f, ", ") }?; + } + } + + Ok(()) + } +} + +/// Named collection of related functions +pub struct Module { + /// Name of the module + name: LocalAtomRef, + /// List of imported external and far functions (module, function and arity) + pub imports: Box<[(LocalAtomRef, LocalAtomRef, usize)]>, + /// Map of functions (name and arity) to the entry label + pub exports: HashMap<(LocalAtomRef, usize), usize>, + /// Defined literal terms + literals: Box<[LocalTerm]>, + /// Instruction pointers of labels + pub labels: Box<[usize]>, + /// Instruction stream + pub instructions: Box<[Instruction]>, +} + +impl Module { + /// Parses a BEAM module from its raw binary representation + pub fn new(data: &[u8], context: &mut LocalContext) -> Result { + // verify signatures + if data[0..4] != [b'F', b'O', b'R', b'1'] + || data[8..12] != [b'B', b'E', b'A', b'M'] { + return Err(LoadError::NotBEAMFile); + } + + // load chunks + let mut chunks = HashMap::new(); + let mut cursor = Cursor::new(data); + cursor.skip(12); + loop { + if cursor.reached_end() { break; } + let name = cursor.read_utf8(4).map_err(|_| LoadError::FormatError)?; + let size = cursor.read_u32_be() as usize; + let chunk_data = cursor.read_slice(size); + let size_w_padding = size.div_ceil(4) * 4; + cursor.skip(size_w_padding - size); + chunks.insert(name, chunk_data); + } + let chunks = chunks; + + // parse atom table (AtU8 chunk, Atom not supported) + let mut cursor = Cursor::new(chunks.get("AtU8").ok_or(LoadError::NotImplemented)?.to_owned()); // probably only contains legacy Atom table + let atom_cnt = cursor.read_u32_be() as usize; + let mut module_atoms = Vec::with_capacity(atom_cnt); + loop { + if cursor.reached_end() { break; } + let atom_size = cursor.read_u8() as usize; + let atom_name = cursor.read_utf8(atom_size).map_err(|_| LoadError::FormatError)?; + let atom = context.atom_table.get_or_make_atom(atom_name); + module_atoms.push(atom); + } + let module_atoms = module_atoms.into_boxed_slice(); + + // parse export table (ExpT chunk) + let mut cursor = Cursor::new(chunks.get("ExpT").ok_or(LoadError::FormatError)?.to_owned()); + let expt_cnt = cursor.read_u32_be() as usize; + let mut exports = HashMap::with_capacity(expt_cnt); + loop { + if cursor.reached_end() { break; } + let name = cursor.read_u32_be() as usize; + let arity = cursor.read_u32_be() as usize; + let label = cursor.read_u32_be() as usize; + exports.insert((module_atoms[name - 1].to_owned(), arity), label); + } + let exports = exports; + + // parse import table (ImpT chunk) + let mut cursor = Cursor::new(chunks.get("ImpT").ok_or(LoadError::FormatError)?.to_owned()); + let import_cnt = cursor.read_u32_be() as usize; + let mut imports = Vec::with_capacity(import_cnt); + loop { + if cursor.reached_end() { break; } + let module = module_atoms[cursor.read_u32_be() as usize - 1].to_owned(); + let function = module_atoms[cursor.read_u32_be() as usize - 1].to_owned(); + let arity = cursor.read_u32_be() as usize; + imports.push((module, function, arity)); + } + let imports = imports.into_boxed_slice(); + + // parse Code chunk + let mut cursor = Cursor::new(chunks.get("Code").ok_or(LoadError::FormatError)?.to_owned()); + let info_size = cursor.read_u32_be() as usize; + let instruction_set = cursor.read_u32_be() as usize; + let opcode_max = cursor.read_u32_be() as usize; + let label_cnt = cursor.read_u32_be() as usize; + let function_cnt = cursor.read_u32_be() as usize; + cursor.skip(info_size - 16); + let code = cursor.read_slice(cursor.remaining()); + if instruction_set != 0 || opcode_max > CURRENT_OPCODE_MAX { + return Err(LoadError::UnsupportedMaxOpcode(opcode_max)); + } + + // parse literals (LitT chunk) + let mut cursor = Cursor::new(chunks.get("LitT").ok_or(LoadError::FormatError)?.to_owned()); + let decompressed_sz = cursor.read_u32_be() as usize; + let compressed = cursor.read_slice(cursor.remaining()); + let mut decompressed = vec![0; decompressed_sz]; + inflate::decompress_slice_iter_to_slice(decompressed.as_mut_slice(), core::iter::once(compressed), true, false).map_err(|_| LoadError::FormatError)?; + let mut cursor = Cursor::new(decompressed.as_slice()); + let literal_cnt = cursor.read_u32_be() as usize; + let mut literals = Vec::with_capacity(literal_cnt); + loop { + if cursor.reached_end() { break; } + let literal_size = cursor.read_u32_be() as usize; + let literal_etf = cursor.read_slice(literal_size); + let literal = LocalTerm::from_etf(literal_etf, context).map_err(|_| LoadError::FormatError)?; + literals.push(literal); + } + let literals = literals.into_boxed_slice(); + + // parse instruction stream within Code chunk + let mut instructions = Vec::new(); + let mut labels = Vec::with_capacity(label_cnt); + labels.push(0); + let mut cursor = Cursor::new(code); + let mut instruction_ctr = 0; + // #[cfg(feature = "trace-beam")] + // log::trace!("module disassembly:"); + loop { + // read next instruction + if cursor.reached_end() { break; } + let instruction = Instruction::read(&mut cursor, &module_atoms, &literals)?; + // #[cfg(feature = "trace-beam")] + // log::trace!("\x1b[38;5;238m{instruction_ctr: >4}: {instruction:#?}"); + + // remember labels + if instruction.opcode == Opcode::Label { + let Some(Operand::Number(ref index)) = instruction.operands[0] else { + return Err(LoadError::FormatError); + }; + let index: usize = index.try_into().unwrap(); + if index != labels.len() { + return Err(LoadError::FormatError); + } + labels.push(instruction_ctr); + } + + instruction_ctr += 1; + instructions.push(instruction); + } + let instructions = instructions.into_boxed_slice(); + let labels = labels.into_boxed_slice(); + + log::trace!("loaded '{}' fns={function_cnt}, labels={label_cnt}, isa={instruction_set}, max_opcode={opcode_max}", module_atoms[0]); + Ok(Module { + name: module_atoms[0].clone(), + imports, + exports, + literals, + instructions, + labels + }) + } +} + +impl core::fmt::Debug for Module { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("Module") + .field("name", &self.name) + .field("exports", &self.exports.keys()) + .finish() + } +} diff --git a/src/vm/port.rs b/src/vm/port.rs new file mode 100644 index 0000000..2fdadb1 --- /dev/null +++ b/src/vm/port.rs @@ -0,0 +1,91 @@ +use alloc::vec; + +use super::{scheduler::{CommonState, Execute, ExecuteMake, ExecuteStatus, TransferAgent}, state::LocalContext, term::LocalTerm}; + +pub struct LogPort { + common: CommonState, + token: Option, +} + +impl Execute for LogPort { + fn get_common_state(&self) -> &CommonState { &self.common } + fn get_common_state_mut(&mut self) -> &mut CommonState { &mut self.common } + fn get_status(&self) -> ExecuteStatus { self.common.status } + + fn run_for(&mut self, context: &mut LocalContext, mut reductions: isize) { + let ok_atom: LocalTerm = context.atom_table.get_or_make_atom("ok").into(); + let error_atom: LocalTerm = context.atom_table.get_or_make_atom("error").into(); + let instantiated_atom: LocalTerm = context.atom_table.get_or_make_atom("instantiated").into(); + let badarg_atom: LocalTerm = context.atom_table.get_or_make_atom("badarg").into(); + let token_atom: LocalTerm = context.atom_table.get_or_make_atom("token").into(); + let mint_token_atom: LocalTerm = context.atom_table.get_or_make_atom("mint_token").into(); + let write_atom: LocalTerm = context.atom_table.get_or_make_atom("write").into(); + + loop { + // receive message + let message = self.common.mailbox.pop_front(); + let Some(message) = message else { + self.common.status = ExecuteStatus::Waiting; + return; + }; + + // deconstruct message + let LocalTerm::Tuple(message) = message else { continue }; + let Some(LocalTerm::Pid(sender)) = message.get(0) else { continue }; + let Some(message) = message.get(1) else { continue }; + let LocalTerm::Tuple(message) = message else { continue }; + let Some(conversation) = message.get(0) else { continue }; + let LocalTerm::Reference(_) = conversation else { continue }; + let Some(request) = message.get(1) else { continue }; + let Some(token) = message.get(2) else { continue }; + let Some(LocalTerm::List(args, None)) = message.get(3) else { continue }; + + // process request + let reply = match request { + r if *r == mint_token_atom && self.token.is_none() => { + let token = context.make_ref(); + self.token = Some(token.clone()); + LocalTerm::Tuple(vec![ok_atom.clone(), token]) + }, + r if *r == mint_token_atom => { + LocalTerm::Tuple(vec![error_atom.clone(), instantiated_atom.clone()]) + }, + r if *r == write_atom && Some(token) == self.token.as_ref() => { + if let Some(LocalTerm::BitString(_, message)) = args.get(0) + && let Ok(message) = core::str::from_utf8(message) { + log::info!("process {sender:?} says: {message}"); + ok_atom.clone() + } else { + LocalTerm::Tuple(vec![error_atom.clone(), badarg_atom.clone()]) + } + }, + r if *r == write_atom => { + LocalTerm::Tuple(vec![error_atom.clone(), token_atom.clone()]) + }, + _ => { + LocalTerm::Tuple(vec![error_atom.clone(), badarg_atom.clone()]) + }, + }; + + let self_pid = LocalTerm::Port(self.common.id); + let message = LocalTerm::Tuple(vec![conversation.clone(), reply]); + let message = LocalTerm::Tuple(vec![self_pid, message]); + context.messenger.as_mut().unwrap().route_message(*sender, message, 16); + + reductions -= 1; + if reductions <= 0 { + return; + } + } + } +} + +impl<'i> ExecuteMake<'i> for LogPort { + const IS_PORT: bool = true; + type Init = (); + type Error = !; + fn new(common: CommonState, _init: &'i Self::Init, _ctx: &LocalContext) -> Result { + Ok(LogPort { common, token: None }) + } +} + diff --git a/src/vm/scheduler.rs b/src/vm/scheduler.rs new file mode 100644 index 0000000..2edf2eb --- /dev/null +++ b/src/vm/scheduler.rs @@ -0,0 +1,271 @@ +//! Execution and scheduling + +use alloc::{boxed::Box, collections::VecDeque, rc::Rc, vec::Vec}; +use core::cell::RefCell; + +use hashbrown::{HashMap, HashSet}; + +use super::{state::LocalContext, term::LocalTerm}; + +/// The starting sequence number for port identifiers +pub const PORT_START: usize = 0x8000_0000_0000_0000; + +/// Status of an [Execute] object +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum ExecuteStatus { + /// Waiting for a message + Waiting, + /// Ready to run + Ready, + /// Currently running + Running, + /// Marked for destruction, will never run + Exited, +} + +/// Executable identifier. Refers to an executable of any kind. +/// +/// Internally represented as +/// - ID of the scheduler it was originally created on +/// - Sequence number within the scheduler it was originally created on +/// +/// Executables might get transferred between schedulers, in which case +/// their ID does not change. If the MSB of the sequence number is set, the +/// executable should match `is_port`. If that bit is reset, it should match +/// `is_pid`. This distinction only matters to those guard functions. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] +pub struct Eid(pub usize, pub usize); + +/// State common shared between all executables +pub struct CommonState { + pub id: Eid, + pub status: ExecuteStatus, + /// Messages that had been sent to this executable + pub mailbox: VecDeque, + /// Priority (lower numeric value = higher priority) + pub priority: u8, +} + +/// Trait for things that can receive messages, be scheduled and be executed. +/// Such objects are referred to as "executables". +pub trait Execute: 'static { + /// Gets a shared reference to the common state + fn get_common_state(&self) -> &CommonState; + /// Gets a mutable reference to the common state + fn get_common_state_mut(&mut self) -> &mut CommonState; + /// Runs the executable for a specified number of reductions + fn run_for(&mut self, context: &mut LocalContext, reductions: isize); + + /// Returns the status of an executable + fn get_status(&self) -> ExecuteStatus { + self.get_common_state().status + } + + /// Pushes a message into the queue + fn send_message(&mut self, message: LocalTerm) { + self.get_common_state_mut().mailbox.push_back(message); + } +} + +/// Allows executables to be created by the scheduler directly using an +/// initialization argument provided from the outside. +pub trait ExecuteMake<'i>: Sized + Execute { + /// Whether the executable is a port or not. This only matters to the + /// `is_port` and `is_pid` BIFs. + const IS_PORT: bool; + /// Initialization argument needed to construct the executable + type Init: Sized; + /// Executable creation error + type Error: Sized; + /// Creates an executable given the common state (generated by a scheduler) + /// and an initialization argument specific to the executable type. The + /// point of this indirection is to satisfy scheduler invariants. + fn new(common: CommonState, init: &'i Self::Init, ctx: &LocalContext) -> Result; +} + +/// Trait for things that run executables. +pub trait Schedule { + /// Returns the scheduler ID + fn get_id(&self) -> usize; + /// Runs exactly zero or one executables up to an implementation-defined + /// reduction limit. Returns `true` if an executable got ran, `false` if the + /// run queue is empty. + fn step(&mut self) -> bool; + /// Constructs an executable and adds it to the scheduler + fn add<'i, E: ExecuteMake<'i>>(&mut self, init: &'i E::Init, priority: u8) -> Result; + /// Removes an executable from the scheduler + fn remove(&mut self, id: Eid); + + /// Runs the scheduler in an infinite loop + fn run(&mut self) -> ! { + loop { + self.step(); + } + } +} + +/// Trait for things that route messages. +pub trait TransferAgent { + /// Forwards a message to the next node in the path. That might be an + /// executable in a scheduler itself, or another scheduler, or a scheduler + /// on another machine. + fn route_message(&mut self, receiver: Eid, message: LocalTerm, ttl: usize); +} + +/// A primitive transfer agent that collects messages while the scheduler is +/// busy, and processes them all at once once a process runs out of reductions. +#[derive(Default)] +pub struct LocalTransferAgent { + local_executables: HashSet, + mailbox: VecDeque<(Eid, LocalTerm)>, +} + +impl TransferAgent for LocalTransferAgent { + fn route_message(&mut self, receiver: Eid, message: LocalTerm, mut ttl: usize) { + #[cfg(feature = "trace-messages")] + log::trace!("send {:?} to {:?} (ttl {ttl})", message, receiver); + if self.local_executables.contains(&receiver) { + self.mailbox.push_back((receiver, message)); + } else { + // TODO: figure out who to send the message to + ttl -= 1; + if ttl > 0 { + todo!(); + } + } + } +} + +/// A primitive round robin scheduler with priorities +pub struct PrimitiveScheduler { + /// Scheduler ID + id: usize, + /// Next "process" sequence number + next_proc: usize, + /// Next "port" sequence number + next_port: usize, + /// Mapping of executable IDs to executables + executables: HashMap>, + /// Queue of executables and priorities that are [ExecuteStatus::Ready] + run_queue: VecDeque<(Eid, u8)>, + /// Execution context + context: LocalContext, +} + +impl PrimitiveScheduler { + pub fn new(mut context: LocalContext, id: usize) -> PrimitiveScheduler { + context.messenger = Some(::default()); + PrimitiveScheduler { + id, + next_proc: 0, + next_port: PORT_START, + executables: HashMap::new(), + run_queue: VecDeque::new(), + context, + } + } + + /// Generates a new [Eid] + fn new_eid(&mut self, for_port: bool) -> Eid { + if for_port { + let res = Eid(self.id, self.next_port); + self.next_port += 1; + res + } else { + let res = Eid(self.id, self.next_proc); + self.next_proc += 1; + res + } + } +} + +impl Schedule for PrimitiveScheduler { + fn get_id(&self) -> usize { + self.id + } + + fn step(&mut self) -> bool { + const REDUCTIONS: isize = 2000; + + // find executable with lowest numerical priority + let (to_run_idx, _) = self.run_queue + .iter() + .enumerate() + .fold((-1, 256usize), |acc @ (_, hi_prio), (idx, (_, priority))| { + let priority = *priority as usize; + if priority < hi_prio { + (idx as isize, priority) + } else { + acc + } + }); + if to_run_idx < 0 { + // empty run queue + return false; + } + + // get executable + let exec_id = self.run_queue.remove(to_run_idx as usize).unwrap().0; + let exec = self.executables.get_mut(&exec_id); + let Some(exec) = exec else { + panic!("inconsistent scheduler state: Eid {exec_id:?} in run queue but not in list of executables"); + }; + + // run executable + exec.get_common_state_mut().status = ExecuteStatus::Running; + exec.run_for(&mut self.context, REDUCTIONS); + + // update state depending on new status + let status = exec.get_common_state().status; + let prio = exec.get_common_state().priority; + match status { + ExecuteStatus::Exited => { + self.context.messenger.as_mut().unwrap().local_executables.remove(&exec_id); + self.executables.remove(&exec_id); () + }, + ExecuteStatus::Running => { + exec.get_common_state_mut().status = ExecuteStatus::Ready; + self.run_queue.push_back((exec_id, prio)); + }, + ExecuteStatus::Ready => { + self.run_queue.push_back((exec_id, prio)); + }, + ExecuteStatus::Waiting => (), + } + + // process incoming messages + loop { + let message = self.context.messenger.as_mut().unwrap().mailbox.pop_front(); + let Some((receiver, message)) = message else { break }; + let Some(receiver) = self.executables.get_mut(&receiver) else { break }; + let state = receiver.get_common_state_mut(); + state.mailbox.push_back(message); + if state.status == ExecuteStatus::Waiting { + state.status = ExecuteStatus::Ready; + self.run_queue.push_back((state.id, state.priority)); + } + } + + // an executable got ran + true + } + + fn add<'i, E: ExecuteMake<'i>>(&mut self, init: &'i E::Init, priority: u8) -> Result { + let id = self.new_eid(E::IS_PORT); + let common = CommonState { + id, + priority, + status: ExecuteStatus::Waiting, + mailbox: Default::default(), + }; + let executable = E::new(common, init, &self.context)?; + self.executables.insert(id, Box::new(executable)); + self.run_queue.push_back((id, priority)); + self.context.messenger.as_mut().unwrap().local_executables.insert(id); + Ok(id) + } + + fn remove(&mut self, id: Eid) { + todo!(); + } +} diff --git a/src/vm/state.rs b/src/vm/state.rs new file mode 100644 index 0000000..0a72fc1 --- /dev/null +++ b/src/vm/state.rs @@ -0,0 +1,156 @@ +//! Common VM state structures + +use core::{fmt::Debug, hash::Hash}; +use alloc::{boxed::Box, rc::{Rc, Weak}, vec::Vec}; + +use hashbrown::HashMap; + +use super::{app::Application, module::Module, scheduler::{Eid, LocalTransferAgent, TransferAgent}, term::LocalTerm}; + +/// Opaque reference to an entry in the atom table. For more information, refer +/// to [LocalTerm::Atom] +#[derive(Clone)] +pub struct LocalAtomRef(usize, Weak); + +impl LocalAtomRef { + fn get_str(&self) -> Rc { + self.1.upgrade().expect("inconsistent atom table state: string freed before weak reference was dropped") + } +} + +impl From<&LocalAtomRef> for Rc { + fn from(value: &LocalAtomRef) -> Self { + value.get_str() + } +} + +// explicit implementation because `Weak` is apparently not `PartialEq`, and we +// don't need to compare it anyways +impl PartialEq for LocalAtomRef { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + fn ne(&self, other: &Self) -> bool { + self.0 != other.0 + } +} + +impl Eq for LocalAtomRef { } + +impl Hash for LocalAtomRef { + fn hash(&self, state: &mut H) { + state.write_usize(self.0); + } +} + +impl core::fmt::Display for LocalAtomRef { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let string: Rc = self.into(); + write!(f, "{}", string) + } +} + +impl core::fmt::Debug for LocalAtomRef { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "LocalAtomRef({})", self) + } +} + +impl PartialOrd for LocalAtomRef { + fn partial_cmp(&self, other: &Self) -> Option { + use core::cmp::Ordering; + if self == other { return Some(Ordering::Equal) }; + if self.get_str() > other.get_str() { return Some(Ordering::Greater) }; + Some(Ordering::Less) + } +} + +impl Ord for LocalAtomRef { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.partial_cmp(other).unwrap() + } +} + +/// Two-way garbage collected mapping between local atom identifiers and their +/// values. For more information about atoms, go to [LocalTerm::Atom]. +/// +/// In OTP, atoms are not garbage collected, but they are GC-ed in this +/// implementation due to the expected dynamic nature of constant code loads and +/// unloads that an operating system faces. +#[derive(Default)] +pub struct AtomTable { + forward_map: HashMap>, + backward_map: HashMap, usize>, + next_id: usize, +} + +impl AtomTable { + /// Gets a [LocalAtomRef] corresponding to its value + pub fn get_existing_atom(&self, name: &str) -> Option { + let id = self.backward_map.get(name).copied()?; + let counter = self.forward_map.get(&id) + .expect("inconsistent atom table state: backward mapping exists, but no forward mapping exists"); + Some(LocalAtomRef(id, Rc::downgrade(counter))) + } + + /// Gets a [LocalAtomRef], creating a new entry in the atom table if it + /// doesn't exist + pub fn get_or_make_atom(&mut self, name: &str) -> LocalAtomRef { + if let Some(existing) = self.get_existing_atom(name) { + return existing; + } + + let id = self.next_id; + self.next_id += 1; + let counter = Rc::from(name); + let weak = Rc::downgrade(&counter); + self.forward_map.insert(id, counter); + self.backward_map.insert(name.into(), id); + LocalAtomRef(id, weak) + } + + /// Performs garbage collection + pub fn gc(&mut self) { + let mut freed = 0; + self.forward_map.retain(|_, counter| { + if Rc::weak_count(counter) == 0 { + self.backward_map.remove(&(**counter)); + freed += 1; + false + } else { + true + } + }); + self.forward_map.shrink_to_fit(); + self.backward_map.shrink_to_fit(); + log::trace!("atom_gc: freed {freed} atoms"); + } +} + +impl Debug for AtomTable { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut pretty = f.debug_map(); + for (id, name) in self.forward_map.iter() { + pretty.entry(id, &(name, Rc::weak_count(name))); + } + pretty.finish() + } +} + +/// Context local to a particular scheduler +#[derive(Default)] +pub struct LocalContext { + pub atom_table: AtomTable, + pub applications: HashMap, + pub next_ref: usize, + pub messenger: Option, +} + +impl LocalContext { + pub fn make_ref(&mut self) -> LocalTerm { + // TODO: random number as last element + let r = LocalTerm::Reference([0, self.next_ref, 0]); + self.next_ref += 1; + r + } +} diff --git a/src/vm/term.rs b/src/vm/term.rs new file mode 100644 index 0000000..9aee4f7 --- /dev/null +++ b/src/vm/term.rs @@ -0,0 +1,515 @@ +//! Erlang Term implementation + +use core::{array::TryFromSliceError, hash::{Hash, Hasher, SipHasher}, ops::Deref}; +use alloc::{boxed::Box, string::String, vec::Vec}; + +use hashbrown::HashMap; +use num_bigint::BigInt; +use strum_macros::FromRepr; + +use super::{scheduler::{Eid, PORT_START}, state::{LocalAtomRef, LocalContext}}; +use crate::util::cursor::Cursor; + +/// An `f64` that implements `Eq`, `Ord` and `Hash`. Yes, I'm aware of the +/// dangers. +// #[derive(Debug, Clone, Copy, PartialOrd)] +// pub struct FloatTerm(f64); +// impl FloatTerm { +// fn key(&self) -> u64 { +// self.0.to_bits() +// } +// } +// impl Hash for FloatTerm { +// fn hash(&self, state: &mut H) { +// self.key().hash(state) +// } +// } +// impl PartialEq for FloatTerm { +// fn eq(&self, other: &FloatTerm) -> bool { +// self.0 == other.0 +// } +// } +// impl Eq for FloatTerm {} +// impl Ord for FloatTerm { +// fn cmp(&self, other: &Self) -> core::cmp::Ordering { + +// } +// } +// impl Deref for FloatTerm { +// type Target = f64; +// fn deref(&self) -> &Self::Target { +// &self.0 +// } +// } +// impl core::fmt::Display for FloatTerm { +// fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +// write!(f, "{}", self.0) +// } +// } + +/// A map between local terms +#[derive(Clone, PartialEq, Eq)] +pub struct MapTerm(pub HashMap); + +impl Hash for MapTerm { + fn hash(&self, state: &mut H) { + let mut vec = self.0.iter().collect::>(); + vec.sort(); + for (k, v) in vec { + k.hash(state); + v.hash(state); + } + } +} + +impl core::fmt::Debug for MapTerm { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut pretty = f.debug_map(); + for (k, v) in self.0.iter() { + pretty.entry(k, v); + } + pretty.finish() + } +} + +impl PartialOrd for MapTerm { + fn partial_cmp(&self, other: &Self) -> Option { + todo!() + } +} + +impl Ord for MapTerm { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + todo!() + } +} + +/// An Erlang term that is local to a particular logical CPU. It assumes some +/// context which needs to be communicated explicitly when a term is shared with +/// another logical CPU or over a network. +#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum LocalTerm { + /// Integer. All integers support long arithmetic. + Integer(BigInt), + /// Double-precision floating point number. + // Float(FloatTerm), + /// Usually short immutable string that has two main purposes in Erlang: as + /// a flag used for signaling something (`ok` and `error` are popular) and + /// as names for functions and modules. Comparing two atoms should be as + /// fast as possible, so they're stored as small unique identifiers instead + /// of full strings. + Atom(LocalAtomRef), + /// A term that is unique among all processors and nodes in the system. It + /// is opaque and unforgeable - in Erlang code there is no way to get the + /// constituent parts of a reference or create one from its components. + /// This property makes references useful as tokens for capability-based + /// security. + /// + /// The three constituent parts in this implementation are: + /// - Node ID + /// - Scheduler-local sequence number + /// - Random number + Reference([usize; 3]), + // TODO: fun + /// Port identifier. A port is like a process in the sense that one can send + /// and receive messages to/from a port. The difference is that a process + /// runs Erlang code and a port is an abstraction for talking to the outside + /// world implemented using native code. + Pid(Eid), + /// Process identifier. + Port(Eid), + /// Tuple. + Tuple(Vec), + /// List. Normally represented as a cons list, in this implementation + /// represented as a slice of all the elements and an improper tail. Erlang + /// allows placing terms that are not a list in the tail of a list, + /// producing an abomination called an "improper list". + List(Box<[LocalTerm]>, Option>), + /// Mapping between arbitrary terms + Map(MapTerm), + /// A binary array that has a bit length that is not necessarily a multiple + /// of 8. + BitString(usize, Vec), +} + +impl core::fmt::Debug for LocalTerm { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use LocalTerm::*; + match *self { + Atom(ref reference) => { + let str: alloc::rc::Rc = reference.into(); + write!(f, "{}", str) + }, + Integer(ref int) => write!(f, "{int}"), + // Float(float) => write!(f, "{float}"), + BitString(len, ref data) => { + // try to print it as a UTF-8 string first + if len % 8 == 0 && let Ok(utf8) = core::str::from_utf8(data) { + write!(f, "<<\"{utf8}\">>") + } else { + write!(f, "<<")?; + for byte in &data[0 .. data.len() - 1] { + write!(f, "{byte}, ")?; + } + let last_bits = len % 8; + let last_byte = data[data.len() - 1]; + if last_bits == 0 { + write!(f, "{last_byte}")?; + } else { + write!(f, "{last_byte}:{last_bits}")?; + } + write!(f, ">>") + } + } + Reference(parts) => { + let mut hash = SipHasher::default(); + parts.hash(&mut hash); + write!(f, "#Ref<{:016x}>", hash.finish()) + }, + Pid(Eid(sched, seq)) => write!(f, "<{sched}.{seq}>"), + Port(Eid(sched, seq)) => write!(f, "#Port<{sched}.{seq}>"), + Tuple(ref tuple) => { + write!(f, "{{")?; + for (idx, elem) in tuple.iter().enumerate() { + write!(f, "{elem:?}")?; + if idx != tuple.len() - 1 { + write!(f, ", ")?; + } + } + write!(f, "}}") + }, + List(ref elements, ref improper_tail) => { + // try to print it as a string first + let is_probable_charlist = elements.iter().all(|elem| { + let LocalTerm::Integer(int) = elem else { return false }; + let Ok(int): Result = int.try_into() else { return false }; + int >= 32 && int <= 127 + }); + + if is_probable_charlist { + write!(f, "\"")?; + for char in elements.iter() { + // validity of this data was ensured beforehand + let LocalTerm::Integer(char) = char else { panic!("invariant violated") }; + let Ok(char): Result = char.try_into() else { panic!("invariant violated") }; + let char = core::char::from_u32(char).expect("invariant violated"); + write!(f, "{char}")?; + } + write!(f, "\"") + } else { + // normal (could be improper) list + write!(f, "[")?; + for (idx, elem) in elements.iter().enumerate() { + write!(f, "{elem:?}")?; + if idx != elements.len() - 1 { + write!(f, ", ")?; + } + } + if let Some(tail) = improper_tail { + write!(f, " | {tail:?}")?; + } + write!(f, "]") + } + }, + LocalTerm::Map(ref map) => write!(f, "{map:?}"), + } + } +} + +impl LocalTerm { + pub fn nil() -> LocalTerm { + Self::List(Box::new([]), None) + } +} + +impl From for LocalTerm { + fn from(value: BigInt) -> Self { + Self::Integer(value) + } +} +impl From for LocalTerm { + fn from(value: isize) -> Self { + Self::Integer(value.into()) + } +} + +impl From for LocalTerm { + fn from(value: LocalAtomRef) -> Self { + Self::Atom(value) + } +} + +impl From for LocalTerm { + fn from(value: Eid) -> Self { + if value.1 >= PORT_START { + Self::Port(value) + } else { + Self::Port(value) + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, FromRepr, Debug)] +enum EtfTag { + SmallInteger = 97, + Integer = 98, + Float = 99, + Port = 102, + NewPort = 89, + V4Port = 120, + Pid = 103, + NewPid = 88, + SmallTuple = 104, + LargeTuple = 105, + Map = 116, + Nil = 106, + String = 107, + List = 108, + Binary = 109, + SmallBigint = 110, + LargeBigint = 111, + NewReference = 114, + NewerReference = 90, + Fun = 117, + NewFun = 112, + Export = 113, + BitBinary = 77, + NewFloat = 70, + AtomUtf8 = 118, + SmallAtomUtf8 = 119, +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum TermError { + /// A feature is not implemented + NotImplemented, + /// An ETF opcode is not implemented + EtfNotImplemented(EtfTag), + /// Invalid data in ETF encoding + FormatError, + /// Term was not of expected type when deconstructing + TypeError, + /// Term had an unexpected length when deconstructing + LengthError, + /// Tuple had unexpected tag when deconstructing + TagError, + /// No value associated with given key in proplist + KeyError, +} + +/// Convenience functions for deconstructing terms +impl LocalTerm { + /// Returns a slice representing a tuple + /// ``` + /// let term = make_term!({1, 2}); + /// let [one, two] = term.get_tuple(); + /// assert_eq!(one, LocalTerm::Integer(1)); + /// assert_eq!(two, LocalTerm::Integer(2)); + /// ``` + pub fn get_tuple(&self) -> Result<&[LocalTerm; S], TermError> { + if let Self::Tuple(vec) = self { + vec.as_slice().try_into().map_err(|_: TryFromSliceError| TermError::LengthError) + } else { + Err(TermError::TypeError) + } + } + + /// Returns a slice representing the fields of a tagged tuple + /// ``` + /// let term = make_term!({ok, 1}); + /// let [one] = term.get_tagged_tuple("ok"); + /// assert_eq!(one, LocalTerm::Integer(1)); + /// ``` + pub fn get_tagged_tuple(&self, tag: &str, context: &LocalContext) -> Result<&[LocalTerm; S], TermError> + where [(); S + 1]: Sized + { + let tuple: &[LocalTerm; S + 1] = match self.get_tuple() { + Ok(t) => t, + Err(e) => return Err(e), + }; + + let expected_tag = context.atom_table.get_existing_atom(tag) + .ok_or(TermError::TagError)?; // if the key that we're looking for is not in the atom table, + // it's not in any possible term (including the tag of our tuple) + let tag = tuple[0].get_atom()?; + if tag != expected_tag { + return Err(TermError::TagError); + } + + return Ok((&tuple[1 .. tuple.len()]).try_into().unwrap()); + } + + /// Returns an atom reference if the term is an atom + pub fn get_atom(&self) -> Result { + if let Self::Atom(atom) = self { + Ok(atom.clone()) + } else { + Err(TermError::TypeError) + } + } + + /// Returns an slice representing the elements of a proper list + pub fn get_list(&self) -> Result<&[LocalTerm], TermError> { + if let Self::List(elements, None) = self { + Ok(&**elements) + } else { + Err(TermError::TypeError) + } + } + + /// Returns the value associated with a key in a proplist + /// ``` + /// let term = make_term!([{key1, 1}, {key2, 2}]); + /// let two = term.get_proplist_value("key2"); + /// assert_eq!(two, LocalTerm::Integer(2)); + /// ``` + pub fn get_proplist_value(&self, key: &str, context: &LocalContext) -> Result<&LocalTerm, TermError> { + let list = self.get_list()?; + let tag = context.atom_table.get_existing_atom(key) + .ok_or(TermError::KeyError)?; // if the key that we're looking for is not in the atom table, + // it's not in any possible term (including the one we're searching through) + for element in list.iter() { + let [key, value] = element.get_tuple()?; + let key = key.get_atom()?; + if key == tag { + return Ok(value); + } + } + Err(TermError::KeyError) + } + + /// Returns the string value that a charlist represents + pub fn get_charlist(&self) -> Result, TermError> { + let chars = self.get_list()?; + let mut string = String::new(); + for element in chars { + let LocalTerm::Integer(int) = element else { return Err(TermError::TypeError) }; + let int = int.try_into().map_err(|_| TermError::TypeError)?; + let char = core::char::from_u32(int).ok_or(TermError::TypeError)?; + string.push(char); + } + Ok(string.into_boxed_str()) + } +} + +/// Constructs a term using the Erlang syntax +/// ``` +/// let first_term = make_term!({hello, world}); +/// let term = make_term!({some_atom, 1, (@first_term)}); +/// assert_eq!(term, LocalTerm::Tuple(vec![ +/// LocalTerm::atom("some_atom"), +/// LocalTerm::Integer(1), +/// LocalTerm::Tuple(vec![ +/// LocalTerm::atom("hello"), +/// LocalTerm::atom("world"), +/// ]), +/// ])); +/// ``` +#[macro_export] +macro_rules! local_term { + ((@$sub_term:expr)) => { $sub_term }; + ($atom:ident) => { + { LocalTerm::atom(stringify!(atom)) } + }; + ($literal:literal) => { + { + let term: LocalTerm = $literal.into(); + term + } + }; + ({$element:tt}) => { + { + use alloc::vec; + LocalTerm::Tuple(vec![make_term!($element)]) + } + }; + ({$first:tt, $($rest:tt),+}) => { + { + use alloc::vec; + let mut vector = vec![make_term!($first)]; + let LocalTerm::Tuple(mut rest) = make_term!({$($rest),+}) else { + panic!("internal error: make_term!({{...}}) returned something other than LocalTerm::Tuple") + }; + vector.append(&mut rest); + LocalTerm::Tuple(vector) + } + }; +} + +/// ETF (External Term Format, the standard binary serialization format in the +/// BEAM world) implementation +impl LocalTerm { + /// Creates a [LocalTerm] from its ETF (External Term Format) + /// representation. Data is passed as a slice of bytes. + pub fn from_etf(etf: &[u8], context: &mut LocalContext) -> Result { + let mut cursor = Cursor::new(etf); + Self::from_etf_cursor(&mut cursor, context) + } + + /// Creates a [LocalTerm] from its ETF (External Term Format) + /// representation. Data is passed as a cursor. + pub fn from_etf_cursor(cursor: &mut Cursor<'_>, context: &mut LocalContext) -> Result { + // specification: https://www.erlang.org/doc/apps/erts/erl_ext_dist.html + + // read tag, skip optional header + let mut tag = cursor.read_u8(); + if tag == 131 { + tag = cursor.read_u8(); + } + + match EtfTag::from_repr(tag as usize) { + Some(EtfTag::SmallTuple) => { + let arity = cursor.read_u8() as usize; + let mut elements = Vec::with_capacity(arity); + for _ in 0..arity { + elements.push(Self::from_etf_cursor(cursor, context)?); + } + Ok(LocalTerm::Tuple(elements)) + }, + + Some(EtfTag::Nil) => { + Ok(LocalTerm::List(Box::new([]), None)) + }, + + Some(EtfTag::String) => { + let length = cursor.read_u16_be() as usize; + let mut characters = Vec::with_capacity(length); + for _ in 0..length { + characters.push(LocalTerm::Integer(cursor.read_u8().into())); + } + Ok(LocalTerm::List(characters.into_boxed_slice(), None)) + }, + + Some(EtfTag::List) => { + let length = cursor.read_u32_be() as usize; + let mut elements = Vec::with_capacity(length); + for _ in 0..length { + elements.push(Self::from_etf_cursor(cursor, context)?); + } + let improper_tail = Self::from_etf_cursor(cursor, context)?; + let improper_tail = match improper_tail { + LocalTerm::List(elements, None) if elements.len() == 0 => None, + term => Some(Box::new(term)), + }; + Ok(LocalTerm::List(elements.into_boxed_slice(), improper_tail)) + }, + + Some(EtfTag::Binary) => { + let length = cursor.read_u32_be() as usize; + let data = cursor.read_slice(length); + Ok(LocalTerm::BitString(length * 8, data.to_vec())) + }, + + Some(EtfTag::SmallAtomUtf8) => { + let length = cursor.read_u8() as usize; + let string = cursor.read_utf8(length).map_err(|_| TermError::FormatError)?; + Ok(LocalTerm::Atom(context.atom_table.get_or_make_atom(string))) + }, + + None => Err(TermError::FormatError), + Some(tag) => Err(TermError::EtfNotImplemented(tag)) + } + } +} + +// TODO: NodeTerm