diff --git a/Cargo.toml b/Cargo.toml index bc21ff68..cf02c73c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ members = [ "crates/ir", "crates/codegen", "crates/object", - "crates/parser2", + "crates/parser", "crates/filecheck", "crates/triple", "crates/interpreter", diff --git a/crates/filecheck/Cargo.toml b/crates/filecheck/Cargo.toml index d2e398d4..b4c751b9 100644 --- a/crates/filecheck/Cargo.toml +++ b/crates/filecheck/Cargo.toml @@ -12,6 +12,6 @@ publish = false filecheck = "0.5.0" # { path = "/Users/sean/src/filecheck" } sonatina-ir = { path = "../ir" } sonatina-codegen = { path = "../codegen" } -sonatina-parser2 = { path = "../parser2" } +sonatina-parser = { path = "../parser" } termcolor = "1.1.2" walkdir = "2" diff --git a/crates/filecheck/src/lib.rs b/crates/filecheck/src/lib.rs index 29aac49c..c18a31ee 100644 --- a/crates/filecheck/src/lib.rs +++ b/crates/filecheck/src/lib.rs @@ -13,7 +13,7 @@ use std::{ use sonatina_ir::{ir_writer::FuncWriter, module::FuncRef, Function}; -use sonatina_parser2::{parse_module, ParsedModule}; +use sonatina_parser::{parse_module, ParsedModule}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use walkdir::WalkDir; diff --git a/crates/interpreter/Cargo.toml b/crates/interpreter/Cargo.toml index 62254c3f..f679d9ab 100644 --- a/crates/interpreter/Cargo.toml +++ b/crates/interpreter/Cargo.toml @@ -19,4 +19,4 @@ cranelift-entity = "0.104" sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } [dev-dependencies] -sonatina-parser2 = { path = "../parser2" } +sonatina-parser = { path = "../parser" } diff --git a/crates/interpreter/src/state.rs b/crates/interpreter/src/state.rs index 4541b640..63d36034 100644 --- a/crates/interpreter/src/state.rs +++ b/crates/interpreter/src/state.rs @@ -273,7 +273,7 @@ mod test { use super::*; fn parse_module(input: &str) -> Module { - match sonatina_parser2::parse_module(input) { + match sonatina_parser::parse_module(input) { Ok(pm) => pm.module, Err(errs) => { for err in errs { diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index 73758117..fa9954c1 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -11,10 +11,22 @@ description = "Parser for sonatina-ir text format" categories = ["compilers", "parser", "wasm"] keywords = ["compiler", "evm", "wasm", "smart-contract"] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -sonatina-ir = { path = "../ir", version = "0.0.3-alpha" } +ir = { package = "sonatina-ir", path = "../ir", version = "0.0.3-alpha" } sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } smallvec = "1.7.0" cranelift-entity = "0.104" +pest = "2.7.10" +pest_derive = "2.7.10" +pest-ast = "0.3.4" +from-pest = "0.3.2" +smol_str = "0.2.2" +hex = "0.4.3" +num-traits = { version = "0.2.19", default-features = false } +either = { version = "1.12.0", default-features = false } +annotate-snippets = "0.11.4" + +[dev-dependencies] +dir-test = { git = "https://github.com/sbillig/dir-test", rev = "c4115dd" } +insta = { version = "1.38.0" } +indenter = "0.3.3" diff --git a/crates/parser2/src/ast.rs b/crates/parser/src/ast.rs similarity index 98% rename from crates/parser2/src/ast.rs rename to crates/parser/src/ast.rs index e1ee2216..27d71956 100644 --- a/crates/parser2/src/ast.rs +++ b/crates/parser/src/ast.rs @@ -1,4 +1,4 @@ -use super::syntax::Node; +use super::{syntax::Node, Error}; use crate::syntax::{FromSyntax, Parser, Rule}; use annotate_snippets::{Level, Renderer, Snippet}; use either::Either; @@ -13,14 +13,6 @@ use smol_str::SmolStr; pub use sonatina_triple::{InvalidTriple, TargetTriple}; use std::{io, ops::Range, str::FromStr}; -#[derive(Debug)] -#[allow(clippy::large_enum_variant)] -pub enum Error { - NumberOutOfBounds(Range), - InvalidTarget(InvalidTriple, Range), - SyntaxError(pest::error::Error), -} - pub fn parse(input: &str) -> Result> { pest::set_error_detail(true); // xxx diff --git a/crates/parser/src/lexer.rs b/crates/parser/src/lexer.rs deleted file mode 100644 index e2f09d82..00000000 --- a/crates/parser/src/lexer.rs +++ /dev/null @@ -1,793 +0,0 @@ -use std::fmt; - -use sonatina_ir::{insn::DataLocationKind, Linkage, Type}; - -use super::{Error, ErrorKind, Result}; - -pub(super) struct Lexer<'a> { - input: &'a [u8], - peek: Option>>, - cur: usize, - line: u32, -} - -macro_rules! try_eat_variant { - ( - $self:ident, - ($first_name:expr, $first_code:expr), - $(($name:expr, $code:expr),)* - ) => { - if $self.eat_string_if($first_name).is_some() { - Some($first_code) - } $(else if $self.eat_string_if($name).is_some() { - Some($code) - })* else { - None - } - } -} - -impl<'a> Lexer<'a> { - pub(super) fn new(input: &'a str) -> Self { - debug_assert!(input.is_ascii()); - - Self { - input: input.as_bytes(), - peek: None, - cur: 0, - line: 1, - } - } - - pub(super) fn next_token(&mut self) -> Result>>> { - self.peek_token()?; - Ok(self.peek.take()) - } - - pub(super) fn peek_token(&mut self) -> Result>>> { - if self.peek.is_some() { - return Ok(self.peek.as_ref()); - } - - while let Some(c) = self.eat_char_if(|c| c.is_whitespace() || c.is_ascii_control()) { - if c == '\n' { - self.line += 1; - } - } - - if self.peek_char().is_none() { - return Ok(None); - } - - let token = if self.eat_char_if(|c| c == ':').is_some() { - Token::Colon - } else if self.eat_char_if(|c| c == ';').is_some() { - Token::SemiColon - } else if self.eat_char_if(|c| c == ',').is_some() { - Token::Comma - } else if self.eat_char_if(|c| c == '(').is_some() { - Token::LParen - } else if self.eat_char_if(|c| c == ')').is_some() { - Token::RParen - } else if self.eat_char_if(|c| c == '[').is_some() { - Token::LBracket - } else if self.eat_char_if(|c| c == ']').is_some() { - Token::RBracket - } else if self.eat_char_if(|c| c == '{').is_some() { - Token::LBrace - } else if self.eat_char_if(|c| c == '}').is_some() { - Token::RBrace - } else if self.eat_char_if(|c| c == '<').is_some() { - Token::LAngleBracket - } else if self.eat_char_if(|c| c == '>').is_some() { - Token::RAngleBracket - } else if self.eat_char_if(|c| c == '=').is_some() { - Token::Eq - } else if self.eat_char_if(|c| c == '.').is_some() { - Token::Dot - } else if self.eat_char_if(|c| c == '*').is_some() { - Token::Star - } else if self.eat_char_if(|c| c == '@').is_some() { - let loc = if self.eat_string_if(b"memory").is_some() { - DataLocationKind::Memory - } else if self.eat_string_if(b"storage").is_some() { - DataLocationKind::Storage - } else { - return Err(self.invalid_token()); - }; - Token::DataLocationKind(loc) - } else if self.eat_char_if(|c| c == '#').is_some() { - let is_module = self.eat_char_if(|c| c == '!').is_some(); - let start = self.cur; - while self.eat_char_if(|c| c != '\n').is_some() {} - let end = self.cur; - let comment = self.str_slice(start, end); - if is_module { - Token::ModuleComment(comment) - } else { - Token::FuncComment(comment) - } - } else if self.eat_char_if(|c| c == '%').is_some() { - if let Some(ident) = self.try_eat_ident() { - Token::Ident(ident) - } else { - return Err(self.invalid_token()); - } - } else if self.eat_char_if(|c| c == '"').is_some() { - self.eat_string_lit()? - } else if self.eat_string_if(b"target").is_some() { - Token::Target - } else if self.eat_string_if(b"func").is_some() { - Token::Func - } else if self.eat_string_if(b"gv").is_some() { - Token::Gv - } else if self.eat_string_if(b"const").is_some() { - Token::Const - } else if self.eat_string_if(b"declare").is_some() { - Token::Declare - } else if self.eat_string_if(b"public").is_some() { - Token::Linkage(Linkage::Public) - } else if self.eat_string_if(b"private").is_some() { - Token::Linkage(Linkage::Private) - } else if self.eat_string_if(b"external").is_some() { - Token::Linkage(Linkage::External) - } else if self.eat_string_if(b"undef").is_some() { - Token::Undef - } else if self.eat_string_if(b"type").is_some() { - Token::Type - } else if self.eat_string_if(b"->").is_some() { - Token::RArrow - } else if let Some(code) = self.try_eat_opcode() { - Token::OpCode(code) - } else if let Some(ty) = self.try_eat_base_ty() { - Token::BaseTy(ty) - } else if self.eat_string_if(b"block").is_some() { - if let Some(id) = self.try_eat_id() { - Token::Block(id) - } else { - return Err(self.invalid_token()); - } - } else if self.eat_string_if(b"v").is_some() { - if let Some(id) = self.try_eat_id() { - Token::Value(id) - } else { - return Err(self.invalid_token()); - } - } else if let Some(integer) = self.try_eat_integer() { - Token::Integer(integer) - } else { - return Err(self.invalid_token()); - }; - - self.peek = Some(WithLoc { - item: token, - line: self.line, - }); - Ok(self.peek.as_ref()) - } - - pub(super) fn line(&mut self) -> u32 { - self.line - } - - fn eat_char_if(&mut self, f: impl FnOnce(char) -> bool) -> Option { - match self.peek_char() { - Some(peek) if f(peek) => { - self.next_char(); - Some(peek) - } - _ => None, - } - } - - fn eat_string_if(&mut self, s: &[u8]) -> Option<&'a str> { - let start = self.cur; - let mut cur = self.cur; - for i in s { - if *i == self.input[cur] { - cur += 1; - } else { - return None; - } - } - - self.cur = cur; - Some(self.str_slice(start, cur)) - } - - fn eat_string_lit(&mut self) -> Result> { - let start = self.cur; - let mut cur = self.cur; - loop { - match self.input.get(cur) { - Some(c) => { - if *c == b'"' { - self.cur = cur + 1; - break; - } else { - cur += 1; - } - } - None => { - return Err(Error::new( - ErrorKind::SyntaxError("missing closing `\"`".into()), - self.line, - )) - } - } - } - - Ok(Token::String(self.str_slice(start, cur))) - } - - fn try_eat_opcode(&mut self) -> Option { - try_eat_variant! { - self, - (b"gep", Code::Gep), - (b"not", Code::Not), - (b"neg", Code::Neg), - (b"add", Code::Add), - (b"sub", Code::Sub), - (b"mul", Code::Mul), - (b"udiv", Code::Udiv), - (b"sdiv", Code::Sdiv), - (b"lt", Code::Lt), - (b"gt", Code::Gt), - (b"slt", Code::Slt), - (b"sgt", Code::Sgt), - (b"le", Code::Le), - (b"ge", Code::Ge), - (b"sle", Code::Sle), - (b"sge", Code::Sge), - (b"eq", Code::Eq), - (b"ne", Code::Ne), - (b"and", Code::And), - (b"or", Code::Or), - (b"xor", Code::Xor), - (b"sext", Code::Sext), - (b"zext", Code::Zext), - (b"bitcast", Code::BitCast), - (b"trunc", Code::Trunc), - (b"load", Code::Load), - (b"store", Code::Store), - (b"call", Code::Call), - (b"jump", Code::Jump), - (b"br_table", Code::BrTable), - (b"br", Code::Br), - (b"alloca", Code::Alloca), - (b"return", Code::Return), - (b"phi", Code::Phi), - } - } - - fn try_eat_base_ty(&mut self) -> Option { - try_eat_variant! { - self, - (b"i8", Type::I8), - (b"i16", Type::I16), - (b"i32", Type::I32), - (b"i64", Type::I64), - (b"i128", Type::I128), - (b"i256", Type::I256), - (b"i1", Type::I1), - (b"void", Type::Void), - } - } - - fn try_eat_id(&mut self) -> Option { - let start = self.cur; - while self.eat_char_if(|c| c.is_ascii_digit()).is_some() {} - let end = self.cur; - self.str_slice(start, end).parse().ok() - } - - fn try_eat_ident(&mut self) -> Option<&'a str> { - let start = self.cur; - while self - .eat_char_if(|c| c.is_alphanumeric() || c == '_') - .is_some() - {} - let end = self.cur; - if start == end { - None - } else { - Some(self.str_slice(start, end)) - } - } - - fn try_eat_integer(&mut self) -> Option<&'a str> { - let start = self.cur; - self.eat_char_if(|c| c == '-'); - while self.eat_char_if(|c| c.is_ascii_digit()).is_some() {} - let end = self.cur; - - if start == end { - None - } else { - Some(self.str_slice(start, end)) - } - } - - fn next_char(&mut self) -> Option { - let next = self.peek_char(); - self.cur += 1; - next - } - - fn peek_char(&mut self) -> Option { - self.input.get(self.cur).map(|peek| *peek as char) - } - - fn str_slice(&self, start: usize, end: usize) -> &'a str { - unsafe { std::str::from_utf8_unchecked(&self.input[start..end]) } - } - - fn invalid_token(&mut self) -> Error { - let start = self.cur; - while self - .eat_char_if(|c| !c.is_whitespace() && !c.is_ascii_control()) - .is_some() - {} - let end = self.cur; - let invalid_token = self.str_slice(start, end); - Error::new( - ErrorKind::InvalidToken(invalid_token.to_string()), - self.line, - ) - } -} - -#[derive(Debug, Clone)] -pub(super) struct WithLoc { - pub(super) item: T, - pub(super) line: u32, -} - -#[derive(Debug, Clone)] -pub(super) enum Token<'a> { - Func, - Gv, - Const, - Declare, - Linkage(Linkage), - RArrow, - Colon, - SemiColon, - Comma, - LParen, - RParen, - LBracket, - RBracket, - LBrace, - RBrace, - LAngleBracket, - RAngleBracket, - Eq, - Dot, - Star, - Undef, - Type, - Target, - ModuleComment(&'a str), - FuncComment(&'a str), - Block(u32), - Value(u32), - Ident(&'a str), - String(&'a str), - DataLocationKind(DataLocationKind), - OpCode(Code), - BaseTy(Type), - Integer(&'a str), -} - -impl<'a> Token<'a> { - pub(super) fn id(&self) -> u32 { - match self { - Self::Block(id) | Self::Value(id) => *id, - _ => unreachable!(), - } - } - - pub(super) fn string(&self) -> &'a str { - match self { - Self::ModuleComment(s) - | Self::FuncComment(s) - | Self::Ident(s) - | Self::Integer(s) - | Self::String(s) => s, - _ => unreachable!(), - } - } - - pub(super) fn opcode(&self) -> Code { - if let Self::OpCode(code) = self { - *code - } else { - unreachable!() - } - } - - pub(super) fn ty(&self) -> Type { - if let Self::BaseTy(ty) = self { - *ty - } else { - unreachable!() - } - } -} - -impl<'a> fmt::Display for Token<'a> { - fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::Func => write!(w, "func"), - Self::Gv => write!(w, "gv"), - Self::Const => write!(w, "const"), - Self::Declare => write!(w, "declare"), - Self::Linkage(linkage) => write!(w, "{}", linkage), - Self::RArrow => write!(w, "=>"), - Self::Colon => write!(w, ":"), - Self::SemiColon => write!(w, ";"), - Self::Comma => write!(w, ","), - Self::LParen => write!(w, "("), - Self::RParen => write!(w, ")"), - Self::LBrace => write!(w, "{{"), - Self::RBrace => write!(w, "}}"), - Self::LBracket => write!(w, "["), - Self::RBracket => write!(w, "]"), - Self::LAngleBracket => write!(w, "<"), - Self::RAngleBracket => write!(w, ">"), - Self::Eq => write!(w, "="), - Self::DataLocationKind(loc) => { - write!(w, "@")?; - - match loc { - DataLocationKind::Memory => write!(w, "memory"), - DataLocationKind::Storage => write!(w, "storage"), - } - } - Self::Dot => write!(w, "."), - Self::Star => write!(w, "*"), - Self::Undef => write!(w, "undef"), - Self::Type => write!(w, "type"), - Self::Target => write!(w, "target"), - Self::String(s) => write!(w, "{}", s), - Self::ModuleComment(comment) => write!(w, "#!{}", comment), - Self::FuncComment(comment) => write!(w, "#{}", comment), - Self::Block(id) => write!(w, "block{}", id), - Self::Value(id) => write!(w, "v{}", id), - Self::Ident(ident) => write!(w, "%{}", ident), - Self::OpCode(code) => write!(w, "{}", code), - Self::BaseTy(_) => write!(w, "type"), - Self::Integer(num) => w.write_str(num), - } - } -} - -#[derive(Debug, Clone, Copy)] -pub(super) enum Code { - // Unary ops. - Not, - Neg, - - // Binary ops. - Add, - Sub, - Mul, - Udiv, - Sdiv, - Lt, - Gt, - Slt, - Sgt, - Le, - Ge, - Sle, - Sge, - Eq, - Ne, - And, - Or, - Xor, - - // Cast ops. - Sext, - Zext, - Trunc, - BitCast, - - Load, - Store, - - // Function Call ops. - Call, - - // Jump ops. - Jump, - - // Branch ops. - Br, - BrTable, - - Gep, - - Alloca, - - Return, - - Phi, -} - -impl fmt::Display for Code { - fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { - use Code::*; - - let s = match self { - Not => "not", - Neg => "neg", - Add => "add", - Sub => "sub", - Mul => "mul", - Udiv => "udiv", - Sdiv => "sdiv", - BitCast => "bitcast", - Lt => "lt", - Gt => "gt", - Slt => "slt", - Sgt => "sgt", - Le => "le", - Ge => "ge", - Sle => "sle", - Sge => "sge", - Eq => "eq", - Ne => "ne", - And => "and", - Or => "or", - Xor => "xor", - Sext => "sext", - Zext => "zext", - Trunc => "trunc", - Load => "load", - Store => "store", - Call => "call", - Jump => "jump", - Gep => "gep", - Alloca => "alloca", - Br => "br", - BrTable => "br_table", - Return => "return", - Phi => "phi", - }; - - w.write_str(s) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn lexer_with_return() { - let input = "func private %test_func() -> i32, i64: - block0: - return 311.i32 -120.i64;"; - let mut lexer = Lexer::new(input); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Func - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Linkage(Linkage::Private), - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Ident("test_func") - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::LParen - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::RParen - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::RArrow - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I32) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Comma - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Colon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Block(0) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Colon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::OpCode(Code::Return) - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Integer("311") - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Dot - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I32) - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Integer("-120") - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Dot - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::SemiColon - )); - - assert!(lexer.next_token().unwrap().is_none()); - } - - #[test] - fn lexer_with_arg() { - let input = "func public %test_func(i32, i64): - block0: - v2.i64 = sext v0; - v3.i64 = mul v2 v1; - return; -"; - let mut lexer = Lexer::new(input); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Func - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Linkage(Linkage::Public) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Ident("test_func") - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::LParen - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I32) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Comma - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::RParen - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Colon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Block(0) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Colon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(2) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Dot - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Eq - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::OpCode(Code::Sext) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(0) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::SemiColon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(3) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Dot - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::BaseTy(Type::I64) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Eq - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::OpCode(Code::Mul) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(2) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::Value(1) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::SemiColon - )); - - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::OpCode(Code::Return) - )); - assert!(matches!( - lexer.next_token().ok().flatten().unwrap().item, - Token::SemiColon - )); - - assert!(lexer.next_token().unwrap().is_none()); - } -} diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 2edd94fc..ba13ba2d 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,27 +1,256 @@ -//! This crate provides a parser for sonatina-IR text format. -//! The text format is mainly used for debugging and testing. +use std::ops::Range; -pub mod parser; +use ast::ValueDeclaration; +use cranelift_entity::SecondaryMap; +use ir::{ + self, + builder::{FunctionBuilder, ModuleBuilder}, + func_cursor::{CursorLocation, FuncCursor, InsnInserter}, + isa::IsaBuilder, + module::{FuncRef, ModuleCtx}, + Module, Signature, +}; +use sonatina_triple::InvalidTriple; +use syntax::Rule; -mod lexer; - -pub type Result = std::result::Result; +pub mod ast; +pub mod syntax; #[derive(Debug)] -pub struct Error { - pub kind: ErrorKind, - pub line: u32, +#[allow(clippy::large_enum_variant)] +pub enum Error { + NumberOutOfBounds(Range), + InvalidTarget(InvalidTriple, Range), + SyntaxError(pest::error::Error), } -impl Error { - pub fn new(kind: ErrorKind, line: u32) -> Self { - Self { kind, line } +pub fn parse_module(input: &str) -> Result> { + let ast = ast::parse(input)?; + + let isa = IsaBuilder::new(ast.target.unwrap()).build(); // xxx + let ctx = ModuleCtx::new(isa); + let mut builder = ModuleBuilder::new(ctx); + + for st in ast.struct_types { + let fields = st + .fields + .iter() + .map(|t| build_type(&mut builder, t)) + .collect::>(); + builder.declare_struct_type(&st.name.0, &fields, false); + } + + for func in ast.declared_functions { + let params = func + .params + .iter() + .map(|t| build_type(&mut builder, t)) + .collect::>(); + let ret_ty = func + .ret_type + .as_ref() + .map(|t| build_type(&mut builder, t)) + .unwrap_or(ir::Type::Void); + + let sig = Signature::new(&func.name.0, func.linkage, ¶ms, ret_ty); + builder.declare_function(sig); + } + + for func in ast.functions.iter() { + let sig = &func.signature; + let args = sig + .params + .iter() + .map(|decl| build_type(&mut builder, &decl.1)) + .collect::>(); + + let ret_ty = sig + .ret_type + .as_ref() + .map(|t| build_type(&mut builder, t)) + .unwrap_or(ir::Type::Void); + let sig = Signature::new(&sig.name.0, sig.linkage, &args, ret_ty); + + builder.declare_function(sig); + } + + let mut func_comments = SecondaryMap::default(); + + for func in ast.functions { + let id = builder.get_func_ref(&func.signature.name.0).unwrap(); + let mut fb = builder.build_function(id); + build_func(&mut fb, &func); + fb.seal_all(); + builder = fb.finish(); + + func_comments[id] = func.comments; } + + let module = builder.build(); + Ok(ParsedModule { + module, + module_comments: ast.comments, + func_comments, + }) } -#[derive(Debug)] -pub enum ErrorKind { - InvalidToken(String), - SyntaxError(String), - SemanticError(String), +pub struct ParsedModule { + pub module: Module, + pub module_comments: Vec, + pub func_comments: SecondaryMap>, +} + +fn build_func(builder: &mut FunctionBuilder, func: &ast::Func) { + for (i, ValueDeclaration(name, _ty)) in func.signature.params.iter().enumerate() { + builder.name_value(builder.func.arg_values[i], &name.0); + } + + // "forward declare" all block ids + if let Some(max_block_id) = func.blocks.iter().map(|b| b.id.0.unwrap()).max() { + while builder.func.dfg.blocks.len() <= max_block_id as usize { + builder.cursor.make_block(&mut builder.func); + } + } + + for block in &func.blocks { + let block_id = ir::Block(block.id.0.unwrap()); + builder.cursor.append_block(&mut builder.func, block_id); + builder + .cursor + .set_location(CursorLocation::BlockTop(block_id)); + + for stmt in &block.stmts { + match &stmt.kind { + ast::StmtKind::Define(ValueDeclaration(val, ty), expr) => { + let ty = build_type(&mut builder.module_builder, ty); + + let result_val = match expr { + ast::Expr::Binary(op, lhs, rhs) => { + let lhs = build_value(builder, lhs); + let rhs = build_value(builder, rhs); + builder.binary_op(*op, lhs, rhs) + } + ast::Expr::Unary(op, val) => { + let val = build_value(builder, val); + builder.unary_op(*op, val) + } + ast::Expr::Cast(op, val) => { + let val = build_value(builder, val); + builder.cast_op(*op, val, ty) + } + ast::Expr::Load(location, addr) => { + let addr = build_value(builder, addr); + match location { + ir::DataLocationKind::Memory => builder.memory_load(addr), + ir::DataLocationKind::Storage => builder.storage_load(addr), + } + } + ast::Expr::Alloca(ty) => { + let ty = build_type(&mut builder.module_builder, ty); + builder.alloca(ty) + } + ast::Expr::Call(ast::Call(name, args)) => { + let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); + let args = args + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.call(func_ref, &args).unwrap() + } + ast::Expr::Gep(vals) => { + let vals = vals + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.gep(&vals).unwrap() + } + ast::Expr::Phi(vals) => { + let args = vals + .iter() + .map(|(val, block)| { + // xxx declare block + let b = ir::Block(block.0.unwrap()); + let v = build_value(builder, val); + (v, b) + }) + .collect::>(); + builder.phi(ty, &args) + } + }; + builder.name_value(result_val, &val.0) + } + ast::StmtKind::Store(loc, addr, val) => { + let addr = build_value(builder, addr); + let val = build_value(builder, val); + + match loc { + ir::DataLocationKind::Memory => builder.memory_store(addr, val), + ir::DataLocationKind::Storage => builder.storage_store(addr, val), + } + } + ast::StmtKind::Return(val) => { + let val = val.as_ref().map(|v| build_value(builder, v)); + builder.ret(val); + } + ast::StmtKind::Jump(block_id) => { + let block_id = ir::Block(block_id.0.unwrap()); + builder.jump(block_id); + } + ast::StmtKind::Branch(cond, true_block, false_block) => { + let cond = build_value(builder, cond); + let true_block = ir::Block(true_block.0.unwrap()); + let false_block = ir::Block(false_block.0.unwrap()); + builder.br(cond, true_block, false_block); + } + ast::StmtKind::BranchTable(index, default_block, table) => { + let index = build_value(builder, index); + let default_block = default_block.as_ref().map(|b| ir::Block(b.0.unwrap())); + let table = table + .iter() + .map(|(val, block)| { + (build_value(builder, val), ir::Block(block.0.unwrap())) + }) + .collect::>(); + builder.br_table(index, default_block, &table); + } + ast::StmtKind::Call(ast::Call(name, args)) => { + let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); + let args = args + .iter() + .map(|val| build_value(builder, val)) + .collect::>(); + builder.call(func_ref, &args).unwrap(); + } + } + } + } +} + +fn build_value(builder: &mut FunctionBuilder, val: &ast::Value) -> ir::Value { + match val { + ast::Value::Immediate(imm) => builder.make_imm_value(*imm), + ast::Value::Named(v) => builder.get_named_value(&v.0), + ast::Value::Error => unreachable!(), + } +} + +fn build_type(builder: &mut ModuleBuilder, t: &ast::Type) -> ir::Type { + match t { + ast::Type::Int(i) => (*i).into(), + ast::Type::Ptr(t) => { + let t = build_type(builder, t); + builder.ptr_type(t) + } + ast::Type::Array(t, n) => { + let elem = build_type(builder, t); + builder.declare_array_type(elem, *n) + } + ast::Type::Void => ir::Type::Void, + ast::Type::Struct(name) => builder.get_struct_type(name).unwrap_or_else(|| { + // xxx error on undeclared struct + eprintln!("struct type not found: {name}"); + ir::Type::Void + }), + ast::Type::Error => todo!(), + } } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs deleted file mode 100644 index 6365ee63..00000000 --- a/crates/parser/src/parser.rs +++ /dev/null @@ -1,1138 +0,0 @@ -// TODO: Refactor and refactor and refactor!!! -use std::collections::HashSet; - -use cranelift_entity::SecondaryMap; -use smallvec::smallvec; - -use sonatina_ir::{ - builder::ModuleBuilder, - func_cursor::{CursorLocation, FuncCursor}, - global_variable::ConstantValue, - insn::{BinaryOp, CastOp, DataLocationKind, UnaryOp}, - isa::IsaBuilder, - module::{FuncRef, ModuleCtx}, - Block, BlockData, Function, GlobalVariableData, Immediate, Insn, InsnData, Linkage, Module, - Signature, Type, Value, ValueData, I256, U256, -}; -use sonatina_triple::TargetTriple; - -use super::{ - lexer::{Code, Lexer, Token, WithLoc}, - Error, ErrorKind, Result, -}; - -#[derive(Default)] -pub struct Parser {} - -macro_rules! eat_token { - ($lexer:expr, $token:pat) => { - if matches!($lexer.peek_token()?, Some(WithLoc { item: $token, .. })) { - Ok(Some($lexer.next_token()?.unwrap().item)) - } else { - Ok(None) - } - }; -} - -macro_rules! expect_token { - ($lexer:expr, $token:pat, $expected:expr) => { - if let Some(tok) = eat_token!($lexer, $token)? { - Ok(tok) - } else { - let (tok, line) = match $lexer.next_token()? { - Some(tok) => ((tok.item.to_string(), tok.line)), - None => (("EOF".to_string(), $lexer.line())), - }; - Err(Error::new( - ErrorKind::SyntaxError(format!("expected `{}`, but got `{}`", $expected, tok)), - line, - )) - } - }; -} - -impl Parser { - pub fn parse(self, input: &str) -> Result { - let mut lexer = Lexer::new(input); - - // Parse comments. - let mut module_comments = Vec::new(); - while let Some(WithLoc { - item: Token::ModuleComment(comment), - .. - }) = lexer.peek_token()? - { - module_comments.push(comment.to_string()); - lexer.next_token()?; - } - - // Parse target triple. - let triple = self.parse_target_triple(&mut lexer)?; - let isa = IsaBuilder::new(triple).build(); - let ctx = ModuleCtx::new(isa); - - let mut module_builder = ModuleBuilder::new(ctx); - - // Parse declared struct types. - while eat_token!(lexer, Token::Type)?.is_some() { - let name = expect_token!(lexer, Token::Ident(_), "type name")?.string(); - expect_token!(lexer, Token::Eq, "=")?; - let packed = eat_token!(lexer, Token::LAngleBracket)?.is_some(); - expect_token!(lexer, Token::LBrace, "{")?; - - let mut fields = vec![]; - if eat_token!(lexer, Token::RBrace)?.is_none() { - loop { - let ty = expect_ty(&module_builder.ctx, &mut lexer)?; - fields.push(ty); - if eat_token!(lexer, Token::RBrace)?.is_some() { - break; - } - expect_token!(lexer, Token::Comma, ",")?; - } - } - if packed { - expect_token!(lexer, Token::RAngleBracket, ">")?; - } - expect_token!(lexer, Token::SemiColon, ";")?; - - module_builder.declare_struct_type(name, &fields, packed); - } - - // Parse global variables. - while eat_token!(lexer, Token::Gv)?.is_some() { - let linkage = expect_linkage(&mut lexer)?; - let is_const = eat_token!(lexer, Token::Const)?.is_some(); - let symbol = expect_token!(lexer, Token::Ident(_), "global variable name")?.string(); - expect_token!(lexer, Token::Colon, ":")?; - let ty = expect_ty(&module_builder.ctx, &mut lexer)?; - - let init = eat_token!(lexer, Token::Eq)? - .map(|_| { - let init = expect_constant(&module_builder.ctx, &mut lexer, ty)?; - Ok(init) - }) - .transpose()?; - - expect_token!(lexer, Token::SemiColon, ";")?; - let gv_data = GlobalVariableData::new(symbol.to_string(), ty, linkage, is_const, init); - module_builder.make_global(gv_data); - } - - // Parse declared functions. - while eat_token!(lexer, Token::Declare)?.is_some() { - let sig = self.parse_declared_func_sig(&module_builder.ctx, &mut lexer)?; - expect_token!(lexer, Token::SemiColon, ";")?; - module_builder.declare_function(sig); - } - - // Parse functions. - let mut func_comments = SecondaryMap::default(); - while let Some(parsed_func) = FuncParser::new(&mut lexer, &mut module_builder).parse()? { - let func_ref = parsed_func.func_ref; - func_comments[func_ref] = parsed_func.comments; - } - - Ok(ParsedModule { - module: module_builder.build(), - module_comments, - func_comments, - }) - } - - fn parse_target_triple(&self, lexer: &mut Lexer) -> Result { - expect_token!(lexer, Token::Target, "target")?; - expect_token!(lexer, Token::Eq, "=")?; - let triple = expect_token!(lexer, Token::String(..), "target triple")?.string(); - - TargetTriple::parse(triple) - .map_err(|e| Error::new(ErrorKind::SemanticError(format!("{}", e)), lexer.line())) - } - - fn parse_declared_func_sig(&self, ctx: &ModuleCtx, lexer: &mut Lexer) -> Result { - let linkage = expect_linkage(lexer)?; - let name = expect_token!(lexer, Token::Ident(..), "func name")?.string(); - - // Parse argument types. - expect_token!(lexer, Token::LParen, "(")?; - let mut args = vec![]; - if eat_token!(lexer, Token::RParen)?.is_none() { - let ty = expect_ty(ctx, lexer)?; - args.push(ty); - while eat_token!(lexer, Token::RParen)?.is_none() { - expect_token!(lexer, Token::Comma, ",")?; - let ty = expect_ty(ctx, lexer)?; - args.push(ty); - } - } - - // Parse return type. - expect_token!(lexer, Token::RArrow, "->")?; - let ret_ty = expect_ty(ctx, lexer)?; - - Ok(Signature::new(name, linkage, &args, ret_ty)) - } -} - -pub struct ParsedModule { - pub module: Module, - pub module_comments: Vec, - pub func_comments: SecondaryMap>, -} - -struct ParsedFunction { - func_ref: FuncRef, - comments: Vec, -} - -struct FuncParser<'a, 'b> { - lexer: &'b mut Lexer<'a>, - module_builder: &'b mut ModuleBuilder, -} - -impl<'a, 'b> FuncParser<'a, 'b> { - fn new(lexer: &'b mut Lexer<'a>, module_builder: &'b mut ModuleBuilder) -> Self { - Self { - lexer, - module_builder, - } - } - - fn parse(&mut self) -> Result> { - if self.lexer.peek_token()?.is_none() { - return Ok(None); - } - - let comments = self.parse_comment()?; - expect_token!(self.lexer, Token::Func, "func")?; - let linkage = expect_linkage(self.lexer)?; - - let fn_name = expect_token!(self.lexer, Token::Ident(..), "func name")?.string(); - - expect_token!(self.lexer, Token::LParen, "(")?; - // Use `Void` for dummy return type. - let sig = Signature::new(fn_name, linkage, &[], Type::Void); - let mut func = Function::new(&self.module_builder.ctx, sig); - let mut inserter = InsnInserter::at_location(CursorLocation::NoWhere); - - if let Some(value) = eat_token!(self.lexer, Token::Value(..))? { - let value = Value(value.id()); - inserter.def_value(&mut func, value, self.lexer.line())?; - expect_token!(self.lexer, Token::Dot, "dot")?; - let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - inserter.append_arg_value(&mut func, value, ty); - - while eat_token!(self.lexer, Token::Comma)?.is_some() { - let value = Value(expect_token!(self.lexer, Token::Value(..), "value")?.id()); - inserter.def_value(&mut func, value, self.lexer.line())?; - expect_token!(self.lexer, Token::Dot, "dot")?; - let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - inserter.append_arg_value(&mut func, value, ty); - } - } - expect_token!(self.lexer, Token::RParen, ")")?; - - // Parse return type. - expect_token!(self.lexer, Token::RArrow, "->")?; - let ret_ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - func.sig.set_ret_ty(ret_ty); - expect_token!(self.lexer, Token::Colon, ":")?; - - let signature_line = self.lexer.line(); - self.parse_body(&mut func, &mut inserter)?; - - let func_ref = match self.module_builder.get_func_ref(func.sig.name()) { - Some(declared) if self.module_builder.sig(declared) == &func.sig => declared, - Some(_) => { - return Err(Error::new( - ErrorKind::SemanticError( - "signature mismatch with the corresponding declared function".to_string(), - ), - signature_line, - )) - } - None => self.module_builder.declare_function(func.sig.clone()), - }; - - std::mem::swap(&mut self.module_builder.funcs[func_ref], &mut func); - Ok(Some(ParsedFunction { func_ref, comments })) - } - - fn parse_body(&mut self, func: &mut Function, inserter: &mut InsnInserter) -> Result<()> { - while let Some(id) = eat_token!(self.lexer, Token::Block(..))? { - expect_token!(self.lexer, Token::Colon, ":")?; - self.parse_block_body(func, inserter, Block(id.id()))?; - } - - Ok(()) - } - - fn parse_block_body( - &mut self, - func: &mut Function, - inserter: &mut InsnInserter, - block: Block, - ) -> Result<()> { - inserter.def_block(func, block, self.lexer.line(), BlockData::default())?; - inserter.append_block(func, block); - inserter.set_location(CursorLocation::BlockTop(block)); - - loop { - if let Some(value) = eat_token!(self.lexer, Token::Value(..))? { - expect_token!(self.lexer, Token::Dot, ".")?; - let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - expect_token!(self.lexer, Token::Eq, "=")?; - let opcode = expect_token!(self.lexer, Token::OpCode(..), "opcode")?.opcode(); - let insn = opcode.make_insn(self, func, inserter, Some(ty))?; - let value = Value(value.id()); - inserter.def_value(func, value, self.lexer.line())?; - let result = func.dfg.make_result(insn).unwrap(); - func.dfg.values[value] = result; - func.dfg.attach_result(insn, value); - } else if let Some(opcode) = eat_token!(self.lexer, Token::OpCode(..))? { - opcode.opcode().make_insn(self, func, inserter, None)?; - } else { - break; - } - } - - Ok(()) - } - - fn expect_insn_arg( - &mut self, - func: &mut Function, - inserter: &mut InsnInserter, - idx: usize, - undefs: &mut Vec, - ) -> Result { - if let Some(value) = eat_token!(self.lexer, Token::Value(..))? { - let value = Value(value.id()); - if !inserter.defined_values.contains(&value) { - undefs.push(idx); - } - Ok(value) - } else if let Some(ident) = eat_token!(self.lexer, Token::Ident(..))? { - let gv = func - .dfg - .ctx - .with_gv_store(|s| s.gv_by_symbol(ident.string())) - .unwrap(); - Ok(func.dfg.make_global_value(gv)) - } else { - let number = - expect_token!(self.lexer, Token::Integer(..), "immediate or value")?.string(); - expect_token!(self.lexer, Token::Dot, "type annotation for immediate")?; - let ty = expect_ty(&self.module_builder.ctx, self.lexer)?; - let imm = build_imm_data(number, &ty, self.lexer.line())?; - Ok(inserter.def_imm(func, imm)) - } - } - - fn expect_block(&mut self) -> Result { - let id = expect_token!(self.lexer, Token::Block(..), "block")?.id(); - Ok(Block(id)) - } - - fn expect_data_loc_kind(&mut self) -> Result { - let token = expect_token!(self.lexer, Token::DataLocationKind(..), "data location")?; - - match token { - Token::DataLocationKind(loc) => Ok(loc), - _ => unreachable!(), - } - } - - fn parse_comment(&mut self) -> Result> { - let mut comments = Vec::new(); - while let Some(line) = eat_token!(self.lexer, Token::FuncComment(..))? { - comments.push(line.string().to_string()); - } - Ok(comments) - } -} - -fn expect_ty(ctx: &ModuleCtx, lexer: &mut Lexer) -> Result { - if let Some(ty) = eat_token!(lexer, Token::BaseTy(..))?.map(|tok| tok.ty()) { - return Ok(ty); - }; - - if eat_token!(lexer, Token::LBracket)?.is_some() { - // Try parse array element type. - let elem_ty = expect_ty(ctx, lexer)?; - expect_token!(lexer, Token::SemiColon, ";")?; - // Try parse array length. - let len = expect_token!(lexer, Token::Integer(..), " or value")? - .string() - .parse() - .map_err(|err| Error::new(ErrorKind::SyntaxError(format!("{}", err)), lexer.line()))?; - expect_token!(lexer, Token::RBracket, "]")?; - Ok(ctx.with_ty_store_mut(|s| s.make_array(elem_ty, len))) - } else if eat_token!(lexer, Token::Star)?.is_some() { - // Try parse ptr base type. - let elem_ty = expect_ty(ctx, lexer)?; - Ok(ctx.with_ty_store_mut(|s| s.make_ptr(elem_ty))) - } else if let Some(tok) = eat_token!(lexer, Token::Ident(..))? { - let name = tok.string(); - ctx.with_ty_store(|s| s.struct_type_by_name(name)) - .ok_or_else(|| { - Error::new( - ErrorKind::SemanticError(format!("type `{name}` is not declared")), - lexer.line(), - ) - }) - } else { - Err(Error::new( - ErrorKind::SyntaxError("invalid type".into()), - lexer.line(), - )) - } -} - -fn expect_linkage(lexer: &mut Lexer) -> Result { - let token = expect_token!(lexer, Token::Linkage { .. }, "linkage")?; - match token { - Token::Linkage(linkage) => Ok(linkage), - _ => unreachable!(), - } -} - -fn expect_constant(ctx: &ModuleCtx, lexer: &mut Lexer, ty: Type) -> Result { - if let Some(number) = eat_token!(lexer, Token::Integer(..))? { - if !ty.is_integral() { - return Err(Error::new( - ErrorKind::SemanticError("expected integral type".to_string()), - lexer.line(), - )); - } - - let data = build_imm_data(number.string(), &ty, lexer.line())?; - Ok(ConstantValue::Immediate(data)) - } else if eat_token!(lexer, Token::LBracket)?.is_some() { - let (elem_ty, mut len) = ctx.with_ty_store(|s| s.array_def(ty)).ok_or_else(|| { - Error::new( - ErrorKind::SemanticError("expcted array type".into()), - lexer.line(), - ) - })?; - - let mut data = Vec::with_capacity(len); - while len > 0 { - let elem = expect_constant(ctx, lexer, elem_ty)?; - data.push(elem); - if len > 1 { - expect_token!(lexer, Token::Comma, ",")?; - } - len -= 1; - } - - expect_token!(lexer, Token::RBracket, "]")?; - Ok(ConstantValue::Array(data)) - } else if eat_token!(lexer, Token::LBrace)?.is_some() { - let fields = ctx - .with_ty_store(|s| s.struct_def(ty).map(|def| def.fields.clone())) - .ok_or_else(|| { - Error::new( - ErrorKind::SemanticError("expected struct type".into()), - lexer.line(), - ) - })?; - - let mut data = Vec::with_capacity(fields.len()); - let field_len = fields.len(); - for (i, field_ty) in fields.into_iter().enumerate() { - let field = expect_constant(ctx, lexer, field_ty)?; - data.push(field); - if i < field_len - 1 { - expect_token!(lexer, Token::Comma, ",")?; - } - } - expect_token!(lexer, Token::RBrace, "}")?; - Ok(ConstantValue::Struct(data)) - } else { - Err(Error::new( - ErrorKind::SyntaxError("invalid constant".into()), - lexer.line(), - )) - } -} - -#[derive(Default)] -pub struct InsnInserter { - loc: CursorLocation, - defined_values: HashSet, - defined_blocks: HashSet, - defined_imms: HashSet, - undefs: HashSet<(Insn, usize)>, -} - -impl InsnInserter { - pub fn def_value(&mut self, func: &mut Function, value: Value, line: u32) -> Result<()> { - if self.defined_values.contains(&value) { - return Err(Error::new( - ErrorKind::SemanticError(format!("v{} is already defined", value.0)), - line, - )); - } - self.defined_values.insert(value); - - let value_len = func.dfg.values.len(); - let value_id = value.0 as usize; - - if value_len <= value_id { - func.dfg.values.reserve(value_id); - for _ in 0..(value_id - value_len + 1) { - // Make dummy value. - func.dfg.values.push(ValueData::Arg { - ty: Type::I8, - idx: usize::MAX, - }); - } - } - - if self.defined_imms.contains(&value) { - let imm_data = func.dfg.value_data(value).clone(); - let new_imm_value = func.dfg.make_value(imm_data); - let mut must_replace = vec![]; - for &user in func.dfg.users(value) { - for (idx, &arg) in func.dfg.insn_args(user).iter().enumerate() { - if arg == value && !self.undefs.contains(&(user, idx)) { - must_replace.push((user, idx)); - } - } - } - - for (insn, idx) in must_replace { - func.dfg.replace_insn_arg(insn, new_imm_value, idx); - } - - let imm = func.dfg.value_imm(new_imm_value).unwrap(); - func.dfg.immediates.insert(imm, new_imm_value); - self.defined_imms.remove(&value); - self.defined_imms.insert(new_imm_value); - } - - Ok(()) - } - - fn def_imm(&mut self, func: &mut Function, imm: Immediate) -> Value { - let value = func.dfg.make_imm_value(imm); - self.defined_imms.insert(value); - value - } - - pub fn def_block( - &mut self, - func: &mut Function, - block: Block, - line: u32, - block_data: BlockData, - ) -> Result<()> { - if self.defined_blocks.contains(&block) { - return Err(Error::new( - ErrorKind::SemanticError(format!("block{} is already defined", block.0)), - line, - )); - } - self.defined_blocks.insert(block); - - let block_id = block.0 as usize; - let block_len = func.dfg.blocks.len(); - - if block_len <= block_id { - func.dfg.blocks.reserve(block_id); - for _ in 0..(block_id - block_len + 1) { - // Make dummy block. - func.dfg.blocks.push(BlockData::default()); - } - } - - func.dfg.blocks[block] = block_data; - Ok(()) - } - - fn insert_insn_data(&mut self, func: &mut Function, insn_data: InsnData) -> Insn { - let insn = func.dfg.make_insn(insn_data); - self.insert_insn(func, insn); - self.set_location(CursorLocation::At(insn)); - insn - } - - fn append_arg_value(&mut self, func: &mut Function, value: Value, ty: Type) { - let idx = func.arg_values.len(); - - let value_data = func.dfg.make_arg_value(ty, idx); - func.sig.append_arg(ty); - func.dfg.values[value] = value_data; - func.arg_values.push(value); - } -} - -impl FuncCursor for InsnInserter { - fn at_location(loc: CursorLocation) -> Self { - Self { - loc, - ..Default::default() - } - } - - fn set_location(&mut self, loc: CursorLocation) { - self.loc = loc; - } - - fn loc(&self) -> CursorLocation { - self.loc - } -} - -macro_rules! make_unary { - ($parser:ident, $func:ident, $inserter:ident, $code:path, $undefs:expr) => {{ - let lhs = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; - expect_token!($parser.lexer, Token::SemiColon, ";")?; - InsnData::Unary { - code: $code, - args: [lhs], - } - }}; -} - -macro_rules! make_binary { - ($parser:ident, $func:ident, $inserter:ident, $code:path, $undefs:expr) => {{ - let lhs = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; - let rhs = $parser.expect_insn_arg($func, $inserter, 1, $undefs)?; - expect_token!($parser.lexer, Token::SemiColon, ";")?; - InsnData::Binary { - code: $code, - args: [lhs, rhs], - } - }}; -} - -macro_rules! make_cast { - ($parser:ident, $func:ident, $inserter:ident, $cast_to:expr, $code:path, $undefs:expr) => {{ - let arg = $parser.expect_insn_arg($func, $inserter, 0, $undefs)?; - expect_token!($parser.lexer, Token::SemiColon, ";")?; - InsnData::Cast { - code: $code, - args: [arg], - ty: $cast_to, - } - }}; -} - -macro_rules! make_jump { - ($parser:ident) => {{ - let dest = $parser.expect_block()?; - expect_token!($parser.lexer, Token::SemiColon, ";")?; - InsnData::Jump { dests: [dest] } - }}; -} - -impl Code { - /// Read args and create insn data. - fn make_insn( - self, - parser: &mut FuncParser, - func: &mut Function, - inserter: &mut InsnInserter, - ret_ty: Option, - ) -> Result { - let mut undefs = vec![]; - let insn_data = match self { - Self::Not => make_unary!(parser, func, inserter, UnaryOp::Not, &mut undefs), - Self::Neg => make_unary!(parser, func, inserter, UnaryOp::Neg, &mut undefs), - Self::Add => make_binary!(parser, func, inserter, BinaryOp::Add, &mut undefs), - Self::Sub => make_binary!(parser, func, inserter, BinaryOp::Sub, &mut undefs), - Self::Mul => make_binary!(parser, func, inserter, BinaryOp::Mul, &mut undefs), - Self::Udiv => make_binary!(parser, func, inserter, BinaryOp::Udiv, &mut undefs), - Self::Sdiv => make_binary!(parser, func, inserter, BinaryOp::Sdiv, &mut undefs), - Self::Lt => make_binary!(parser, func, inserter, BinaryOp::Lt, &mut undefs), - Self::Gt => make_binary!(parser, func, inserter, BinaryOp::Gt, &mut undefs), - Self::Slt => make_binary!(parser, func, inserter, BinaryOp::Slt, &mut undefs), - Self::Sgt => make_binary!(parser, func, inserter, BinaryOp::Sgt, &mut undefs), - Self::Le => make_binary!(parser, func, inserter, BinaryOp::Le, &mut undefs), - Self::Ge => make_binary!(parser, func, inserter, BinaryOp::Ge, &mut undefs), - Self::Sle => make_binary!(parser, func, inserter, BinaryOp::Sle, &mut undefs), - Self::Sge => make_binary!(parser, func, inserter, BinaryOp::Sge, &mut undefs), - Self::Eq => make_binary!(parser, func, inserter, BinaryOp::Eq, &mut undefs), - Self::Ne => make_binary!(parser, func, inserter, BinaryOp::Ne, &mut undefs), - Self::And => make_binary!(parser, func, inserter, BinaryOp::And, &mut undefs), - Self::Or => make_binary!(parser, func, inserter, BinaryOp::Or, &mut undefs), - Self::Xor => make_binary!(parser, func, inserter, BinaryOp::Xor, &mut undefs), - Self::Sext => make_cast!( - parser, - func, - inserter, - ret_ty.unwrap(), - CastOp::Sext, - &mut undefs - ), - Self::Zext => make_cast!( - parser, - func, - inserter, - ret_ty.unwrap(), - CastOp::Zext, - &mut undefs - ), - Self::BitCast => make_cast!( - parser, - func, - inserter, - ret_ty.unwrap(), - CastOp::BitCast, - &mut undefs - ), - Self::Trunc => make_cast!( - parser, - func, - inserter, - ret_ty.unwrap(), - CastOp::Trunc, - &mut undefs - ), - - Self::Load => { - let loc = parser.expect_data_loc_kind()?; - let arg = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Load { args: [arg], loc } - } - Self::Store => { - let loc = parser.expect_data_loc_kind()?; - let lhs = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; - let rhs = parser.expect_insn_arg(func, inserter, 1, &mut undefs)?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Store { - args: [lhs, rhs], - loc, - } - } - - Self::Call => { - let func_name = - expect_token!(parser.lexer, Token::Ident(..), "func name")?.string(); - let mut args = smallvec![]; - let mut idx = 0; - while eat_token!(parser.lexer, Token::SemiColon)?.is_none() { - let arg = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; - args.push(arg); - idx += 1; - } - - let callee = parser - .module_builder - .get_func_ref(func_name) - .ok_or_else(|| { - Error::new( - ErrorKind::SemanticError(format!("%{} is not declared", func_name)), - parser.lexer.line(), - ) - })?; - let sig = parser.module_builder.get_sig(callee).clone(); - let ret_ty = sig.ret_ty(); - func.callees.insert(callee, sig); - InsnData::Call { - func: callee, - args, - ret_ty, - } - } - - Self::Jump => make_jump!(parser), - - Self::Br => { - let cond = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; - let then = parser.expect_block()?; - let else_ = parser.expect_block()?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Branch { - args: [cond], - dests: [then, else_], - } - } - Self::BrTable => { - let mut arg_idx = 0; - let mut args = smallvec![]; - let cond = parser.expect_insn_arg(func, inserter, arg_idx, &mut undefs)?; - args.push(cond); - arg_idx += 1; - - let default = if eat_token!(parser.lexer, Token::Undef)?.is_some() { - None - } else { - Some(parser.expect_block()?) - }; - - let mut table = smallvec![]; - while eat_token!(parser.lexer, Token::LParen)?.is_some() { - let value = parser.expect_insn_arg(func, inserter, arg_idx, &mut undefs)?; - args.push(value); - let block = parser.expect_block()?; - table.push(block); - expect_token!(parser.lexer, Token::RParen, ")")?; - arg_idx += 1; - } - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::BrTable { - args, - default, - table, - } - } - - Self::Gep => { - let mut args = smallvec![]; - let mut idx = 0; - while eat_token!(parser.lexer, Token::SemiColon)?.is_none() { - let arg = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; - args.push(arg); - idx += 1; - } - - InsnData::Gep { args } - } - - Self::Alloca => { - let ty = expect_ty(&parser.module_builder.ctx, parser.lexer)?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Alloca { ty } - } - - Self::Return => { - if eat_token!(parser.lexer, Token::SemiColon)?.is_some() { - InsnData::Return { args: None } - } else { - let value = parser.expect_insn_arg(func, inserter, 0, &mut undefs)?; - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Return { args: Some(value) } - } - } - - Self::Phi => { - let mut values = smallvec![]; - let mut blocks = smallvec![]; - let mut idx = 0; - while eat_token!(parser.lexer, Token::LParen)?.is_some() { - let value = parser.expect_insn_arg(func, inserter, idx, &mut undefs)?; - values.push(value); - let block = parser.expect_block()?; - blocks.push(block); - expect_token!(parser.lexer, Token::RParen, ")")?; - idx += 1; - } - expect_token!(parser.lexer, Token::SemiColon, ";")?; - InsnData::Phi { - values, - blocks, - ty: ret_ty.unwrap(), - } - } - }; - - let insn = inserter.insert_insn_data(func, insn_data); - for undef in undefs { - inserter.undefs.insert((insn, undef)); - } - - Ok(insn) - } -} - -fn build_imm_data(number: &str, ty: &Type, line: u32) -> Result { - match ty { - Type::I1 => number - .parse::() - .map(|val| Immediate::I1(val != 0)) - .map_err(|err| parse_imm_error(err, line)), - - Type::I8 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i8)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I16 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i16)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I32 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i32)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I64 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i64)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I128 => number - .parse::() - .or_else(|_| number.parse::().map(|v| v as i128)) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line)), - - Type::I256 => { - let number = number.to_string(); - let is_negative = number.as_bytes()[0] as char == '-'; - let number = if is_negative { &number[1..] } else { &number }; - let mut i256: I256 = U256::from_str_radix(number, 10) - .map(Into::into) - .map_err(|err| parse_imm_error(err, line))?; - - if is_negative { - i256 = I256::zero().overflowing_sub(i256).0; - } - - Ok(Immediate::I256(i256)) - } - - _ => Err(Error::new( - ErrorKind::SemanticError("can't use non integral types for immediates".into()), - line, - )), - } -} - -fn parse_imm_error(err: impl std::fmt::Display, line: u32) -> Error { - Error::new( - ErrorKind::SemanticError(format!("failed to parse immediate: {}", err)), - line, - ) -} - -#[cfg(test)] -mod tests { - use super::*; - - use sonatina_ir::ir_writer::FuncWriter; - - fn test_func_parser(input: &str) -> bool { - let mut lexer = Lexer::new(input); - let triple = TargetTriple::parse("evm-ethereum-london").unwrap(); - let isa = IsaBuilder::new(triple).build(); - let mut module_builder = ModuleBuilder::new(ModuleCtx::new(isa)); - let parsed_func = FuncParser::new(&mut lexer, &mut module_builder) - .parse() - .unwrap() - .unwrap(); - let module = module_builder.build(); - let mut writer = FuncWriter::new(&module.funcs[parsed_func.func_ref]); - - input.trim() == writer.dump_string().unwrap().trim() - } - - #[test] - fn parser_with_return() { - assert!(test_func_parser( - "func private %test_func() -> i32: - block0: - return 311.i32;" - )); - } - - #[test] - fn test_with_arg() { - assert!(test_func_parser( - "func public %test_func(v0.i32, v1.i64) -> void: - block0: - v2.i64 = sext v0; - v3.i64 = mul v2 v1; - return; -" - )); - } - - #[test] - fn parser_with_non_continuous_value() { - assert!(test_func_parser( - "func private %test_func() -> i32: - block64: - jump block1; - - block1: - return 311.i32;" - )); - } - - #[test] - fn test_gep() { - assert!(test_func_parser( - "func public %test(v0.*i32, v1.*[*i64; 10]) -> *i32: - block0: - v2.*i32 = gep v0 10.i32; - v3.**i64 = gep v1 10.i32; - return v1;" - )); - } - - #[test] - fn parser_with_phi() { - assert!(test_func_parser( - "func private %test_func() -> void: - block0: - jump block1; - - block1: - v4.i32 = phi (1.i32 block0) (v5 block5); - br 1.i32 block6 block2; - - block2: - br 1.i32 block4 block3; - - block3: - jump block5; - - block4: - jump block5; - - block5: - v5.i32 = phi (2.i32 block3) (v4 block4); - jump block1; - - block6: - v3.i32 = add v4 v4; - return; - " - )); - } - - #[test] - fn parser_with_immediate() { - assert!(test_func_parser( - "func private %test_func() -> i8: - block64: - v0.i8 = add -1.i8 127.i8; - v1.i8 = add v0 3.i8; - jump block1; - - block1: - v2.i16 = zext -128.i8; - return v1;" - )); - } - - #[test] - fn test_with_module_comment() { - let input = " - #! Module comment 1 - #! Module comment 2 - - target = \"evm-ethereum-london\" - - # f1 start 1 - # f1 start 2 - func private %f1() -> i32: - block0: - return 311.i32; - - # f2 start 1 - # f2 start 2 - func public %f2() -> i32: - block0: - return 311.i32; - "; - - let parser = Parser::default(); - let parsed_module = parser.parse(input).unwrap(); - let module_comments = parsed_module.module_comments; - assert_eq!(module_comments[0], " Module comment 1"); - assert_eq!(module_comments[1], " Module comment 2"); - - let module = parsed_module.module; - let mut funcs = module.iter_functions(); - let func1 = funcs.next().unwrap(); - let func1_comment = &parsed_module.func_comments[func1]; - assert_eq!(func1_comment[0], " f1 start 1"); - assert_eq!(func1_comment[1], " f1 start 2"); - - let func2 = funcs.next().unwrap(); - let func2_comment = &parsed_module.func_comments[func2]; - assert_eq!(func2_comment[0], " f2 start 1"); - assert_eq!(func2_comment[1], " f2 start 2"); - } - - #[test] - fn test_with_struct_type() { - let input = " - target = \"evm-ethereum-london\" - - type %s1 = {i32, i64}; - type %s2_packed = <{i32, i64, *%s1}>; - - func public %test(v0.*%s1, v1.*%s2_packed) -> i32: - block0: - return 311.i32; - "; - - let parser = Parser::default(); - let module = parser.parse(input).unwrap().module; - - module.ctx.with_ty_store(|s| { - let ty = s.struct_type_by_name("s1").unwrap(); - let def = s.struct_def(ty).unwrap(); - assert_eq!(def.fields.len(), 2); - assert_eq!(def.fields[0], Type::I32); - assert_eq!(def.fields[1], Type::I64); - assert!(!def.packed); - }); - - let s1_ptr_ty = module.ctx.with_ty_store_mut(|s| { - let ty = s.struct_type_by_name("s1").unwrap(); - s.make_ptr(ty) - }); - module.ctx.with_ty_store(|s| { - let ty = s.struct_type_by_name("s2_packed").unwrap(); - let def = s.struct_def(ty).unwrap(); - assert_eq!(def.fields.len(), 3); - assert_eq!(def.fields[0], Type::I32); - assert_eq!(def.fields[1], Type::I64); - assert_eq!(def.fields[2], s1_ptr_ty); - assert!(def.packed); - }); - } - - #[test] - fn test_with_gv() { - let input = " - target = \"evm-ethereum-london\" - - gv public const %CONST_PUBLIC: i32 = 1; - gv external %GLOBAL_EXTERNAL: i32; - - func public %test() -> i32: - block0: - v2.i32 = add %CONST_PUBLIC %GLOBAL_EXTERNAL; - return v2; - "; - - let parser = Parser::default(); - let module = parser.parse(input).unwrap().module; - - module.ctx.with_gv_store(|s| { - let symbol = "CONST_PUBLIC"; - let gv = s.gv_by_symbol(symbol).unwrap(); - let data = s.gv_data(gv); - assert_eq!(data.symbol, symbol); - assert_eq!(data.ty, Type::I32); - assert_eq!(data.linkage, Linkage::Public); - assert!(data.is_const); - assert_eq!(data.data, Some(ConstantValue::make_imm(1i32))); - }); - - module.ctx.with_gv_store(|s| { - let symbol = "GLOBAL_EXTERNAL"; - let gv = s.gv_by_symbol(symbol).unwrap(); - let data = s.gv_data(gv); - assert_eq!(data.symbol, symbol); - assert_eq!(data.ty, Type::I32); - assert_eq!(data.linkage, Linkage::External); - assert!(!data.is_const); - assert_eq!(data.data, None) - }); - } -} diff --git a/crates/parser2/src/sonatina.pest b/crates/parser/src/sonatina.pest similarity index 100% rename from crates/parser2/src/sonatina.pest rename to crates/parser/src/sonatina.pest diff --git a/crates/parser2/src/syntax.rs b/crates/parser/src/syntax.rs similarity index 100% rename from crates/parser2/src/syntax.rs rename to crates/parser/src/syntax.rs diff --git a/crates/parser2/test_files/syntax/func/empty.snap b/crates/parser/test_files/syntax/func/empty.snap similarity index 100% rename from crates/parser2/test_files/syntax/func/empty.snap rename to crates/parser/test_files/syntax/func/empty.snap diff --git a/crates/parser2/test_files/syntax/func/empty.sntn b/crates/parser/test_files/syntax/func/empty.sntn similarity index 100% rename from crates/parser2/test_files/syntax/func/empty.sntn rename to crates/parser/test_files/syntax/func/empty.sntn diff --git a/crates/parser2/test_files/syntax/func/simple.snap b/crates/parser/test_files/syntax/func/simple.snap similarity index 100% rename from crates/parser2/test_files/syntax/func/simple.snap rename to crates/parser/test_files/syntax/func/simple.snap diff --git a/crates/parser2/test_files/syntax/func/simple.sntn b/crates/parser/test_files/syntax/func/simple.sntn similarity index 100% rename from crates/parser2/test_files/syntax/func/simple.sntn rename to crates/parser/test_files/syntax/func/simple.sntn diff --git a/crates/parser2/test_files/syntax/module/simple.ast.snap b/crates/parser/test_files/syntax/module/simple.ast.snap similarity index 100% rename from crates/parser2/test_files/syntax/module/simple.ast.snap rename to crates/parser/test_files/syntax/module/simple.ast.snap diff --git a/crates/parser2/test_files/syntax/module/simple.ir.snap b/crates/parser/test_files/syntax/module/simple.ir.snap similarity index 100% rename from crates/parser2/test_files/syntax/module/simple.ir.snap rename to crates/parser/test_files/syntax/module/simple.ir.snap diff --git a/crates/parser2/test_files/syntax/module/simple.snap b/crates/parser/test_files/syntax/module/simple.snap similarity index 100% rename from crates/parser2/test_files/syntax/module/simple.snap rename to crates/parser/test_files/syntax/module/simple.snap diff --git a/crates/parser2/test_files/syntax/module/simple.sntn b/crates/parser/test_files/syntax/module/simple.sntn similarity index 100% rename from crates/parser2/test_files/syntax/module/simple.sntn rename to crates/parser/test_files/syntax/module/simple.sntn diff --git a/crates/parser2/test_files/syntax/stmts/bin_op.snap b/crates/parser/test_files/syntax/stmts/bin_op.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/bin_op.snap rename to crates/parser/test_files/syntax/stmts/bin_op.snap diff --git a/crates/parser2/test_files/syntax/stmts/bin_op.sntn b/crates/parser/test_files/syntax/stmts/bin_op.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/bin_op.sntn rename to crates/parser/test_files/syntax/stmts/bin_op.sntn diff --git a/crates/parser2/test_files/syntax/stmts/cast.snap b/crates/parser/test_files/syntax/stmts/cast.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/cast.snap rename to crates/parser/test_files/syntax/stmts/cast.snap diff --git a/crates/parser2/test_files/syntax/stmts/cast.sntn b/crates/parser/test_files/syntax/stmts/cast.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/cast.sntn rename to crates/parser/test_files/syntax/stmts/cast.sntn diff --git a/crates/parser2/test_files/syntax/stmts/control_flow.snap b/crates/parser/test_files/syntax/stmts/control_flow.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/control_flow.snap rename to crates/parser/test_files/syntax/stmts/control_flow.snap diff --git a/crates/parser2/test_files/syntax/stmts/control_flow.sntn b/crates/parser/test_files/syntax/stmts/control_flow.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/control_flow.sntn rename to crates/parser/test_files/syntax/stmts/control_flow.sntn diff --git a/crates/parser2/test_files/syntax/stmts/stmts.snap b/crates/parser/test_files/syntax/stmts/stmts.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/stmts.snap rename to crates/parser/test_files/syntax/stmts/stmts.snap diff --git a/crates/parser2/test_files/syntax/stmts/stmts.sntn b/crates/parser/test_files/syntax/stmts/stmts.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/stmts.sntn rename to crates/parser/test_files/syntax/stmts/stmts.sntn diff --git a/crates/parser2/test_files/syntax/stmts/unary_op.snap b/crates/parser/test_files/syntax/stmts/unary_op.snap similarity index 100% rename from crates/parser2/test_files/syntax/stmts/unary_op.snap rename to crates/parser/test_files/syntax/stmts/unary_op.snap diff --git a/crates/parser2/test_files/syntax/stmts/unary_op.sntn b/crates/parser/test_files/syntax/stmts/unary_op.sntn similarity index 100% rename from crates/parser2/test_files/syntax/stmts/unary_op.sntn rename to crates/parser/test_files/syntax/stmts/unary_op.sntn diff --git a/crates/parser2/tests/syntax.rs b/crates/parser/tests/syntax.rs similarity index 80% rename from crates/parser2/tests/syntax.rs rename to crates/parser/tests/syntax.rs index 5af88b2d..d1d6f3b9 100644 --- a/crates/parser2/tests/syntax.rs +++ b/crates/parser/tests/syntax.rs @@ -1,17 +1,13 @@ -use ariadne::{Label, Report, ReportKind, Source}; use dir_test::{dir_test, Fixture}; use indenter::indented; use ir::ir_writer::ModuleWriter; -use pest::{error::InputLocation, iterators::Pairs, Parser as _}; -use sonatina_parser2::{ +use pest::{iterators::Pairs, Parser as _}; +use sonatina_parser::{ ast, parse_module, syntax::{Parser, Rule}, + Error, }; - -use std::{ - fmt::{self, Write}, - ops::Range, -}; +use std::fmt::{self, Write}; #[dir_test( dir: "$CARGO_MANIFEST_DIR/test_files/syntax/stmts", @@ -69,28 +65,9 @@ fn test_rule(rule: Rule, fixture: Fixture<&str>) { } } -fn location_range(loc: InputLocation) -> Range { - match loc { - InputLocation::Pos(pos) => pos..pos, - InputLocation::Span((s, e)) => s..e, - } -} - fn report_error(err: pest::error::Error, fixture: &Fixture<&str>) { - let mut s = Vec::new(); - - Report::build(ReportKind::Error, fixture.path(), 12) - .with_code(3) - .with_message("parse error".to_string()) - .with_label( - Label::new((fixture.path(), location_range(err.location))) - .with_message(format!("{}", err.variant.message())), - ) - .finish() - .write_for_stdout((fixture.path(), Source::from(fixture.content())), &mut s) - .unwrap(); - - eprintln!("{}", std::str::from_utf8(&s).unwrap()); + let s = Error::SyntaxError(err).print_to_string(fixture.path(), fixture.content()); + eprintln!("{s}"); } struct PairsWrapper<'i>(Pairs<'i, Rule>); diff --git a/crates/parser2/Cargo.toml b/crates/parser2/Cargo.toml deleted file mode 100644 index ef642cc9..00000000 --- a/crates/parser2/Cargo.toml +++ /dev/null @@ -1,33 +0,0 @@ -[package] -name = "sonatina-parser2" -version = "0.0.3-alpha" -edition = "2021" -authors = ["Sonatina Developers"] -license = "Apache-2.0" -readme = "../../README.md" -homepage = "https://github.com/fe-lang/sonatina/tree/main/crates/parser" -repository = "https://github.com/fe-lang/sonatina" -description = "Parser for sonatina-ir text format" -categories = ["compilers", "parser", "wasm"] -keywords = ["compiler", "evm", "wasm", "smart-contract"] - -[dependencies] -ir = { package = "sonatina-ir", path = "../ir", version = "0.0.3-alpha" } -sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } -smallvec = "1.7.0" -cranelift-entity = "0.104" -pest = "2.7.10" -pest_derive = "2.7.10" -pest-ast = "0.3.4" -from-pest = "0.3.2" -smol_str = "0.2.2" -hex = "0.4.3" -num-traits = { version = "0.2.19", default-features = false } -either = { version = "1.12.0", default-features = false } -annotate-snippets = "0.11.4" - -[dev-dependencies] -dir-test = { git = "https://github.com/sbillig/dir-test", rev = "c4115dd" } -insta = { version = "1.38.0" } -indenter = "0.3.3" -ariadne = { version = "0.4.1", features = ["auto-color"] } diff --git a/crates/parser2/src/lib.rs b/crates/parser2/src/lib.rs deleted file mode 100644 index 2246f8f6..00000000 --- a/crates/parser2/src/lib.rs +++ /dev/null @@ -1,244 +0,0 @@ -use ast::{Error, ValueDeclaration}; -use cranelift_entity::SecondaryMap; -use ir::{ - self, - builder::{FunctionBuilder, ModuleBuilder}, - func_cursor::{CursorLocation, FuncCursor, InsnInserter}, - isa::IsaBuilder, - module::{FuncRef, ModuleCtx}, - Module, Signature, -}; - -pub mod ast; -pub mod syntax; - -pub fn parse_module(input: &str) -> Result> { - let ast = ast::parse(input)?; - - let isa = IsaBuilder::new(ast.target.unwrap()).build(); // xxx - let ctx = ModuleCtx::new(isa); - let mut builder = ModuleBuilder::new(ctx); - - for st in ast.struct_types { - let fields = st - .fields - .iter() - .map(|t| build_type(&mut builder, t)) - .collect::>(); - builder.declare_struct_type(&st.name.0, &fields, false); - } - - for func in ast.declared_functions { - let params = func - .params - .iter() - .map(|t| build_type(&mut builder, t)) - .collect::>(); - let ret_ty = func - .ret_type - .as_ref() - .map(|t| build_type(&mut builder, t)) - .unwrap_or(ir::Type::Void); - - let sig = Signature::new(&func.name.0, func.linkage, ¶ms, ret_ty); - builder.declare_function(sig); - } - - for func in ast.functions.iter() { - let sig = &func.signature; - let args = sig - .params - .iter() - .map(|decl| build_type(&mut builder, &decl.1)) - .collect::>(); - - let ret_ty = sig - .ret_type - .as_ref() - .map(|t| build_type(&mut builder, t)) - .unwrap_or(ir::Type::Void); - let sig = Signature::new(&sig.name.0, sig.linkage, &args, ret_ty); - - builder.declare_function(sig); - } - - let mut func_comments = SecondaryMap::default(); - - for func in ast.functions { - let id = builder.get_func_ref(&func.signature.name.0).unwrap(); - let mut fb = builder.build_function(id); - build_func(&mut fb, &func); - fb.seal_all(); - builder = fb.finish(); - - func_comments[id] = func.comments; - } - - let module = builder.build(); - Ok(ParsedModule { - module, - module_comments: ast.comments, - func_comments, - }) -} - -pub struct ParsedModule { - pub module: Module, - pub module_comments: Vec, - pub func_comments: SecondaryMap>, -} - -fn build_func(builder: &mut FunctionBuilder, func: &ast::Func) { - for (i, ValueDeclaration(name, _ty)) in func.signature.params.iter().enumerate() { - builder.name_value(builder.func.arg_values[i], &name.0); - } - - // "forward declare" all block ids - if let Some(max_block_id) = func.blocks.iter().map(|b| b.id.0.unwrap()).max() { - while builder.func.dfg.blocks.len() <= max_block_id as usize { - builder.cursor.make_block(&mut builder.func); - } - } - - for block in &func.blocks { - let block_id = ir::Block(block.id.0.unwrap()); - builder.cursor.append_block(&mut builder.func, block_id); - builder - .cursor - .set_location(CursorLocation::BlockTop(block_id)); - - for stmt in &block.stmts { - match &stmt.kind { - ast::StmtKind::Define(ValueDeclaration(val, ty), expr) => { - let ty = build_type(&mut builder.module_builder, ty); - - let result_val = match expr { - ast::Expr::Binary(op, lhs, rhs) => { - let lhs = build_value(builder, lhs); - let rhs = build_value(builder, rhs); - builder.binary_op(*op, lhs, rhs) - } - ast::Expr::Unary(op, val) => { - let val = build_value(builder, val); - builder.unary_op(*op, val) - } - ast::Expr::Cast(op, val) => { - let val = build_value(builder, val); - builder.cast_op(*op, val, ty) - } - ast::Expr::Load(location, addr) => { - let addr = build_value(builder, addr); - match location { - ir::DataLocationKind::Memory => builder.memory_load(addr), - ir::DataLocationKind::Storage => builder.storage_load(addr), - } - } - ast::Expr::Alloca(ty) => { - let ty = build_type(&mut builder.module_builder, ty); - builder.alloca(ty) - } - ast::Expr::Call(ast::Call(name, args)) => { - let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); - let args = args - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.call(func_ref, &args).unwrap() - } - ast::Expr::Gep(vals) => { - let vals = vals - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.gep(&vals).unwrap() - } - ast::Expr::Phi(vals) => { - let args = vals - .iter() - .map(|(val, block)| { - // xxx declare block - let b = ir::Block(block.0.unwrap()); - let v = build_value(builder, val); - (v, b) - }) - .collect::>(); - builder.phi(ty, &args) - } - }; - builder.name_value(result_val, &val.0) - } - ast::StmtKind::Store(loc, addr, val) => { - let addr = build_value(builder, addr); - let val = build_value(builder, val); - - match loc { - ir::DataLocationKind::Memory => builder.memory_store(addr, val), - ir::DataLocationKind::Storage => builder.storage_store(addr, val), - } - } - ast::StmtKind::Return(val) => { - let val = val.as_ref().map(|v| build_value(builder, v)); - builder.ret(val); - } - ast::StmtKind::Jump(block_id) => { - let block_id = ir::Block(block_id.0.unwrap()); - builder.jump(block_id); - } - ast::StmtKind::Branch(cond, true_block, false_block) => { - let cond = build_value(builder, cond); - let true_block = ir::Block(true_block.0.unwrap()); - let false_block = ir::Block(false_block.0.unwrap()); - builder.br(cond, true_block, false_block); - } - ast::StmtKind::BranchTable(index, default_block, table) => { - let index = build_value(builder, index); - let default_block = default_block.as_ref().map(|b| ir::Block(b.0.unwrap())); - let table = table - .iter() - .map(|(val, block)| { - (build_value(builder, val), ir::Block(block.0.unwrap())) - }) - .collect::>(); - builder.br_table(index, default_block, &table); - } - ast::StmtKind::Call(ast::Call(name, args)) => { - let func_ref = builder.module_builder.get_func_ref(&name.0).unwrap(); - let args = args - .iter() - .map(|val| build_value(builder, val)) - .collect::>(); - builder.call(func_ref, &args).unwrap(); - } - } - } - } -} - -fn build_value(builder: &mut FunctionBuilder, val: &ast::Value) -> ir::Value { - match val { - ast::Value::Immediate(imm) => builder.make_imm_value(*imm), - ast::Value::Named(v) => builder.get_named_value(&v.0), - ast::Value::Error => unreachable!(), - } -} - -fn build_type(builder: &mut ModuleBuilder, t: &ast::Type) -> ir::Type { - match t { - ast::Type::Int(i) => (*i).into(), - ast::Type::Ptr(t) => { - let t = build_type(builder, t); - builder.ptr_type(t) - } - ast::Type::Array(t, n) => { - let elem = build_type(builder, t); - builder.declare_array_type(elem, *n) - } - ast::Type::Void => ir::Type::Void, - ast::Type::Struct(name) => builder.get_struct_type(name).unwrap_or_else(|| { - // xxx error on undeclared struct - eprintln!("struct type not found: {name}"); - ir::Type::Void - }), - ast::Type::Error => todo!(), - } -}