From 0d016f594dca5a13b263a5d0ca4a9b97c58cb308 Mon Sep 17 00:00:00 2001 From: Lennart Van Hirtum Date: Sun, 10 Mar 2024 17:55:12 +0100 Subject: [PATCH] Start trying to integrate tree sitter --- README.md | 4 ++-- multiply_add.sus | 20 +++++++++++--------- resetNormalizer.sus | 5 +++++ src/file_position.rs | 13 ++++++++----- src/flattening/mod.rs | 22 ++++++++++++++-------- src/linker.rs | 32 ++++++++++++++++++++++++++++++-- src/parser.rs | 30 +++++++++++++++++++++++++++--- src/tokenizer.rs | 4 ++-- tree-sitter-sus | 2 +- 9 files changed, 100 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index c8cae16..2da08e6 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ The main goals of the language are roughly listed below: - [x] Can Parse Blur2 filter - [x] If Statements - [x] Latency Specifiers +- [ ] Access module inputs / outputs through field names - [ ] Array Slices - [ ] Bound Specifiers - [ ] Structs @@ -99,7 +100,6 @@ The main goals of the language are roughly listed below: ### Code Generation - [x] Expression Flattening -- [ ] State Machine Generation - [x] Can Generate Verilog for Multiply-Add pipeline - [x] Can Generate Verilog for Blur2 filter - [ ] Can Generate Verilog for FIFO @@ -135,7 +135,7 @@ pipeline multiply_add : i32 a, i32 b, i32 c -> i32 result { reg result = tmp2 + a; } ``` -Pipeline stages are denoted by adding the 'reg' keyword to statements. Either at the statement level, or to add registers within expressions. This example would then compile to the following Verilog code: +Pipeline stages are denoted by adding the 'reg' keyword to statements. Either at the statement level, or to add registers within expressions. This example could[^1] compile to the following Verilog code: ```Verilog module multiply_add( input[31:0] a, diff --git a/multiply_add.sus b/multiply_add.sus index 708ff4a..a231a34 100644 --- a/multiply_add.sus +++ b/multiply_add.sus @@ -217,16 +217,14 @@ module multiply_add : reg total = tmp + c; } -module fibonnaci : bool next -> int num { +module fibonnaci : -> int num { state int current = 1; state int current_prev = 0; - if next { - num = current + current_prev; - current_prev = current; - current = num; - reg int delay_current = current; - } + num = current + current_prev; + current_prev = current; + current = num; + reg int delay_current = current; } //timeline (v, true -> /) .. (v, false -> v)* @@ -345,8 +343,12 @@ module generative : int i -> int o, int o2 { } module add_stuff_to_indices : int[10] values -> int[10] added_values { + int[5] arr; for int i in 0..10 { - added_values[i] = values[i] + i; + int t = values[i]; + added_values[i] = t + i; + + int tt = arr[i] + values[0]; } } @@ -479,7 +481,7 @@ module determinable_because_no_input_output_ports : int a -> int x { } // This module is a copy of ::undeteriminable_input_latency, but it doesn't have an error, because we just assume the latency of the inner nodes to be the earliest possible. -module conflicting_latency_declarations : int a'1 -> int x'2 { +module conflicting_latency_declarations : int a'0 -> int x'1 { reg int nio = a; reg x = nio; } diff --git a/resetNormalizer.sus b/resetNormalizer.sus index 3a4146d..fadb826 100644 --- a/resetNormalizer.sus +++ b/resetNormalizer.sus @@ -1,4 +1,9 @@ module hello_from_the_other_side : int[3] a -> int result { result = a[2]; + int x = a[0] + a[1] + a[2]; +} + +module m { + } diff --git a/src/file_position.rs b/src/file_position.rs index 62055cb..b35c3e6 100644 --- a/src/file_position.rs +++ b/src/file_position.rs @@ -4,16 +4,19 @@ use std::{fmt::Display, ops::{Index, Range}}; #[derive(Clone,Copy,Debug,PartialEq,Eq,Hash)] pub struct Span(usize, usize); +impl From> for Span { + #[track_caller] + fn from(value: Range) -> Self { + assert!(value.end >= value.start); + Span(value.start, value.end) + } +} + impl Span { /// Only really used for having a span with the maximum size. pub const MAX_POSSIBLE_SPAN : Span = Span(0, usize::MAX); pub const INVALID_SPAN : Span = Span(usize::MAX, usize::MAX); - #[track_caller] - pub fn new_from_byte_range(rng : Range) -> Span { - assert!(rng.end >= rng.start); - Span(rng.start, rng.end) - } pub fn into_range(&self) -> Range { self.0..self.1 } diff --git a/src/flattening/mod.rs b/src/flattening/mod.rs index a7fee18..77cc5bf 100644 --- a/src/flattening/mod.rs +++ b/src/flattening/mod.rs @@ -4,13 +4,7 @@ pub mod name_context; use std::{ops::Deref, iter::zip}; use crate::{ - arena_alloc::{ArenaAllocator, FlatAlloc, UUIDMarker, UUIDRange, UUID}, - ast::{AssignableExpressionModifiers, CodeBlock, Expression, Identifier, IdentifierType, InterfacePorts, LeftExpression, Module, Operator, SignalDeclaration, SpanExpression, SpanTypeExpression, Statement, TypeExpression}, - errors::{error_info, ErrorCollector, ErrorInfo}, - file_position::{BracketSpan, Span}, - linker::{ConstantUUID, FileUUID, GlobalResolver, Linker, ModuleUUID, NameElem, NamedConstant, NamedType, ResolvedGlobals, ResolvedNameElem, TypeUUIDMarker}, - typing::{get_binary_operator_types, typecheck, typecheck_is_array_indexer, typecheck_unary_operator, Type, WrittenType, BOOL_TYPE, INT_TYPE}, - value::Value + arena_alloc::{ArenaAllocator, FlatAlloc, UUIDMarker, UUIDRange, UUID}, ast::{AssignableExpressionModifiers, CodeBlock, Expression, Identifier, IdentifierType, InterfacePorts, LeftExpression, Module, Operator, SignalDeclaration, SpanExpression, SpanTypeExpression, Statement, TypeExpression}, errors::{error_info, ErrorCollector, ErrorInfo}, file_position::{BracketSpan, Span}, linker::{ConstantUUID, FileData, FileUUID, GlobalResolver, Linker, ModuleUUID, NameElem, NamedConstant, NamedType, ResolvedGlobals, ResolvedNameElem, TypeUUIDMarker}, parser::SusTreeSitterSingleton, typing::{get_binary_operator_types, typecheck, typecheck_is_array_indexer, typecheck_unary_operator, Type, WrittenType, BOOL_TYPE, INT_TYPE}, value::Value }; use self::name_context::LocalVariableContext; @@ -216,6 +210,8 @@ struct FlatteningContext<'prev, 'inst, 'l, 'runtime> { linker : &'runtime GlobalResolver<'l>, pub type_list_for_naming : &'l ArenaAllocator, module : &'l Module, + + sus : SusTreeSitterSingleton, } impl<'prev, 'inst, 'l, 'runtime> FlatteningContext<'prev, 'inst, 'l, 'runtime> { @@ -314,6 +310,7 @@ impl<'prev, 'inst, 'l, 'runtime> FlatteningContext<'prev, 'inst, 'l, 'runtime> { type_list_for_naming: self.type_list_for_naming, local_variable_context : LocalVariableContext::new_initial(), module, + sus : SusTreeSitterSingleton::new(), }; let interface_ports = nested_context.initialize_interface::(); @@ -492,7 +489,8 @@ impl<'prev, 'inst, 'l, 'runtime> FlatteningContext<'prev, 'inst, 'l, 'runtime> { local_variable_context: self.local_variable_context.extend(), linker: self.linker, type_list_for_naming: self.type_list_for_naming, - module: self.module + module: self.module, + sus : SusTreeSitterSingleton::new(), }; inner_context.flatten_code_keep_context(code); } @@ -583,6 +581,10 @@ impl<'prev, 'inst, 'l, 'runtime> FlatteningContext<'prev, 'inst, 'l, 'runtime> { } } + fn flatten_tree_sitter(&mut self, module_node : &tree_sitter::Node<'_>) { + + } + /* ==== Typechecking ==== */ @@ -913,10 +915,14 @@ impl FlattenedModule { local_variable_context : LocalVariableContext::new_initial(), type_list_for_naming : &linker.types, module, + sus : SusTreeSitterSingleton::new(), }; let interface_ports = context.initialize_interface::(); + + + //context.flatten_tree_sitter() context.flatten_code(&module.code); context.typecheck(); context.generative_check(); diff --git a/src/linker.rs b/src/linker.rs index 2a2014f..04b23b6 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -1,5 +1,7 @@ use std::{collections::{HashMap, HashSet}, rc::Rc, cell::RefCell}; +use tree_sitter::TreeCursor; + use crate::{arena_alloc::{ArenaAllocator, UUIDMarker, UUID}, ast::{LinkInfo, Module}, errors::{error_info, ErrorCollector}, file_position::{FileText, Span}, flattening::{FlatID, FlattenedModule, Instruction, WireInstance, WireSource}, instantiation::InstantiatedModule, parser::{FullParseResult, TokenTreeNode}, tokenizer::TokenTypeIdx, typing::{Type, WrittenType}, util::{const_str_position, const_str_position_in_tuples}, value::Value}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -122,7 +124,8 @@ pub struct FileData { pub tokens : Vec, pub token_hierarchy : Vec, pub parsing_errors : ErrorCollector, - pub associated_values : Vec + pub associated_values : Vec, + pub tree : tree_sitter::Tree } #[derive(Debug,Clone,Copy,PartialEq,Eq,Hash)] @@ -330,13 +333,38 @@ impl Linker { pub fn add_reserved_file(&mut self, file : FileUUID, parse_result : FullParseResult) { let mut associated_values = Vec::new(); + + let sus = crate::parser::SusTreeSitterSingleton::new(); + + { + let root_node = parse_result.tree.root_node(); + + let mut tmp_cursor = root_node.walk(); + for node in root_node.children(&mut tmp_cursor) { + if node.kind_id() == sus.module_node { + let name_child = node.child_by_field_id(sus.module_name_field).unwrap(); + println!("MODULE DECL: {}", &parse_result.file_text.file_text[name_child.byte_range()]) + } else { + parse_result.ast.errors.error_basic(Span::from(node.byte_range()), "Only module declarations are allowed at the top level of a file!"); + continue; + } + } + } + for md in parse_result.ast.modules { let module_name = md.link_info.name.clone(); let new_module_uuid = NameElem::Module(self.modules.alloc(md)); associated_values.push(new_module_uuid); self.add_name(module_name, new_module_uuid); } - self.files.alloc_reservation(file, FileData { file_text : parse_result.file_text, tokens: parse_result.tokens, token_hierarchy: parse_result.token_hierarchy, parsing_errors : parse_result.ast.errors, associated_values}); + self.files.alloc_reservation(file, FileData{ + file_text : parse_result.file_text, + tree: parse_result.tree, + tokens: parse_result.tokens, + token_hierarchy: parse_result.token_hierarchy, + parsing_errors : parse_result.ast.errors, + associated_values + }); } pub fn relink(&mut self, file : FileUUID, parse_result : FullParseResult) { diff --git a/src/parser.rs b/src/parser.rs index 222a656..b4e3a6c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,7 +3,7 @@ use num::BigInt; use crate::{ast::*, errors::*, file_position::{BracketSpan, FileText, SingleCharSpan, Span}, flattening::FlattenedModule, instantiation::InstantiationList, linker::FileUUID, tokenizer::*, value::Value}; -use std::{iter::Peekable, str::FromStr}; +use std::{cell::Cell, iter::Peekable, str::FromStr}; use core::slice::Iter; pub enum TokenTreeNode { @@ -715,10 +715,11 @@ pub struct FullParseResult { pub file_text : FileText, pub tokens : Vec, pub token_hierarchy : Vec, - pub ast : ASTRoot + pub ast : ASTRoot, + pub tree : tree_sitter::Tree } -pub fn perform_full_semantic_parse<'txt>(file_text : String, file : FileUUID) -> FullParseResult { +pub fn perform_full_semantic_parse(file_text : String, file : FileUUID) -> FullParseResult { let errors = ErrorCollector::new(file); let (tokens, token_spans) = tokenize(&file_text, &errors); @@ -729,10 +730,33 @@ pub fn perform_full_semantic_parse<'txt>(file_text : String, file : FileUUID) -> let ast = parse(&token_hierarchy, &file_text, errors); + let mut parser = tree_sitter::Parser::new(); + parser.set_language(tree_sitter_sus::language()).unwrap(); + FullParseResult{ + tree : parser.parse(&file_text.file_text, None).unwrap(), file_text, tokens, token_hierarchy, ast, } } + +pub struct SusTreeSitterSingleton { + pub language : tree_sitter::Language, + pub module_node : u16, + pub module_name_field : u16, +} + +impl SusTreeSitterSingleton { + pub fn new() -> Self { + let language = tree_sitter_sus::language(); + SusTreeSitterSingleton { + module_node : language.id_for_node_kind("module", true), + module_name_field : language.field_id_for_name("name").unwrap(), + language, + } + } +} + +//pub static TREE_SITTER_SUS : Cell> = Cell::new(None); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 21b2003..f5a363f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -211,10 +211,10 @@ impl TokenGatherer { // Result can be used for error reporting fn push(&mut self, typ : TokenTypeIdx, rng : Range) { self.token_types.push(typ); - self.token_spans.push(Span::new_from_byte_range(rng)); + self.token_spans.push(Span::from(rng)); } fn push_invalid>(&mut self, rng : Range, errors : &ErrorCollector, motivation : S) { - errors.error_basic(Span::new_from_byte_range(rng), motivation); + errors.error_basic(Span::from(rng), motivation); } } diff --git a/tree-sitter-sus b/tree-sitter-sus index afe311a..22f4b70 160000 --- a/tree-sitter-sus +++ b/tree-sitter-sus @@ -1 +1 @@ -Subproject commit afe311acc419da3ba793eb232fecb2f65fc55ee3 +Subproject commit 22f4b7082cf7b2be01912cd5e023c52a4676f25d