From 79d13d614ab276b7382ecd9d470ed172001e0953 Mon Sep 17 00:00:00 2001 From: Lennart Van Hirtum Date: Mon, 5 Feb 2024 23:09:59 +0100 Subject: [PATCH] Compute lines and cols at tokenization time --- src/dev_aid/lsp.rs | 97 ++++++++++++++++------------------------------ src/dev_aid/mod.rs | 1 - src/flattening.rs | 18 ++++----- src/linker.rs | 77 +++++++++++++++++++++++++++++++++++- src/tokenizer.rs | 48 +++++++++++++++++++++-- src/typing.rs | 39 ++++++++++++++----- 6 files changed, 192 insertions(+), 88 deletions(-) diff --git a/src/dev_aid/lsp.rs b/src/dev_aid/lsp.rs index fa01c3d..692f857 100644 --- a/src/dev_aid/lsp.rs +++ b/src/dev_aid/lsp.rs @@ -6,7 +6,7 @@ use lsp_server::{Response, Message, Connection}; use lsp_types::notification::Notification; -use crate::{parser::perform_full_semantic_parse, dev_aid::syntax_highlighting::create_token_ide_info, ast::{IdentifierType, Span}, errors::{ErrorCollector, CompileError, ErrorLevel}, linker::{FileUUIDMarker, Linker, FileUUID, FileData}, arena_alloc::ArenaVector}; +use crate::{arena_alloc::ArenaVector, ast::{IdentifierType, Span}, dev_aid::syntax_highlighting::create_token_ide_info, errors::{ErrorCollector, CompileError, ErrorLevel}, linker::{FileUUIDMarker, Linker, FileUUID, FileData}, parser::perform_full_semantic_parse, tokenizer::{CharLine, TokenizeResult}}; use super::syntax_highlighting::{IDETokenType, IDEIdentifierType, IDEToken}; @@ -69,13 +69,13 @@ pub fn lsp_main(port : u16) -> Result<(), Box> { // Run the server and wait for the two threads to end (typically by trigger LSP Exit event). let server_capabilities = serde_json::to_value(&ServerCapabilities { definition_provider: Some(OneOf::Left(true)), - /*document_highlight_provider: Some(OneOf::Right( + document_highlight_provider: Some(OneOf::Right( DocumentHighlightOptions{ work_done_progress_options: WorkDoneProgressOptions{ - work_done_progress: Some(true) + work_done_progress: Some(false) } } - )),*/ + )), semantic_tokens_provider: Some(SemanticTokensServerCapabilities::SemanticTokensOptions(SemanticTokensOptions{ work_done_progress_options: WorkDoneProgressOptions { work_done_progress: Some(false) @@ -170,8 +170,7 @@ impl SemanticTokensDeltaAccumulator { } let delta_col = position.character - self.prev.character; - self.prev.character = position.character; - self.prev.line = position.line; + self.prev = position; self.semantic_tokens.push(SemanticToken{ delta_line: delta_line, @@ -183,80 +182,42 @@ impl SemanticTokensDeltaAccumulator { } } -fn do_syntax_highlight(file_data : &FileData, linker : &Linker) -> (SemanticTokensResult, Vec>) { - let file_text = &file_data.file_text; +fn do_syntax_highlight(file_data : &FileData, linker : &Linker) -> SemanticTokensResult { let ide_tokens = create_token_ide_info(&file_data, linker); let mut semantic_tokens_acc = SemanticTokensDeltaAccumulator{prev : Position {line : 0, character : 0}, semantic_tokens : Vec::new()}; semantic_tokens_acc.semantic_tokens.reserve(file_data.tokens.len()); - let mut positions : Vec> = Vec::new(); - positions.reserve(file_data.tokens.len()); - let mut cur_whitespace_start = 0; - let mut cur_position = Position{line : 0, character : 0}; for (tok_idx, ide_tok) in ide_tokens.iter().enumerate() { let typ = get_semantic_token_type_from_ide_token(ide_tok); let mod_bits = get_modifiers_for_token(ide_tok); - let tok_range = file_data.tokens.get_token_range(tok_idx); - let whitespace_text = &file_text[cur_whitespace_start..tok_range.start]; - cur_whitespace_start = tok_range.end; - let token_text = &file_text[tok_range]; - - // skip through whitespace - for c in whitespace_text.chars() { - if c == '\n' { - cur_position.line += 1; - cur_position.character = 0; - } else { - cur_position.character += 1; - } - } - let real_token_start_position = cur_position; - let mut part_start_position = cur_position; - for c in token_text.chars() { - if c == '\n' { - semantic_tokens_acc.push(part_start_position, cur_position.character - part_start_position.character, typ, mod_bits); - cur_position.line += 1; - cur_position.character = 0; - part_start_position = cur_position; - } else { - cur_position.character += 1; - } - } - semantic_tokens_acc.push(part_start_position, cur_position.character - part_start_position.character, typ, mod_bits); - positions.push(real_token_start_position..cur_position); - } - let eof_start = cur_position.clone(); - for c in file_text[cur_whitespace_start..].chars() { - if c == '\n' { - cur_position.line += 1; - cur_position.character = 0; - } else { - cur_position.character += 1; - } + let tok_range = file_data.tokens.get_token_linechar_range(tok_idx); + let start_pos = Position{line : tok_range.start.line as u32, character : tok_range.start.character as u32}; + let end_pos = Position{line : tok_range.end.line as u32, character : tok_range.end.character as u32}; + semantic_tokens_acc.push(start_pos, end_pos.character - start_pos.character, typ, mod_bits) } - positions.push(eof_start..cur_position); - (SemanticTokensResult::Tokens(lsp_types::SemanticTokens { + SemanticTokensResult::Tokens(lsp_types::SemanticTokens { result_id: None, data: semantic_tokens_acc.semantic_tokens - }), positions) + }) } use lsp_types::Diagnostic; -fn cvt_span_to_lsp_range(ch_sp : Span, token_positions : &[std::ops::Range]) -> lsp_types::Range { +fn cvt_span_to_lsp_range(ch_sp : Span, tokens : &TokenizeResult) -> lsp_types::Range { + let rng = tokens.get_span_linechar_range(ch_sp); Range { - start: token_positions[ch_sp.0].start, - end: token_positions[ch_sp.1].end + start: Position{character : rng.start.character as u32, line : rng.start.line as u32}, + end: Position{character : rng.end.character as u32, line : rng.end.line as u32} } } // Requires that token_positions.len() == tokens.len() + 1 to include EOF token -fn convert_diagnostic(err : CompileError, token_positions : &[std::ops::Range], uris : &ArenaVector) -> Diagnostic { - let error_pos = cvt_span_to_lsp_range(err.position, token_positions); +fn convert_diagnostic(err : CompileError, tokens : &TokenizeResult, uris : &ArenaVector) -> Diagnostic { + let error_pos = cvt_span_to_lsp_range(err.position, tokens); let severity = match err.level { ErrorLevel::Error => DiagnosticSeverity::ERROR, @@ -264,7 +225,7 @@ fn convert_diagnostic(err : CompileError, token_positions : &[std::ops::Range], uris : &ArenaVector) -> Result<(), Box> { +fn send_errors_warnings(connection: &Connection, errors : ErrorCollector, token_boundaries : &TokenizeResult, uris : &ArenaVector) -> Result<(), Box> { let mut diag_vec : Vec = Vec::new(); let (err_vec, file) = errors.get(); for err in err_vec { - diag_vec.push(convert_diagnostic(err, token_positions, uris)); + diag_vec.push(convert_diagnostic(err, token_boundaries, uris)); } let params = &PublishDiagnosticsParams{ @@ -316,6 +277,14 @@ fn main_loop( let params : GotoDefinitionParams = serde_json::from_value(req.params).expect("JSON Encoding Error while parsing params"); println!("got gotoDefinition request: {params:?}"); + let pos = ¶ms.text_document_position_params.position; + let text_document = ¶ms.text_document_position_params.text_document; + + let uuid = file_cache.ensure_contains_file(&text_document.uri); + + + let file_data = &file_cache.linker.files[uuid]; + let result = Some(GotoDefinitionResponse::Array(Vec::new())); let result = serde_json::to_value(&result).unwrap(); let resp = Response { id: req.id, result: Some(result), error: None }; @@ -330,7 +299,7 @@ fn main_loop( let file_data = &file_cache.linker.files[uuid]; - let (syntax_highlight, token_positions) = do_syntax_highlight(file_data, &file_cache.linker); + let syntax_highlight = do_syntax_highlight(file_data, &file_cache.linker); let result = serde_json::to_value(&syntax_highlight).unwrap(); connection.sender.send(Message::Response(Response{ @@ -340,11 +309,13 @@ fn main_loop( // println!("Flattening..."); file_cache.linker.recompile_all(); - let mut errors = file_cache.linker.files[uuid].parsing_errors.clone(); + let file_data = &file_cache.linker.files[uuid]; // Have to grab it again because previous line mutates + + let mut errors = file_data.parsing_errors.clone(); file_cache.linker.get_all_errors_in_file(uuid, &mut errors); // println!("Errors: {:?}", &errors); - send_errors_warnings(&connection, errors, &token_positions, &file_cache.uris)?; + send_errors_warnings(&connection, errors, &file_data.tokens, &file_cache.uris)?; }, // TODO ... req => { diff --git a/src/dev_aid/mod.rs b/src/dev_aid/mod.rs index f668210..4efeb69 100644 --- a/src/dev_aid/mod.rs +++ b/src/dev_aid/mod.rs @@ -3,4 +3,3 @@ pub mod syntax_highlighting; #[cfg(feature = "lsp")] pub mod lsp; - diff --git a/src/flattening.rs b/src/flattening.rs index e4a8ec8..7a359b0 100644 --- a/src/flattening.rs +++ b/src/flattening.rs @@ -3,7 +3,7 @@ use std::{ops::Deref, iter::zip}; use crate::{ ast::{AssignableExpression, AssignableExpressionModifiers, CodeBlock, DeclID, DeclIDMarker, Expression, IdentifierType, InterfacePorts, LocalOrGlobal, Module, Operator, Span, SpanAssignableExpression, SpanExpression, SpanTypeExpression, Statement, TypeExpression}, linker::{Linker, FileUUID, GlobalResolver, ResolvedGlobals, NamedConstant, ConstantUUID, ModuleUUID, NameElem, NamedType, TypeUUIDMarker}, - errors::{ErrorCollector, error_info, ErrorInfo}, arena_alloc::{UUID, UUIDMarker, FlatAlloc, UUIDRange, ArenaAllocator}, typing::{get_binary_operator_types, typecheck, typecheck_is_array_indexer, typecheck_unary_operator, ResolvedTypeExpr, Type, BOOL_TYPE, INT_TYPE}, value::Value + errors::{ErrorCollector, error_info, ErrorInfo}, arena_alloc::{UUID, UUIDMarker, FlatAlloc, UUIDRange, ArenaAllocator}, typing::{get_binary_operator_types, typecheck, typecheck_is_array_indexer, typecheck_unary_operator, WrittenType, Type, BOOL_TYPE, INT_TYPE}, value::Value }; #[derive(Debug,Clone,Copy,PartialEq,Eq,Hash)] @@ -81,7 +81,7 @@ pub struct WireInstance { #[derive(Debug)] pub struct Declaration { - pub typ_expr : ResolvedTypeExpr, + pub typ_expr : WrittenType, pub typ : Type, pub is_declared_in_this_module : bool, pub name_token : usize, @@ -185,20 +185,20 @@ struct FlatteningContext<'inst, 'l, 'm> { } impl<'inst, 'l, 'm> FlatteningContext<'inst, 'l, 'm> { - fn map_to_type(&mut self, type_expr : &SpanTypeExpression) -> ResolvedTypeExpr { + fn map_to_type(&mut self, type_expr : &SpanTypeExpression) -> WrittenType { match &type_expr.0 { TypeExpression::Named => { if let Some(typ_id) = &self.linker.resolve_type(type_expr.1, &self.errors) { - ResolvedTypeExpr::Named(type_expr.1, *typ_id) + WrittenType::Named(type_expr.1, *typ_id) } else { - ResolvedTypeExpr::Error(type_expr.1) + WrittenType::Error(type_expr.1) } } TypeExpression::Array(b) => { let (array_type_expr, array_size_expr) = b.deref(); let array_element_type = self.map_to_type(&array_type_expr); let array_size_wire_id = self.flatten_expr(array_size_expr); - ResolvedTypeExpr::Array(type_expr.1, Box::new((array_element_type, array_size_wire_id))) + WrittenType::Array(type_expr.1, Box::new((array_element_type, array_size_wire_id))) } } } @@ -212,14 +212,14 @@ impl<'inst, 'l, 'm> FlatteningContext<'inst, 'l, 'm> { return self.alloc_module_interface(decl.name.clone(), md, id, decl.typ.1) } Some(NameElem::Type(id)) => { - ResolvedTypeExpr::Named(decl.typ.1, id) + WrittenType::Named(decl.typ.1, id) } Some(global_module_or_type) => { let accepted = if ALLOW_MODULES {"Type or Module"} else {"Type"}; self.linker.make_bad_error_location_error(global_module_or_type, accepted, decl.typ.1, &self.errors); - ResolvedTypeExpr::Error(decl.typ.1) + WrittenType::Error(decl.typ.1) } - None => ResolvedTypeExpr::Error(decl.typ.1) + None => WrittenType::Error(decl.typ.1) } } else { self.map_to_type(&decl.typ) diff --git a/src/linker.rs b/src/linker.rs index 0c28bb9..80de90a 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -1,6 +1,6 @@ use std::{collections::{HashMap, HashSet}, rc::Rc, cell::RefCell}; -use crate::{arena_alloc::{ArenaAllocator, UUID, UUIDMarker}, ast::{Module, LinkInfo, Span}, errors::{ErrorCollector, error_info}, flattening::{FlatID, FlattenedModule, Instruction}, instantiation::InstantiatedModule, parser::{FullParseResult, TokenTreeNode}, tokenizer::TokenizeResult, typing::Type, util::{const_str_position, const_str_position_in_tuples}, value::Value}; +use crate::{arena_alloc::{ArenaAllocator, UUID, UUIDMarker}, ast::{Module, LinkInfo, Span}, errors::{ErrorCollector, error_info}, flattening::{ConnectionWrite, FlatID, FlattenedModule, Instruction, WireInstance}, instantiation::InstantiatedModule, parser::{FullParseResult, TokenTreeNode}, tokenizer::TokenizeResult, typing::{WrittenType, Type}, util::{const_str_position, const_str_position_in_tuples}, value::Value}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct ModuleUUIDMarker; @@ -372,6 +372,81 @@ impl Linker { md.instantiations.instantiate(&md.link_info.name, &md.flattened, self) } + + pub fn get_info_about_source_location<'linker>(&'linker self, token_idx : usize, file : FileUUID) -> Option<(LocationInfo<'linker>, Span)> { + let mut location_builder = LocationInfoBuilder::new(token_idx); + + for global in &self.files[file].associated_values { + match *global { + NameElem::Module(md_id) => { + let md = &self.modules[md_id]; + if md.link_info.span.contains_token(token_idx) { + for (_id, inst) in &md.flattened.instructions { + match inst { + Instruction::SubModule(sm) => { + location_builder.update(sm.module_name_span, LocationInfo::Global(NameElem::Module(sm.module_uuid))); + } + Instruction::Declaration(decl) => { + if let Some(typ) = decl.typ_expr.get_deepest_selected(token_idx) { + location_builder.update(typ.get_span(), LocationInfo::Type(typ)); + } + } + Instruction::Wire(wire) => { + location_builder.update(wire.span, LocationInfo::Wire(md, wire)); + } + Instruction::Write(write) => { + location_builder.update(Span::new_single_token(write.to.span.0), LocationInfo::WriteWire(md, &write.to)); + } + Instruction::IfStatement(_) | Instruction::ForStatement(_) => {} + }; + } + break; + } + } + NameElem::Type(_) => { + todo!() + } + NameElem::Constant(_) => { + todo!() + } + } + } + if let Some(instr) = location_builder.best_instruction { + Some((instr, location_builder.best_span)) + } else { + None + } + } +} + +pub enum LocationInfo<'linker> { + WriteWire(&'linker Module, &'linker ConnectionWrite), + Wire(&'linker Module, &'linker WireInstance), + Type(&'linker WrittenType), + Global(NameElem) +} + +struct LocationInfoBuilder<'linker> { + best_instruction : Option>, + best_span : Span, + token_idx : usize +} + +impl<'linker> LocationInfoBuilder<'linker> { + fn new(token_idx : usize) -> Self { + Self{ + best_instruction : None, + best_span : Span(0, usize::MAX), + token_idx + } + } + fn update(&mut self, span : Span, info : LocationInfo<'linker>) { + if span.contains_token(self.token_idx) && span.size() <= self.best_span.size() { + assert!(span.size() < self.best_span.size()); + self.best_span = span; + self.best_instruction = Some(info); + } + } } #[derive(Debug)] diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c458f62..4b366c1 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -205,16 +205,23 @@ impl<'iter> Iterator for FileIter<'iter> { } } +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct CharLine { + pub line : usize, + pub character : usize +} + pub struct TokenizeResult { pub token_types : Vec, // List of all boundaries. Starts with 0, in whitespace mode, and then alternatingly switch to being a token, switch to being whitespace, back and forth // The span of token i is given by token_boundaries[i*2+1..i*2+2] // Ends at the end of the file, with a final whitespace block - pub token_boundaries : Vec + pub token_boundaries : Vec, + pub token_boundaries_as_char_lines : Vec } impl TokenizeResult { fn new() -> Self { - TokenizeResult{token_types : Vec::new(), token_boundaries : vec![0]} + TokenizeResult{token_types : Vec::new(), token_boundaries : vec![0], token_boundaries_as_char_lines : Vec::new()} } // Result can be used for error reporting fn push(&mut self, typ : TokenTypeIdx, rng : Range) { @@ -226,6 +233,22 @@ impl TokenizeResult { errors.error_basic(Span::new_single_token(self.token_types.len()), motivation); self.push(TOKEN_INVALID, rng); } + fn finalize(&mut self, file_text : &str) { + let mut cur_position = CharLine{line: 0, character: 0}; + let mut start = 0; + self.token_boundaries_as_char_lines = self.token_boundaries.iter().map(|part_end| { + for c in file_text[start..*part_end].chars() { + if c == '\n' { + cur_position.line += 1; + cur_position.character = 0; + } else { + cur_position.character += 1; + } + } + start = *part_end; + cur_position + }).collect(); + } pub fn len(&self) -> usize { self.token_types.len() @@ -233,6 +256,15 @@ impl TokenizeResult { pub fn get_token_range(&self, token_idx : usize) -> Range { self.token_boundaries[token_idx*2+1]..self.token_boundaries[token_idx*2+2] } + pub fn get_token_linechar_range(&self, token_idx : usize) -> Range { + self.token_boundaries_as_char_lines[token_idx*2+1]..self.token_boundaries_as_char_lines[token_idx*2+2] + } + pub fn get_span_range(&self, span : Span) -> Range { + self.token_boundaries[span.0*2+1]..self.token_boundaries[span.1*2+2] + } + pub fn get_span_linechar_range(&self, span : Span) -> Range { + self.token_boundaries_as_char_lines[span.0*2+1]..self.token_boundaries_as_char_lines[span.1*2+2] + } } pub fn tokenize<'txt>(file_text : &'txt str, errors : &ErrorCollector) -> TokenizeResult { @@ -302,8 +334,15 @@ pub fn tokenize<'txt>(file_text : &'txt str, errors : &ErrorCollector) -> Tokeni } else { file_text.len() }; - let comment_span = file_pos..end_pos; - result.push(TOKEN_COMMENT, comment_span); + let mut part_start = file_pos; + for (idx, c) in file_text[file_pos..end_pos].char_indices() { + if c == '\n' { + let real_idx = file_pos + idx; + result.push(TOKEN_COMMENT, part_start..real_idx); + part_start = real_idx + 1; + } + } + result.push(TOKEN_COMMENT, part_start..end_pos); } else if symbol_tok_id == kw("*/") { // Unexpected close comment @@ -317,5 +356,6 @@ pub fn tokenize<'txt>(file_text : &'txt str, errors : &ErrorCollector) -> Tokeni } result.token_boundaries.push(file_text.len()); + result.finalize(file_text); result } diff --git a/src/typing.rs b/src/typing.rs index 7b0804b..f38ff54 100644 --- a/src/typing.rs +++ b/src/typing.rs @@ -4,18 +4,18 @@ use crate::{ast::{Operator, Span}, linker::{get_builtin_type, TypeUUID, Linker, // These are #[derive(Debug, Clone)] -pub enum ResolvedTypeExpr { +pub enum WrittenType { Error(Span), Named(Span, TypeUUID), - Array(Span, Box<(ResolvedTypeExpr, FlatID)>) + Array(Span, Box<(WrittenType, FlatID)>) } -impl ResolvedTypeExpr { +impl WrittenType { pub fn for_each_located_type, Span)>(&self, f : &mut F) { match self { - ResolvedTypeExpr::Error(span) => {f(None, *span)} - ResolvedTypeExpr::Named(span, id) => {f(Some(*id), *span)} - ResolvedTypeExpr::Array(_span, arr_box) => { + WrittenType::Error(span) => {f(None, *span)} + WrittenType::Named(span, id) => {f(Some(*id), *span)} + WrittenType::Array(_span, arr_box) => { let (arr, _idx) = arr_box.deref(); arr.for_each_located_type(f); } @@ -24,15 +24,34 @@ impl ResolvedTypeExpr { pub fn get_span(&self) -> Span { match self { - ResolvedTypeExpr::Error(span) | ResolvedTypeExpr::Named(span, _) | ResolvedTypeExpr::Array(span, _) => *span + WrittenType::Error(span) | WrittenType::Named(span, _) | WrittenType::Array(span, _) => *span + } + } + + pub fn get_deepest_selected(&self, token_idx : usize) -> Option<&WrittenType> { + let span = self.get_span(); + if span.contains_token(token_idx) { + match self { + WrittenType::Error(_span) | WrittenType::Named(_span, _) => {} + WrittenType::Array(_span, arr_box) => { + let (arr_typ, _idx) = arr_box.deref(); + let sub = arr_typ.get_deepest_selected(token_idx); + if sub.is_some() { + return sub; + } + } + } + Some(self) + } else { + None } } pub fn to_type(&self) -> Type { match self { - ResolvedTypeExpr::Error(_) => Type::Error, - ResolvedTypeExpr::Named(_, id) => Type::Named(*id), - ResolvedTypeExpr::Array(_, arr_box) => { + WrittenType::Error(_) => Type::Error, + WrittenType::Named(_, id) => Type::Named(*id), + WrittenType::Array(_, arr_box) => { let (elem_typ, arr_idx) = arr_box.deref(); Type::Array(Box::new((elem_typ.to_type(), *arr_idx))) }