diff --git a/core/src/bytecode/ast/builder.rs b/core/src/bytecode/ast/builder.rs index abcb9c07f6..9b4e4ff502 100644 --- a/core/src/bytecode/ast/builder.rs +++ b/core/src/bytecode/ast/builder.rs @@ -32,8 +32,6 @@ use super::{ *, }; -use std::rc::Rc; - type StaticPath = Vec; /// Typestate style tag for `Field`s that are not yet completely specified @@ -56,6 +54,12 @@ pub struct Field<'ast, State> { state: State, path: StaticPath, metadata: FieldMetadata<'ast>, + /// We need to store the documentation separately, because the metadata only accepts a string + /// that has been allocated in the AST allocator. We could require to thread the allocator for + /// calls to [Self::doc] or [Self::some_doc], but it's more ergonomic to keep the phases of + /// building a field and finalizing it separate. We thus store the documentation here + /// temporarily. + doc: Option, /// As we might build contract element by element, we can't rely on /// `metadata.annotation.contracts`, which is a fixed array. /// @@ -72,7 +76,7 @@ impl<'ast, A> Field<'ast, A> { /// Attach documentation metadata to the field, optionally pub fn some_doc(mut self, some_doc: Option>) -> Self { - self.metadata.doc = some_doc.map(|doc| Rc::from(doc.as_ref())); + self.doc = some_doc.map(|s| s.as_ref().to_string()); self } @@ -133,6 +137,7 @@ impl<'ast> Field<'ast, Incomplete> { state: Incomplete(), path: path.into_iter().map(|e| e.as_ref().into()).collect(), metadata: Default::default(), + doc: None, contracts: Vec::new(), } } @@ -148,6 +153,7 @@ impl<'ast> Field<'ast, Incomplete> { state: Complete(None), path: self.path, metadata: self.metadata, + doc: self.doc, contracts: self.contracts, } } @@ -158,6 +164,7 @@ impl<'ast> Field<'ast, Incomplete> { state: Complete(Some(value.into())), path: self.path, metadata: self.metadata, + doc: self.doc, contracts: self.contracts, } } @@ -172,6 +179,7 @@ impl<'ast> Field<'ast, Complete<'ast>> { state: record, path: self.path, metadata: self.metadata, + doc: self.doc, contracts: self.contracts, }; match value { @@ -185,6 +193,7 @@ impl<'ast> Field<'ast, Record<'ast>> { /// Finalize the [`Field`] without setting a value pub fn no_value(mut self, alloc: &'ast AstAlloc) -> Record<'ast> { self.finalize_contracts(alloc); + self.metadata.doc = self.doc.map(|s| alloc.alloc_str(&s)); self.state.field_defs.push(record::FieldDef { path: alloc.alloc_many( @@ -202,6 +211,7 @@ impl<'ast> Field<'ast, Record<'ast>> { /// Finalize the [`Field`] by setting its a value pub fn value(mut self, alloc: &'ast AstAlloc, value: impl Into>) -> Record<'ast> { self.finalize_contracts(alloc); + self.metadata.doc = self.doc.map(|s| alloc.alloc_str(&s)); self.state.field_defs.push(record::FieldDef { path: alloc.alloc_many( @@ -244,6 +254,7 @@ impl<'ast> Record<'ast> { state: self, path: vec![Ident::new(name)], metadata: Default::default(), + doc: None, contracts: Vec::new(), } } @@ -270,6 +281,7 @@ impl<'ast> Record<'ast> { state: self, path: path.into_iter().map(|e| Ident::new(e)).collect(), metadata: Default::default(), + doc: None, contracts: Vec::new(), } } @@ -511,7 +523,7 @@ mod tests { ( "foo", FieldMetadata { - doc: Some(Rc::from("foo")), + doc: Some(alloc.alloc_str("foo")), ..Default::default() }, None @@ -520,7 +532,7 @@ mod tests { ( "baz", FieldMetadata { - doc: Some(Rc::from("baz")), + doc: Some(alloc.alloc_str("baz")), ..Default::default() }, None, @@ -764,7 +776,7 @@ mod tests { iter::once(( "foo", FieldMetadata { - doc: Some(Rc::from("foo?")), + doc: Some(alloc.alloc_str("foo?")), opt: true, priority: MergePriority::Bottom, not_exported: true, diff --git a/core/src/bytecode/ast/compat.rs b/core/src/bytecode/ast/compat.rs index 03e461d017..cbba40ec04 100644 --- a/core/src/bytecode/ast/compat.rs +++ b/core/src/bytecode/ast/compat.rs @@ -186,7 +186,7 @@ impl<'ast> FromMainline<'ast, (LocIdent, term::record::Field)> for record::Field impl<'ast> FromMainline<'ast, term::record::FieldMetadata> for record::FieldMetadata<'ast> { fn from_mainline(alloc: &'ast AstAlloc, metadata: &term::record::FieldMetadata) -> Self { - let doc = metadata.doc.as_ref().map(|doc| rc::Rc::from(doc.as_str())); + let doc = metadata.doc.as_ref().map(|doc| alloc.alloc_str(doc)); record::FieldMetadata { doc, @@ -1019,6 +1019,15 @@ impl<'ast> FromAst> for MainlineEnumRowsUnr { } } +impl<'ast> FromAst> for mline_type::EnumRow { + fn from_ast(erow: &EnumRow<'ast>) -> Self { + mline_type::EnumRow { + id: erow.id, + typ: erow.typ.as_ref().map(|ty| Box::new((*ty).to_mainline())), + } + } +} + impl<'ast> FromAst> for MainlineRecordRowsUnr { fn from_ast(rrows: &RecordRowsUnr<'ast>) -> Self { rrows.clone().map( @@ -1028,9 +1037,22 @@ impl<'ast> FromAst> for MainlineRecordRowsUnr { } } +impl<'ast> FromAst> for mline_type::RecordRow { + fn from_ast(rrow: &RecordRow<'ast>) -> Self { + mline_type::RecordRowF { + id: rrow.id, + typ: Box::new(rrow.typ.to_mainline()), + } + } +} + impl<'ast> FromAst> for term::LabeledType { fn from_ast(typ: &Type<'ast>) -> Self { let typ: mline_type::Type = typ.to_mainline(); + //TODO:remove + if typ.pos.into_opt().is_none() { + panic!("Expected a position to be set for the type {typ:?}"); + } // We expect the new AST node to always have a position set. In fact we should // probably switch to `RawSpan` instead of `TermPos` everywhere; but let's do that // later diff --git a/core/src/bytecode/ast/mod.rs b/core/src/bytecode/ast/mod.rs index 3c584f4d87..03ee94e00c 100644 --- a/core/src/bytecode/ast/mod.rs +++ b/core/src/bytecode/ast/mod.rs @@ -11,8 +11,7 @@ use std::{ ffi::{OsStr, OsString}, - fmt::Debug, - iter, rc, + fmt, iter, }; use pattern::Pattern; @@ -165,7 +164,7 @@ pub struct LetBinding<'ast> { /// The metadata that can be attached to a let. It's a subset of [record::FieldMetadata]. #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct LetMetadata<'ast> { - pub doc: Option>, + pub doc: Option<&'ast str>, pub annotation: Annotation<'ast>, } @@ -233,6 +232,15 @@ impl Ast<'_> { } } +impl Default for Ast<'_> { + fn default() -> Self { + Ast { + node: Node::Null, + pos: TermPos::None, + } + } +} + /// A branch of a match expression. #[derive(Debug, PartialEq, Eq, Clone)] pub struct MatchBranch<'ast> { @@ -263,18 +271,7 @@ pub struct Annotation<'ast> { pub contracts: &'ast [Type<'ast>], } -impl<'ast> Annotation<'ast> { - /// Returns the main annotation, which is either the type annotation if any, or the first - /// contract annotation. - pub fn first<'a>(&'a self) -> Option<&'a Type<'ast>> { - self.typ.as_ref().or(self.contracts.iter().next()) - } - - /// Iterates over the annotations, starting by the type and followed by the contracts. - pub fn iter<'a>(&'a self) -> impl Iterator> { - self.typ.iter().chain(self.contracts.iter()) - } - +impl Annotation<'_> { /// Returns a string representation of the contracts (without the static type annotation) as a /// comma-separated list. pub fn contracts_to_string(&self) -> Option { @@ -295,8 +292,8 @@ impl<'ast> Annotation<'ast> { } } -#[derive(Clone, Debug, PartialEq, Eq)] /// Specifies where something should be imported from. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Import<'ast> { Path { path: &'ast OsStr, @@ -442,8 +439,8 @@ impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for Ast<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Ast<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Ast<'ast>, &S) -> TraverseControl, state: &S, ) -> Option { let child_state = match f(self, state) { @@ -558,12 +555,12 @@ impl<'ast> TraverseAlloc<'ast, Type<'ast>> for Ast<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Type<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Type<'ast>, &S) -> TraverseControl, state: &S, ) -> Option { self.traverse_ref( - &mut |ast: &Ast<'ast>, state: &S| match &ast.node { + &mut |ast: &'ast Ast<'ast>, state: &S| match &ast.node { Node::Type(typ) => typ.traverse_ref(f, state).into(), _ => TraverseControl::Continue, }, @@ -592,8 +589,8 @@ impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for Annotation<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Ast<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Ast<'ast>, &S) -> TraverseControl, scope: &S, ) -> Option { self.typ @@ -630,8 +627,8 @@ impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for LetBinding<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Ast<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Ast<'ast>, &S) -> TraverseControl, scope: &S, ) -> Option { self.metadata @@ -666,8 +663,8 @@ impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for MatchBranch<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Ast<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Ast<'ast>, &S) -> TraverseControl, scope: &S, ) -> Option { self.pattern @@ -697,6 +694,7 @@ impl Allocable for MatchBranch<'_> {} impl Allocable for Record<'_> {} impl Allocable for record::FieldPathElem<'_> {} impl Allocable for FieldDef<'_> {} +impl Allocable for record::FieldMetadata<'_> {} impl Allocable for Pattern<'_> {} impl Allocable for EnumPattern<'_> {} @@ -738,6 +736,11 @@ impl AstAlloc { } } + /// Return the current number of allocated bytes. + pub fn allocated_bytes(&self) -> usize { + self.generic_arena.allocated_bytes() + self.number_arena.len() + self.error_arena.len() + } + /// Allocates an AST component in the arena. /// /// [Self] never guarantees that all destructors are going to be run when using such a generic @@ -999,8 +1002,8 @@ impl AstAlloc { // Phony implementation of `Debug` so that we can still derive the trait for structure that holds // onto an allocator. -impl Debug for AstAlloc { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl fmt::Debug for AstAlloc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result { write!(f, "AstAlloc") } } @@ -1028,3 +1031,13 @@ where fn try_convert(alloc: &'ast AstAlloc, from: T) -> Result; } + +//TODO: get rid of this expensive implementation once we migrate pretty::*. +impl fmt::Display for Ast<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use crate::term::RichTerm; + use compat::FromAst as _; + + write!(f, "{}", RichTerm::from_ast(self)) + } +} diff --git a/core/src/bytecode/ast/pattern/mod.rs b/core/src/bytecode/ast/pattern/mod.rs index 50907eaf65..5a17d5a1b6 100644 --- a/core/src/bytecode/ast/pattern/mod.rs +++ b/core/src/bytecode/ast/pattern/mod.rs @@ -272,8 +272,8 @@ impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for Pattern<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Ast<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Ast<'ast>, &S) -> TraverseControl, scope: &S, ) -> Option { match &self.data { @@ -324,8 +324,8 @@ impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for FieldPattern<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Ast<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Ast<'ast>, &S) -> TraverseControl, scope: &S, ) -> Option { self.annotation diff --git a/core/src/bytecode/ast/record.rs b/core/src/bytecode/ast/record.rs index 79f8324a02..3edffd7293 100644 --- a/core/src/bytecode/ast/record.rs +++ b/core/src/bytecode/ast/record.rs @@ -1,11 +1,13 @@ use super::{Annotation, Ast, AstAlloc, TraverseAlloc, TraverseControl, TraverseOrder}; -use crate::{identifier::LocIdent, position::TermPos}; +use crate::{ + identifier::{Ident, LocIdent}, + position::TermPos, + term::IndexMap, +}; pub use crate::term::MergePriority; -use std::rc::Rc; - /// Element of a record field path in a record field definition. For example, in `{ a."%{"hello-" /// ++ "world"}".c = true }`, the path `a."%{b}".c` is composed of three elements: an identifier /// `a`, an expression `"hello" ++ "world"`, and another identifier `c`. @@ -43,8 +45,8 @@ impl<'ast> FieldPathElem<'ast> { alloc.alloc_singleton(FieldPathElem::Expr(expr)) } - /// Try to interpret this element element as a static identifier. Returns `None` if the the - /// element is an expression with interpolation inside. + /// Try to interpret this element element as a static identifier. Returns `None` if the element + /// is an expression with interpolation inside. Dual of [Self::try_as_dyn_expr]. pub fn try_as_ident(&self) -> Option { match self { FieldPathElem::Ident(ident) => Some(*ident), @@ -54,6 +56,17 @@ impl<'ast> FieldPathElem<'ast> { .map(|s| LocIdent::from(s).with_pos(expr.pos)), } } + + /// Tries to interpret this element as a dynamic identifier. Returns `None` if the element is a + /// static identifier (that is, if [Self::try_as_ident] returns `Some(_)`). + pub fn try_as_dyn_expr(&self) -> Option<&Ast<'ast>> { + match self { + FieldPathElem::Expr(expr) if expr.node.try_str_chunk_as_static_str().is_none() => { + Some(expr) + } + _ => None, + } + } } /// A field definition. A field is defined by a dot-separated path of identifier or interpolated @@ -85,19 +98,24 @@ impl FieldDef<'_> { } } - /// Try to get the declared field name, that is the last element of the path, as a static - /// identifier. + /// Returns the declared field name, that is the last element of the path, as a static + /// identifier. Returns `None` if the last element is an expression. pub fn name_as_ident(&self) -> Option { self.path.last().expect("empty field path").try_as_ident() } + + /// Returns the root identifier of the field path, that is the first element of the path, as a + /// static identifier. Returns `None` if the first element is an expression. + pub fn root_as_ident(&self) -> Option { + self.path.first().expect("empty field path").try_as_ident() + } } /// The metadata attached to record fields. #[derive(Debug, PartialEq, Eq, Clone, Default)] pub struct FieldMetadata<'ast> { - /// The documentation of the field. This is allocated once and for all and shared through a - /// reference-counted pointer. - pub doc: Option>, + /// The documentation of the field. + pub doc: Option<&'ast str>, /// Type and contract annotations. pub annotation: Annotation<'ast>, /// If the field is optional. @@ -140,7 +158,7 @@ pub struct Record<'ast> { pub open: bool, } -impl Record<'_> { +impl<'ast> Record<'ast> { /// A record with no fields and the default set of attributes. pub fn empty() -> Self { Default::default() @@ -158,6 +176,69 @@ impl Record<'_> { .iter() .all(|field| field.path.iter().any(|elem| elem.try_as_ident().is_some())) } + + /// Returns the top-level static fields of this record. + /// + /// # Example + /// + /// The top-level static fields of this record are `foo` and `bar`: + /// + /// ```nickel + /// { + /// foo.bar = 1, + /// foo.baz = 2, + /// bar.baz = 3, + /// "%{x}" = false, + /// } + /// ``` + pub fn toplvl_stat_fields(&self) -> Vec { + self.field_defs + .iter() + .filter_map(|field| field.path.first()?.try_as_ident()) + .collect() + } + + /// Returns the top-level dynamically defined fields of this record. + pub fn toplvl_dyn_fields(&self) -> Vec<&Ast<'ast>> { + self.field_defs + .iter() + .filter_map(|field| field.path.first()?.try_as_dyn_expr()) + .collect() + } + + /// Returns all the pieces that defines the field with the given identifier. This requires to + /// make a linear search over this record. + pub fn defs_of(&self, ident: Ident) -> Vec<&FieldDef<'ast>> { + self.field_defs + .iter() + .filter(|field| { + field + .path + .first() + .and_then(FieldPathElem::try_as_ident) + .is_some_and(|i| i.ident() == ident) + }) + .collect() + } + + /// Returns an iterator over all field definitions, grouped by the first identifier of their + /// paths (that is, the field which they are defining). Field that aren't statically defined + /// (i.e. whose path's first element isn't an ident) are ignored. + pub fn group_by_field_id(&self) -> IndexMap>> { + let mut map = IndexMap::new(); + + for (id, field) in self.field_defs.iter().filter_map(|field| { + field + .path + .first() + .and_then(FieldPathElem::try_as_ident) + .map(|i| (i, field)) + }) { + map.entry(id.ident()).or_insert_with(Vec::new).push(field); + } + + map + } } impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for FieldDef<'ast> { @@ -201,8 +282,8 @@ impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for FieldDef<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Ast<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Ast<'ast>, &S) -> TraverseControl, scope: &S, ) -> Option { self.path diff --git a/core/src/bytecode/ast/typ.rs b/core/src/bytecode/ast/typ.rs index 76ff51d015..c9a2a6b7fb 100644 --- a/core/src/bytecode/ast/typ.rs +++ b/core/src/bytecode/ast/typ.rs @@ -1,9 +1,12 @@ //! Representation of Nickel types in the AST. use super::{Ast, AstAlloc, TermPos}; -use crate::{traverse::*, typ as mainline_typ}; +use crate::{identifier::Ident, traverse::*, typ as mainline_typ}; +use iter::*; pub use mainline_typ::{EnumRowF, EnumRowsF, RecordRowF, RecordRowsF, TypeF}; +use std::fmt; + /// The recursive unrolling of a type, that is when we "peel off" the top-level layer to find the actual /// structure represented by an instantiation of `TypeF`. pub type TypeUnr<'ast> = TypeF<&'ast Type<'ast>, RecordRows<'ast>, EnumRows<'ast>, &'ast Ast<'ast>>; @@ -48,8 +51,8 @@ impl<'ast> Type<'ast> { } /// Searches for a [crate::typ::TypeF]. If one is found, returns the term it contains. - pub fn find_contract(&self) -> Option<&'ast Ast<'ast>> { - self.find_map(|ty: &Type| match &ty.typ { + pub fn find_contract(&'ast self) -> Option<&'ast Ast<'ast>> { + self.find_map(|ty: &'ast Type| match &ty.typ { TypeF::Contract(f) => Some(*f), _ => None, }) @@ -97,8 +100,8 @@ impl<'ast> TraverseAlloc<'ast, Type<'ast>> for Type<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Type<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Type<'ast>, &S) -> TraverseControl, state: &S, ) -> Option { let child_state = match f(self, state) { @@ -159,12 +162,12 @@ impl<'ast> TraverseAlloc<'ast, Ast<'ast>> for Type<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Ast<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Ast<'ast>, &S) -> TraverseControl, state: &S, ) -> Option { self.traverse_ref( - &mut |ty: &Type, s: &S| match &ty.typ { + &mut |ty: &'ast Type, s: &S| match &ty.typ { TypeF::Contract(t) => { if let Some(ret) = t.traverse_ref(f, s) { TraverseControl::Return(ret) @@ -202,8 +205,8 @@ impl<'ast> TraverseAlloc<'ast, Type<'ast>> for RecordRows<'ast> { } fn traverse_ref( - &self, - f: &mut dyn FnMut(&Type<'ast>, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast Type<'ast>, &S) -> TraverseControl, state: &S, ) -> Option { match &self.0 { @@ -215,3 +218,160 @@ impl<'ast> TraverseAlloc<'ast, Type<'ast>> for RecordRows<'ast> { } } } + +impl<'ast> RecordRows<'ast> { + /// Find a nested binding in a record row type. The nested field is given as a list of + /// successive fields, that is, as a path. Return `None` if there is no such binding. + /// + /// # Example + /// + /// - self: `{a : {b : Number }}` + /// - path: `["a", "b"]` + /// - result: `Some(Number)` + pub fn find_path<'a>(&'a self, path: &[Ident]) -> Option<&'a RecordRow<'ast>> { + if path.is_empty() { + return None; + } + + // While going through the record rows, we use this helper for recursion instead of + // `find_path`, to avoid cloning a lot of intermediate rows, and rather only clone the + // final one to return. + fn find_path_ref<'a, 'ast>( + rrows: &'a RecordRows<'ast>, + path: &[Ident], + ) -> Option<&'a RecordRow<'ast>> { + let next = rrows.iter().find_map(|item| match item { + RecordRowsItem::Row(row) if row.id.ident() == path[0] => Some(row), + _ => None, + }); + + if path.len() == 1 { + next + } else { + match next.map(|row| &row.typ.typ) { + Some(TypeF::Record(rrows)) => find_path_ref(rrows, &path[1..]), + _ => None, + } + } + } + + find_path_ref(self, path) + } + + /// Find the row with the given identifier in the record type. Return `None` if there is no such + /// row. + /// + /// Equivalent to `find_path(&[id])`. + pub fn find_row(&self, id: Ident) -> Option<&RecordRow<'ast>> { + self.find_path(&[id]) + } + + pub fn iter(&self) -> RecordRowsIter, RecordRows<'ast>> { + RecordRowsIter { + rrows: Some(self), + ty: std::marker::PhantomData, + } + } +} + +impl<'ast> EnumRows<'ast> { + /// Find the row with the given identifier in the enum type. Return `None` if there is no such + /// row. + pub fn find_row<'a>(&'a self, id: Ident) -> Option<&'a EnumRow<'ast>> { + self.iter().find_map(|row_item| match row_item { + EnumRowsItem::Row(row) if row.id.ident() == id => Some(row), + _ => None, + }) + } + + pub fn iter(&self) -> EnumRowsIter, EnumRows<'ast>> { + EnumRowsIter { + erows: Some(self), + ty: std::marker::PhantomData, + } + } +} + +//TODO: get rid of this expensive implementation once we migrate pretty::*. +impl fmt::Display for Type<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use super::compat::FromAst as _; + use crate::typ; + + write!(f, "{}", typ::Type::from_ast(self)) + } +} + +pub mod iter { + use super::*; + use crate::identifier::LocIdent; + + /// An iterator over the rows of a record type. + pub struct RecordRowsIter<'a, Ty, RRows> { + pub(crate) rrows: Option<&'a RRows>, + pub(crate) ty: std::marker::PhantomData, + } + + /// The item produced by an iterator over record rows. + pub enum RecordRowsItem<'a, Ty> { + TailDyn, + TailVar(&'a LocIdent), + Row(&'a RecordRowF), + } + + impl<'a, 'ast> Iterator for RecordRowsIter<'a, Type<'ast>, RecordRows<'ast>> { + type Item = RecordRowsItem<'a, &'ast Type<'ast>>; + + fn next(&mut self) -> Option { + self.rrows.and_then(|next| match next.0 { + RecordRowsF::Empty => { + self.rrows = None; + None + } + RecordRowsF::TailDyn => { + self.rrows = None; + Some(RecordRowsItem::TailDyn) + } + RecordRowsF::TailVar(ref id) => { + self.rrows = None; + Some(RecordRowsItem::TailVar(id)) + } + RecordRowsF::Extend { ref row, tail } => { + self.rrows = Some(tail); + Some(RecordRowsItem::Row(row)) + } + }) + } + } + + pub struct EnumRowsIter<'a, Ty, ERows> { + pub(crate) erows: Option<&'a ERows>, + pub(crate) ty: std::marker::PhantomData, + } + + pub enum EnumRowsItem<'a, Ty> { + TailVar(&'a LocIdent), + Row(&'a EnumRowF), + } + + impl<'a, 'ast> Iterator for EnumRowsIter<'a, Type<'ast>, EnumRows<'ast>> { + type Item = EnumRowsItem<'a, &'ast Type<'ast>>; + + fn next(&mut self) -> Option { + self.erows.and_then(|next| match next.0 { + EnumRowsF::Empty => { + self.erows = None; + None + } + EnumRowsF::TailVar(ref id) => { + self.erows = None; + Some(EnumRowsItem::TailVar(id)) + } + EnumRowsF::Extend { ref row, tail } => { + self.erows = Some(tail); + Some(EnumRowsItem::Row(row)) + } + }) + } + } +} diff --git a/core/src/bytecode/typecheck/error.rs b/core/src/bytecode/typecheck/error.rs index 94329fb349..9b28f7d06f 100644 --- a/core/src/bytecode/typecheck/error.rs +++ b/core/src/bytecode/typecheck/error.rs @@ -422,12 +422,14 @@ impl<'ast> UnifError<'ast> { #[allow(unused_variables)] #[allow(unreachable_code)] UnifError::RecordRowConflict { - row: _, + row, expected, inferred, } => TypecheckError::RecordRowConflict { // We won't convert to mainline when we'll plug-in the migrated typechecker, so it doesn't make sense to try to fix this line now - the error will go away. - row: todo!(), //row.to_type(&state.ast_alloc, names_reg, state.table), + row: row + .to_type(state.ast_alloc, names_reg, state.table) + .to_mainline(), expected: expected .to_type(state.ast_alloc, names_reg, state.table) .to_mainline(), @@ -439,12 +441,14 @@ impl<'ast> UnifError<'ast> { #[allow(unused_variables)] #[allow(unreachable_code)] UnifError::EnumRowConflict { - row: _, + row, expected, inferred, } => TypecheckError::EnumRowConflict { // We won't convert to mainline when we'll plug-in the migrated typechecker, so it doesn't make sense to try to fix this line now - the error will go away. - row: todo!(), + row: row + .to_type(state.ast_alloc, names_reg, state.table) + .to_mainline(), expected: expected .to_type(state.ast_alloc, names_reg, state.table) .to_mainline(), diff --git a/core/src/bytecode/typecheck/mod.rs b/core/src/bytecode/typecheck/mod.rs index 26d4d498f2..df8a323f5c 100644 --- a/core/src/bytecode/typecheck/mod.rs +++ b/core/src/bytecode/typecheck/mod.rs @@ -1,7 +1,7 @@ //! Typechecking and type inference. //! //! Nickel uses a mix of a bidirectional typechecking algorithm, together with standard -//! unification-based type inference. Nickel is gradually typed, and dynamic typing is the default. +//! unification-based type inference.Nickel is gradually typed, and dynamic typing is the default. //! Static typechecking is triggered by a type annotation. //! //! # Modes @@ -54,17 +54,17 @@ //! //! In walk mode, the type of let-bound expressions is inferred in a shallow way (see //! [HasApparentType]). -use super::ast::{ - pattern::bindings::Bindings as _, record::FieldDef, typ::*, Annotation, Ast, AstAlloc, - MatchBranch, Node, StringChunk, TryConvert, -}; - use crate::{ - cache::ImportResolver, + bytecode::ast::{ + compat::ToMainline, pattern::bindings::Bindings as _, record::FieldDef, typ::*, Annotation, + Ast, AstAlloc, Import, LetBinding, MatchBranch, Node, StringChunk, TryConvert, + }, environment::Environment, error::TypecheckError, identifier::{Ident, LocIdent}, - mk_buty_arrow, mk_buty_enum, mk_buty_record, mk_buty_record_row, stdlib as nickel_stdlib, + mk_buty_arrow, mk_buty_enum, mk_buty_record, mk_buty_record_row, + position::TermPos, + stdlib as nickel_stdlib, traverse::TraverseAlloc, typ::{EnumRowsIterator, RecordRowsIterator, VarKind, VarKindDiscriminant}, }; @@ -97,6 +97,27 @@ use self::subtyping::SubsumedBy; /// The max depth parameter used to limit the work performed when inferring the type of the stdlib. const INFER_RECORD_MAX_DEPTH: u8 = 4; +//TODO[RFC007]: once we switch to this typechecker, replace this trait definition by an import of +//`cache::AstImportResolver`. +pub trait AstImportResolver<'ast> { + /// Resolve an import. + /// + /// Read and store the content of an import, put it in the file cache (or get it from there if + /// it is cached), then parse it and return the corresponding term and file id. + /// + /// The term and the path are provided only if the import is processed for the first time. + /// Indeed, at import resolution phase, the term of an import encountered for the first time is + /// queued to be processed (e.g. having its own imports resolved). The path is needed to + /// resolve nested imports relatively to this parent. Only after this processing the term is + /// inserted back in the cache. On the other hand, if it has been resolved before, it is + /// already transformed in the cache and do not need further processing. + fn resolve( + &mut self, + import: &Import<'ast>, + pos: &TermPos, + ) -> Result>, crate::error::ImportError>; +} + /// The typechecker has two modes, one for statically typed code and one for dynamically type code. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum TypecheckMode { @@ -112,6 +133,7 @@ pub type TypeEnv<'ast> = Environment>; /// A term environment defined as a mapping from identifiers to a tuple of a term and an /// environment (i.e. a closure). Used to compute contract equality. #[derive(PartialEq, Clone, Debug)] +//TODO: we should just store `&'ast Ast<'ast>` references here, instead of an owned version pub struct TermEnv<'ast>(pub Environment, TermEnv<'ast>)>); impl TermEnv<'_> { @@ -239,7 +261,7 @@ impl VarLevelsData { /// Unification types and variants that store an upper bound on the level of the unification /// variables they contain, or for which an upper bound can be computed quickly (in constant time). trait VarLevelUpperBound { - // Return an upper bound on the level of the unification variables contained in `self`. + // Returns an upper bound on the level of the unification variables contained in `self`. // Depending on the implementer, the level might refer to different kind of unification // variables (type, record rows or enum rows). fn var_level_upper_bound(&self) -> VarLevel; @@ -457,8 +479,8 @@ impl<'ast> UnifType<'ast> { } } - /// Return the unification root associated with this type. If the type is a unification - /// variable, return the result of `table.root_type`. Return `self` otherwise. + /// Returns the unification root associated with this type. If the type is a unification + /// variable, return the result of `table.root_type`. Returns `self` otherwise. fn into_root(self, table: &UnifTable<'ast>) -> Self { match self { UnifType::UnifVar { id, init_level } => table.root_type(id, init_level), @@ -498,8 +520,8 @@ impl<'ast> UnifRecordRows<'ast> { } } - /// Return the unification root associated with these record rows. If the rows are a unification - /// variable, return the result of `table.root_rrows`. Return `self` otherwise. + /// Returns the unification root associated with these record rows. If the rows are a unification + /// variable, return the result of `table.root_rrows`. Returns `self` otherwise. fn into_root(self, table: &UnifTable<'ast>) -> Self { match self { UnifRecordRows::UnifVar { id, init_level } => table.root_rrows(id, init_level), @@ -538,8 +560,8 @@ impl<'ast> UnifEnumRows<'ast> { } } - /// Return the unification root associated with these enum rows. If the rows are a unification - /// variable, return the result of `table.root_erows`. Return `self` otherwise. + /// Returns the unification root associated with these enum rows. If the rows are a unification + /// variable, return the result of `table.root_erows`. Returns `self` otherwise. fn into_root(self, table: &UnifTable<'ast>) -> Self { match self { UnifEnumRows::UnifVar { id, init_level } => table.root_erows(id, init_level), @@ -628,7 +650,7 @@ impl<'ast> UnifEnumRows<'ast> { } impl<'ast> UnifEnumRows<'ast> { - /// Return an iterator producing immutable references to individual rows. + /// Returns an iterator producing immutable references to individual rows. pub(super) fn iter(&self) -> EnumRowsIterator, UnifEnumRows<'ast>> { EnumRowsIterator { erows: Some(self), @@ -1242,6 +1264,14 @@ impl Context<'_> { var_level: VarLevel::MIN_LEVEL, } } + + /// Returns `true` if this context is empty, or equivalently if it equals to [Self::new()], or + /// equivalently if it equals to [Self::default()]. + pub fn is_empty(&self) -> bool { + self.type_env.is_empty() + && self.term_env.0.is_empty() + && self.var_level == VarLevel::MIN_LEVEL + } } impl Default for Context<'_> { @@ -1258,7 +1288,7 @@ pub enum EnvBuildError<'ast> { /// Populate the initial typing environment from a `Vec` of parsed files. pub fn mk_initial_ctxt<'ast>( ast_alloc: &'ast AstAlloc, - initial_env: &[(nickel_stdlib::StdlibModule, Ast<'ast>)], + initial_env: &[(nickel_stdlib::StdlibModule, &'ast Ast<'ast>)], ) -> Result, EnvBuildError<'ast>> { // Collect the bindings for each module, clone them and flatten the result to a single list. let mut bindings = Vec::new(); @@ -1283,28 +1313,24 @@ pub fn mk_initial_ctxt<'ast>( ( id, - field_def - .value - .as_ref() - .unwrap_or_else(|| { - panic!("expected stdlib module {id} to have a definition") - }) - .clone(), + field_def.value.as_ref().unwrap_or_else(|| { + panic!("expected stdlib module {id} to have a definition") + }), ) })); } (nickel_stdlib::StdlibModule::Internals, _) => { - return Err(EnvBuildError::NotARecord(ast.clone())); + return Err(EnvBuildError::NotARecord((*ast).clone())); } // Otherwise, we insert a value in the environment bound to the name of the module - (module, _) => bindings.push((module.name().into(), ast.clone())), + (module, _) => bindings.push((module.name().into(), *ast)), } } let term_env = bindings .iter() .cloned() - .map(|(id, ast)| (id.ident(), (ast, TermEnv::new()))) + .map(|(id, ast)| (id.ident(), (ast.clone(), TermEnv::new()))) .collect(); let type_env = bindings @@ -1312,7 +1338,7 @@ pub fn mk_initial_ctxt<'ast>( .map(|(id, ast)| { ( id.ident(), - infer_record_type(ast_alloc, &ast, &term_env, INFER_RECORD_MAX_DEPTH), + infer_record_type(ast_alloc, ast, &term_env, INFER_RECORD_MAX_DEPTH), ) }) .collect(); @@ -1332,7 +1358,7 @@ pub fn env_add_term<'ast>( env: &mut TypeEnv<'ast>, ast: &Ast<'ast>, term_env: &TermEnv<'ast>, - resolver: &dyn ImportResolver, + resolver: &mut dyn AstImportResolver<'ast>, ) -> Result<(), EnvBuildError<'ast>> { match &ast.node { Node::Record(record) => { @@ -1358,14 +1384,14 @@ pub fn env_add<'ast>( ast_alloc: &'ast AstAlloc, env: &mut TypeEnv<'ast>, id: LocIdent, - ast: &Ast<'ast>, + ast: &'ast Ast<'ast>, term_env: &TermEnv<'ast>, - resolver: &dyn ImportResolver, + resolver: &mut dyn AstImportResolver<'ast>, ) { env.insert( id.ident(), UnifType::from_apparent_type( - ast.node.apparent_type(ast_alloc, Some(env), Some(resolver)), + ast.apparent_type(ast_alloc, Some(env), Some(resolver)), term_env, ), ); @@ -1381,7 +1407,7 @@ pub fn env_add<'ast>( /// refined/reborrowed during recursive calls. pub struct State<'ast, 'local> { /// The import resolver, to retrieve and typecheck imports. - resolver: &'local dyn ImportResolver, + resolver: &'local mut dyn AstImportResolver<'ast>, /// The unification table. table: &'local mut UnifTable<'ast>, /// Row constraints. @@ -1409,7 +1435,7 @@ pub struct TypeTables<'ast> { /// Typechecks a term. /// /// Returns the inferred type in case of success. This is just a wrapper that calls -/// `type_check_with_visitor` with a blanket implementation for the visitor. +/// `typecheck_visit` with a blanket implementation for the visitor. /// /// Note that this function doesn't recursively typecheck imports (anymore), but just the current /// file. It however still needs the resolver to get the apparent type of imports. @@ -1417,9 +1443,9 @@ pub struct TypeTables<'ast> { /// Returns the type inferred for type wildcards. pub fn typecheck<'ast>( alloc: &'ast AstAlloc, - ast: &Ast<'ast>, + ast: &'ast Ast<'ast>, initial_ctxt: Context<'ast>, - resolver: &impl ImportResolver, + resolver: &mut dyn AstImportResolver<'ast>, initial_mode: TypecheckMode, ) -> Result, TypecheckError> { typecheck_visit(alloc, ast, initial_ctxt, resolver, &mut (), initial_mode) @@ -1429,9 +1455,9 @@ pub fn typecheck<'ast>( /// Typechecks a term while providing the type information to a visitor. pub fn typecheck_visit<'ast, V>( ast_alloc: &'ast AstAlloc, - ast: &Ast<'ast>, + ast: &'ast Ast<'ast>, initial_ctxt: Context<'ast>, - resolver: &impl ImportResolver, + resolver: &mut dyn AstImportResolver<'ast>, visitor: &mut V, initial_mode: TypecheckMode, ) -> Result, TypecheckError> @@ -1470,24 +1496,40 @@ where /// AST components that can be walked (traversed to look for statically typed block). Corresponds /// to typechecking in **walk** mode. -trait Walk<'ast> { +trait Walk<'ast>: Copy { /// Walks the AST of a term looking for statically typed blocks to check. Calls the visitor /// callbacks alongside and store the apparent type of variables inside the type and /// environments. + // [^self-owned]: this method doesn't require to take ownership `self`, as the AST nodes are + // merely traversed. However, we need to implement `Walk` on two different categories of + // objects: + // + // 1. Reference to AST nodes, which needs to be of the form of `&'ast _` in order to produce + // suberefences that are guaranteed to live as long as `'ast`. + // 2. References to temporary objects, that don't live in the allocator and are created on the + // spot by the typechecker. The typical example is [record::ResolvedRecord]. Those can't + // have an `&'ast` lifetime. But this is fine because they hold internal node + // references that are `&'ast`, so we can relax the constraint and implement the trait + // for `&_`. + // + // To have the same interface work on both categories, we implement this trait on _the + // reference type_ instead, such as `&'ast Ast<'ast>`, so that the implementer can freely + // chose the lifetime of the reference. This is why we take ownership of `self` here: it's + // actually of type `&'something _`, for the `_` of interest. fn walk>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, ) -> Result<(), TypecheckError>; } -impl<'ast, T> Walk<'ast> for [T] +impl<'ast, T> Walk<'ast> for &'ast [T] where - T: Walk<'ast>, + &'ast T: Walk<'ast>, { fn walk>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -1500,12 +1542,12 @@ where } } -impl<'ast, T> Walk<'ast> for StringChunk +impl<'ast, T> Walk<'ast> for &'ast StringChunk where - T: Walk<'ast>, + &'ast T: Walk<'ast>, { fn walk>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -1518,24 +1560,22 @@ where } } -impl<'ast> Walk<'ast> for Ast<'ast> { +impl<'ast> Walk<'ast> for &'ast Ast<'ast> { fn walk>( - &self, + self, state: &mut State<'ast, '_>, mut ctxt: Context<'ast>, visitor: &mut V, ) -> Result<(), TypecheckError> { - let Ast { node, pos } = self; - visitor.visit_term( self, UnifType::from_apparent_type( - node.apparent_type(state.ast_alloc, Some(&ctxt.type_env), Some(state.resolver)), + self.apparent_type(state.ast_alloc, Some(&ctxt.type_env), Some(state.resolver)), &ctxt.term_env, ), ); - match node { + match &self.node { Node::ParseError(_) | Node::Null | Node::Bool(_) @@ -1547,10 +1587,10 @@ impl<'ast> Walk<'ast> for Ast<'ast> { | Node::Import(_) => Ok(()), Node::Var(x) => ctxt.type_env .get(&x.ident()) - .ok_or(TypecheckError::UnboundIdentifier { id: *x, pos: *pos }) + .ok_or(TypecheckError::UnboundIdentifier { id: *x, pos: self.pos }) .map(|_| ()), Node::StringChunks(chunks) => { - chunks.walk(state, ctxt, visitor) + (*chunks).walk(state, ctxt, visitor) } Node::Fun { args, body } => { // The parameter of an unannotated function is always assigned type `Dyn`, unless the @@ -1578,29 +1618,14 @@ impl<'ast> Walk<'ast> for Ast<'ast> { let start_ctxt = ctxt.clone(); + // We first need to populate the (potentially) recursive environment in this separate + // loop before walking bound values. for binding in bindings.iter() { - let ty_let = binding_type(state, &binding.value.node, &start_ctxt, false); - - // In the case of a let-binding, we want to guess a better type than `Dyn` when we can - // do so cheaply for the whole pattern. - if let Some(alias) = &binding.pattern.alias { - visitor.visit_ident(alias, ty_let.clone()); - ctxt.type_env.insert(alias.ident(), ty_let); - ctxt.term_env.0.insert(alias.ident(), (binding.value.clone(), start_ctxt.term_env.clone())); - } + let ty_let = binding_type(state, binding, &start_ctxt, false); - // [^separate-alias-treatment]: Note that we call `pattern_types` on the inner pattern - // data, which doesn't take into account the potential heading alias `x @ `. - // This is on purpose, as the alias has been treated separately, so we don't want to - // shadow it with a less precise type. - // - // The use of start_ctxt here looks like it might be wrong for let rec, but in fact - // the context is unused in mode `TypecheckMode::Walk` anyway. - let PatternTypeData {bindings: pat_bindings, ..} = binding.pattern.data.pattern_types(state, &start_ctxt, TypecheckMode::Walk)?; - - for (id, typ) in pat_bindings { - visitor.visit_ident(&id, typ.clone()); - ctxt.type_env.insert(id.ident(), typ); + let mut register_binding = |id: &LocIdent, uty: UnifType<'ast>| { + visitor.visit_ident(id, uty.clone()); + ctxt.type_env.insert(id.ident(), uty); // [^term-env-rec-bindings]: we don't support recursive binding when checking // for contract equality. // @@ -1610,13 +1635,34 @@ impl<'ast> Walk<'ast> for Ast<'ast> { // bare references to represent cycles. Everything would be cleaned at the end // of the block. ctxt.term_env.0.insert(id.ident(), (binding.value.clone(), start_ctxt.term_env.clone())); + }; + + // The use of start_ctxt here looks like it might be wrong for let rec, but in fact + // the context is unused in mode `TypecheckMode::Walk` anyway. + let PatternTypeData {bindings: pat_bindings, ..} = binding.pattern.pattern_types(state, &start_ctxt, TypecheckMode::Walk)?; + + for (id, typ) in pat_bindings { + register_binding(&id, typ); + } + + // In the case of a let-binding, we want to guess a better type than `Dyn` when we + // can do so cheaply for the whole pattern, that is when there's an alias and/or + // when the pattern is an `Any` pattern. We do this after the generic loop over + // bindings, so that this more precise type information shadows the previous one. + + if let Some(alias) = &binding.pattern.alias { + register_binding(alias, ty_let.clone()); + } + + if let Some(id) = &binding.pattern.try_as_any() { + register_binding(id, ty_let); } } let value_ctxt = if *rec { ctxt.clone() } else { start_ctxt.clone() }; for binding in bindings.iter() { - binding.value.walk(state, value_ctxt.clone(), visitor)?; + binding.walk(state, value_ctxt.clone(), visitor)?; } body.walk(state, ctxt, visitor) @@ -1628,12 +1674,7 @@ impl<'ast> Walk<'ast> for Ast<'ast> { Node::Match(match_data) => { for MatchBranch { pattern, guard, body } in match_data.branches.iter() { let mut local_ctxt = ctxt.clone(); - let PatternTypeData { bindings: pat_bindings, .. } = pattern.data.pattern_types(state, &ctxt, TypecheckMode::Walk)?; - - if let Some(alias) = &pattern.alias { - visitor.visit_ident(alias, mk_uniftype::dynamic()); - local_ctxt.type_env.insert(alias.ident(), mk_uniftype::dynamic()); - } + let PatternTypeData { bindings: pat_bindings, .. } = pattern.pattern_types(state, &ctxt, TypecheckMode::Walk)?; for (id, typ) in pat_bindings { visitor.visit_ident(&id, typ.clone()); @@ -1658,44 +1699,20 @@ impl<'ast> Walk<'ast> for Ast<'ast> { then_branch.walk(state, ctxt.clone(), visitor)?; else_branch.walk(state, ctxt, visitor) } - Node::Record(record) => { - for field_def in record.field_defs.iter() { - let field_type = field_type( - state, - field_def, - &ctxt, - false, - ); - - if let Some(id) = field_def.name_as_ident() { - ctxt.type_env.insert(id.ident(), field_type.clone()); - visitor.visit_ident(&id, field_type); - } - } - - // Walk the type and contract annotations - // - // We don't bind the fields in the term environment used to check for contract - // equality. See [^term-env-rec-bindings]. - record.field_defs - .iter() - .try_for_each(|field_def| -> Result<(), TypecheckError> { - field_def.walk(state, ctxt.clone(), visitor) - }) - } + Node::Record(record) => record.resolve().with_pos(self.pos).walk(state, ctxt, visitor), Node::EnumVariant { arg: Some(arg), ..} => arg.walk(state, ctxt, visitor), Node::PrimOpApp { args, .. } => args.walk(state, ctxt, visitor), Node::Annotated { annot, inner } => { - walk_with_annot(state, ctxt, visitor, annot, Some(inner)) + walk_with_annot(state, ctxt, visitor, *annot, Some(inner)) } Node::Type(typ) => typ.walk(state, ctxt, visitor), } } } -impl<'ast> Walk<'ast> for Type<'ast> { +impl<'ast> Walk<'ast> for &'ast Type<'ast> { fn walk>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -1727,9 +1744,9 @@ impl<'ast> Walk<'ast> for Type<'ast> { } } -impl<'ast> Walk<'ast> for RecordRows<'ast> { +impl<'ast> Walk<'ast> for &'ast RecordRows<'ast> { fn walk>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -1748,9 +1765,20 @@ impl<'ast> Walk<'ast> for RecordRows<'ast> { } } -impl<'ast> Walk<'ast> for FieldDef<'ast> { +impl<'ast, S: AnnotSeqRef<'ast>> Walk<'ast> for &FieldDefCheckView<'ast, S> { + fn walk>( + self, + state: &mut State<'ast, '_>, + ctxt: Context<'ast>, + visitor: &mut V, + ) -> Result<(), TypecheckError> { + walk_with_annot(state, ctxt, visitor, self.annots, self.value) + } +} + +impl<'ast> Walk<'ast> for &'ast FieldDef<'ast> { fn walk>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -1765,31 +1793,44 @@ impl<'ast> Walk<'ast> for FieldDef<'ast> { } } +impl<'ast> Walk<'ast> for &'ast LetBinding<'ast> { + fn walk>( + self, + state: &mut State<'ast, '_>, + ctxt: Context<'ast>, + visitor: &mut V, + ) -> Result<(), TypecheckError> { + walk_with_annot( + state, + ctxt, + visitor, + &self.metadata.annotation, + Some(&self.value), + ) + } +} + /// Walk an annotated term, either via [crate::term::record::FieldMetadata], or via a standalone /// type and contracts annotation. A type annotation switches the typechecking mode to _enforce_. -fn walk_with_annot<'ast, V: TypecheckVisitor<'ast>>( +fn walk_with_annot<'ast, 'a, S: AnnotSeqRef<'ast>, V: TypecheckVisitor<'ast>>( state: &mut State<'ast, '_>, mut ctxt: Context<'ast>, visitor: &mut V, - annot: &Annotation<'ast>, - value: Option<&Ast<'ast>>, + annots: S, + value: Option<&'ast Ast<'ast>>, ) -> Result<(), TypecheckError> { - annot + annots .iter() .try_for_each(|ty| ty.walk(state, ctxt.clone(), visitor))?; - match (annot, value) { - (Annotation { typ: Some(ty2), .. }, Some(value)) => { + let typ = annots.typ(); + + match (typ, value) { + (Some(ty2), Some(value)) => { let uty2 = UnifType::from_type(ty2.clone(), &ctxt.term_env); value.check(state, ctxt, visitor, uty2) } - ( - Annotation { - typ: None, - contracts, - }, - Some(value), - ) => { + (None, Some(value)) => { // If we see a function annotated with a function contract, we can get the type of the // arguments for free. We use this information both for typechecking (you could see it // as an extension of the philosophy of apparent types, but for function arguments @@ -1797,14 +1838,15 @@ fn walk_with_annot<'ast, V: TypecheckVisitor<'ast>>( // completion. if let Node::Fun { args, body } = value.node { // We look for the first contract of the list that is a function contract. - let domains = contracts.iter().find_map(|c| { - if let TypeF::Arrow(mut domain, _) = &c.typ { + let domains = annots.contracts().find_map(|c| { + if let TypeF::Arrow(domain, mut codomain) = &c.typ { let mut domains = vec![UnifType::from_type((*domain).clone(), &ctxt.term_env)]; - while let TypeF::Arrow(next_domain, _) = &domain.typ { - domains.push(UnifType::from_type((*domain).clone(), &ctxt.term_env)); - domain = next_domain; + while let TypeF::Arrow(next_domain, next_codomain) = &codomain.typ { + domains + .push(UnifType::from_type((*next_domain).clone(), &ctxt.term_env)); + codomain = next_codomain; } Some(domains) @@ -1813,14 +1855,45 @@ fn walk_with_annot<'ast, V: TypecheckVisitor<'ast>>( } }); + let mut register_binding = + |id: LocIdent, ctxt: &mut Context<'ast>, uty: UnifType<'ast>| { + visitor.visit_ident(&id, uty.clone()); + ctxt.type_env.insert(id.ident(), uty); + }; + if let Some(domains) = domains { - for (arg, uty) in args.iter().zip(domains) { + for (arg, uty) in args + .iter() + // We might find fewer domains than arguments (for example, a function `fun + // x y z` can very much be annotated with a contract `Number -> Dyn`). + // However we still need to process all of the arguments, or they will be + // reported as unbound variable when used. So if we're out of domain types, + // we just use `Dyn` for the remaining args. + .zip(domains.into_iter().map(Some).chain(std::iter::repeat(None))) + { + let uty = uty.unwrap_or_else(mk_uniftype::dynamic); + // Because the normal code path in `walk` sets the function argument to `Dyn`, // we need to short-circuit it. We manually visit the argument, augment the // typing environment and walk the body of the function. if let Some(id) = arg.try_as_any() { - visitor.visit_ident(&id, uty.clone()); - ctxt.type_env.insert(id.ident(), uty); + register_binding(id, &mut ctxt, uty.clone()); + + if let Some(alias) = arg.alias { + register_binding(alias, &mut ctxt, uty); + } + } + // However, if the pattern is a single variable, we need to properly fill + // the environment with pattern variables. + else { + let PatternTypeData { + bindings: pat_bindings, + .. + } = arg.pattern_types(state, &ctxt, TypecheckMode::Walk)?; + + ctxt.type_env.extend( + pat_bindings.into_iter().map(|(id, typ)| (id.ident(), typ)), + ); } } @@ -1886,8 +1959,9 @@ fn walk_with_annot<'ast, V: TypecheckVisitor<'ast>>( /// [bidirectional-typing]: (https://arxiv.org/abs/1908.05839) trait Check<'ast> { /// Checks `self` against a given type. + // We take ownership of `self`: see [^self-owned] in `Walk`. fn check>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -1895,9 +1969,9 @@ trait Check<'ast> { ) -> Result<(), TypecheckError>; } -impl<'ast> Check<'ast> for Ast<'ast> { +impl<'ast> Check<'ast> for &'ast Ast<'ast> { fn check>( - &self, + self, state: &mut State<'ast, '_>, mut ctxt: Context<'ast>, visitor: &mut V, @@ -2024,53 +2098,84 @@ impl<'ast> Check<'ast> for Ast<'ast> { let typed_bindings: Result, _> = bindings .iter() .map(|binding| -> Result<_, TypecheckError> { - // See [^separate-alias-treatment]. - let pat_types = binding.pattern.pattern_types( - state, - &start_ctxt, - TypecheckMode::Enforce, - )?; - - // In the destructuring case, there's no alternative pattern, and we must thus - // immediatly close all the row types. - pattern::close_all_enums(pat_types.enum_open_tails, state); - // The inferred type of the expr being bound - let ty_let = binding_type(state, &binding.value.node, &start_ctxt, true); + let ty_bound = binding_type(state, binding, &start_ctxt, true); - pat_types - .typ - .unify(ty_let.clone(), state, &start_ctxt) - .map_err(|e| e.into_typecheck_err(state, binding.value.pos))?; - - if let Some(alias) = &binding.pattern.alias { - visitor.visit_ident(alias, ty_let.clone()); - ctxt.type_env.insert(alias.ident(), ty_let.clone()); - ctxt.term_env.0.insert( - alias.ident(), - (binding.value.clone(), start_ctxt.term_env.clone()), - ); - } - - for (id, typ) in pat_types.bindings { - visitor.visit_ident(&id, typ.clone()); - ctxt.type_env.insert(id.ident(), typ); + let mut register_binding = |id: &LocIdent, uty: UnifType<'ast>| { + visitor.visit_ident(id, uty.clone()); + ctxt.type_env.insert(id.ident(), uty.clone()); // See [^term-env-rec-bindings] for why we use `start_ctxt` independently // from `rec`. ctxt.term_env.0.insert( id.ident(), (binding.value.clone(), start_ctxt.term_env.clone()), ); + }; + + // In the case of a simple binding (a variable), we want to use the binding + // type directly for this variable without going through unification. + // + // Currently, it doesn't make a whole lot of difference (we could get rid + // of the `if` branch and only keep the `else` branch to mostly the same + // effect), because we happily unify unification variables with polymorphic + // types. However this situation isn't ideal and might change. + // Distinguishing the two cases is more future-proof. + if let Some(id) = binding.pattern.try_as_any() { + if let Some(alias) = &binding.pattern.alias { + register_binding(alias, ty_bound.clone()); + } + + register_binding(&id, ty_bound.clone()); + } else { + // We treat the alias separately, so we only call `pattern_types` on + // the underlying `data` here. + let pat_types = binding.pattern.data.pattern_types( + state, + &start_ctxt, + TypecheckMode::Enforce, + )?; + + // In the destructuring case, there's no alternative pattern, and we must thus + // immediately close all the row types. + pattern::close_all_enums(pat_types.enum_open_tails, state); + + pat_types + .typ + .unify(ty_bound.clone(), state, &start_ctxt) + .map_err(|e| e.into_typecheck_err(state, binding.value.pos))?; + + if let Some(alias) = &binding.pattern.alias { + register_binding(alias, ty_bound.clone()); + } + + for (id, typ) in pat_types.bindings { + register_binding(&id, typ); + } } - Ok((&binding.value, ty_let)) + Ok((&binding.value, ty_bound, &binding.metadata.annotation)) }) .collect(); let re_ctxt = if *rec { &ctxt } else { &start_ctxt }; - for (value, ty_let) in typed_bindings? { - value.check(state, re_ctxt.clone(), visitor, ty_let)?; + for (value, ty_bound, annot) in typed_bindings? { + // If the binding is annotated, we implement the same behavior as for a + // free-standing annotation `foo | T`, or any other annotated value: the mode + // is switched to infer and we let `infer_with_annot` handles the rest. + if !annot.is_empty() { + // Note that the loop above already checked that `ty_bound` agrees with the + // type inferred from the pattern. In the case of an annotated binding, + // `ty_bound` is coming from the annotation. So we don't have to check the + // inferred type against anything else here; we call `infer_with_annot` so + // that it correctly handles checking (or walking, if the annotation is a + // contract annotation) the underlying term, but we don't actually use the + // result of inference. + let _ = + infer_with_annot(state, re_ctxt.clone(), visitor, annot, Some(value))?; + } else { + value.check(state, re_ctxt.clone(), visitor, ty_bound)?; + } } body.check(state, ctxt, visitor, ty) @@ -2085,7 +2190,7 @@ impl<'ast> Check<'ast> for Ast<'ast> { // record types, we thus just take the intersection of the types, which amounts to // unify all pattern types together. While it might fail most of the time (including // for the `{x}` and `{y}` example), it can still typecheck interesting expressions - // when the record pattern are similar enough: + // when the record patterns are compatible: // // ```nickel // x |> match { @@ -2096,7 +2201,7 @@ impl<'ast> Check<'ast> for Ast<'ast> { // // We can definitely find a type for `x`: `{foo: a, bar: [| 'Baz, 'Qux |]}`. // - // For enum types, we can express union: for example, the union of `[|'Foo, 'Bar|]` and + // For enum types, we can express unions: for example, the union of `[|'Foo, 'Bar|]` and // `[|'Bar, 'Baz|]` is `[|'Foo, 'Bar, 'Baz|]`. We can even turn this into a unification // problem: "open" the initial row types as `[| 'Foo, 'Bar; ?a |]` and `[|'Bar, 'Baz; // ?b |]`, unify them together, and close the result (unify the tail with an empty row @@ -2265,8 +2370,26 @@ impl<'ast> Check<'ast> for Ast<'ast> { .resolve() .with_pos(self.pos) .check(state, ctxt, visitor, ty), - Node::Import(_) => { - todo!("need to figure out import resolution with the new AST first") + // We use the apparent type of the import for checking. This function doesn't + // recursively typecheck imports: this is the responsibility of the caller. + Node::Import(import) => { + let imported = state.resolver.resolve(import, &self.pos)?; + + if let Some(ast) = imported { + let ty_import: UnifType<'ast> = UnifType::from_apparent_type( + ast.apparent_type( + state.ast_alloc, + Some(&ctxt.type_env), + Some(state.resolver), + ), + &ctxt.term_env, + ); + + ty.unify(ty_import, state, &ctxt) + .map_err(|err| err.into_typecheck_err(state, self.pos))?; + } + + Ok(()) } // Node::Import(_) => ty // .unify(mk_uniftype::dynamic(), state, &ctxt) @@ -2286,12 +2409,11 @@ impl<'ast> Check<'ast> for Ast<'ast> { // .map_err(|err| err.into_typecheck_err(state, self.pos)) // } Node::Type(typ) => { - if let Some(_contract) = typ.find_contract() { - todo!("needs to update `error::TypecheckError` first, but not ready to switch to the new typechecker yet") - // Err(TypecheckError::CtrTypeInTermPos { - // contract, - // pos: *pos, - // }) + if let Some(contract) = typ.find_contract() { + Err(TypecheckError::CtrTypeInTermPos { + contract: contract.to_mainline(), + pos: self.pos, + }) } else { Ok(()) } @@ -2300,19 +2422,94 @@ impl<'ast> Check<'ast> for Ast<'ast> { } } -impl<'ast> Check<'ast> for FieldDef<'ast> { +/// Metadata can be combined using [crate::combine::CombineAlloc]. However, this requires to +/// allocate a fresh annotation object. During record resolution and typechecking, we just want to +/// walk the annotations as if they were combined, but without actually allocating. This trait +/// provides a common interface to either a single annotation or a sequence of annotations, as used +/// by the typechecker. +// We take ownership of `self`: see [^self-owned] in `Walk`. +pub trait AnnotSeqRef<'ast>: Copy { + /// Returns the first type annotation, if any. + fn typ(self) -> Option<&'ast Type<'ast>>; + + /// Return the sequence of contracts, that is all annotations but the first type annotation, if + /// any. + fn contracts(self) -> impl Iterator>; + + /// Returns the main annotation, which is either the type annotation if any, or the first + /// contract annotation. + fn first(self) -> Option<&'ast Type<'ast>>; + + /// Iterates over the annotations, starting by the type and followed by the contracts. + fn iter(self) -> impl Iterator>; +} + +impl<'ast> AnnotSeqRef<'ast> for &'ast Annotation<'ast> { + fn first(self) -> Option<&'ast Type<'ast>> { + self.typ.as_ref().or(self.contracts.iter().next()) + } + + fn iter(self) -> impl Iterator> { + self.typ.iter().chain(self.contracts.iter()) + } + + fn typ(self) -> Option<&'ast Type<'ast>> { + self.typ.as_ref() + } + + fn contracts(self) -> impl Iterator> { + self.contracts.iter() + } +} + +// Implementation used for a piecewise definition, where at most one value is defined. This can be +// considered the same as a single field definition with all annotations combined. +impl<'ast> AnnotSeqRef<'ast> for &[&'ast FieldDef<'ast>] { + fn typ(self) -> Option<&'ast Type<'ast>> { + self.iter() + .find_map(|def| def.metadata.annotation.typ.as_ref()) + } + + fn first(self) -> Option<&'ast Type<'ast>> { + self.iter().find_map(|def| def.metadata.annotation.first()) + } + + fn iter(self) -> impl Iterator> { + self.iter().flat_map(|def| def.metadata.annotation.iter()) + } + + fn contracts(self) -> impl Iterator> { + self.iter() + .flat_map(|def| def.metadata.annotation.typ.as_ref()) + .skip(1) + .chain( + self.iter() + .flat_map(|def| def.metadata.annotation.contracts.iter()), + ) + } +} + +/// Common structure for checking either a standard field definition, or a field definition +/// reconstituted from record resolution. +struct FieldDefCheckView<'ast, S> { + /// The annotation or sequence of annotations. + annots: S, + /// The position of the identifier (the last of the field path). + pos_id: TermPos, + /// The field value, if any. + value: Option<&'ast Ast<'ast>>, +} + +impl<'ast, S: AnnotSeqRef<'ast>> Check<'ast> for &FieldDefCheckView<'ast, S> { fn check>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, ty: UnifType<'ast>, ) -> Result<(), TypecheckError> { - //unwrap(): a field path is always assumed to be non-empty - let pos_id = self.path.last().unwrap().pos(); - // If there's no annotation, we simply check the underlying value, if any. - if self.metadata.annotation.is_empty() { + if self.annots.iter().next().is_none() { if let Some(value) = self.value.as_ref() { value.check(state, ctxt, visitor, ty) } else { @@ -2320,18 +2517,12 @@ impl<'ast> Check<'ast> for FieldDef<'ast> { // act a bit like a function parameter). But for now, we play safe and implement a more // restrictive rule, which is that a value without a definition has type `Dyn` ty.unify(mk_uniftype::dynamic(), state, &ctxt) - .map_err(|err| err.into_typecheck_err(state, pos_id)) + .map_err(|err| err.into_typecheck_err(state, self.pos_id)) } } else { - let pos = self.value.as_ref().map(|v| v.pos).unwrap_or(pos_id); + let pos = self.value.as_ref().map(|v| v.pos).unwrap_or(self.pos_id); - let inferred = infer_with_annot( - state, - ctxt.clone(), - visitor, - &self.metadata.annotation, - self.value.as_ref(), - )?; + let inferred = infer_with_annot(state, ctxt.clone(), visitor, self.annots, self.value)?; inferred .subsumed_by(ty, state, ctxt) @@ -2340,6 +2531,23 @@ impl<'ast> Check<'ast> for FieldDef<'ast> { } } +impl<'ast> Check<'ast> for &'ast FieldDef<'ast> { + fn check>( + self, + state: &mut State<'ast, '_>, + ctxt: Context<'ast>, + visitor: &mut V, + ty: UnifType<'ast>, + ) -> Result<(), TypecheckError> { + FieldDefCheckView { + annots: &self.metadata.annotation, + pos_id: self.path.last().unwrap().pos(), + value: self.value.as_ref(), + } + .check(state, ctxt, visitor, ty) + } +} + /// Function handling the common part of inferring the type of terms with type or contract /// annotation, with or without definitions. This encompasses both standalone type annotation /// (where `value` is always `Some(_)`) as well as field definitions (where `value` may or may not @@ -2348,38 +2556,35 @@ impl<'ast> Check<'ast> for FieldDef<'ast> { /// As for [check_visited] and [infer_visited], the additional `item_id` is provided when the term /// has been added to the visitor before but can still benefit from updating its information /// with the inferred type. -fn infer_with_annot<'ast, V: TypecheckVisitor<'ast>>( +fn infer_with_annot<'ast, V: TypecheckVisitor<'ast>, S: AnnotSeqRef<'ast>>( state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, - annot: &Annotation<'ast>, - value: Option<&Ast<'ast>>, + annots: S, + value: Option<&'ast Ast<'ast>>, ) -> Result, TypecheckError> { - for ty in annot.iter() { + for ty in annots.iter() { ty.walk(state, ctxt.clone(), visitor)?; } - match (annot, value) { - (Annotation { typ: Some(ty2), .. }, Some(value)) => { + let typ = annots.typ(); + let mut contracts = annots.contracts().peekable(); + + match (typ, value) { + (Some(ty2), Some(value)) => { let uty2 = UnifType::from_type(ty2.clone(), &ctxt.term_env); visitor.visit_term(value, uty2.clone()); - value.check(state, ctxt, visitor, uty2.clone())?; + Ok(uty2) } // An annotation without a type but with a contract switches the typechecker back to walk // mode. If there are several contracts, we arbitrarily chose the first one as the apparent // type (the most precise type would be the intersection of all contracts, but Nickel's // type system doesn't feature intersection types). - ( - Annotation { - typ: None, - contracts, - }, - value_opt, - ) if !contracts.is_empty() => { - let ty2 = contracts.first().unwrap(); + (None, value_opt) if contracts.peek().is_some() => { + let ty2 = contracts.next().unwrap(); let uty2 = UnifType::from_type(ty2.clone(), &ctxt.term_env); if let Some(value) = &value_opt { @@ -2398,12 +2603,12 @@ fn infer_with_annot<'ast, V: TypecheckVisitor<'ast>>( // as its inner value. This case should only happen for record fields, as the parser can't // produce an annotated term without an actual annotation. Still, such terms could be // produced programmatically, and aren't necessarily an issue. - (_, Some(value)) => value.infer(state, ctxt, visitor), + (None, Some(value)) => value.infer(state, ctxt, visitor), // An empty value is a record field without definition. We don't check anything, and infer // its type to be either the first annotation defined if any, or `Dyn` otherwise. // We can only hit this case for record fields. - _ => { - let inferred = annot + (_, None) => { + let inferred = annots .first() .map(|ty| UnifType::from_type(ty.clone(), &ctxt.term_env)) .unwrap_or_else(mk_uniftype::dynamic); @@ -2417,8 +2622,10 @@ trait Infer<'ast> { /// /// `infer` corresponds to the inference mode of bidirectional typechecking. Nickel uses a mix of /// bidirectional typechecking and traditional ML-like unification. + // Note that [^self-owned] of `Walk` doesn't apply here, so we can take implement this trait + // directly on the types of interest. fn infer>( - &self, + &'ast self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -2427,7 +2634,7 @@ trait Infer<'ast> { impl<'ast> Infer<'ast> for Ast<'ast> { fn infer>( - &self, + &'ast self, state: &mut State<'ast, '_>, mut ctxt: Context<'ast>, visitor: &mut V, @@ -2447,7 +2654,7 @@ impl<'ast> Infer<'ast> for Ast<'ast> { } // Theoretically, we need to instantiate the type of the head of the primop application, // that is, the primop itself. In practice, - // [crate::bytecode::typecheck::operation::PrimOpType::primop_type] returns types that are + // [crate::typecheck::operation::PrimOpType::primop_type] returns types that are // already instantiated with free unification variables, to save building a polymorphic // type that would be instantiated immediately. Thus, the type of a primop is currently // always monomorphic. @@ -2476,10 +2683,11 @@ impl<'ast> Infer<'ast> for Ast<'ast> { std::iter::repeat_with(|| state.table.fresh_type_uvar(ctxt.var_level)) .take(args.len()) .collect(); + let codomain = state.table.fresh_type_uvar(ctxt.var_level); let fun_type = mk_uniftype::nary_arrow(arg_types.clone(), codomain.clone()); - // "Match" the type of the head with `dom -> codom` + // "Match" the type of the head with `arg1 -> ... -> argn -> codom` fun_type .unify(head_type, state, &ctxt) .map_err(|err| err.into_typecheck_err(state, head.pos))?; @@ -2493,7 +2701,7 @@ impl<'ast> Infer<'ast> for Ast<'ast> { Ok(codomain) } Node::Annotated { annot, inner } => { - infer_with_annot(state, ctxt, visitor, annot, Some(inner)) + infer_with_annot(state, ctxt, visitor, *annot, Some(inner)) } _ => { // The remaining cases can't produce polymorphic types, and thus we can reuse the @@ -2530,31 +2738,12 @@ impl<'ast> Infer<'ast> for Ast<'ast> { /// corresponding to it. fn binding_type<'ast>( state: &mut State<'ast, '_>, - node: &Node<'ast>, - ctxt: &Context<'ast>, - strict: bool, -) -> UnifType<'ast> { - apparent_or_infer( - state, - node.apparent_type(state.ast_alloc, Some(&ctxt.type_env), Some(state.resolver)), - ctxt, - strict, - ) -} - -/// Same as `binding_type` but for record field definition. -fn field_type<'ast>( - state: &mut State<'ast, '_>, - field_def: &FieldDef<'ast>, + binding: &'ast LetBinding<'ast>, ctxt: &Context<'ast>, strict: bool, ) -> UnifType<'ast> { - apparent_or_infer( - state, - field_def.apparent_type(state.ast_alloc, Some(&ctxt.type_env), Some(state.resolver)), - ctxt, - strict, - ) + let uty = binding.apparent_type(state.ast_alloc, Some(&ctxt.type_env), Some(state.resolver)); + apparent_or_infer(state, uty, ctxt, strict) } /// Either returns the exact type annotation extracted as an apparent type, or return a fresh @@ -2576,7 +2765,8 @@ fn apparent_or_infer<'ast>( } } -/// Substitute wildcards in a type for their unification variable. +/// Substitute wildcards in a type for their unification variable, and converts the result to a +/// [UnifType]. fn replace_wildcards_with_var<'ast>( table: &mut UnifTable<'ast>, ctxt: &Context<'ast>, @@ -2666,6 +2856,12 @@ pub enum ApparentType<'ast> { Approximated(Type<'ast>), } +impl Default for ApparentType<'_> { + fn default() -> Self { + ApparentType::Approximated(Type::from(TypeF::Dyn)) + } +} + impl<'ast> TryConvert<'ast, ApparentType<'ast>> for Type<'ast> { type Error = std::convert::Infallible; @@ -2699,46 +2895,72 @@ pub trait HasApparentType<'ast> { /// `Dyn` if the resolver is not passed as a parameter to the function. /// - Otherwise, return an approximation of the type (currently `Dyn`, but could be more precise in /// the future, such as `Dyn -> Dyn` for functions, `{ | Dyn}` for records, and so on). + // We take ownership of `self`: see [^self-owned] in `Walk`. fn apparent_type( - &self, + self, ast_alloc: &'ast AstAlloc, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&mut dyn AstImportResolver<'ast>>, ) -> ApparentType<'ast>; } -impl<'ast> HasApparentType<'ast> for FieldDef<'ast> { - // Return the apparent type of a field, by first looking at the type annotation, if any, then at - // the contracts annotation, and if there is none, fall back to the apparent type of the value. If - // there is no value, `Approximated(Dyn)` is returned. +// Common implementation for FieldDef and LetBinding. +impl<'ast> HasApparentType<'ast> for &(&'ast Annotation<'ast>, Option<&'ast Ast<'ast>>) { fn apparent_type( - &self, + self, ast_alloc: &'ast AstAlloc, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&mut dyn AstImportResolver<'ast>>, ) -> ApparentType<'ast> { - self.metadata - .annotation + self.0 .first() .cloned() .map(ApparentType::Annotated) .or_else(|| { - self.value + self.1 .as_ref() - .map(|v| v.node.apparent_type(ast_alloc, env, resolver)) + .map(|v| v.apparent_type(ast_alloc, env, resolver)) }) - .unwrap_or(ApparentType::Approximated(Type::from(TypeF::Dyn))) + .unwrap_or_default() + } +} + +impl<'ast> HasApparentType<'ast> for &'ast FieldDef<'ast> { + // Return the apparent type of a field, by first looking at the type annotation, if any, then at + // the contracts annotation, and if there is none, fall back to the apparent type of the value. If + // there is no value, `Approximated(Dyn)` is returned. + fn apparent_type( + self, + ast_alloc: &'ast AstAlloc, + env: Option<&TypeEnv<'ast>>, + resolver: Option<&mut dyn AstImportResolver<'ast>>, + ) -> ApparentType<'ast> { + (&self.metadata.annotation, self.value.as_ref()).apparent_type(ast_alloc, env, resolver) + } +} + +impl<'ast> HasApparentType<'ast> for &'ast LetBinding<'ast> { + // Return the apparent type of a binding, by first looking at a potential type annotation, if + // any, then at the contracts annotation, and if there is none, fall back to the apparent type + // of the value. If there is no value, `Approximated(Dyn)` is returned. + fn apparent_type( + self, + ast_alloc: &'ast AstAlloc, + env: Option<&TypeEnv<'ast>>, + resolver: Option<&mut dyn AstImportResolver<'ast>>, + ) -> ApparentType<'ast> { + (&self.metadata.annotation, Some(&self.value)).apparent_type(ast_alloc, env, resolver) } } -impl<'ast> HasApparentType<'ast> for Node<'ast> { +impl<'ast> HasApparentType<'ast> for &'ast Ast<'ast> { fn apparent_type( - &self, + self, ast_alloc: &'ast AstAlloc, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&mut dyn AstImportResolver<'ast>>, ) -> ApparentType<'ast> { - use crate::files::FileId; + use crate::bytecode::ast::Import; // Check the apparent type while avoiding cycling through direct imports loops. Indeed, // `apparent_type` tries to see through imported terms. But doing so can lead to an infinite @@ -2753,16 +2975,16 @@ impl<'ast> HasApparentType<'ast> for Node<'ast> { // returns `Dyn` if it detects a cycle. fn apparent_type_check_cycle<'ast>( ast_alloc: &'ast AstAlloc, - node: &Node<'ast>, + ast: &Ast<'ast>, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, - _imports_seen: HashSet, + resolver: Option<&mut dyn AstImportResolver<'ast>>, + mut imports_seen: HashSet>, ) -> ApparentType<'ast> { - match node { + match &ast.node { Node::Annotated { annot, inner } => annot .first() .map(|typ| ApparentType::Annotated(typ.clone())) - .unwrap_or_else(|| inner.node.apparent_type(ast_alloc, env, resolver)), + .unwrap_or_else(|| inner.apparent_type(ast_alloc, env, resolver)), Node::Number(_) => ApparentType::Inferred(Type::from(TypeF::Number)), Node::Bool(_) => ApparentType::Inferred(Type::from(TypeF::Bool)), Node::String(_) | Node::StringChunks(_) => { @@ -2774,19 +2996,35 @@ impl<'ast> HasApparentType<'ast> for Node<'ast> { Node::Var(id) => env .and_then(|envs| envs.get(&id.ident()).cloned()) .map(ApparentType::FromEnv) - .unwrap_or(ApparentType::Approximated(Type::from(TypeF::Dyn))), - //TODO: import - // Node::ResolvedImport(file_id) => match resolver { - // Some(r) if !imports_seen.contains(file_id) => { - // imports_seen.insert(*file_id); - // - // let t = r - // .get(*file_id) - // .expect("Internal error: resolved import not found during typechecking."); - // apparent_type_check_cycle(&t.term, env, Some(r), imports_seen) - // } - // _ => ApparentType::Approximated(Type::from(TypeF::Dyn)), - // }, + .unwrap_or_default(), + Node::Import(import) => { + let Some(resolver) = resolver else { + return ApparentType::default(); + }; + + // We are in an import cycle. We stop there and return `Dyn`. + if !imports_seen.insert(import.clone()) { + return ApparentType::default(); + } + + // We don't handle import errors here. it's just too annoying to have + // `apparent_type` return a `Result`. + // + // The import error will be bubbled up anyway by typechecking, because either + // we're in walk mode and the import will be walked just after, or we're in + // check mode and the import will be assigned to a pretty liberal type `?a`. + // + // What's important is that we don't create a spurious typechecking error + // "expected Foo, got Dyn" that would be reported first and hide the actual, + // underlying import error. + let imported = resolver.resolve(import, &ast.pos).unwrap_or_default(); + + if let Some(ast) = imported { + apparent_type_check_cycle(ast_alloc, ast, env, Some(resolver), imports_seen) + } else { + ApparentType::default() + } + } _ => ApparentType::Approximated(Type::from(TypeF::Dyn)), } } @@ -2812,7 +3050,7 @@ impl<'ast> HasApparentType<'ast> for Node<'ast> { /// some, the inferred type could be wrong. pub fn infer_record_type<'ast>( ast_alloc: &'ast AstAlloc, - ast: &Ast<'ast>, + ast: &'ast Ast<'ast>, term_env: &TermEnv<'ast>, max_depth: u8, ) -> UnifType<'ast> { @@ -2851,8 +3089,8 @@ pub fn infer_record_type<'ast>( }, )), )), - node => { - UnifType::from_apparent_type(node.apparent_type(ast_alloc, None, None), &TermEnv::new()) + _ => { + UnifType::from_apparent_type(ast.apparent_type(ast_alloc, None, None), &TermEnv::new()) } } } @@ -3006,7 +3244,7 @@ pub trait TypecheckVisitor<'ast> { /// /// It's possible for a single term to be visited multiple times, for example, if type /// inference kicks in. - fn visit_term(&mut self, _ast: &Ast<'ast>, _ty: UnifType<'ast>) {} + fn visit_term(&mut self, _ast: &'ast Ast<'ast>, _ty: UnifType<'ast>) {} /// Record the type of a bound identifier. fn visit_ident(&mut self, _ident: &LocIdent, _new_type: UnifType<'ast>) {} diff --git a/core/src/bytecode/typecheck/pattern.rs b/core/src/bytecode/typecheck/pattern.rs index 87982c401b..4ac10afc49 100644 --- a/core/src/bytecode/typecheck/pattern.rs +++ b/core/src/bytecode/typecheck/pattern.rs @@ -52,8 +52,8 @@ pub struct PatternTypeData<'ast, T> { /// /// Those variables (or their descendent in a row type) might need to be closed after the type /// of all the patterns of a match expression have been unified, depending on the presence of a - /// wildcard pattern. The path of the corresponding sub-pattern is stored as well, since enum - /// patterns in different positions might need different treatment. For example: + /// wildcard or an _any_ pattern. The path of the corresponding sub-pattern is stored as well, + /// since enum patterns in different positions might need different treatment. For example: /// /// ```nickel /// match { @@ -68,7 +68,8 @@ pub struct PatternTypeData<'ast, T> { /// must be closed, because this match expression can't handle `'Foo ('Other 0)`. The type of /// the match expression is thus `[| 'Foo [| 'Bar: a, 'Qux: b |]; c|] -> d`. /// - /// Wildcard can occur anywhere, so the previous case can also happen within a record pattern: + /// Wildcards and any patterns can occur anywhere, so the previous case can also happen within + /// a record pattern: /// /// ```nickel /// match { @@ -82,8 +83,8 @@ pub struct PatternTypeData<'ast, T> { /// /// See [^typechecking-match-expression] in [typecheck] for more details. pub enum_open_tails: Vec<(PatternPath, UnifEnumRows<'ast>)>, - /// Paths of the occurrence of wildcard patterns encountered. This is used to determine which - /// tails in [Self::enum_open_tails] should be left open. + /// Paths of the occurrence of wildcard or any patterns encountered. This is used to determine + /// which tails in [Self::enum_open_tails] should be left open. pub wildcard_occurrences: HashSet, } /// Close all the enum row types left open when typechecking a match expression. Special case of @@ -173,8 +174,8 @@ pub(super) trait PatternTypes<'ast> { /// added to the type environment. /// /// The type of each "leaf" identifier will be assigned based on the `mode` argument. The - /// current possibilities are for each leaf to have type `Dyn`, to use an explicit type - /// annotation, or to be assigned a fresh unification variable. + /// current possibilities are for each leaf to have type `Dyn`, to use their explicit type + /// annotation if any, or to be assigned a fresh unification variable. fn pattern_types( &self, state: &mut State<'ast, '_>, @@ -361,6 +362,7 @@ impl<'ast> PatternTypes<'ast> for PatternData<'ast> { Ok(any_type(mode, state, ctxt)) } PatternData::Any(id) => { + pt_state.wildcard_pat_paths.insert(path); let typ = any_type(mode, state, ctxt); pt_state.bindings.push((*id, typ.clone())); diff --git a/core/src/bytecode/typecheck/record.rs b/core/src/bytecode/typecheck/record.rs index 8b23883f09..63a2903c79 100644 --- a/core/src/bytecode/typecheck/record.rs +++ b/core/src/bytecode/typecheck/record.rs @@ -21,9 +21,9 @@ pub(super) trait Resolve<'ast> { } /// A resolved record literal, without field paths or piecewise definitions. Piecewise definitions -/// of fields have been grouped together, path have been broken into proper levels and top-level +/// of fields have been grouped together, paths have been broken into proper levels and top-level /// fields are partitioned between static and dynamic. -#[derive(Default)] +#[derive(Default, Debug)] pub(super) struct ResolvedRecord<'ast> { /// The static fields of the record. pub stat_fields: IndexMap>, @@ -155,34 +155,30 @@ impl<'ast> ResolvedRecord<'ast> { // We build a vector of unification variables representing the type of the fields of // the record. - // - // Since `IndexMap` guarantees a stable order of iteration, we use a vector instead of - // hashmap here. To find the type associated to the field `foo`, retrieve the index of - // `foo` in `self.stat_fields.keys()` and index into `field_types`. - let mut field_types: Vec> = + let field_types: Vec> = iter::repeat_with(|| state.table.fresh_type_uvar(ctxt.var_level)) .take(self.stat_fields.len()) .collect(); // Build the type {id1 : ?a1, id2: ?a2, .., idn: ?an}, which is the type of the whole // record. - let rows = self.stat_fields.keys().zip(field_types.iter()).fold( - mk_buty_record_row!(), - |acc, (id, row_ty)| mk_buty_record_row!((*id, row_ty.clone()); acc), - ); + let rows = self + .stat_fields + .keys() + .rev() + .zip(field_types.iter().rev()) + .fold( + mk_buty_record_row!(), + |acc, (id, row_ty)| mk_buty_record_row!((*id, row_ty.clone()); acc), + ); ty.unify(mk_buty_record!(; rows), state, &ctxt) .map_err(|err| err.into_typecheck_err(state, self.pos))?; - // We reverse the order of `field_types`. The idea is that we can then pop each - // field type as we iterate a last time over the fields, taking ownership, instead of - // having to clone elements if we indexed instead. - field_types.reverse(); - - for (id, field) in self.stat_fields.iter() { + for ((id, field), field_type) in self.stat_fields.iter().zip(field_types) { // unwrap(): `field_types` has exactly the same length as `self.stat_fields`, as it // was constructed with `.take(self.stat_fields.len()).collect()`. - let field_type = field_types.pop().unwrap(); + // let field_type = field_types.pop().unwrap(); // For a recursive record and a field which requires the additional unification // step (whose type wasn't known when building the recursive environment), we @@ -214,9 +210,9 @@ impl<'ast> ResolvedRecord<'ast> { } } -impl<'ast> Check<'ast> for ResolvedRecord<'ast> { +impl<'ast> Check<'ast> for &ResolvedRecord<'ast> { fn check>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -235,6 +231,98 @@ impl<'ast> Check<'ast> for ResolvedRecord<'ast> { } } +impl<'ast> Walk<'ast> for &ResolvedRecord<'ast> { + fn walk>( + self, + state: &mut State<'ast, '_>, + mut ctxt: Context<'ast>, + visitor: &mut V, + ) -> Result<(), TypecheckError> { + // We first build the recursive environment + for (id, field) in self.stat_fields.iter() { + let field_type = UnifType::from_apparent_type( + field.apparent_type(state.ast_alloc, Some(&ctxt.type_env), Some(state.resolver)), + // We can reuse `ctxt` here instead of needing to save the initial context, even if + // we're mutating it in the loop, because we don't touch `term_env`. + &ctxt.term_env, + ); + + visitor.visit_ident(id, field_type.clone()); + ctxt.type_env.insert(id.ident(), field_type); + } + + for (ast, field) in self.dyn_fields.iter() { + ast.walk(state, ctxt.clone(), visitor)?; + field.walk(state, ctxt.clone(), visitor)?; + } + + // Then we check the fields in the recursive environment + for (_, field) in self.stat_fields.iter() { + field.walk(state, ctxt.clone(), visitor)?; + } + + Ok(()) + } +} + +impl<'ast> Walk<'ast> for &ResolvedField<'ast> { + fn walk>( + self, + state: &mut State<'ast, '_>, + ctxt: Context<'ast>, + visitor: &mut V, + ) -> Result<(), TypecheckError> { + match (self.resolved.is_empty(), self.defs.as_slice()) { + // This shouldn't happen (fields present in the record should either have a definition + // or comes from record resolution). + (true, []) => { + unreachable!("typechecker internal error: checking a vacant field") + } + (false, []) => self.resolved.walk(state, ctxt, visitor), + // Special case for a piecewise definition where at most one definition has a value. + // This won't result in a runtime merge. Instead, it's always equivalent to one field + // definition where the annotations have been combined. We reuse the same logic as for + // checking a standard single field definition thanks to `FieldDefCheckView`. + (true, defs) if defs.iter().filter(|def| def.value.is_some()).count() <= 1 => { + let value = defs.iter().find_map(|def| def.value.as_ref()); + + FieldDefCheckView { + annots: defs, + value, + // unwrap(): + // + // 1. We treated the case `(true, [])` in the first branch of this pattern, so + // there must be at least one element in `defs` + // 2. The path of a field definition must always be non-empty + pos_id: defs + .first() + .unwrap() + .path + .last() + .expect("empty field path in field definition") + .pos(), + } + .walk(state, ctxt, visitor) + } + // In all other cases, we have either several definitions or at least one definition + // and a resolved part. Those cases will result in a merge (even if it can be sometimes + // optimized to a static merge that happens before runtime), so we type everything as + // `Dyn`. + (_, defs) => { + for def in defs.iter() { + def.walk(state, ctxt.clone(), visitor)?; + } + + // If `resolved` is empty, this will be a no-op. We thus don't bother guarding it + // behind a `if resolved.is_empty()` + self.resolved.walk(state, ctxt, visitor)?; + + Ok(()) + } + } + } +} + impl<'ast> Combine for ResolvedRecord<'ast> { fn combine(this: ResolvedRecord<'ast>, other: ResolvedRecord<'ast>) -> Self { use crate::eval::merge::split; @@ -319,7 +407,7 @@ impl<'ast> PoslessResolvedRecord<'ast> { /// Rather than having an ad-hoc enum with all those cases (that would just take up more memory), /// we consider the general combined case directly. Others are special cases with an empty /// `resolved`, or an empty or one-element `values`. -#[derive(Default)] +#[derive(Default, Debug)] pub(super) struct ResolvedField<'ast> { /// The resolved part of the field, coming from piecewise definitions where this field appears /// in the middle of the path. @@ -328,7 +416,7 @@ pub(super) struct ResolvedField<'ast> { /// appears last in the path. /// /// We store the whole [crate::bytecode::ast::record::FieldDef] here, although we don't need - /// the path anymore, because it's easier and less costly that create an ad-hoc structure to + /// the path anymore, because it's easier and less costly than creating an ad-hoc structure to /// store only the value and the metadata. defs: Vec<&'ast FieldDef<'ast>>, } @@ -362,9 +450,9 @@ impl<'ast> ResolvedField<'ast> { } } -impl<'ast> Check<'ast> for ResolvedField<'ast> { +impl<'ast> Check<'ast> for &ResolvedField<'ast> { fn check>( - &self, + self, state: &mut State<'ast, '_>, ctxt: Context<'ast>, visitor: &mut V, @@ -376,11 +464,38 @@ impl<'ast> Check<'ast> for ResolvedField<'ast> { (true, []) => { unreachable!("typechecker internal error: checking a vacant field") } - (true, [def]) if def.metadata.is_empty() => def.check(state, ctxt, visitor, ty), + // When there's just one classic field definition and no resolved form, we offload the + // work to `FieldDef::check`. (false, []) => self.resolved.check(state, ctxt, visitor, ty), + // Special case for a piecewise definition where at most one definition has a value. + // This won't result in a runtime merge. Instead, it's always equivalent to one field + // definition where the annotations have been combined. We reuse the same logic as for + // checking a standard single field definition thanks to `FieldDefCheckView`. + (true, defs) if defs.iter().filter(|def| def.value.is_some()).count() <= 1 => { + let value = defs.iter().find_map(|def| def.value.as_ref()); + + FieldDefCheckView { + annots: defs, + value, + // unwrap(): + // + // 1. We treated the case `(true, [])` in the first branch of this pattern, so + // there must be at least one element in `defs` + // 2. The path of a field definition must always be non-empty + pos_id: defs + .first() + .unwrap() + .path + .last() + .expect("empty field path") + .pos(), + } + .check(state, ctxt, visitor, ty) + } // In all other cases, we have either several definitions or at least one definition - // and a resolved part. Those cases will result in a runtime merge, so we type - // everything as `Dyn`. + // and a resolved part. Those cases will result in a merge (even if it can be sometimes + // optimized to a static merge that happens before runtime), so we type everything as + // `Dyn`. (_, defs) => { for def in defs.iter() { def.check(state, ctxt.clone(), visitor, mk_uniftype::dynamic())?; @@ -388,7 +503,7 @@ impl<'ast> Check<'ast> for ResolvedField<'ast> { if !self.resolved.is_empty() { // This will always raise an error, since the resolved part is equivalent to a - // record literal which doens't type against `Dyn` (at least currently). We + // record literal which doesn't type against `Dyn` (at least currently). We // could raise the error directly, but it's simpler to call `check` on // `self.resolved`, which will handle that for us. // @@ -520,15 +635,15 @@ impl<'ast> Resolve<'ast> for FieldDef<'ast> { } } -impl<'ast> HasApparentType<'ast> for ResolvedField<'ast> { +impl<'ast> HasApparentType<'ast> for &ResolvedField<'ast> { // Return the apparent type of a field, by first looking at the type annotation, if any, then at // the contracts annotation, and if there is none, fall back to the apparent type of the value. If // there is no value, `Approximated(Dyn)` is returned. fn apparent_type( - &self, + self, ast_alloc: &'ast AstAlloc, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&mut dyn AstImportResolver<'ast>>, ) -> ApparentType<'ast> { match self.defs.as_slice() { // If there is a resolved part, the apparent type is `Dyn`: a resolved part itself is a diff --git a/core/src/bytecode/typecheck/subtyping.rs b/core/src/bytecode/typecheck/subtyping.rs index 67f93b03cc..20c5009a02 100644 --- a/core/src/bytecode/typecheck/subtyping.rs +++ b/core/src/bytecode/typecheck/subtyping.rs @@ -32,6 +32,12 @@ pub(super) trait SubsumedBy<'ast> { type Error; /// Checks if `self` is subsumed by `t2`, that is if `self <: t2`. Returns an error otherwise. + /// + /// For error reporting, note that the order of "expected" and "inferred" type is reversed when + /// compared to `unify`: in the latter, `self` is the "expected" type (coming from the context) + /// and "inferred" the type deduced from the expression, while here, the inferred type is + /// usually `self` and the expected type is `t2`. This is made to match the usual order of + /// subtyping. fn subsumed_by( self, t2: Self, @@ -155,42 +161,67 @@ impl<'ast> SubsumedBy<'ast> for UnifRecordRows<'ast> { state: &mut State<'ast, '_>, ctxt: Context<'ast>, ) -> Result<(), Self::Error> { - // This code is almost taken verbatim fro `unify`, but where some recursive calls are - // changed to be `subsumed_by` instead of `unify`. We can surely factorize both into a - // generic function, but this is left for future work. + // This code is almost taken verbatim from `unify`, but where some recursive calls are + // changed to be `subsumed_by` instead of `unify`, and where missing row and extra row + // errors have been swapped (since `subsumed_by` and `unify` are reversed with respect to + // which argument is the expected type and which is the inferred type). + // + // We can surely factorize both into a generic function, but this is left for future work. let inferred = self.into_root(state.table); let checked = t2.into_root(state.table); match (inferred, checked) { ( - UnifRecordRows::Concrete { rrows: rrows1, .. }, UnifRecordRows::Concrete { - rrows: rrows2, - var_levels_data: levels2, + rrows: rrows1, + var_levels_data: levels1, }, + UnifRecordRows::Concrete { rrows: rrows2, .. }, ) => match (rrows1, rrows2) { - (RecordRowsF::Extend { row, tail }, rrows2 @ RecordRowsF::Extend { .. }) => { - let urrows2 = UnifRecordRows::Concrete { - rrows: rrows2, - var_levels_data: levels2, + // We reverse the order of the arguments when compared to `unify`. It shouldn't + // really matter, but experience shows that the inferred type - here on the left - + // is often much bigger in a structural type system, than the expected type (coming + // from constraints or annotations). + // + // For example, when checking `array.fold_left` where `array` is `std.array`. The + // expected type (right hand side of `subsume`j) is `{ fold_left : ?a; ?b }`. The + // inferred type is, on the other hand, quite big: it's the list of all functions + // in the `array` module. + // + // If we started to pop stuff from the inferred type, we would iterate through all + // array functions - until `fold_left` - and consecutively extend `?b` to match + // that, which is useless. In the end, we just want to find `fold_left` in the type + // of `array` and match `?a` to its type. Starting to pop stuff from the expected + // type instead achieve that in one step. + ( + rrows1 @ RecordRowsF::Extend { .. }, + RecordRowsF::Extend { + row: row2, + tail: tail2, + }, + ) => { + let urrows1 = UnifRecordRows::Concrete { + rrows: rrows1, + var_levels_data: levels1, }; - let (ty_res, urrows_without_ty_res) = urrows2 - .remove_row(&row.id, &row.typ, state, ctxt.var_level) + + let (ty_res, urrows_without_ty_res) = urrows1 + .remove_row(&row2.id, &row2.typ, state, ctxt.var_level) .map_err(|err| match err { - RemoveRowError::Missing => RowUnifError::MissingRow(row.id), + RemoveRowError::Missing => RowUnifError::MissingRow(row2.id), RemoveRowError::Conflict => { - RowUnifError::RecordRowConflict(row.clone()) + RowUnifError::RecordRowConflict(row2.clone()) } })?; if let RemoveRowResult::Extracted(ty) = ty_res { - row.typ - .subsumed_by(ty, state, ctxt.clone()) + ty.subsumed_by(*row2.typ, state, ctxt.clone()) .map_err(|err| RowUnifError::RecordRowMismatch { - id: row.id, + id: row2.id, cause: Box::new(err), })?; } - tail.subsumed_by(urrows_without_ty_res, state, ctxt) + + urrows_without_ty_res.subsumed_by(*tail2, state, ctxt) } (RecordRowsF::TailVar(id), _) | (_, RecordRowsF::TailVar(id)) => { Err(RowUnifError::UnboundTypeVariable(id)) diff --git a/core/src/bytecode/typecheck/unif.rs b/core/src/bytecode/typecheck/unif.rs index 68df32d0f7..fbdb57b09e 100644 --- a/core/src/bytecode/typecheck/unif.rs +++ b/core/src/bytecode/typecheck/unif.rs @@ -1174,6 +1174,14 @@ pub(super) trait Unify<'ast> { /// Try to unify two types. Unification corresponds to imposing an equality constraints on /// those types. This can fail if the types can't be matched. + /// + /// While unification is symmetric in principle, we make a difference in Nickel between the + /// expected type (coming from the context, from annotations, etc.) and the inferred type, + /// corresponding to the type of the expression as determined by its shape or other intrinsic + /// properties. Note that "inferred" here doesn't really map precisely to the bidirectional + /// typechecking vocabulary used elsewhere in the typechecker. + /// + /// Here, `self` is the expected type, and `t2` is the inferred type. fn unify( self, t2: Self, diff --git a/core/src/environment.rs b/core/src/environment.rs index e7aebec1ef..deb3c1ad70 100644 --- a/core/src/environment.rs +++ b/core/src/environment.rs @@ -166,6 +166,12 @@ impl Environment { prev_layers_eq && curr_layers_eq } + + /// Returns `true` if this environment is empty, that is if the current layer is empty and + /// there's no previous layer. + pub fn is_empty(&self) -> bool { + self.current.is_empty() && self.previous.borrow().is_none() + } } impl FromIterator<(K, V)> for Environment { diff --git a/core/src/error/mod.rs b/core/src/error/mod.rs index 6d9fe14e18..483d8ab823 100644 --- a/core/src/error/mod.rs +++ b/core/src/error/mod.rs @@ -462,6 +462,13 @@ pub enum TypecheckError { /// The position of the whole or-pattern. pos: TermPos, }, + /// An error occured during the resolution of an import. + /// + /// Since RFC007, imports aren't pre-processed anymore, and import resolution can happen + /// interleaved with typechecking. In particular, in order to typecheck expressions of the form + /// `import "file.ncl"`, the typechecker might ask to resolve the import, which can lead to any + /// import error. + ImportError(ImportError), } #[derive(Debug, PartialEq, Eq, Clone, Default)] @@ -768,6 +775,12 @@ impl From for EvalError { } } +impl From for TypecheckError { + fn from(error: ImportError) -> Self { + TypecheckError::ImportError(error) + } +} + /// Return an escaped version of a string. Used to sanitize strings before inclusion in error /// messages, which can contain ASCII code sequences, and in particular ANSI escape codes, that /// could alter Nickel's error messages. @@ -2598,6 +2611,7 @@ impl IntoDiagnostics for TypecheckError { .into(), ])] } + TypecheckError::ImportError(err) => err.into_diagnostics(files), } } } diff --git a/core/src/parser/grammar.lalrpop b/core/src/parser/grammar.lalrpop index 22c45db243..fafaff2938 100644 --- a/core/src/parser/grammar.lalrpop +++ b/core/src/parser/grammar.lalrpop @@ -166,7 +166,7 @@ LetAnnotAtom: LetMetadata<'ast> = { ..Default::default() }, "|" "doc" => LetMetadata { - doc: Some(<>.into()), + doc: Some(alloc.alloc_str(&<>)), ..Default::default() }, } diff --git a/core/src/position.rs b/core/src/position.rs index 798bae3107..02def37e08 100644 --- a/core/src/position.rs +++ b/core/src/position.rs @@ -127,6 +127,15 @@ impl TermPos { } } + /// Returns the file id associated to this position, if the position is defined, or `None` + /// otherwise. + pub fn src_id(&self) -> Option { + match self { + TermPos::Original(raw_span) | TermPos::Inherited(raw_span) => Some(raw_span.src_id), + TermPos::None => None, + } + } + /// Return either `self` or `other` if and only if exactly one of them is defined. If both are /// `None` or both are defined, `None` is returned. pub fn xor(self, other: Self) -> Self { diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index abf7d0563f..c0bfde99bd 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -391,7 +391,7 @@ impl PartialEq for Term { } } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] /// Specifies where something should be imported from. pub enum Import { Path { diff --git a/core/src/traverse.rs b/core/src/traverse.rs index 4ef6efb515..3380cef6f6 100644 --- a/core/src/traverse.rs +++ b/core/src/traverse.rs @@ -94,15 +94,21 @@ pub trait TraverseAlloc<'ast, T>: Sized { F: FnMut(T) -> Result; /// Same as [Traverse::traverse_ref], but takes an additional AST allocator. + /// + /// There is as small difference though: this function guarantees that the lifetime of the + /// references is bound to the lifetime of the AST allocator, which the signature in + /// [Traverse::traverse_ref] does not. This is useful e.g. in the LSP to extract references and + /// store them in separate data structure. We can guarantee that those reference won't be + /// dangling as long as the allocator is around. fn traverse_ref( - &self, - f: &mut dyn FnMut(&T, &S) -> TraverseControl, + &'ast self, + f: &mut dyn FnMut(&'ast T, &S) -> TraverseControl, scope: &S, ) -> Option; - fn find_map(&self, mut pred: impl FnMut(&T) -> Option) -> Option + fn find_map(&'ast self, mut pred: impl FnMut(&'ast T) -> Option) -> Option where - T: Clone, + T: Clone + 'ast, { self.traverse_ref( &mut |t, _state: &()| {