From 1450dca21e5bd6dddd01b39a83834d8994640cf5 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Sun, 2 Feb 2025 16:37:02 +0000 Subject: [PATCH] refactor(ast_tools): re-vamp `oxc_ast_tools` --- .github/.generated_ast_watch_list.yml | 42 +- Cargo.lock | 5 +- .../oxc_ast/src/generated/assert_layouts.rs | 6 + tasks/ast_tools/Cargo.toml | 9 +- tasks/ast_tools/src/codegen.rs | 230 ++- tasks/ast_tools/src/derives/clone_in.rs | 127 +- tasks/ast_tools/src/derives/content_eq.rs | 104 +- tasks/ast_tools/src/derives/estree.rs | 375 +++-- tasks/ast_tools/src/derives/get_address.rs | 43 +- tasks/ast_tools/src/derives/get_span.rs | 239 +-- tasks/ast_tools/src/derives/mod.rs | 334 ++-- .../src/generators/assert_layouts.rs | 488 +++++- tasks/ast_tools/src/generators/ast_builder.rs | 898 ++++++----- tasks/ast_tools/src/generators/ast_kind.rs | 246 +-- tasks/ast_tools/src/generators/get_id.rs | 66 +- tasks/ast_tools/src/generators/mod.rs | 120 +- tasks/ast_tools/src/generators/typescript.rs | 276 ++-- tasks/ast_tools/src/generators/visit.rs | 1350 +++++++++++------ tasks/ast_tools/src/layout.rs | 170 --- tasks/ast_tools/src/logger.rs | 24 +- tasks/ast_tools/src/main.rs | 354 ++++- tasks/ast_tools/src/markers.rs | 476 ------ tasks/ast_tools/src/output/javascript.rs | 2 +- tasks/ast_tools/src/output/mod.rs | 9 +- tasks/ast_tools/src/output/rust.rs | 13 +- tasks/ast_tools/src/parse/attr.rs | 141 ++ tasks/ast_tools/src/parse/load.rs | 141 ++ tasks/ast_tools/src/parse/mod.rs | 114 ++ tasks/ast_tools/src/parse/parse.rs | 798 ++++++++++ tasks/ast_tools/src/parse/skeleton.rs | 31 + tasks/ast_tools/src/passes/calc_layout.rs | 306 ---- tasks/ast_tools/src/passes/linker.rs | 91 -- tasks/ast_tools/src/passes/mod.rs | 70 - tasks/ast_tools/src/rust_ast.rs | 451 ------ tasks/ast_tools/src/schema/defs.rs | 264 ---- tasks/ast_tools/src/schema/defs/box.rs | 81 + tasks/ast_tools/src/schema/defs/cell.rs | 79 + tasks/ast_tools/src/schema/defs/enum.rs | 240 +++ tasks/ast_tools/src/schema/defs/mod.rs | 127 ++ tasks/ast_tools/src/schema/defs/option.rs | 79 + tasks/ast_tools/src/schema/defs/primitive.rs | 89 ++ tasks/ast_tools/src/schema/defs/struct.rs | 174 +++ tasks/ast_tools/src/schema/defs/type.rs | 255 ++++ tasks/ast_tools/src/schema/defs/vec.rs | 81 + tasks/ast_tools/src/schema/derives.rs | 135 ++ .../src/schema/extensions/clone_in.rs | 6 + .../ast_tools/src/schema/extensions/estree.rs | 37 + tasks/ast_tools/src/schema/extensions/kind.rs | 6 + .../ast_tools/src/schema/extensions/layout.rs | 92 ++ tasks/ast_tools/src/schema/extensions/span.rs | 6 + .../ast_tools/src/schema/extensions/visit.rs | 34 + tasks/ast_tools/src/schema/file.rs | 63 + tasks/ast_tools/src/schema/get_generics.rs | 38 - tasks/ast_tools/src/schema/get_ident.rs | 30 - tasks/ast_tools/src/schema/mod.rs | 438 ++---- tasks/ast_tools/src/schema/serialize.rs | 49 - tasks/ast_tools/src/schema/to_type.rs | 65 - tasks/ast_tools/src/util.rs | 319 ---- tasks/ast_tools/src/utils.rs | 52 + 59 files changed, 6410 insertions(+), 4578 deletions(-) delete mode 100644 tasks/ast_tools/src/layout.rs delete mode 100644 tasks/ast_tools/src/markers.rs create mode 100644 tasks/ast_tools/src/parse/attr.rs create mode 100644 tasks/ast_tools/src/parse/load.rs create mode 100644 tasks/ast_tools/src/parse/mod.rs create mode 100644 tasks/ast_tools/src/parse/parse.rs create mode 100644 tasks/ast_tools/src/parse/skeleton.rs delete mode 100644 tasks/ast_tools/src/passes/calc_layout.rs delete mode 100644 tasks/ast_tools/src/passes/linker.rs delete mode 100644 tasks/ast_tools/src/passes/mod.rs delete mode 100644 tasks/ast_tools/src/rust_ast.rs delete mode 100644 tasks/ast_tools/src/schema/defs.rs create mode 100644 tasks/ast_tools/src/schema/defs/box.rs create mode 100644 tasks/ast_tools/src/schema/defs/cell.rs create mode 100644 tasks/ast_tools/src/schema/defs/enum.rs create mode 100644 tasks/ast_tools/src/schema/defs/mod.rs create mode 100644 tasks/ast_tools/src/schema/defs/option.rs create mode 100644 tasks/ast_tools/src/schema/defs/primitive.rs create mode 100644 tasks/ast_tools/src/schema/defs/struct.rs create mode 100644 tasks/ast_tools/src/schema/defs/type.rs create mode 100644 tasks/ast_tools/src/schema/defs/vec.rs create mode 100644 tasks/ast_tools/src/schema/derives.rs create mode 100644 tasks/ast_tools/src/schema/extensions/clone_in.rs create mode 100644 tasks/ast_tools/src/schema/extensions/estree.rs create mode 100644 tasks/ast_tools/src/schema/extensions/kind.rs create mode 100644 tasks/ast_tools/src/schema/extensions/layout.rs create mode 100644 tasks/ast_tools/src/schema/extensions/span.rs create mode 100644 tasks/ast_tools/src/schema/extensions/visit.rs create mode 100644 tasks/ast_tools/src/schema/file.rs delete mode 100644 tasks/ast_tools/src/schema/get_generics.rs delete mode 100644 tasks/ast_tools/src/schema/get_ident.rs delete mode 100644 tasks/ast_tools/src/schema/serialize.rs delete mode 100644 tasks/ast_tools/src/schema/to_type.rs delete mode 100644 tasks/ast_tools/src/util.rs create mode 100644 tasks/ast_tools/src/utils.rs diff --git a/.github/.generated_ast_watch_list.yml b/.github/.generated_ast_watch_list.yml index f4f41338f0c20..3cfb256e6fb70 100644 --- a/.github/.generated_ast_watch_list.yml +++ b/.github/.generated_ast_watch_list.yml @@ -2,36 +2,36 @@ # To edit this generated file you have to edit `tasks/ast_tools/src/main.rs` src: - - 'crates/oxc_ast/src/ast/literal.rs' + - '.github/.generated_ast_watch_list.yml' + - 'crates/oxc_ast/src/ast/comment.rs' - 'crates/oxc_ast/src/ast/js.rs' - - 'crates/oxc_ast/src/ast/ts.rs' - 'crates/oxc_ast/src/ast/jsx.rs' - - 'crates/oxc_ast/src/ast/comment.rs' - - 'crates/oxc_syntax/src/number.rs' - - 'crates/oxc_syntax/src/operator.rs' - - 'crates/oxc_span/src/span/types.rs' - - 'crates/oxc_span/src/source_type/mod.rs' - - 'crates/oxc_regular_expression/src/ast.rs' + - 'crates/oxc_ast/src/ast/literal.rs' + - 'crates/oxc_ast/src/ast/ts.rs' + - 'crates/oxc_ast/src/generated/assert_layouts.rs' + - 'crates/oxc_ast/src/generated/ast_builder.rs' + - 'crates/oxc_ast/src/generated/ast_kind.rs' - 'crates/oxc_ast/src/generated/derive_clone_in.rs' - - 'crates/oxc_regular_expression/src/generated/derive_clone_in.rs' - - 'crates/oxc_syntax/src/generated/derive_clone_in.rs' + - 'crates/oxc_ast/src/generated/derive_content_eq.rs' + - 'crates/oxc_ast/src/generated/derive_estree.rs' - 'crates/oxc_ast/src/generated/derive_get_address.rs' - - 'crates/oxc_regular_expression/src/generated/derive_get_address.rs' - 'crates/oxc_ast/src/generated/derive_get_span.rs' - 'crates/oxc_ast/src/generated/derive_get_span_mut.rs' - - 'crates/oxc_ast/src/generated/derive_content_eq.rs' + - 'crates/oxc_ast/src/generated/get_id.rs' + - 'crates/oxc_ast/src/generated/visit.rs' + - 'crates/oxc_ast/src/generated/visit_mut.rs' + - 'crates/oxc_regular_expression/src/ast.rs' + - 'crates/oxc_regular_expression/src/generated/derive_clone_in.rs' - 'crates/oxc_regular_expression/src/generated/derive_content_eq.rs' - - 'crates/oxc_syntax/src/generated/derive_content_eq.rs' - - 'crates/oxc_ast/src/generated/derive_estree.rs' - 'crates/oxc_regular_expression/src/generated/derive_estree.rs' + - 'crates/oxc_regular_expression/src/generated/derive_get_address.rs' - 'crates/oxc_span/src/generated/derive_estree.rs' + - 'crates/oxc_span/src/source_type/mod.rs' + - 'crates/oxc_span/src/span/types.rs' + - 'crates/oxc_syntax/src/generated/derive_clone_in.rs' + - 'crates/oxc_syntax/src/generated/derive_content_eq.rs' - 'crates/oxc_syntax/src/generated/derive_estree.rs' - - 'crates/oxc_ast/src/generated/assert_layouts.rs' - - 'crates/oxc_ast/src/generated/ast_kind.rs' - - 'crates/oxc_ast/src/generated/ast_builder.rs' - - 'crates/oxc_ast/src/generated/get_id.rs' - - 'crates/oxc_ast/src/generated/visit.rs' - - 'crates/oxc_ast/src/generated/visit_mut.rs' + - 'crates/oxc_syntax/src/number.rs' + - 'crates/oxc_syntax/src/operator.rs' - 'npm/oxc-types/types.d.ts' - 'tasks/ast_tools/src/**' - - '.github/.generated_ast_watch_list.yml' diff --git a/Cargo.lock b/Cargo.lock index 139e791cc1f78..e0dc95aa5aaac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1605,18 +1605,21 @@ dependencies = [ name = "oxc_ast_tools" version = "0.0.0" dependencies = [ + "bitflags 2.7.0", "bpaf", "convert_case 0.7.1", "cow-utils", + "indexmap", "itertools", "lazy_static", + "oxc_index", "prettyplease", "proc-macro2", "quote", + "rayon", "regex", "rustc-hash", "serde", - "serde_json", "syn", ] diff --git a/crates/oxc_ast/src/generated/assert_layouts.rs b/crates/oxc_ast/src/generated/assert_layouts.rs index 45e1b8d6e59cf..6bae73ef2fe45 100644 --- a/crates/oxc_ast/src/generated/assert_layouts.rs +++ b/crates/oxc_ast/src/generated/assert_layouts.rs @@ -1568,6 +1568,9 @@ const _: () = { assert!(align_of::() == 8usize); assert!(offset_of!(NamedReference, span) == 0usize); assert!(offset_of!(NamedReference, name) == 8usize); + + assert!(size_of::() == 1usize); + assert!(align_of::() == 1usize); }; #[cfg(target_pointer_width = "32")] @@ -3131,6 +3134,9 @@ const _: () = { assert!(align_of::() == 4usize); assert!(offset_of!(NamedReference, span) == 0usize); assert!(offset_of!(NamedReference, name) == 8usize); + + assert!(size_of::() == 1usize); + assert!(align_of::() == 1usize); }; #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))] diff --git a/tasks/ast_tools/Cargo.toml b/tasks/ast_tools/Cargo.toml index 03258899a0bfa..e0d3dbaaedaf4 100644 --- a/tasks/ast_tools/Cargo.toml +++ b/tasks/ast_tools/Cargo.toml @@ -10,20 +10,23 @@ workspace = true [[bin]] name = "oxc_ast_tools" -test = false +test = true doctest = false [dependencies] +bitflags = { workspace = true } bpaf = { workspace = true, features = ["autocomplete", "bright-color", "derive"] } convert_case = { workspace = true } cow-utils = { workspace = true } +indexmap = { workspace = true } itertools = { workspace = true } lazy_static = { workspace = true } +oxc_index = { workspace = true } prettyplease = { workspace = true } proc-macro2 = { workspace = true } quote = { workspace = true } +rayon = { workspace = true } regex = { workspace = true } rustc-hash = { workspace = true } -serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true } +serde = { workspace = true } syn = { workspace = true, features = ["clone-impls", "derive", "extra-traits", "full", "parsing", "printing", "proc-macro"] } diff --git a/tasks/ast_tools/src/codegen.rs b/tasks/ast_tools/src/codegen.rs index 6ee18d26c13a1..73df7ce55b976 100644 --- a/tasks/ast_tools/src/codegen.rs +++ b/tasks/ast_tools/src/codegen.rs @@ -1,149 +1,133 @@ -use std::{cell::RefCell, path::PathBuf}; - -use itertools::Itertools; -use rustc_hash::{FxBuildHasher, FxHashMap}; +use rustc_hash::FxHashMap; use crate::{ - log, log_result, - output::{Output, RawOutput}, - passes::Pass, - rust_ast::{AstRef, Module}, - schema::{lower_ast_types, Schema}, - Result, TypeId, + logln, + parse::attr::{AttrPositions, AttrProcessor}, + Derive, Generator, Output, RawOutput, Result, Schema, DERIVES, GENERATORS, }; -#[derive(Default)] -pub struct AstCodegen { - files: Vec, - passes: Vec>>, - generators: Vec>>, -} - -pub struct AstCodegenResult { - pub outputs: Vec, - pub schema: Schema, -} - -pub trait Runner { - type Context; - fn verb(&self) -> &'static str; - fn name(&self) -> &'static str; - fn file_path(&self) -> &'static str; - fn run(&mut self, ctx: &Self::Context) -> Result>; -} - -pub struct EarlyCtx { - ty_table: Vec, - ident_table: FxHashMap, - mods: RefCell>, +pub type DeriveId = usize; +pub type GeneratorId = usize; + +/// [`Codegen`] contains all data relating to the running of the codegen overall. +/// +/// [`Schema`] is the source of truth on types, and which generators and derives act upon. +/// [`Codegen`] is the engine which runs the generators and derives. +pub struct Codegen { + /// Mapping from derive name to `DeriveId` + derive_name_to_id: FxHashMap<&'static str, DeriveId>, + /// Mapping from attribute name to ID of derive/generator which uses the attr, + /// and legal positions for the attribute + attr_processors: FxHashMap<&'static str, (AttrProcessor, AttrPositions)>, } -impl EarlyCtx { - fn new(mods: Vec) -> Self { - // worst case len - let len = mods.iter().fold(0, |acc, it| acc + it.items.len()); - let adts = mods.iter().flat_map(|it| it.items.iter()); - - let mut ty_table = Vec::with_capacity(len); - let mut ident_table = FxHashMap::with_capacity_and_hasher(len, FxBuildHasher); - for adt in adts { - if let Some(ident) = adt.borrow().ident() { - let ident = ident.to_string(); - let type_id = ty_table.len(); - ty_table.push(AstRef::clone(adt)); - ident_table.insert(ident, type_id); +impl Codegen { + /// Create new [`Codegen`]. + pub fn new() -> Self { + let mut derive_name_to_id = FxHashMap::default(); + + let mut attr_processors = FxHashMap::default(); + + for (id, &derive) in DERIVES.iter().enumerate() { + derive_name_to_id.insert(derive.trait_name(), id); + + let processor = AttrProcessor::Derive(id); + for &(name, positions) in derive.attrs() { + let existing = attr_processors.insert(name, (processor, positions)); + if let Some((existing_processor, _)) = existing { + panic!( + "Two derives expect same attr `#[{name:?}]`: {} and {}", + existing_processor.name(), + processor.name() + ); + } } } - Self { ty_table, ident_table, mods: RefCell::new(mods) } - } - - pub fn chronological_idents(&self) -> impl Iterator { - self.ident_table.iter().sorted_by_key(|it| it.1).map(|it| it.0) - } - - pub fn mods(&self) -> &RefCell> { - &self.mods - } - - pub fn find(&self, key: &String) -> Option { - self.type_id(key).map(|id| AstRef::clone(&self.ty_table[id])) - } + for (id, &generator) in GENERATORS.iter().enumerate() { + let processor = AttrProcessor::Generator(id); + + for &(name, positions) in generator.attrs() { + let existing_processor = attr_processors.insert(name, (processor, positions)); + if let Some((existing_processor, _)) = existing_processor { + panic!( + "Two derives/generators expect same attr {name:?}: {} and {}", + existing_processor.name(), + processor.name() + ); + } + } + } - pub fn type_id(&self, key: &String) -> Option { - self.ident_table.get(key).copied() + Self { derive_name_to_id, attr_processors } } - pub fn ast_ref(&self, id: TypeId) -> AstRef { - AstRef::clone(&self.ty_table[id]) + /// Get a [`Derive`] by its name. + pub fn get_derive_id_by_name(&self, name: &str) -> DeriveId { + self.derive_name_to_id.get(name).copied().unwrap_or_else(|| { + panic!("Unknown derive trait {name:?}"); + }) } - fn into_schema(self) -> Schema { - lower_ast_types(&self) + /// Get processor (derive or generator) for an attribute, and legal positions for the attribute + pub fn attr_processor(&self, attr_name: &str) -> Option<(AttrProcessor, AttrPositions)> { + self.attr_processors.get(attr_name).copied() } } -impl AstCodegen { - #[must_use] - pub fn add_file

(mut self, path: P) -> Self - where - P: AsRef, - { - self.files.push(path.as_ref().into()); - self - } +/// Runner trait. +/// +/// This is the super-trait of [`Derive`] and [`Generator`]. +/// +/// [`Generator`]: crate::Generator +pub trait Runner { + fn name(&self) -> &'static str; - #[must_use] - pub fn pass

(mut self, pass: P) -> Self - where - P: Pass + Runner + 'static, - { - self.passes.push(Box::new(pass)); - self - } + fn file_path(&self) -> &'static str; - #[must_use] - pub fn generate(mut self, generator: G) -> Self - where - G: Runner + 'static, - { - self.generators.push(Box::new(generator)); - self - } + fn run(&self, schema: &Schema, codegen: &Codegen) -> Result>; +} - pub fn run(mut self) -> Result { - let modules = self - .files - .into_iter() - .map(Module::with_path) - .map(Module::load) - .map_ok(Module::expand) - .map_ok(|it| it.map(Module::analyze)) - .collect::>>>>()???; - - // Early passes - let early_ctx = EarlyCtx::new(modules); - let mut outputs = run_passes(&mut self.passes, &early_ctx)?; - - // Late passes - let schema = early_ctx.into_schema(); - outputs.extend(run_passes(&mut self.generators, &schema)?); - - Ok(AstCodegenResult { outputs, schema }) - } +/// Get all runners (generators and derives). +pub fn get_runners() -> Vec { + GENERATORS + .iter() + .map(|&gen| GeneratorOrDerive::Generator(gen)) + .chain(DERIVES.iter().map(|&derive| GeneratorOrDerive::Derive(derive))) + .collect() } -fn run_passes(runners: &mut [Box>], ctx: &C) -> Result> { - let mut outputs = vec![]; - for runner in runners { - log!("{} {}... ", runner.verb(), runner.name()); +/// A `Generator` or a `Derive`. +/// +/// Provides a single interface for running either. +#[derive(Clone, Copy)] +pub enum GeneratorOrDerive { + Generator(&'static (dyn Generator + Sync)), + Derive(&'static (dyn Derive + Sync)), +} - let result = runner.run(ctx); - log_result!(result); - let runner_outputs = result?; +impl GeneratorOrDerive { + /// Execute `prepare` method on the [`Generator`] or [`Derive`]. + pub fn prepare(self, schema: &mut Schema) { + match self { + Self::Generator(generator) => generator.prepare(schema), + Self::Derive(derive) => derive.prepare(schema), + } + } - let generator_path = runner.file_path(); - outputs.extend(runner_outputs.into_iter().map(|output| output.into_raw(generator_path))); + /// Run the [`Generator`] or [`Derive`]. + pub fn run(self, schema: &Schema, codegen: &Codegen) -> Vec { + let (runner_path, result) = match self { + Self::Generator(generator) => { + logln!("Generate {}... ", generator.name()); + (generator.file_path(), generator.run(schema, codegen)) + } + Self::Derive(derive) => { + logln!("Derive {}... ", derive.name()); + (derive.file_path(), derive.run(schema, codegen)) + } + }; + let runner_outputs = result.unwrap(); + runner_outputs.into_iter().map(|output| output.into_raw(runner_path)).collect() } - Ok(outputs) } diff --git a/tasks/ast_tools/src/derives/clone_in.rs b/tasks/ast_tools/src/derives/clone_in.rs index 48e6b20669c9b..ecbccf39253e2 100644 --- a/tasks/ast_tools/src/derives/clone_in.rs +++ b/tasks/ast_tools/src/derives/clone_in.rs @@ -1,25 +1,46 @@ -use itertools::Itertools; +//! Derive for `CloneIn` trait. + use proc_macro2::TokenStream; use quote::{format_ident, quote}; use syn::Ident; use crate::{ - markers::CloneInAttribute, - schema::{EnumDef, GetIdent, Schema, StructDef, TypeDef}, + schema::{Def, EnumDef, Schema, StructDef}, + Result, }; -use super::{define_derive, Derive}; +use super::{define_derive, AttrLocation, AttrPart, AttrPositions, Derive, StructOrEnum}; +/// Derive for `CloneIn` trait. pub struct DeriveCloneIn; define_derive!(DeriveCloneIn); impl Derive for DeriveCloneIn { - fn trait_name() -> &'static str { + fn trait_name(&self) -> &'static str { "CloneIn" } - fn prelude() -> TokenStream { + /// Register that accept `#[clone_in]` attr on struct fields. + fn attrs(&self) -> &[(&'static str, AttrPositions)] { + &[("clone_in", AttrPositions::StructField)] + } + + /// Parse `#[clone_in(default)]` on struct field. + fn parse_attr(&self, _attr_name: &str, location: AttrLocation, part: AttrPart) -> Result<()> { + // No need to check attr name is `clone_in`, because that's the only attribute this derive handles. + // Ditto location can only be `StructField`. + let AttrLocation::StructField(struct_def, field_index) = location else { unreachable!() }; + + if matches!(part, AttrPart::Tag("default")) { + struct_def.fields[field_index].clone_in.is_default = true; + Ok(()) + } else { + Err(()) + } + } + + fn prelude(&self) -> TokenStream { quote! { #![allow(clippy::default_trait_access)] @@ -28,72 +49,70 @@ impl Derive for DeriveCloneIn { } } - fn derive(&mut self, def: &TypeDef, _: &Schema) -> TokenStream { - match &def { - TypeDef::Enum(it) => derive_enum(it), - TypeDef::Struct(it) => derive_struct(it), + fn derive(&self, type_def: StructOrEnum, schema: &Schema) -> TokenStream { + match type_def { + StructOrEnum::Struct(struct_def) => derive_struct(struct_def), + StructOrEnum::Enum(enum_def) => derive_enum(enum_def, schema), } } } -fn derive_enum(def: &EnumDef) -> TokenStream { - let ty_ident = def.ident(); +fn derive_struct(struct_def: &StructDef) -> TokenStream { + let type_ident = struct_def.ident(); - let mut used_alloc = false; - let matches = def - .all_variants() - .map(|var| { - let ident = var.ident(); - if var.is_unit() { - quote!(Self :: #ident => #ty_ident :: #ident) + let has_fields = !struct_def.fields.is_empty(); + let body = if has_fields { + let fields = struct_def.fields.iter().map(|field| { + let field_ident = field.ident(); + if field.clone_in.is_default { + quote!( #field_ident: Default::default() ) } else { - used_alloc = true; - quote!(Self :: #ident(it) => #ty_ident :: #ident(CloneIn::clone_in(it, allocator))) + quote!( #field_ident: CloneIn::clone_in(&self.#field_ident, allocator) ) } - }) - .collect_vec(); - - let alloc_ident = if used_alloc { format_ident!("allocator") } else { format_ident!("_") }; - let body = quote! { - match self { - #(#matches),* - } + }); + quote!( #type_ident { #(#fields),* } ) + } else { + quote!( #type_ident ) }; - impl_clone_in(&ty_ident, def.has_lifetime, &alloc_ident, &body) + generate_impl(&type_ident, &body, struct_def.has_lifetime, has_fields) } -fn derive_struct(def: &StructDef) -> TokenStream { - let ty_ident = def.ident(); +fn derive_enum(enum_def: &EnumDef, schema: &Schema) -> TokenStream { + let type_ident = enum_def.ident(); + + let mut uses_allocator = false; + let match_arms = enum_def.all_variants(schema).map(|variant| { + let ident = variant.ident(); + if variant.is_fieldless() { + quote!( Self::#ident => #type_ident::#ident ) + } else { + uses_allocator = true; + quote!( Self::#ident(it) => #type_ident::#ident(CloneIn::clone_in(it, allocator)) ) + } + }); - let (alloc_ident, body) = if def.fields.is_empty() { - (format_ident!("_"), quote!(#ty_ident)) - } else { - let fields = def.fields.iter().map(|field| { - let ident = field.ident(); - match field.markers.derive_attributes.clone_in { - CloneInAttribute::Default => quote!(#ident: Default::default()), - CloneInAttribute::None => { - quote!(#ident: CloneIn::clone_in(&self.#ident, allocator)) - } - } - }); - (format_ident!("allocator"), quote!(#ty_ident { #(#fields),* })) + let body = quote! { + match self { + #(#match_arms),* + } }; - impl_clone_in(&ty_ident, def.has_lifetime, &alloc_ident, &body) + generate_impl(&type_ident, &body, enum_def.has_lifetime, uses_allocator) } -fn impl_clone_in( - ty_ident: &Ident, - has_lifetime: bool, - alloc_ident: &Ident, +fn generate_impl( + type_ident: &Ident, body: &TokenStream, + has_lifetime: bool, + uses_allocator: bool, ) -> TokenStream { + let alloc_ident = format_ident!("{}", if uses_allocator { "allocator" } else { "_" }); + if has_lifetime { quote! { - impl <'new_alloc> CloneIn<'new_alloc> for #ty_ident<'_> { - type Cloned = #ty_ident<'new_alloc>; + impl<'new_alloc> CloneIn<'new_alloc> for #type_ident<'_> { + type Cloned = #type_ident<'new_alloc>; fn clone_in(&self, #alloc_ident: &'new_alloc Allocator) -> Self::Cloned { #body } @@ -101,8 +120,8 @@ fn impl_clone_in( } } else { quote! { - impl <'alloc> CloneIn<'alloc> for #ty_ident { - type Cloned = #ty_ident; + impl<'alloc> CloneIn<'alloc> for #type_ident { + type Cloned = #type_ident; fn clone_in(&self, #alloc_ident: &'alloc Allocator) -> Self::Cloned { #body } diff --git a/tasks/ast_tools/src/derives/content_eq.rs b/tasks/ast_tools/src/derives/content_eq.rs index df94790744293..87d255e92b9da 100644 --- a/tasks/ast_tools/src/derives/content_eq.rs +++ b/tasks/ast_tools/src/derives/content_eq.rs @@ -1,31 +1,25 @@ -use itertools::Itertools; +//! Derive for `ContentEq` trait. + use proc_macro2::TokenStream; -use quote::quote; +use quote::{format_ident, quote}; -use crate::{ - schema::{EnumDef, GetGenerics, Schema, StructDef, ToType, TypeDef}, - util::ToIdent, -}; +use crate::schema::{Def, EnumDef, Schema, StructDef}; -use super::{define_derive, Derive}; +use super::{define_derive, Derive, StructOrEnum}; -const IGNORE_FIELD_TYPES: [/* type name */ &str; 4] = [ - "Span", - "ScopeId", - "SymbolId", - "ReferenceId", -]; +const IGNORE_FIELD_TYPES: [&str; 4] = ["Span", "ScopeId", "SymbolId", "ReferenceId"]; +/// Derive for `ContentEq` trait. pub struct DeriveContentEq; define_derive!(DeriveContentEq); impl Derive for DeriveContentEq { - fn trait_name() -> &'static str { + fn trait_name(&self) -> &'static str { "ContentEq" } - fn prelude() -> TokenStream { + fn prelude(&self) -> TokenStream { quote! { // NOTE: writing long match expressions formats better than using `matches` macro. #![allow(clippy::match_like_matches_macro)] @@ -35,24 +29,45 @@ impl Derive for DeriveContentEq { } } - fn derive(&mut self, def: &TypeDef, _: &Schema) -> TokenStream { - let (other, body) = match &def { - TypeDef::Enum(it) => derive_enum(it), - TypeDef::Struct(it) => derive_struct(it), - }; - - impl_content_eq(def, other, &body) + fn derive(&self, type_def: StructOrEnum, schema: &Schema) -> TokenStream { + match type_def { + StructOrEnum::Struct(struct_def) => derive_struct(struct_def, schema), + StructOrEnum::Enum(enum_def) => derive_enum(enum_def, schema), + } } } -fn derive_enum(def: &EnumDef) -> (&str, TokenStream) { - let body = if def.is_unit() { - // we assume unit enums implement `PartialEq` +fn derive_struct(struct_def: &StructDef, schema: &Schema) -> TokenStream { + let fields = struct_def + .fields + .iter() + .filter(|field| { + let innermost_type = field.type_def(schema).innermost_type(schema); + !IGNORE_FIELD_TYPES.contains(&innermost_type.name()) + }) + .map(|field| { + let ident = field.ident(); + quote!( ContentEq::content_eq(&self.#ident, &other.#ident) ) + }); + + let mut body = quote!( #(#fields)&&* ); + let mut other_name = "other"; + if body.is_empty() { + body = quote!(true); + other_name = "_"; + }; + + generate_impl(&struct_def.ty_anon(schema), other_name, &body) +} + +fn derive_enum(enum_def: &EnumDef, schema: &Schema) -> TokenStream { + let body = if enum_def.is_fieldless() { + // We assume fieldless enums implement `PartialEq` quote!(self == other) } else { - let matches = def.all_variants().map(|var| { - let ident = var.ident(); - if var.is_unit() { + let matches = enum_def.all_variants(schema).map(|variant| { + let ident = variant.ident(); + if variant.is_fieldless() { quote!( (Self::#ident, Self::#ident) => true ) } else { quote!( (Self::#ident(a), Self::#ident(b)) => a.content_eq(b) ) @@ -67,39 +82,14 @@ fn derive_enum(def: &EnumDef) -> (&str, TokenStream) { } }; - ("other", body) + generate_impl(&enum_def.ty_anon(schema), "other", &body) } -fn derive_struct(def: &StructDef) -> (&str, TokenStream) { - if def.fields.is_empty() { - ("_", quote!(true)) - } else { - let fields = def - .fields - .iter() - .filter(|field| { - !IGNORE_FIELD_TYPES.iter().any(|it| field.typ.name().inner_name() == *it) - }) - .map(|field| { - let ident = field.ident(); - quote!(ContentEq::content_eq(&self.#ident, &other.#ident)) - }) - .collect_vec(); - if fields.is_empty() { - ("_", quote!(true)) - } else { - ("other", quote!(#(#fields)&&*)) - } - } -} - -fn impl_content_eq(def: &TypeDef, other_name: &str, body: &TokenStream) -> TokenStream { - let ty = if def.has_lifetime() { def.to_elided_type() } else { def.to_type_elide() }; - let other = other_name.to_ident(); - +fn generate_impl(ty: &TokenStream, other_name: &str, body: &TokenStream) -> TokenStream { + let other_ident = format_ident!("{other_name}"); quote! { impl ContentEq for #ty { - fn content_eq(&self, #other: &Self) -> bool { + fn content_eq(&self, #other_ident: &Self) -> bool { #body } } diff --git a/tasks/ast_tools/src/derives/estree.rs b/tasks/ast_tools/src/derives/estree.rs index 7ca285f91195d..b9ec100b606f0 100644 --- a/tasks/ast_tools/src/derives/estree.rs +++ b/tasks/ast_tools/src/derives/estree.rs @@ -1,32 +1,46 @@ -use convert_case::{Case, Casing}; +//! Derive for `Serialize` impls, which serialize AST to ESTree format in JSON. + +use std::borrow::Cow; + use proc_macro2::TokenStream; use quote::quote; -use rustc_hash::FxHashMap; +use syn::{parse_str, Type}; use crate::{ - markers::ESTreeStructTagMode, - schema::{ - serialize::{enum_variant_name, get_always_flatten_structs, get_type_tag}, - EnumDef, FieldDef, GetGenerics, GetIdent, Schema, StructDef, TypeDef, - }, + schema::{Def, EnumDef, FieldDef, Schema, StructDef, TypeDef, VariantDef}, + Result, }; -use super::{define_derive, Derive}; +use super::{ + attr_positions, define_derive, AttrLocation, AttrPart, AttrPositions, Derive, StructOrEnum, +}; +/// Derive for `Serialize` impls, which serialize AST to ESTree format in JSON. pub struct DeriveESTree; define_derive!(DeriveESTree); impl Derive for DeriveESTree { - fn trait_name() -> &'static str { + fn trait_name(&self) -> &'static str { "ESTree" } - fn snake_name() -> String { + fn snake_name(&self) -> String { "estree".to_string() } - fn prelude() -> TokenStream { + /// Register that accept `#[estree]` attr on structs, enums, struct fields, or enum variants. + fn attrs(&self) -> &[(&'static str, AttrPositions)] { + &[("estree", attr_positions!(Struct | Enum | StructField | EnumVariant))] + } + + /// Parse `#[estree]` attr. + fn parse_attr(&self, _attr_name: &str, location: AttrLocation, part: AttrPart) -> Result<()> { + // No need to check attr name is `estree`, because that's the only attribute this derive handles + parse_estree_attr(location, part) + } + + fn prelude(&self) -> TokenStream { quote! { #![allow(unused_imports, unused_mut, clippy::match_same_arms)] @@ -35,28 +49,22 @@ impl Derive for DeriveESTree { } } - fn derive(&mut self, def: &TypeDef, schema: &Schema) -> TokenStream { - if let TypeDef::Struct(def) = def { - if def - .markers - .estree - .as_ref() - .and_then(|e| e.tag_mode.as_ref()) - .is_some_and(|e| e == &ESTreeStructTagMode::CustomSerialize) - { - return TokenStream::new(); + /// Generate implementation of `Serialize` for a struct or enum. + fn derive(&self, type_def: StructOrEnum, schema: &Schema) -> TokenStream { + let body = match type_def { + StructOrEnum::Struct(struct_def) => { + if struct_def.estree.custom_serialize { + return quote!(); + } + generate_body_for_struct(struct_def, schema) } - } - - let body = match def { - TypeDef::Enum(def) => serialize_enum(def), - TypeDef::Struct(def) => serialize_struct(def, schema), + StructOrEnum::Enum(enum_def) => generate_body_for_enum(enum_def, schema), }; - let ident = def.ident(); - let lifetime = if def.has_lifetime() { quote!(<'_>) } else { TokenStream::new() }; + let ty = type_def.ty_anon(schema); + quote! { - impl Serialize for #ident #lifetime { + impl Serialize for #ty { fn serialize(&self, serializer: S) -> Result { #body } @@ -65,150 +73,235 @@ impl Derive for DeriveESTree { } } -fn serialize_struct(def: &StructDef, schema: &Schema) -> TokenStream { - if let Some(via) = &def.markers.estree.as_ref().and_then(|e| e.via.as_ref()) { - let via: TokenStream = via.parse().unwrap(); +/// Parse `#[estree]` attr. +fn parse_estree_attr(location: AttrLocation, part: AttrPart) -> Result<()> { + // No need to check attr name is `estree`, because that's the only attribute this derive handles + match location { + // `#[estree]` attr on struct + AttrLocation::Struct(struct_def) => match part { + AttrPart::Tag("always_flatten") => struct_def.estree.always_flatten = true, + AttrPart::Tag("no_type") => struct_def.estree.no_type = true, + AttrPart::Tag("custom_serialize") => struct_def.estree.custom_serialize = true, + AttrPart::String("rename", value) => struct_def.estree.rename = Some(value), + AttrPart::String("via", value) => struct_def.estree.via = Some(value), + AttrPart::String("add_ts", value) => struct_def.estree.add_ts = Some(value), + _ => return Err(()), + }, + // `#[estree]` attr on enum + AttrLocation::Enum(enum_def) => match part { + AttrPart::Tag("no_rename_variants") => enum_def.estree.no_rename_variants = true, + AttrPart::Tag("custom_ts_def") => enum_def.estree.custom_ts_def = true, + _ => return Err(()), + }, + // `#[estree]` attr on struct field + AttrLocation::StructField(struct_def, field_index) => match part { + AttrPart::Tag("skip") => struct_def.fields[field_index].estree.skip = true, + AttrPart::Tag("flatten") => struct_def.fields[field_index].estree.flatten = true, + AttrPart::String("rename", value) => { + struct_def.fields[field_index].estree.rename = Some(value); + } + AttrPart::String("via", value) => { + struct_def.fields[field_index].estree.via = Some(value); + } + AttrPart::String("append_to", value) => { + // Find field this field is to be appended to + let target_field_index = struct_def + .fields + .iter() + .enumerate() + .find(|(_, other_field)| other_field.name() == value) + .map(|(field_index, _)| field_index) + .ok_or(())?; + if target_field_index == field_index { + // Can't append field to itself + return Err(()); + } + let target_field = &mut struct_def.fields[target_field_index]; + if target_field.estree.append_field_index.is_some() { + // Can't append twice to same field + return Err(()); + } + target_field.estree.append_field_index = Some(field_index); + struct_def.fields[field_index].estree.skip = true; + } + AttrPart::String("ts_type", value) => { + struct_def.fields[field_index].estree.ts_type = Some(value); + } + _ => return Err(()), + }, + // `#[estree]` attr on enum variant + AttrLocation::EnumVariant(enum_def, variant_index) => match part { + AttrPart::String("rename", value) => { + enum_def.variants[variant_index].estree.rename = Some(value); + } + _ => return Err(()), + }, + _ => unreachable!(), + } + + Ok(()) +} + +/// Generate body of `serialize` method for a struct. +fn generate_body_for_struct(struct_def: &StructDef, schema: &Schema) -> TokenStream { + if let Some(via_str) = struct_def.estree.via.as_deref() { + let via_ty = parse_str::(via_str).unwrap(); return quote! { - #via::from(self).serialize(serializer) + #via_ty::from(self).serialize(serializer) }; } - let ident = def.ident(); - // If type_tag is Some, we serialize it manually. If None, either one of - // the fields is named r#type, or the struct does not need a "type" field. - let type_tag = get_type_tag(def); + let mut stmts = quote!(); - let mut fields = vec![]; - if let Some(ty) = &type_tag { - fields.push(quote! { map.serialize_entry("type", #ty)?; }); + if should_add_type_field_to_struct(struct_def) { + let type_name = struct_def.estree.rename.as_deref().unwrap_or_else(|| struct_def.name()); + stmts.extend(quote!( map.serialize_entry("type", #type_name)?; )); } - let mut append_to: FxHashMap = FxHashMap::default(); + for field in &struct_def.fields { + if !field.estree.skip { + stmts.extend(generate_stmt_for_struct_field(field, struct_def, schema)); + } + } - // Scan through to find all append_to fields - for field in &def.fields { - let Some(parent) = field.markers.derive_attributes.estree.append_to.as_ref() else { - continue; - }; - assert!( - append_to.insert(parent.clone(), field).is_none(), - "Duplicate append_to target (on {ident})" - ); + quote! { + let mut map = serializer.serialize_map(None)?; + #stmts + map.end() } +} - for field in &def.fields { - if field.markers.derive_attributes.estree.skip - || field.markers.derive_attributes.estree.append_to.is_some() - { - continue; - } - let ident = field.ident().unwrap(); - let name = match &field.markers.derive_attributes.estree.rename { - Some(rename) => rename.to_string(), - None => field.name.clone().unwrap().to_case(Case::Camel), - }; - assert!( - !(name == "type" && type_tag.is_some()), - "Unexpected r#type field when #[estree(type = ...)] is specified (on {ident})" - ); - - let ident = field.ident().unwrap(); - let always_flatten = match field.typ.type_id() { - Some(id) => get_always_flatten_structs(schema).contains(&id), - None => false, +/// Generate code to serialize a struct field. +fn generate_stmt_for_struct_field( + field: &FieldDef, + struct_def: &StructDef, + schema: &Schema, +) -> TokenStream { + let field_name_ident = field.ident(); + + if should_flatten_field(field, schema) { + return quote! { + self.#field_name_ident.serialize(serde::__private::ser::FlatMapSerializer(&mut map))?; }; + } - let append_after = append_to.get(&ident.to_string()); - - if always_flatten || field.markers.derive_attributes.estree.flatten { - assert!( - append_after.is_none(), - "Cannot flatten and append to the same field (on {ident})" - ); - fields.push(quote! { - self.#ident.serialize( - serde::__private::ser::FlatMapSerializer(&mut map) - )?; - }); - } else if let Some(append_after) = append_after { - let after_ident = append_after.ident().unwrap(); - fields.push(quote! { - map.serialize_entry( - #name, - &oxc_estree::ser::AppendTo { - array: &self.#ident, - after: &self.#after_ident - } - )?; - }); - } else if let Some(via) = &field.markers.derive_attributes.estree.via { - let via_tokens: TokenStream = via.parse().unwrap(); - fields.push(quote! { - map.serialize_entry( - #name, - &#via_tokens(&self.#ident) - )?; - }); - } else { - fields.push(quote! { - map.serialize_entry(#name, &self.#ident)?; - }); - } + let field_camel_name = get_struct_field_name(field); + + let mut value = quote!( &self.#field_name_ident ); + if let Some(via_str) = field.estree.via.as_deref() { + let via_ty = parse_str::(via_str).unwrap(); + value = quote!( &#via_ty(#value) ); + } else if let Some(append_field_index) = field.estree.append_field_index { + let append_from_ident = struct_def.fields[append_field_index].ident(); + value = quote! { + &oxc_estree::ser::AppendTo { array: #value, after: &self.#append_from_ident } + }; } quote! { - let mut map = serializer.serialize_map(None)?; - #(#fields)* - map.end() + map.serialize_entry(#field_camel_name, #value)?; } } -// 3 different kinds of AST enums: -// 1. Transparent enums, which would be #[serde(untagged)]. These take their -// type tag from their children. Each of the variants is its own struct. -// 2. Type enums, which are not camelCased. These are for example the -// r#type field of a Function, and are used instead of the struct name -// as the type field on the JSON. -// 3. All other enums, which are camelCased. -fn serialize_enum(def: &EnumDef) -> TokenStream { - let ident = def.ident(); - - let is_untagged = def.all_variants().all(|var| var.fields.len() == 1); - - if is_untagged { - let match_branches = def.all_variants().map(|var| { - let var_ident = var.ident(); +/// Generate body of `serialize` method for an enum. +fn generate_body_for_enum(enum_def: &EnumDef, schema: &Schema) -> TokenStream { + let enum_ident = enum_def.ident(); + + if enum_def.is_fieldless() { + let enum_name = enum_def.name(); + let match_branches = enum_def.all_variants(schema).map(|variant| { + let variant_ident = variant.ident(); + // TODO: Don't print numbers as `0u32` - just `0` is fine + let discriminant = u32::from(variant.discriminant); + let value = get_fieldless_variant_value(enum_def, variant); + quote! { - #ident::#var_ident(x) => { - Serialize::serialize(x, serializer) + #enum_ident::#variant_ident => { + serializer.serialize_unit_variant(#enum_name, #discriminant, #value) } } }); + quote! { - match self { - #(#match_branches),* + match *self { + #(#match_branches)* } } } else { - let match_branches = def.all_variants().map(|var| { - let var_ident = var.ident(); - let enum_name = ident.to_string(); - let discriminant = u32::from(var.discriminant); - let serialized_to = enum_variant_name(var, def); - assert!( - var.fields.is_empty(), - "Tagged enums must not have inner fields (on {ident}::{var_ident})" - ); + let match_branches = enum_def.all_variants(schema).map(|variant| { + let variant_ident = variant.ident(); + // TODO: Rename `x` to `it` to match other generated code quote! { - #ident::#var_ident => { - serializer.serialize_unit_variant(#enum_name, #discriminant, #serialized_to) + #enum_ident::#variant_ident(x) => { + Serialize::serialize(x, serializer) } } }); + quote! { - match *self { - #(#match_branches),* + match self { + #(#match_branches)* } } } } + +/// Get if should generate a `type` field. +/// +/// Type field should be added unless struct has an `#[estree(no_type)]` attr +/// or struct has an existing field called `type`. +/// +/// This function also used by Typescript generator. +pub fn should_add_type_field_to_struct(struct_def: &StructDef) -> bool { + if struct_def.estree.no_type { + false + } else { + !struct_def.fields.iter().any(|field| matches!(field.name(), "type")) + } +} + +/// Get if should flatten a struct field. +/// +/// Returns `true` if either the field has an `#[estree(flatten)]` attr on it, +/// or the type that the field contains has an `#[estree(always_flatten)]` attr. +/// +/// This function also used by Typescript generator. +pub fn should_flatten_field(field: &FieldDef, schema: &Schema) -> bool { + if field.estree.flatten { + true + } else { + let field_type = field.type_def(schema); + matches!(field_type, TypeDef::Struct(field_struct_def) if field_struct_def.estree.always_flatten) + } +} + +/// Get value of a fieldless enum variant. +/// +/// Value is determined by: +/// * `#[estree(rename)]` attr on variant. +/// * `#[estree(no_rename_variants)]` attr on enum. +/// +/// This function also used by Typescript generator. +pub fn get_fieldless_variant_value<'s>( + enum_def: &'s EnumDef, + variant: &'s VariantDef, +) -> Cow<'s, str> { + if let Some(variant_name) = variant.estree.rename.as_deref() { + Cow::Borrowed(variant_name) + } else if enum_def.estree.no_rename_variants { + Cow::Borrowed(variant.name()) + } else { + Cow::Owned(variant.camel_name()) + } +} + +/// Get ESTree name for struct field. +/// +/// This function also used by Typescript generator. +pub fn get_struct_field_name(field: &FieldDef) -> Cow<'_, str> { + if let Some(field_name) = field.estree.rename.as_deref() { + Cow::Borrowed(field_name) + } else { + Cow::Owned(field.camel_name()) + } +} diff --git a/tasks/ast_tools/src/derives/get_address.rs b/tasks/ast_tools/src/derives/get_address.rs index c3ddd6674c52b..1d3eccd46c83d 100644 --- a/tasks/ast_tools/src/derives/get_address.rs +++ b/tasks/ast_tools/src/derives/get_address.rs @@ -1,23 +1,23 @@ +//! Derive for `GetAddress` trait. + use proc_macro2::TokenStream; use quote::quote; -use crate::{ - schema::{EnumDef, Schema, ToType, TypeDef}, - util::TypeWrapper, -}; +use crate::schema::{Def, EnumDef, Schema}; -use super::{define_derive, Derive}; +use super::{define_derive, Derive, StructOrEnum}; +/// Derive for `GetAddress` trait. pub struct DeriveGetAddress; define_derive!(DeriveGetAddress); impl Derive for DeriveGetAddress { - fn trait_name() -> &'static str { + fn trait_name(&self) -> &'static str { "GetAddress" } - fn prelude() -> TokenStream { + fn prelude(&self) -> TokenStream { quote! { #![allow(clippy::match_same_arms)] @@ -26,31 +26,36 @@ impl Derive for DeriveGetAddress { } } - fn derive(&mut self, def: &TypeDef, _schema: &Schema) -> TokenStream { - if let TypeDef::Enum(enum_def) = def { - derive_enum(enum_def) + fn derive(&self, type_def: StructOrEnum, schema: &Schema) -> TokenStream { + if let StructOrEnum::Enum(enum_def) = type_def { + derive_enum(enum_def, schema) } else { - panic!("`GetAddress` can only be implemented with `#[generate_derive]` on enums"); + panic!( + "`GetAddress` can only be implemented with `#[generate_derive]` on enums: `{}`", + type_def.name() + ); } } } -fn derive_enum(def: &EnumDef) -> TokenStream { - let target_type = def.to_elided_type(); +fn derive_enum(enum_def: &EnumDef, schema: &Schema) -> TokenStream { + let ty = enum_def.ty_anon(schema); - let matches = def.all_variants().map(|variant| { + let matches = enum_def.all_variants(schema).map(|variant| { + let variant_type = variant.field_type(schema).unwrap(); assert!( - variant.fields.len() == 1 - && variant.fields[0].typ.analysis().wrapper == TypeWrapper::Box, - "`GetAddress` can only be derived on enums where all variants are boxed" + variant_type.is_box(), + "`GetAddress` can only be derived on enums where all variants are boxed: `{}::{}`", + enum_def.name(), + variant.name(), ); let ident = variant.ident(); - quote!(Self::#ident(it) => GetAddress::address(it)) + quote!( Self::#ident(it) => GetAddress::address(it) ) }); quote! { - impl GetAddress for #target_type { + impl GetAddress for #ty { ///@ `#[inline]` because compiler should boil this down to a single assembly instruction #[inline] fn address(&self) -> Address { diff --git a/tasks/ast_tools/src/derives/get_span.rs b/tasks/ast_tools/src/derives/get_span.rs index 3f9d0d206b4e4..a58ecb3991f0c 100644 --- a/tasks/ast_tools/src/derives/get_span.rs +++ b/tasks/ast_tools/src/derives/get_span.rs @@ -1,24 +1,46 @@ +//! Derive for `GetSpan` trait. + use proc_macro2::TokenStream; -use quote::quote; +use quote::{format_ident, quote}; use syn::Ident; use crate::{ - schema::{EnumDef, GetGenerics, Schema, StructDef, ToType, TypeDef}, - util::{ToIdent, TypeWrapper}, + schema::{Def, EnumDef, Schema, StructDef}, + Result, }; -use super::{define_derive, Derive}; +use super::{define_derive, AttrLocation, AttrPart, AttrPositions, Derive, StructOrEnum}; +/// Derive for `GetSpan` trait. pub struct DeriveGetSpan; define_derive!(DeriveGetSpan); impl Derive for DeriveGetSpan { - fn trait_name() -> &'static str { + fn trait_name(&self) -> &'static str { "GetSpan" } - fn prelude() -> TokenStream { + /// Register that accept `#[span]` attr on struct fields. + fn attrs(&self) -> &[(&'static str, AttrPositions)] { + &[("span", AttrPositions::StructField)] + } + + /// Parse `#[span]` on struct field. + fn parse_attr(&self, _attr_name: &str, location: AttrLocation, part: AttrPart) -> Result<()> { + // No need to check attr name is `span`, because that's the only attribute this derive handles. + // Ditto location can only be `StructField`. + let AttrLocation::StructField(struct_def, field_index) = location else { unreachable!() }; + + if matches!(part, AttrPart::None) { + struct_def.span.span_field_index = Some(field_index); + Ok(()) + } else { + Err(()) + } + } + + fn prelude(&self) -> TokenStream { quote! { #![allow(clippy::match_same_arms)] @@ -27,22 +49,23 @@ impl Derive for DeriveGetSpan { } } - fn derive(&mut self, def: &TypeDef, _: &Schema) -> TokenStream { - let self_type = quote!(&self); - let result_type = quote!(Span); + fn derive(&self, type_def: StructOrEnum, schema: &Schema) -> TokenStream { + let self_ty = quote!(&self); + let result_ty = quote!(Span); let result_expr = quote!(self.span); - let unbox = |it| quote!(#it.as_ref()); - let reference = |it| quote!(&#it); + let reference = quote!( & ); + let unboxed_ref = quote!(it.as_ref()); - derive( - Self::trait_name(), + derive_type( + type_def, + "GetSpan", "span", - &self_type, - &result_type, + &self_ty, + &result_ty, &result_expr, - def, - unbox, - reference, + &reference, + &unboxed_ref, + schema, ) } } @@ -52,11 +75,11 @@ pub struct DeriveGetSpanMut; define_derive!(DeriveGetSpanMut); impl Derive for DeriveGetSpanMut { - fn trait_name() -> &'static str { + fn trait_name(&self) -> &'static str { "GetSpanMut" } - fn prelude() -> TokenStream { + fn prelude(&self) -> TokenStream { quote! { #![allow(clippy::match_same_arms)] @@ -65,120 +88,126 @@ impl Derive for DeriveGetSpanMut { } } - fn derive(&mut self, def: &TypeDef, _: &Schema) -> TokenStream { - let self_type = quote!(&mut self); - let result_type = quote!(&mut Span); + fn derive(&self, type_def: StructOrEnum, schema: &Schema) -> TokenStream { + let self_ty = quote!(&mut self); + let result_ty = quote!(&mut Span); let result_expr = quote!(&mut self.span); - let unbox = |it| quote!(&mut **#it); - let reference = |it| quote!(&mut #it); + let reference = quote!( &mut ); + let unboxed_ref = quote!(&mut **it); - derive( - Self::trait_name(), + derive_type( + type_def, + "GetSpanMut", "span_mut", - &self_type, - &result_type, + &self_ty, + &result_ty, &result_expr, - def, - unbox, - reference, + &reference, + &unboxed_ref, + schema, ) } } +/// Generate `GetSpan` / `GetSpanMut` trait implementation for a type. #[expect(clippy::too_many_arguments)] -fn derive( +fn derive_type( + type_def: StructOrEnum, trait_name: &str, method_name: &str, - self_type: &TokenStream, - result_type: &TokenStream, + self_ty: &TokenStream, + result_ty: &TokenStream, result_expr: &TokenStream, - def: &TypeDef, - unbox: U, - reference: R, -) -> TokenStream -where - U: Fn(TokenStream) -> TokenStream, - R: Fn(TokenStream) -> TokenStream, -{ - let trait_ident = trait_name.to_ident(); - let method_ident = method_name.to_ident(); - match &def { - TypeDef::Enum(def) => { - derive_enum(def, &trait_ident, &method_ident, self_type, result_type, unbox) - } - TypeDef::Struct(def) => derive_struct( - def, + reference: &TokenStream, + unboxed_ref: &TokenStream, + schema: &Schema, +) -> TokenStream { + let trait_ident = format_ident!("{trait_name}"); + let method_ident = format_ident!("{method_name}"); + match type_def { + StructOrEnum::Struct(struct_def) => derive_struct( + struct_def, &trait_ident, &method_ident, - self_type, - result_type, + self_ty, + result_ty, result_expr, reference, + schema, + ), + StructOrEnum::Enum(enum_def) => derive_enum( + enum_def, + &trait_ident, + &method_ident, + self_ty, + result_ty, + unboxed_ref, + schema, ), } } -fn derive_enum( - def: &EnumDef, - trait_name: &Ident, - method_name: &Ident, - self_type: &TokenStream, - result_type: &TokenStream, - unbox: U, -) -> TokenStream -where - U: Fn(TokenStream) -> TokenStream, -{ - let target_type = if def.has_lifetime() { def.to_elided_type() } else { def.to_type_elide() }; - - let matches = def.all_variants().map(|var| { - let ident = var.ident(); - let mut it = quote!(it); - if var.fields.first().is_some_and(|it| it.typ.analysis().wrapper == TypeWrapper::Box) { - it = unbox(it); - } - quote!(Self :: #ident(it) => #trait_name :: #method_name(#it)) - }); +/// Generate `GetSpan` / `GetSpanMut` trait implementation for a struct. +#[expect(clippy::too_many_arguments)] +fn derive_struct( + struct_def: &StructDef, + trait_ident: &Ident, + method_ident: &Ident, + self_ty: &TokenStream, + result_ty: &TokenStream, + result_expr: &TokenStream, + reference: &TokenStream, + schema: &Schema, +) -> TokenStream { + let ty = struct_def.ty_anon(schema); + + let result_expr = if let Some(field_index) = struct_def.span.span_field_index { + let field_ident = struct_def.fields[field_index].ident(); + "e!( #trait_ident::#method_ident(#reference self.#field_ident) ) + } else { + result_expr + }; quote! { - impl #trait_name for #target_type { - fn #method_name(#self_type) -> #result_type { - match self { - #(#matches),* - } + impl #trait_ident for #ty { + #[inline] + fn #method_ident(#self_ty) -> #result_ty { + #result_expr } } } } -fn derive_struct( - def: &StructDef, - trait_name: &Ident, - method_name: &Ident, - self_type: &TokenStream, - result_type: &TokenStream, - result_expr: &TokenStream, - reference: R, -) -> TokenStream -where - R: Fn(TokenStream) -> TokenStream, -{ - let target_type = if def.has_lifetime() { def.to_elided_type() } else { def.to_type_elide() }; - - let span_field = def.fields.iter().find(|field| field.markers.span); - let result_expr = if let Some(span_field) = span_field { - let ident = span_field.name.as_ref().map(ToIdent::to_ident).unwrap(); - let reference = reference(quote!(self.#ident)); - quote!(#trait_name :: #method_name (#reference)) - } else { - result_expr.clone() - }; +/// Generate `GetSpan` / `GetSpanMut` trait implementation for an enum. +fn derive_enum( + enum_def: &EnumDef, + trait_ident: &Ident, + method_ident: &Ident, + self_ty: &TokenStream, + result_ty: &TokenStream, + unboxed_ref: &TokenStream, + schema: &Schema, +) -> TokenStream { + let ty = enum_def.ty_anon(schema); + + let matches = enum_def.all_variants(schema).map(|variant| { + let variant_ident = variant.ident(); + let variant_type = variant.field_type(schema).unwrap(); + // TODO: Just generate `it.span()` or `it.span_mut()`. + // Then output is the same whether variant is boxed or not, and `unboxed_ref` is not needed. + if variant_type.is_box() { + quote!( Self::#variant_ident(it) => #trait_ident::#method_ident(#unboxed_ref) ) + } else { + quote!( Self::#variant_ident(it) => #trait_ident::#method_ident(it) ) + } + }); quote! { - impl #trait_name for #target_type { - #[inline] - fn #method_name(#self_type) -> #result_type { - #result_expr + impl #trait_ident for #ty { + fn #method_ident(#self_ty) -> #result_ty { + match self { + #(#matches),* + } } } } diff --git a/tasks/ast_tools/src/derives/mod.rs b/tasks/ast_tools/src/derives/mod.rs index 0deb163f5c64d..b65d5121d31d5 100644 --- a/tasks/ast_tools/src/derives/mod.rs +++ b/tasks/ast_tools/src/derives/mod.rs @@ -1,19 +1,19 @@ use convert_case::{Case, Casing}; -use itertools::Itertools; use proc_macro2::TokenStream; use quote::quote; use rustc_hash::{FxHashMap, FxHashSet}; -use syn::{parse_str, ItemUse}; +use syn::{parse_str, Path}; use crate::{ output::{output_path, Output}, - schema::{Schema, TypeDef}, - Result, + parse::attr::{attr_positions, AttrLocation, AttrPart, AttrPositions}, + schema::{Def, Derives, EnumDef, FileId, Schema, StructDef, TypeDef, TypeId}, + Codegen, Result, Runner, }; mod clone_in; mod content_eq; -mod estree; +pub mod estree; mod get_address; mod get_span; @@ -23,36 +23,179 @@ pub use estree::DeriveESTree; pub use get_address::DeriveGetAddress; pub use get_span::{DeriveGetSpan, DeriveGetSpanMut}; -pub trait Derive { - // Methods defined by implementer +/// Trait to define a derive. +pub trait Derive: Runner { + // Methods which can/must be defined by implementer. - fn trait_name() -> &'static str; + /// Get trait name. + fn trait_name(&self) -> &'static str; - fn snake_name() -> String { - Self::trait_name().to_case(Case::Snake) + /// Get snake case trait name. + /// + /// Defaults to `trait_name()` converted to snake case. + /// Can be overridden. + fn snake_name(&self) -> String { + self.trait_name().to_case(Case::Snake) } - fn prelude() -> TokenStream { - TokenStream::default() + /// Attributes that this derive uses. + /// + /// If this [`Derive`] handles any attributes, override this method to return the details of where + /// those attributes can legally be used. + /// + /// [`parse_attr`] will be called with any attributes on structs/enums matching these patterns. + /// + /// e.g.: + /// + /// ```ignore + /// fn attrs(&self) -> &[(&'static str, AttrPositions)] { + /// &[("clone_in", AttrPositions::StructField)] + /// } + /// ``` + /// + /// ```ignore + /// fn attrs(&self) -> &[(&'static str, AttrPositions)] { + /// &[ + /// ("visit", attr_positions!(AstAttr | StructField | EnumVariant)), + /// ("scope", attr_positions!(Struct | Enum | StructField)), + /// ] + /// } + /// ``` + /// + /// [`parse_attr`]: Derive::parse_attr + fn attrs(&self) -> &[(&'static str, AttrPositions)] { + &[] } - fn derive(&mut self, def: &TypeDef, schema: &Schema) -> TokenStream; + /// Parse an attribute part and record information from it on type definition. + /// + /// `parse_attr` will only be called with attributes which this [`Derive`] has registered + /// its ownership of by returning their details from [`attrs`] method. + /// + /// * `attr_name` is name of the attribute. + /// * `location` is location attribute appears (e.g. on a struct field). + /// * `part` contains the details of this part of the attribute. + /// + /// e.g.: + /// + /// ``` + /// #[ast(visit)] + /// #[estree(rename = "FooFoo")] + /// struct Foo { + /// #[estree(skip, rename = "Blah")] + /// #[span] + /// blip: Bar, + /// } + /// ``` + /// + /// `parse_attr` will be called 5 times, with arguments: + /// + /// * `"visit", AttrLocation::StructAstAttr(struct_def), AttrPart::None` + /// * `"estree", AttrLocation::Struct(struct_def), AttrPart::String("rename", "FooFoo")` + /// * `"estree", AttrLocation::StructField(struct_def, 0), AttrPart::Tag("skip")` + /// * `"estree", AttrLocation::StructField(struct_def, 0), AttrPart::String("rename", "Blah")` + /// * `"span", AttrLocation::StructField(struct_def, 0), AttrPart::None` + /// + /// [`attrs`]: Derive::attrs + #[expect(unused_variables)] + fn parse_attr( + &self, + attr_name: &str, + location: AttrLocation<'_>, + part: AttrPart<'_>, + ) -> Result<()> { + Ok(()) + } + + /// Generate prelude to be output at top of generated files. + /// + /// Defaults to no prelude. + /// Can be overridden. + fn prelude(&self) -> TokenStream { + quote!() + } + + /// Prepare for generatation, modifying schema. + /// + /// Runs before any `generate` or `derive` method runs. + #[expect(unused_variables)] + fn prepare(&self, schema: &mut Schema) {} + + /// Generate trait implementation for a type. + fn derive(&self, type_def: StructOrEnum<'_>, schema: &Schema) -> TokenStream; + + // Standard methods. Should not be overridden. + + /// Run derive on all types which derive the trait, and compile into 1 file per crate. + fn output(&self, schema: &Schema, codegen: &Codegen) -> Vec { + #[derive(Default)] + struct CrateContent { + import_file_ids: FxHashSet, + output: TokenStream, + } - // Standard methods + // Run derive on all types which has `#[generate_derive]` attr for this trait. + // Store results in a hash map indexed by crate name. + let derive_id = codegen.get_derive_id_by_name(self.trait_name()); - fn template(module_paths: Vec<&str>, impls: TokenStream) -> TokenStream { - let prelude = Self::prelude(); + let mut crate_contents = FxHashMap::<&str, CrateContent>::default(); + for type_def in &schema.types { + let derived = match type_def { + TypeDef::Struct(struct_def) if struct_def.generates_derive(derive_id) => { + self.derive(StructOrEnum::Struct(struct_def), schema) + } + TypeDef::Enum(enum_def) if enum_def.generates_derive(derive_id) => { + self.derive(StructOrEnum::Enum(enum_def), schema) + } + _ => continue, + }; + + let file_id = type_def.file_id().unwrap(); + let content = crate_contents.entry(schema.files[file_id].krate()).or_default(); + content.import_file_ids.insert(file_id); + + content.output.extend(quote! { + ///@@line_break + #derived + }); + } - // from `x::y::z` to `crate::y::z::*` - let use_modules = module_paths.into_iter().map(|module_path| { - let module_path = module_path.strip_suffix("::mod").unwrap_or(module_path); - let local_path = ["crate"] - .into_iter() - .chain(module_path.split("::").skip(1)) - .chain(["*"]) - .join("::"); - let use_module: ItemUse = parse_str(format!("use {local_path};").as_str()).unwrap(); - quote!( #use_module ) + // Generate an output for each crate. + // Wrap each output in template with `use` statements to import types which were derived. + let filename = format!("derive_{}.rs", self.snake_name()); + crate_contents + .into_iter() + .map(|(krate, content)| { + let mut import_paths = content + .import_file_ids + .into_iter() + .map(|file_id| schema.files[file_id].import_path()) + .collect::>(); + import_paths.sort_unstable(); + + Output::Rust { + path: output_path(&format!("crates/{krate}"), &filename), + tokens: self.template(&import_paths, content.output), + } + }) + .collect() + } + + /// Wrap derived output for a crate in template. + /// Add prelude, and `use` statements to import types which were derived. + fn template(&self, import_paths: &[&str], impls: TokenStream) -> TokenStream { + let prelude = self.prelude(); + + let use_modules = import_paths.iter().map(|import_path| { + if import_path.is_empty() { + quote! { + use crate::*; + } + } else { + // `::ast::js` -> `use crate::ast::js::*;` + let import_path: Path = parse_str(import_path).unwrap(); + quote!( use crate #import_path ::*; ) + } }); quote! { @@ -65,70 +208,23 @@ pub trait Derive { #impls } } - - fn output(&mut self, schema: &Schema) -> Result> { - let trait_name = Self::trait_name(); - let filename = format!("derive_{}.rs", Self::snake_name()); - let output = schema - .defs - .iter() - .filter(|def| def.generates_derive(trait_name)) - .map(|def| (def, self.derive(def, schema))) - .fold( - FxHashMap::<&str, (FxHashSet<&str>, Vec)>::default(), - |mut acc, (def, stream)| { - let module_path = def.module_path(); - let krate = module_path.split("::").next().unwrap(); - let streams = acc.entry(krate).or_default(); - streams.0.insert(module_path); - streams.1.push(stream); - acc - }, - ) - .into_iter() - .sorted_by(|lhs, rhs| lhs.0.cmp(rhs.0)) - .fold(Vec::new(), |mut acc, (krate, (modules, streams))| { - let mut modules = Vec::from_iter(modules); - modules.sort_unstable(); - - let output = Output::Rust { - path: output_path(&format!("crates/{krate}"), &filename), - tokens: Self::template( - modules, - streams.into_iter().fold(TokenStream::new(), |mut acc, it| { - acc.extend(quote! { - ///@@line_break - }); - acc.extend(it); - acc - }), - ), - }; - - acc.push(output); - acc - }); - Ok(output) - } } +/// Macro to implement [`Runner`] for a [`Derive`]. +/// +/// Must be used on every [`Derive`]. +/// +/// # Example +/// ``` +/// struct DeriveCloneIn; +/// define_derive!(DeriveCloneIn); +/// ``` macro_rules! define_derive { ($ident:ident $($lifetime:lifetime)?) => { const _: () = { - use $crate::{ - codegen::Runner, - output::Output, - schema::Schema, - Result, - }; + use $crate::{Output, Runner, Schema, Result, Codegen}; impl $($lifetime)? Runner for $ident $($lifetime)? { - type Context = Schema; - - fn verb(&self) -> &'static str { - "Derive" - } - fn name(&self) -> &'static str { stringify!($ident) } @@ -137,11 +233,79 @@ macro_rules! define_derive { file!() } - fn run(&mut self, schema: &Schema) -> Result> { - self.output(schema) + fn run(&self, schema: &Schema, codegen: &Codegen) -> Result> { + Ok(self.output(schema, codegen)) } } }; }; } pub(crate) use define_derive; + +/// Reference to a [`StructDef`] or [`EnumDef`]. +/// +/// This type is what's passed to [`Derive::derive`] method. +#[derive(Clone, Copy)] +pub enum StructOrEnum<'d> { + Struct(&'d StructDef), + Enum(&'d EnumDef), +} + +impl Def for StructOrEnum<'_> { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + match self { + Self::Struct(struct_def) => struct_def.id(), + Self::Enum(enum_def) => enum_def.id(), + } + } + + /// Get type name. + fn name(&self) -> &str { + match self { + Self::Struct(struct_def) => struct_def.name(), + Self::Enum(enum_def) => enum_def.name(), + } + } + + /// Get [`FileId`] of file containing definition of this type. + fn file_id(&self) -> Option { + match self { + Self::Struct(struct_def) => struct_def.file_id(), + Self::Enum(enum_def) => enum_def.file_id(), + } + } + + /// Get all traits which have derives generated for this type. + fn generated_derives(&self) -> Derives { + match self { + Self::Struct(struct_def) => struct_def.generated_derives(), + Self::Enum(enum_def) => enum_def.generated_derives(), + } + } + + /// Get if type has a lifetime. + fn has_lifetime(&self, schema: &Schema) -> bool { + match self { + Self::Struct(struct_def) => struct_def.has_lifetime(schema), + Self::Enum(enum_def) => enum_def.has_lifetime(schema), + } + } + + /// Get type signature (including lifetimes). + /// Lifetimes are anonymous (`'_`) if `anon` is true. + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + match self { + Self::Struct(struct_def) => struct_def.ty_with_lifetime(schema, anon), + Self::Enum(enum_def) => enum_def.ty_with_lifetime(schema, anon), + } + } + + /// Get inner type, if type has one. + /// + /// Structs and enums don't have a single inner type, so returns `None`. + #[expect(unused_variables)] + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + None + } +} diff --git a/tasks/ast_tools/src/generators/assert_layouts.rs b/tasks/ast_tools/src/generators/assert_layouts.rs index b7edfe115562d..a15a8e1f74b84 100644 --- a/tasks/ast_tools/src/generators/assert_layouts.rs +++ b/tasks/ast_tools/src/generators/assert_layouts.rs @@ -1,105 +1,449 @@ +//! Calculate memory layout of all types. +//! Generate const assertions for the correctness of those calculations. +//! +//! Memory layouts are different on 64-bit and 32-bit platforms. +//! Calculate each separately, and generate assertions for each. + +use std::cmp::{max, min}; + use proc_macro2::TokenStream; use quote::quote; -use syn::Type; +use syn::Ident; use crate::{ output::{output_path, Output}, - schema::{FieldDef, Schema, ToType, TypeDef}, - util::ToIdent, - Generator, + schema::{ + extensions::layout::{Layout, Niche, Offset, PlatformLayout}, + Def, Discriminant, EnumDef, PrimitiveDef, Schema, StructDef, TypeDef, TypeId, Visibility, + }, + Codegen, Generator, }; use super::define_generator; +/// Generator for memory layout assertions. pub struct AssertLayouts; define_generator!(AssertLayouts); impl Generator for AssertLayouts { - fn generate(&mut self, schema: &Schema) -> Output { - let (assertions_64, assertions_32) = schema - .defs - .iter() - .map(|def| { - let typ = def.to_type_elide(); - assert_type(&typ, def) - }) - .collect::<(Vec, Vec)>(); + /// Calculate layouts of all types. + fn prepare(&self, schema: &mut Schema) { + for type_id in schema.types.indices() { + calculate_layout(type_id, schema); + } + } + + /// Generate assertions that calculated layouts are correct. + fn generate(&self, schema: &Schema, _codegen: &Codegen) -> Output { + let (assertions_64, assertions_32): (TokenStream, TokenStream) = + schema.types.iter().map(generate_layout_assertions).unzip(); - Output::Rust { - path: output_path(crate::AST_CRATE, "assert_layouts.rs"), - tokens: quote! { - use std::mem::{align_of, offset_of, size_of}; + let output = quote! { + use std::mem::{align_of, offset_of, size_of}; - ///@@line_break - use oxc_regular_expression::ast::*; + ///@@line_break + use oxc_regular_expression::ast::*; - ///@@line_break - use crate::ast::*; + ///@@line_break + use crate::ast::*; - ///@@line_break - #[cfg(target_pointer_width = "64")] - const _: () = { #(#assertions_64)* }; + ///@@line_break + #[cfg(target_pointer_width = "64")] + const _: () = { #assertions_64 }; - ///@@line_break - #[cfg(target_pointer_width = "32")] - const _: () = { #(#assertions_32)* }; + ///@@line_break + #[cfg(target_pointer_width = "32")] + const _: () = { #assertions_32 }; + + ///@@line_break + #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))] + const _: () = panic!("Platforms with pointer width other than 64 or 32 bit are not supported"); + }; + + Output::Rust { path: output_path(crate::AST_CRATE, "assert_layouts.rs"), tokens: output } + } +} + +/// Calculate layout for a type. +/// +/// If layout was calculated already, just return the existing `Layout`. +fn calculate_layout(type_id: TypeId, schema: &mut Schema) -> &Layout { + fn is_not_calculated(layout: &Layout) -> bool { + // `align` field is set to 0 initially, but that's an illegal value + layout.layout_64.align == 0 + } - ///@@line_break - #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))] - const _: () = panic!("Platforms with pointer width other than 64 or 32 bit are not supported"); - }, + let type_def = &schema.types[type_id]; + match type_def { + TypeDef::Struct(struct_def) => { + if is_not_calculated(&struct_def.layout) { + schema.struct_def_mut(type_id).layout = + calculate_layout_for_struct(type_id, schema); + } + &schema.struct_def(type_id).layout + } + TypeDef::Enum(enum_def) => { + if is_not_calculated(&enum_def.layout) { + schema.enum_def_mut(type_id).layout = calculate_layout_for_enum(type_id, schema); + } + &schema.enum_def(type_id).layout + } + TypeDef::Primitive(primitive_def) => { + if is_not_calculated(&primitive_def.layout) { + schema.primitive_def_mut(type_id).layout = + calculate_layout_for_primitive(primitive_def); + } + &schema.primitive_def(type_id).layout + } + TypeDef::Option(option_def) => { + if is_not_calculated(&option_def.layout) { + schema.option_def_mut(type_id).layout = + calculate_layout_for_option(type_id, schema); + } + &schema.option_def(type_id).layout + } + TypeDef::Box(box_def) => { + if is_not_calculated(&box_def.layout) { + schema.box_def_mut(type_id).layout = calculate_layout_for_box(); + } + &schema.box_def(type_id).layout + } + TypeDef::Vec(vec_def) => { + if is_not_calculated(&vec_def.layout) { + schema.vec_def_mut(type_id).layout = calculate_layout_for_vec(); + } + &schema.vec_def(type_id).layout + } + TypeDef::Cell(cell_def) => { + if is_not_calculated(&cell_def.layout) { + schema.cell_def_mut(type_id).layout = calculate_layout_for_cell(type_id, schema); + } + &schema.cell_def(type_id).layout } } } -fn assert_type(ty: &Type, def: &TypeDef) -> (TokenStream, TokenStream) { - match def { - TypeDef::Struct(def) => ( - with_offsets_assertion( - assert_size_align(ty, def.size_64, def.align_64), - ty, - &def.fields, - def.offsets_64.as_deref(), - ), - with_offsets_assertion( - assert_size_align(ty, def.size_32, def.align_32), - ty, - &def.fields, - def.offsets_32.as_deref(), - ), - ), - TypeDef::Enum(def) => ( - assert_size_align(ty, def.size_64, def.align_64), - assert_size_align(ty, def.size_32, def.align_32), - ), +/// Calculate layout for a struct. +/// +/// All structs in AST are `#[repr(C)]`. In a `#[repr(C)]` struct, compiler does not re-order the fields, +/// so they are stored in memory in same order as they're defined. +/// +/// Each field is aligned to the alignment of the field type. Padding bytes are added between fields +/// as necessary to ensure this. +/// +/// Alignment of the struct is the highest alignment of its fields (or 1 if no fields). +/// Size of struct is a multiple of its alignment. +/// +/// A struct has a niche if any of its fields has a niche. The niche will be the largest niche +/// in any of its fields. Padding bytes are not used as niches. +fn calculate_layout_for_struct(type_id: TypeId, schema: &mut Schema) -> Layout { + let mut layout_64 = PlatformLayout::from_size_align(0, 1); + let mut layout_32 = PlatformLayout::from_size_align(0, 1); + + for field_index in schema.struct_def(type_id).field_indices() { + let field_type_id = schema.struct_def(type_id).fields[field_index].type_id; + let field_layout = calculate_layout(field_type_id, schema); + + #[expect(clippy::items_after_statements)] + fn update(layout: &mut PlatformLayout, field_layout: &PlatformLayout) -> u32 { + // Field needs to be aligned + let offset = layout.size.next_multiple_of(field_layout.align); + + // Update alignment + layout.align = max(layout.align, field_layout.align); + + // Update niche. + // Take the largest niche. Preference for earlier niche if 2 fields have niches of same size. + if let Some(field_niche) = &field_layout.niche { + if layout.niche.as_ref().is_none_or(|niche| field_niche.count > niche.count) { + let mut niche = field_niche.clone(); + niche.offset += offset; + layout.niche = Some(niche); + } + } + + // Next field starts after this one + layout.size = offset + field_layout.size; + + // Return offset of this field + offset + } + + let offset_64 = update(&mut layout_64, &field_layout.layout_64); + let offset_32 = update(&mut layout_32, &field_layout.layout_32); + + // Store offset on `field` + let field = &mut schema.struct_def_mut(type_id).fields[field_index]; + field.offset = Offset { offset_64, offset_32 }; } + + // Round up size to alignment + layout_64.size = layout_64.size.next_multiple_of(layout_64.align); + layout_32.size = layout_32.size.next_multiple_of(layout_32.align); + + Layout { layout_64, layout_32 } } -fn assert_size_align(ty: &Type, size: usize, align: usize) -> TokenStream { - quote! { - ///@@line_break - assert!(size_of::<#ty>() == #size); - assert!(align_of::<#ty>() == #align); +/// Calculate layout for an enum. +/// +/// All enums in AST are `#[repr(C, u8)]` (if has fields) or `#[repr(u8)]` if fieldless. +/// +/// `#[repr(C, u8)]` enums have alignment of highest-aligned variant. +/// Size is size of largest variant + alignment of highest-aligned variant. +/// +/// Fieldless `#[repr(u8)]` enums obey the same rules. Fieldless variants act as size 0, align 1. +/// +/// `#[repr(C, u8)]` and `#[repr(u8)]` enums must always have at least one variant. +/// +/// Any unused discriminant values at start of end of the range form a niche. +fn calculate_layout_for_enum(type_id: TypeId, schema: &mut Schema) -> Layout { + struct State { + min_discriminant: Discriminant, + max_discriminant: Discriminant, + layout_64: PlatformLayout, + layout_32: PlatformLayout, } + + fn process_variants(type_id: TypeId, state: &mut State, schema: &mut Schema) { + let State { min_discriminant, max_discriminant, layout_64, layout_32 } = state; + + for variant_index in schema.enum_def(type_id).variant_indices() { + let variant = &schema.enum_def(type_id).variants[variant_index]; + + *min_discriminant = min(*min_discriminant, variant.discriminant); + *max_discriminant = max(*max_discriminant, variant.discriminant); + + if let Some(variant_type_id) = variant.field_type_id { + let variant_layout = calculate_layout(variant_type_id, schema); + + layout_64.size = max(layout_64.size, variant_layout.layout_64.size); + layout_64.align = max(layout_64.align, variant_layout.layout_64.align); + layout_32.size = max(layout_32.size, variant_layout.layout_32.size); + layout_32.align = max(layout_32.align, variant_layout.layout_32.align); + } + } + + for inherits_index in schema.enum_def(type_id).inherits_indices() { + let inherits_type_id = schema.enum_def(type_id).inherits[inherits_index]; + process_variants(inherits_type_id, state, schema); + } + } + + let mut state = State { + min_discriminant: Discriminant::MAX, + max_discriminant: 0, + layout_64: PlatformLayout::from_size_align(0, 1), + layout_32: PlatformLayout::from_size_align(0, 1), + }; + process_variants(type_id, &mut state, schema); + let State { min_discriminant, max_discriminant, mut layout_64, mut layout_32 } = state; + + layout_64.size += layout_64.align; + layout_32.size += layout_32.align; + + // Any unused discriminant values at start of end of the range form a niche. + // Note: The unused discriminants must be at start or end of range, *not* in the middle. + // `#[repr(u8)] enum Foo { A = 0, B = 255 }` has no niche. + // The largest available range (from start or from end) is used for the niche. + let niches_start = min_discriminant; + let niches_end = Discriminant::MAX - max_discriminant; + + if niches_start != 0 || niches_end != 0 { + let is_range_start = niches_start >= niches_end; + let count = u32::from(if is_range_start { niches_start } else { niches_end }); + let niche = Niche::new(0, 1, is_range_start, count); + layout_64.niche = Some(niche.clone()); + layout_32.niche = Some(niche); + } + + Layout { layout_64, layout_32 } } -fn with_offsets_assertion( - mut tk: TokenStream, - ty: &Type, - fields: &[FieldDef], - offsets: Option<&[usize]>, -) -> TokenStream { - let Some(offsets) = offsets else { return tk }; - - let assertions = fields.iter().zip(offsets).filter(|(field, _)| field.vis.is_pub()).map( - |(field, offset)| { - let field = field.name.as_ref().map(ToIdent::to_ident); - quote! { - assert!(offset_of!(#ty, #field) == #offset); +/// Calculate layout for an `Option`. +/// +/// * If inner type has a niche: +/// `Option` uses that niche to represent `None`. +/// The `Option` is same size and alignment as the inner type. +/// * If inner type has no niche: +/// The `Option`'s size = inner type size + inner type alignment. +/// `Some` / `None` discriminant is stored as a `bool` in first byte. +/// This introduces a new niche, identical to a struct with `bool` as first field. +fn calculate_layout_for_option(type_id: TypeId, schema: &mut Schema) -> Layout { + let option_def = schema.option_def(type_id); + let inner_layout = calculate_layout(option_def.inner_type_id, schema); + + #[expect(clippy::items_after_statements)] + fn consume_niche(layout: &mut PlatformLayout) { + if let Some(niche) = &mut layout.niche { + if niche.count == 1 { + layout.niche = None; + } else { + niche.count -= 1; } + } else { + layout.size += layout.align; + layout.niche = Some(Niche::new(0, 1, false, 254)); + } + } + + let mut layout = inner_layout.clone(); + consume_niche(&mut layout.layout_64); + consume_niche(&mut layout.layout_32); + layout +} + +/// Calculate layout for a `Box`. +/// +/// All `Box`es have same layout, regardless of the inner type. +/// `Box`es are pointer-sized, with a single niche (like `NonNull`). +fn calculate_layout_for_box() -> Layout { + Layout { + layout_64: PlatformLayout::from_size_align_niche(8, 8, Niche::new(0, 8, true, 1)), + layout_32: PlatformLayout::from_size_align_niche(4, 4, Niche::new(0, 4, true, 1)), + } +} + +/// Calculate layout for a `Vec`. +/// +/// All `Vec`s have same layout, regardless of the inner type. +/// `Vec`s contain 4 x pointer-sized fields. +/// They have a single niche on the first field - the pointer which is `NonNull`. +fn calculate_layout_for_vec() -> Layout { + Layout { + layout_64: PlatformLayout::from_size_align_niche(32, 8, Niche::new(0, 8, true, 1)), + layout_32: PlatformLayout::from_size_align_niche(16, 4, Niche::new(0, 4, true, 1)), + } +} + +/// Calculate layout for a `Cell`. +/// +/// `Cell`s have same layout as their inner type, but with no niche. +fn calculate_layout_for_cell(type_id: TypeId, schema: &mut Schema) -> Layout { + let cell_def = schema.cell_def(type_id); + let inner_layout = calculate_layout(cell_def.inner_type_id, schema); + + let mut layout = inner_layout.clone(); + layout.layout_64.niche = None; + layout.layout_32.niche = None; + layout +} + +/// Calculate layout for a primitive. +/// +/// Primitives have varying layouts. Some have niches, most don't. +fn calculate_layout_for_primitive(primitive_def: &PrimitiveDef) -> Layout { + // `ScopeId`, `SymbolId` and `ReferenceId` are a `NonZeroU32`, with a niche for 0 + let semantic_id_layout = Layout::from_size_align_niche(4, 4, Niche::new(0, 4, true, 1)); + // `&str` and `Atom` are a `NonNull` pointer + `usize` pair. Niche for 0 on the pointer field + let str_layout = Layout { + layout_64: PlatformLayout::from_size_align_niche(16, 8, Niche::new(0, 8, true, 1)), + layout_32: PlatformLayout::from_size_align_niche(8, 4, Niche::new(0, 4, true, 1)), + }; + // `usize` and `isize` are pointer-sized, but with no niche + let usize_layout = Layout { + layout_64: PlatformLayout::from_size_align(8, 8), + layout_32: PlatformLayout::from_size_align(4, 4), + }; + + #[expect(clippy::match_same_arms)] + match primitive_def.name() { + "bool" => Layout::from_size_align_niche(1, 1, Niche::new(0, 1, false, 254)), + "u8" => Layout::from_type::(), + "u16" => Layout::from_type::(), + "u32" => Layout::from_type::(), + "u64" => Layout::from_type::(), + "u128" => { + panic!("Cannot calculate alignment for `u128`. It differs depending on Rust version.") + } + "usize" => usize_layout.clone(), + "i8" => Layout::from_type::(), + "i16" => Layout::from_type::(), + "i32" => Layout::from_type::(), + "i64" => Layout::from_type::(), + "i128" => { + panic!("Cannot calculate alignment for `i128`. It differs depending on Rust version.") + } + "isize" => usize_layout.clone(), + "f32" => Layout::from_type::(), + "f64" => Layout::from_type::(), + "&str" => str_layout.clone(), + "Atom" => str_layout, + "ScopeId" => semantic_id_layout.clone(), + "SymbolId" => semantic_id_layout.clone(), + "ReferenceId" => semantic_id_layout, + "PointerAlign" => Layout { + layout_64: PlatformLayout::from_size_align(0, 8), + layout_32: PlatformLayout::from_size_align(0, 4), }, - ); - tk.extend(assertions); - tk + name => panic!("Unknown primitive type: {name}"), + } +} + +/// Generate layout assertions for a type +fn generate_layout_assertions( + type_def: &TypeDef, +) -> (/* 64 bit */ TokenStream, /* 32 bit */ TokenStream) { + match type_def { + TypeDef::Struct(struct_def) => generate_layout_assertions_for_struct(struct_def), + TypeDef::Enum(enum_def) => generate_layout_assertions_for_enum(enum_def), + _ => (quote!(), quote!()), + } +} + +/// Generate layout assertions for a struct. +/// This includes size and alignment assertions, plus assertions about offset of fields. +fn generate_layout_assertions_for_struct(struct_def: &StructDef) -> (TokenStream, TokenStream) { + fn gen(struct_def: &StructDef, is_64: bool, struct_ident: &Ident) -> TokenStream { + let layout = + if is_64 { &struct_def.layout.layout_64 } else { &struct_def.layout.layout_32 }; + + let size_align_assertions = generate_size_align_assertions(layout, struct_ident); + + let offset_asserts = struct_def.fields.iter().filter_map(|field| { + if field.visibility != Visibility::Public { + // Cannot create assertions for fields which are not public, as assertions + // are generated in `oxc_ast` crate, and those types are in other crates + return None; + } + + let field_ident = field.ident(); + let offset = + if is_64 { field.offset.offset_64 } else { field.offset.offset_32 } as usize; + // TODO: Don't print numbers as `4usize` - just `4` would be fine + Some(quote! { + assert!(offset_of!(#struct_ident, #field_ident) == #offset); + }) + }); + + quote! { + #size_align_assertions + #(#offset_asserts)* + } + } + + let ident = struct_def.ident(); + (gen(struct_def, true, &ident), gen(struct_def, false, &ident)) +} + +/// Generate layout assertions for an enum. +/// This is just size and alignment assertions. +fn generate_layout_assertions_for_enum(enum_def: &EnumDef) -> (TokenStream, TokenStream) { + let ident = enum_def.ident(); + ( + generate_size_align_assertions(&enum_def.layout.layout_64, &ident), + generate_size_align_assertions(&enum_def.layout.layout_32, &ident), + ) +} + +/// Generate size and alignment assertions for a type. +fn generate_size_align_assertions(layout: &PlatformLayout, ident: &Ident) -> TokenStream { + let size = layout.size as usize; + let align = layout.align as usize; + // TODO: Don't print numbers as `4usize` - just `4` would be fine + quote! { + ///@@line_break + assert!(size_of::<#ident>() == #size); + assert!(align_of::<#ident>() == #align); + } } diff --git a/tasks/ast_tools/src/generators/ast_builder.rs b/tasks/ast_tools/src/generators/ast_builder.rs index bc27758b0e1c3..f24d63cbe34d6 100644 --- a/tasks/ast_tools/src/generators/ast_builder.rs +++ b/tasks/ast_tools/src/generators/ast_builder.rs @@ -1,569 +1,517 @@ -use std::{borrow::Cow, stringify}; +//! Generator for `AstBuilder`. -use convert_case::{Case, Casing}; use itertools::Itertools; use proc_macro2::TokenStream; -use quote::{format_ident, quote, ToTokens}; -use syn::{parse_quote, Ident, Type}; +use quote::{format_ident, quote}; +use syn::Ident; use crate::{ output::{output_path, Output}, - schema::{ - EnumDef, FieldDef, GetIdent, Schema, StructDef, ToType, TypeDef, TypeName, VariantDef, - }, - util::{is_reserved_name, TypeAnalysis, TypeWrapper}, - Generator, + schema::{Def, EnumDef, FieldDef, Schema, StructDef, TypeDef, VariantDef}, + utils::is_reserved_name, + Codegen, Generator, }; use super::define_generator; -pub const BLACK_LIST: [&str; 1] = ["Span"]; +/// Types to omit builder method for. +const BLACK_LIST: [&str; 1] = ["Span"]; +/// Semantic ID types. +/// We generate builder methods both with and without these fields for types which include any of them. +const SEMANTIC_ID_TYPES: [&str; 3] = ["ScopeId", "SymbolId", "ReferenceId"]; + +/// Generator for `AstBuilder`. pub struct AstBuilderGenerator; define_generator!(AstBuilderGenerator); impl Generator for AstBuilderGenerator { - fn generate(&mut self, schema: &Schema) -> Output { + /// Generate `AstBuilder`. + fn generate(&self, schema: &Schema, _codegen: &Codegen) -> Output { let fns = schema - .defs + .types .iter() - .filter(|def| { - let is_visitable = def.is_visitable(); - let is_blacklisted = BLACK_LIST.contains(&def.name()); - is_visitable && !is_blacklisted + .filter(|&type_def| { + let is_visited = match type_def { + TypeDef::Struct(struct_def) => struct_def.visit.is_visited, + TypeDef::Enum(enum_def) => enum_def.visit.is_visited, + _ => false, + }; + let is_blacklisted = BLACK_LIST.contains(&type_def.name()); + is_visited && !is_blacklisted }) - .map(|it| generate_builder_fn(it, schema)) - .collect_vec(); - - Output::Rust { - path: output_path(crate::AST_CRATE, "ast_builder.rs"), - tokens: quote! { - //! AST node factories - - //!@@line_break - #![allow( - clippy::default_trait_access, - clippy::too_many_arguments, - clippy::fn_params_excessive_bools, - )] - - ///@@line_break - use std::cell::Cell; - - ///@@line_break - use oxc_allocator::{Allocator, Box, IntoIn, Vec}; - use oxc_syntax::{scope::ScopeId, symbol::SymbolId, reference::ReferenceId}; - - ///@@line_break - use crate::ast::*; - - ///@@line_break - /// AST builder for creating AST nodes - #[derive(Clone, Copy)] - pub struct AstBuilder<'a> { - /// The memory allocator used to allocate AST nodes in the arena. - pub allocator: &'a Allocator, - } + .map(|type_def| generate_builder_methods(type_def, schema)) + .collect::(); - ///@@line_break - impl<'a> AstBuilder<'a> { - #(#fns)* - } - }, - } + let output = quote! { + //! AST node factories + + //!@@line_break + #![allow( + clippy::default_trait_access, + clippy::too_many_arguments, + clippy::fn_params_excessive_bools, + )] + + ///@@line_break + use std::cell::Cell; + + ///@@line_break + use oxc_allocator::{Allocator, Box, IntoIn, Vec}; + use oxc_syntax::{scope::ScopeId, symbol::SymbolId, reference::ReferenceId}; + + ///@@line_break + use crate::ast::*; + + ///@@line_break + /// AST builder for creating AST nodes + #[derive(Clone, Copy)] + pub struct AstBuilder<'a> { + /// The memory allocator used to allocate AST nodes in the arena. + pub allocator: &'a Allocator, + } + + ///@@line_break + impl<'a> AstBuilder<'a> { + #fns + } + }; + + Output::Rust { path: output_path(crate::AST_CRATE, "ast_builder.rs"), tokens: output } } } -fn fn_ident_name>(ident: S) -> String { - ident.as_ref().to_case(Case::Snake) +/// Param for a builder function. +/// +/// Contains reference to the struct field, and various other bits of data derived from it. +#[expect(clippy::struct_field_names)] +struct Param<'d> { + /// Struct field which this param is for + field: &'d FieldDef, + /// Struct field name identifier + ident: TokenStream, + /// Function parameter e.g. `span: Span` + fn_param: TokenStream, + /// `true` if is a default param (semantic ID) + is_default: bool, + /// `true` if this param has a generic param e.g. `type_annotation: T1` (`T1` is generic) + has_generic_param: bool, +} + +/// Generate builder methods for a type. +fn generate_builder_methods(type_def: &TypeDef, schema: &Schema) -> TokenStream { + match type_def { + TypeDef::Struct(struct_def) => generate_builder_methods_for_struct(struct_def, schema), + TypeDef::Enum(enum_def) => generate_builder_methods_for_enum(enum_def, schema), + _ => unreachable!(), + } } -fn enum_builder_name(enum_name: String, var_name: String) -> Ident { - // replace `xxx_yyy_xxx` with `xxx_yyy`. - let var_name = if var_name.ends_with(enum_name.as_str()) { - var_name.chars().take(var_name.len() - enum_name.len()).collect::() - // replace `ts_xxx_ts_yyy` with `ts_xxx_yyy` - } else if enum_name.starts_with("TS") && var_name.starts_with("TS") { - var_name.chars().skip(2).collect::() +/// Generate builder methods for a struct. +/// +/// Generates two builder methods: +/// 1. To build an owned type e.g. `boolean_literal`. +/// 2. To build a boxed type e.g. `alloc_boolean_literal`. +fn generate_builder_methods_for_struct(struct_def: &StructDef, schema: &Schema) -> TokenStream { + let (mut params, generic_params, where_clause, has_default_fields) = + get_struct_params(struct_def, schema); + let (fn_params, fields) = get_struct_fn_params_and_fields(¶ms, true); + + let (fn_name_postfix, doc_postfix) = if has_default_fields { + let default_params = params.iter().filter(|param| param.is_default); + let fn_name_postfix = format!( + "_with_{}", + default_params.clone().map(|param| param.field.name()).join("_and_") + ); + let doc_postfix = format!( + " with `{}`", + default_params + .map(|param| { param.field.type_def(schema).innermost_type(schema).name() }) + .join("` and `") + ); + (fn_name_postfix, doc_postfix) } else { - var_name + (String::new(), String::new()) }; - format_ident!("{}_{}", fn_ident_name(enum_name), fn_ident_name(var_name)) -} + // Generate builder functions including all fields (inc default fields) + let output = generate_builder_methods_for_struct_impl( + struct_def, + ¶ms, + &fn_params, + &fields, + &generic_params, + &where_clause, + &fn_name_postfix, + &doc_postfix, + schema, + ); -fn struct_builder_name(name: &str, does_alloc: bool) -> Ident { - if does_alloc { - format_ident!("alloc_{name}") - } else if is_reserved_name(name) { - format_ident!("{name}_") - } else { - format_ident!("{name}") + if !has_default_fields { + return output; } -} -fn generate_builder_fn(def: &TypeDef, schema: &Schema) -> TokenStream { - match def { - TypeDef::Enum(def) => generate_enum_builder_fn(def, schema), - TypeDef::Struct(def) => generate_struct_builder_fn(def, schema), - } -} + // Generate builder functions excluding default fields + let (fn_params, fields) = get_struct_fn_params_and_fields(¶ms, false); + params.retain(|param| !param.is_default); + let mut output2 = generate_builder_methods_for_struct_impl( + struct_def, + ¶ms, + &fn_params, + &fields, + &generic_params, + &where_clause, + "", + "", + schema, + ); + + output2.extend(output); -fn generate_enum_builder_fn(def: &EnumDef, schema: &Schema) -> TokenStream { - def.variants.iter().map(|it| generate_enum_variant_builder_fn(def, it, schema)).collect() + output2 } -/// Create a builder function for an enum variant (e.g. for `Expression::Binary`) -fn generate_enum_variant_builder_fn( - enum_: &EnumDef, - variant: &VariantDef, +/// Build a pair of builder methods for a struct. +/// +/// This is a separate function as may need to be called twice, with and without semantic ID fields. +#[expect(clippy::too_many_arguments)] +fn generate_builder_methods_for_struct_impl( + struct_def: &StructDef, + params: &[Param], + fn_params: &TokenStream, + fields: &TokenStream, + generic_params: &TokenStream, + where_clause: &TokenStream, + fn_name_postfix: &str, + doc_postfix: &str, schema: &Schema, ) -> TokenStream { - assert_eq!(variant.fields.len(), 1); - let enum_ident = enum_.ident(); - let enum_type = &enum_.to_type(); - let var_ident = &variant.ident(); - let var_type = &variant.fields.first().expect("we have already asserted this one!").typ; - let var_type_name = &var_type.name(); - let fn_name = enum_builder_name(enum_ident.to_string(), var_type_name.inner_name().to_string()); - let ty = var_type - .type_id() - .or_else(|| var_type.transparent_type_id()) - .and_then(|id| schema.get(id)) - .expect("type not found!"); - - let TypeDef::Struct(field_def) = ty else { panic!("Unsupported!") }; - - let params = get_struct_params(field_def, schema); - let params = params.into_iter().filter(Param::not_default).collect_vec(); - let fields = params.iter().map(|it| it.ident.clone()); - let (generic_params, where_clause) = get_generic_params(¶ms); - - let does_alloc = matches!(var_type_name, TypeName::Box(_)); - let inner_builder = struct_builder_name(&fn_ident_name(&field_def.name), does_alloc); - let inner = quote!(self.#inner_builder(#(#fields),*)); - - let article = article_for(enum_ident.to_string()); - let mut docs = DocComment::new(format!(" Build {article} [`{enum_ident}::{var_ident}`].")) - .with_params(¶ms); - if does_alloc { - let inner_name = var_type_name.inner_name(); - let inner_article = article_for(inner_name); - docs = docs.with_description(format!( - "This node contains {inner_article} [`{inner_name}`] that will be stored in the memory arena." - )); + let struct_ident = struct_def.ident(); + let struct_ty = struct_def.ty(schema); + + let args = params.iter().map(|param| ¶m.ident); + + let mut fn_name_base = struct_def.snake_name(); + if !fn_name_postfix.is_empty() { + fn_name_base.push_str(fn_name_postfix); } + let fn_name = struct_builder_name(&fn_name_base, false); + let alloc_fn_name = struct_builder_name(&fn_name_base, true); + + // Generate doc comments + let struct_name = struct_def.name(); + let article = article_for(struct_name); + let fn_doc1 = format!(" Build {article} [`{struct_name}`]{doc_postfix}."); + let fn_doc2 = format!(" If you want the built node to be allocated in the memory arena, use [`AstBuilder::{alloc_fn_name}`] instead."); + let alloc_doc1 = format!( + " Build {article} [`{struct_name}`]{doc_postfix}, and store it in the memory arena." + ); + let alloc_doc2 = format!(" Returns a [`Box`] containing the newly-allocated node. If you want a stack-allocated node, use [`AstBuilder::{fn_name}`] instead."); + let params_docs = generate_doc_comment_for_params(params); quote! { ///@@line_break - #docs + #[doc = #fn_doc1] + #[doc = ""] + #[doc = #fn_doc2] + #params_docs #[inline] - pub fn #fn_name #generic_params (self, #(#params),*) -> #enum_type #where_clause { - #enum_ident::#var_ident(#inner) + pub fn #fn_name #generic_params (self, #fn_params) -> #struct_ty #where_clause { + #struct_ident { #fields } } - } -} -fn default_init_field(field: &FieldDef) -> bool { - let ident = field.ident().expect("expected named field"); - matches!( - (ident.to_string().as_str(), field.typ.raw()), - ("scope_id", "Cell>") - | ("symbol_id", "Cell>") - | ("reference_id", "Cell>") - ) + ///@@line_break + #[doc = #alloc_doc1] + #[doc = ""] + #[doc = #alloc_doc2] + #params_docs + #[inline] + pub fn #alloc_fn_name #generic_params (self, #fn_params) -> Box<'a, #struct_ty> #where_clause { + Box::new_in(self.#fn_name(#(#args),*), self.allocator) + } + } } -/// Generate builder function for struct. -/// -/// Generates functions: -/// 1. to create owned object. -/// 2. to create boxed object. +/// Get params for builder method for struct. /// -/// If type has default fields (`scope_id`, `symbol_id`, `reference_id`), also generates functions: +/// Also generate generic params and where clause for the method. /// -/// 3. to create owned object with provided `ScopeId` / `SymbolId` / `ReferenceId`. -/// 4. to create boxed object with provided `ScopeId` / `SymbolId` / `ReferenceId`. -fn generate_struct_builder_fn(ty: &StructDef, schema: &Schema) -> TokenStream { - let ident = ty.ident(); - let as_type = ty.to_type(); - let ty_name = fn_ident_name(&ty.name); - let fn_name = struct_builder_name(&ty_name, false); - let alloc_fn_name = struct_builder_name(&ty_name, true); - - let params_incl_defaults = get_struct_params(ty, schema); - let (generic_params, where_clause) = get_generic_params(¶ms_incl_defaults); - +/// ``` +/// // ↓↓↓↓ generic params +/// pub fn foo(self, span: Span, type_parameters: T1) -> Foo<'a> +/// where T1: IntoIn<'a, Option>>> {} +/// // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ where clause +/// ``` +fn get_struct_params<'s>( + struct_def: &'s StructDef, + schema: &'s Schema, +) -> ( + Vec>, // Params + TokenStream, // Generic params + TokenStream, // `where` clause + bool, // Has default fields +) { + // Only a single `Atom` or `&str` generic supported at present + let mut has_atom_generic = false; + let mut has_str_generic = false; + let mut generic_count = 0u32; let mut has_default_fields = false; - let mut params = vec![]; - let mut fn_params_incl_defaults = vec![]; - let mut default_field_names = vec![]; - let mut default_field_type_names = vec![]; - let mut fields = vec![]; - let mut fields_incl_defaults = vec![]; - let mut args = vec![]; - let mut args_incl_defaults = vec![]; - - for param in ¶ms_incl_defaults { - let mut field = if param.into_in { - let ident = ¶m.ident; - quote!(#ident: #ident.into_in(self.allocator)) - } else { - param.ident.to_token_stream() - }; - - if param.is_default && !has_default_fields { - has_default_fields = true; - fn_params_incl_defaults = params.iter().map(Param::to_token_stream).collect(); - fields_incl_defaults.clone_from(&fields); - args_incl_defaults.clone_from(&args); - } - if param.is_default { - let field_ident = ¶m.ident; - field = quote!(#field_ident: Default::default()); - - let field_name = field_ident.to_string(); - let field_type_name = match field_name.as_str() { - "scope_id" => "ScopeId", - "symbol_id" => "SymbolId", - "reference_id" => "ReferenceId", - _ => unreachable!(), - }; - let field_type_ident = format_ident!("{field_type_name}"); - fn_params_incl_defaults.push(quote!(#field_ident: #field_type_ident)); - fields_incl_defaults.push(quote!( #field_ident: Cell::new(Some(#field_ident)) )); + let mut generics = vec![]; - default_field_names.push(field_name); - default_field_type_names.push(field_type_name); - } else { - params.push(param.clone()); - args.push(param.ident.clone()); - - if has_default_fields { - fn_params_incl_defaults.push(param.to_token_stream()); - fields_incl_defaults.push(field.clone()); - } - } - - if has_default_fields { - args_incl_defaults.push(param.ident.clone()); - } + let params = struct_def + .fields + .iter() + .map(|field| { + let type_def = field.type_def(schema); + let ty = type_def.ty(schema); - fields.push(field); - } - - let article = article_for(ident.to_string()); - let create_docs = |fn_name, alloc_fn_name, params, extra| { - let fn_docs = DocComment::new(format!("Build {article} [`{ident}`]{extra}.")) - .with_description(format!("If you want the built node to be allocated in the memory arena, use [`AstBuilder::{alloc_fn_name}`] instead.")) - .with_params(params); - - let alloc_docs = DocComment::new(format!("Build {article} [`{ident}`]{extra}, and store it in the memory arena.")) - .with_description(format!("Returns a [`Box`] containing the newly-allocated node. If you want a stack-allocated node, use [`AstBuilder::{fn_name}`] instead.")) - .with_params(params); + let is_default = SEMANTIC_ID_TYPES.contains(&type_def.innermost_type(schema).name()); + if is_default { + has_default_fields = true; + }; - (fn_docs, alloc_docs) - }; + let generic_ident = match type_def { + TypeDef::Primitive(primitive_def) => match primitive_def.name() { + "Atom" if !has_atom_generic => { + has_atom_generic = true; + Some(format_ident!("A")) + } + "&str" if !has_str_generic => { + has_str_generic = true; + Some(format_ident!("S")) + } + _ => None, + }, + TypeDef::Box(_) => { + generic_count += 1; + Some(format_ident!("T{generic_count}")) + } + TypeDef::Option(option_def) if option_def.inner_type(schema).is_box() => { + generic_count += 1; + Some(format_ident!("T{generic_count}")) + } + _ => None, + }; + let has_generic_param = generic_ident.is_some(); + + let fn_param_ty = if is_default { + assert!(!has_generic_param); + type_def.innermost_type(schema).ty(schema) + } else if let Some(generic_ident) = generic_ident { + let where_clause_part = quote!( #generic_ident: IntoIn<'a, #ty> ); + let generic_ty = quote!( #generic_ident ); + generics.push((generic_ident, where_clause_part)); + generic_ty + } else { + ty + }; - let (fn_docs, alloc_docs) = create_docs(&fn_name, &alloc_fn_name, ¶ms, ""); + let field_ident = field.ident(); + let fn_param = quote!( #field_ident: #fn_param_ty ); - let mut output = quote! { - ///@@line_break - #fn_docs - #[inline] - pub fn #fn_name #generic_params (self, #(#params),*) -> #as_type #where_clause { - #ident { #(#fields),* } - } + Param { field, ident: field_ident, fn_param, is_default, has_generic_param } + }) + .collect(); - ///@@line_break - #alloc_docs - #[inline] - pub fn #alloc_fn_name #generic_params (self, #(#params),*) -> Box<'a, #as_type> #where_clause { - Box::new_in(self.#fn_name(#(#args),*), self.allocator) - } + let (generic_params, where_clause) = if generics.is_empty() { + (quote!(), quote!()) + } else { + let generic_params = generics.iter().map(|(generic_ident, _)| generic_ident); + let generic_params = quote!( <#(#generic_params),*> ); + let where_clause = generics.iter().map(|(_, where_clause_part)| where_clause_part); + let where_clause = quote!( where #(#where_clause),* ); + (generic_params, where_clause) }; - if has_default_fields { - let fn_name = format_ident!("{ty_name}_with_{}", default_field_names.join("_and_")); - let alloc_fn_name = format_ident!("alloc_{fn_name}"); - - let with = format!(" with `{}`", default_field_type_names.iter().join("` and `")); - let (fn_docs, alloc_docs) = - create_docs(&fn_name, &alloc_fn_name, ¶ms_incl_defaults, &with); - - output = quote! { - #output + (params, generic_params, where_clause, has_default_fields) +} - ///@@line_break - #fn_docs - #[inline] - pub fn #fn_name #generic_params (self, #(#fn_params_incl_defaults),*) -> #as_type #where_clause { - #ident { #(#fields_incl_defaults),* } - } +/// Get function params and fields for a struct builder method. +/// +/// Omit default fields from function params if `include_default_fields == true`. +/// +/// ``` +/// // ↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓ function params +/// pub fn foo(span: Span, bar: Bar<'a>) -> Foo<'a> { +/// Bar { span, bar } +/// // ^^^^^^^^^ fields +/// } +/// ``` +fn get_struct_fn_params_and_fields( + params: &[Param], + include_default_fields: bool, +) -> (/* function params */ TokenStream, /* fields */ TokenStream) { + let mut fields = vec![]; + let fn_params = params.iter().filter_map(|param| { + let param_ident = ¶m.ident; - ///@@line_break - #alloc_docs - #[inline] - pub fn #alloc_fn_name #generic_params (self, #(#fn_params_incl_defaults),*) -> Box<'a, #as_type> #where_clause { - Box::new_in(self.#fn_name(#(#args_incl_defaults),*), self.allocator) + if param.is_default { + if include_default_fields { + fields.push(quote!( #param_ident: Cell::new(Some(#param_ident)) )); + return Some(¶m.fn_param); } - }; - } - output -} + fields.push(quote!( #param_ident: Default::default() )); + return None; + } -// TODO: remove me -#[expect(dead_code)] -#[derive(Clone, Debug)] -struct Param { - is_default: bool, - analysis: TypeAnalysis, - ident: Ident, - ty: Type, - generic: Option<(/* predicate */ TokenStream, /* param name */ TokenStream)>, - into_in: bool, - docs: Vec, -} + let field = if param.has_generic_param { + quote!( #param_ident: #param_ident.into_in(self.allocator) ) + } else { + quote!( #param_ident ) + }; -impl Param { - fn is_default(&self) -> bool { - self.is_default - } + fields.push(field); - fn not_default(&self) -> bool { - !self.is_default() - } -} + Some(¶m.fn_param) + }); -impl ToTokens for Param { - fn to_tokens(&self, tokens: &mut TokenStream) { - let ident = &self.ident; - let ty = &self.ty; - tokens.extend(quote!(#ident: #ty)); - } + let fn_params = quote!( #(#fn_params),* ); + let fields = quote!( #(#fields),* ); + (fn_params, fields) } -/// Represents a rusdoc comment that will be added to a generated function, -/// struct, etc. -/// -/// [`DocComment`] implements [`ToTokens`], so you can use it in a [`quote!`] -/// block as normal. -/// -/// ```ignore -/// let docs = DocComment::new("This is a summary") -/// .with_description("This is a longer description"); -/// -/// let my_function = quote! { -/// #doc -/// fn my_function() { -/// } -/// } -/// ``` +/// Generate builder methods for an enum. /// -/// This generates comments in the following format: -/// -/// ```md -///

-/// -/// -/// -/// ## Parameters -/// - param1: some docs -/// - param2 -/// ``` -/// -/// 1. [`summary`] is a single-line overview about the thing being documented. -/// 2. [`description`] is a longer-form description that can span multiple -/// lines. It will be split into paragraphs for you. -/// 3. [`parameters`] is a bulleted list of function parameters. Documentation -/// for them can be extracted from struct fields and enums. This really only applies to functions. -/// -/// Each section only appears if there is content for it. Only [`summary`] is required. -/// -/// [`summary`]: DocComment::summary -/// [`description`]: DocComment::description -/// [`parameters`]: DocComment::params -/// -#[derive(Debug)] -struct DocComment<'p> { - /// Single-line summary. Put at the top of the comment. - summary: Cow<'static, str>, - /// Zero or more description paragraphs. - description: Vec>, - /// Function parameters, if applicable. Will be used to create a parameter - /// section that looks like this: - /// - /// ```md - /// ## Parameters - /// - first_param: some docs taken from the [`Param`] - /// - second_param - /// ``` - params: &'p [Param], +/// Generates a builder method for every variant of the enum (not including inherited variants). +fn generate_builder_methods_for_enum(enum_def: &EnumDef, schema: &Schema) -> TokenStream { + enum_def + .variants + .iter() + .map(|variant| generate_builder_method_for_enum_variant(enum_def, variant, schema)) + .collect() } -impl<'p> DocComment<'p> { - pub fn new(summary: S) -> Self - where - S: Into>, - { - Self { summary: Self::maybe_add_space(summary.into()), description: vec![], params: &[] } - } - - /// Add a longer-form description to the doc comment. - pub fn with_description(mut self, description: S) -> Self - where - S: Into>, - { - self.description = vec![Self::maybe_add_space(description.into())]; - self +/// Generate builder method for an enum variant. +#[expect(clippy::similar_names)] +fn generate_builder_method_for_enum_variant( + enum_def: &EnumDef, + variant: &VariantDef, + schema: &Schema, +) -> TokenStream { + let mut variant_type = variant.field_type(schema).unwrap(); + let mut is_boxed = false; + if let TypeDef::Box(box_def) = variant_type { + variant_type = box_def.inner_type(schema); + is_boxed = true; } + let TypeDef::Struct(variant_type) = variant_type else { panic!("Unsupported!") }; - /// Add a description section made up of multiple lines. - /// - /// Each line will be turned into its own paragraph. - // TODO: remove me - #[expect(dead_code)] - pub fn with_description_lines(mut self, description: L) -> Self - where - S: Into>, - L: IntoIterator, - { - self.description = - description.into_iter().map(Into::into).map(Self::maybe_add_space).collect(); - self + let (mut params, generic_params, where_clause, has_default_fields) = + get_struct_params(variant_type, schema); + if has_default_fields { + params.retain(|param| !param.is_default); } - /// Add a section documenting function parameters. - pub fn with_params(mut self, params: &'p Vec) -> Self { - self.params = params.as_slice(); - self + let fn_params = params.iter().map(|param| ¶m.fn_param); + let args = params.iter().map(|param| ¶m.ident); + + let enum_ident = enum_def.ident(); + let enum_ty = enum_def.ty(schema); + let fn_name = enum_variant_builder_name(enum_def, variant, schema); + let variant_ident = variant.ident(); + let inner_builder_name = struct_builder_name(&variant_type.snake_name(), is_boxed); + + // Generate doc comments + let enum_name = enum_def.name(); + let article_enum = article_for(enum_name); + let variant_name = variant.ident(); + let fn_doc1 = format!(" Build {article_enum} [`{enum_name}::{variant_name}`]."); + let mut fn_docs = quote!( #[doc = #fn_doc1] ); + if is_boxed { + let variant_type_name = variant_type.name(); + let article_variant = article_for(variant_type_name); + let fn_doc2 = format!( + " This node contains {article_variant} [`{variant_type_name}`] that will be stored in the memory arena." + ); + fn_docs.extend(quote!( #[doc = ""] #[doc = #fn_doc2] )); } + let params_docs = generate_doc_comment_for_params(¶ms); - /// Add a leading space to a doc comment line if it doesn't already have one. - /// This makes it easier to read, since the comment won't be directly next - /// to the `///`. - fn maybe_add_space(s: Cow<'static, str>) -> Cow<'static, str> { - if s.is_empty() || s.starts_with(' ') { - s - } else { - Cow::Owned(format!(" {s}")) + quote! { + ///@@line_break + #fn_docs + #params_docs + #[inline] + pub fn #fn_name #generic_params(self, #(#fn_params),*) -> #enum_ty #where_clause { + #enum_ident::#variant_ident(self.#inner_builder_name(#(#args),*)) } } } -/// Get the correct article (a/an) that should precede a `word`. +/// Get name of struct builder method. /// -/// # Panics -/// Panics if `word` is empty. -fn article_for>(word: S) -> &'static str { - match word.as_ref().chars().next().unwrap().to_ascii_lowercase() { - 'a' | 'e' | 'i' | 'o' | 'u' => "an", - _ => "a", +/// If `does_alloc == true`, prepends `alloc_` to start of name. +fn struct_builder_name(snake_name: &str, does_alloc: bool) -> Ident { + if does_alloc { + format_ident!("alloc_{snake_name}") + } else if is_reserved_name(snake_name) { + format_ident!("{snake_name}_") + } else { + format_ident!("{snake_name}") } } -impl ToTokens for DocComment<'_> { - fn to_tokens(&self, tokens: &mut TokenStream) { - let summary = &self.summary; - tokens.extend(quote!( #[doc = #summary])); - - // print description - for line in &self.description { - // extra newline needed to create a new paragraph - tokens.extend(quote!( #[doc = ""])); - tokens.extend(quote!( #[doc = #line])); - } +/// Get name of enum variant builder method. +fn enum_variant_builder_name(enum_def: &EnumDef, variant: &VariantDef, schema: &Schema) -> Ident { + let enum_name = enum_def.snake_name(); - // print docs for function parameters - if !self.params.is_empty() { - tokens.extend(quote!( #[doc = ""])); - tokens.extend(quote!( #[doc = " ## Parameters"])); - for param in self.params { - let docs = param.docs.first(); - let docs = match docs { - Some(docs) => { - format!(" * `{}`: {}", param.ident, docs.trim()) - } - None if param.ident == "span" => { - " * `span`: The [`Span`] covering this node".to_string() - } - None => { - format!(" * `{}`", param.ident) - } - }; - tokens.extend(quote!(#[doc = #docs])); - } - } + // TODO: `let variant_name = variant.snake_name();` would be better + let mut variant_type = variant.field_type(schema).unwrap(); + if let TypeDef::Box(box_def) = variant_type { + variant_type = box_def.inner_type(schema); } + let variant_name = variant_type.snake_name(); + + let variant_name = if variant_name.len() > enum_name.len() + && variant_name.ends_with(&enum_name) + && variant_name.as_bytes()[variant_name.len() - enum_name.len() - 1] == b'_' + { + // Replace `xxx_yyy_xxx` with `xxx_yyy` + &variant_name[..variant_name.len() - enum_name.len() - 1] + } else if enum_name.starts_with("ts_") && variant_name.starts_with("ts_") { + // Replace `ts_xxx_ts_yyy` with `ts_xxx_yyy` + &variant_name[3..] + } else { + &variant_name + }; + + format_ident!("{enum_name}_{variant_name}") } -fn get_generic_params( - params: &[Param], -) -> (/* generic params */ Option, /* where clause */ Option) { - let params = params.iter().filter(|it| it.generic.is_some()).collect_vec(); +/// Generate doc comment for function params. +fn generate_doc_comment_for_params(params: &[Param]) -> TokenStream { if params.is_empty() { - return Default::default(); + return quote!(); } - let len = params.len(); - let (predicates, params) = params.into_iter().fold( - (Vec::with_capacity(len), Vec::with_capacity(len)), - |mut acc, it| { - let generic = - it.generic.as_ref().expect("non-generics should be filtered out at this point."); - acc.0.push(&generic.0); - acc.1.push(&generic.1); - acc - }, - ); - (Some(quote!(<#(#params),*>)), Some(quote!(where #(#predicates),*))) + let lines = params.iter().map(|param| { + let field = param.field; + // TODO: `field.name()` would be better. + let field_ident = field.ident(); + let field_comment = if let Some(field_comment) = field.doc_comment.as_deref() { + format!(" * `{field_ident}`: {field_comment}") + } else if field.name() == "span" { + " * `span`: The [`Span`] covering this node".to_string() + } else { + format!(" * `{field_ident}`") + }; + quote!( #[doc = #field_comment] ) + }); + + quote! { + /// + /// ## Parameters + #(#lines)* + } } -// TODO: currently doesn't support multiple `Atom` or `&'a str` params. -fn get_struct_params(struct_: &StructDef, schema: &Schema) -> Vec { - // generic param postfix - let mut t_count = 0; - let mut t_param = move || { - t_count += 1; - format_ident!("T{t_count}").to_token_stream() - }; - struct_.fields.iter().fold(Vec::new(), |mut acc, field| { - let analysis = field.typ.analysis(); - let type_def = field.typ.transparent_type_id().and_then(|id| schema.get(id)); - let (interface_typ, generic_typ) = match (&analysis.wrapper, type_def) { - (TypeWrapper::Box, Some(def)) => { - let t = t_param(); - let typ = def.to_type(); - (Some(parse_quote!(#t)), Some((quote!(#t: IntoIn<'a, Box<'a, #typ>>), t))) - } - (TypeWrapper::OptBox, Some(def)) => { - let t = t_param(); - let typ = def.to_type(); - (Some(parse_quote!(#t)), Some((quote!(#t: IntoIn<'a, Option>>), t))) - } - (TypeWrapper::Ref, None) if field.typ.is_str_slice() => { - let t = format_ident!("S").to_token_stream(); - (Some(parse_quote!(#t)), Some((quote!(#t: IntoIn<'a, &'a str>), t))) - } - (TypeWrapper::None, None) if field.typ.name().inner_name() == "Atom" => { - let t = format_ident!("A").to_token_stream(); - (Some(parse_quote!(#t)), Some((quote!(#t: IntoIn<'a, Atom<'a>>), t))) - } - _ => (None, None), - }; - let ty = interface_typ.unwrap_or_else(|| field.typ.to_type()); - acc.push(Param { - is_default: default_init_field(field), - analysis: analysis.clone(), - ident: field.ident().expect("expected named ident! on struct"), - ty, - into_in: generic_typ.is_some(), - generic: generic_typ, - docs: field.docs.clone(), - }); - acc - }) +/// Get the correct article ("a" / "an") that should precede a word in a doc comment. +fn article_for(word: &str) -> &'static str { + match word.as_bytes().first().map(u8::to_ascii_uppercase) { + Some(b'A' | b'E' | b'I' | b'O' | b'U') => "an", + _ => "a", + } } diff --git a/tasks/ast_tools/src/generators/ast_kind.rs b/tasks/ast_tools/src/generators/ast_kind.rs index 9f6a8d1341132..1b44eadd35203 100644 --- a/tasks/ast_tools/src/generators/ast_kind.rs +++ b/tasks/ast_tools/src/generators/ast_kind.rs @@ -1,22 +1,29 @@ -use convert_case::{Case, Casing}; -use itertools::Itertools; +//! Generator of code related to `AstKind`. +//! +//! * `AstType` type definition. +//! * `AstKind` type definition. +//! * `AstKind::ty` method. +//! * `AstKind::as_*` methods. +//! * `GetSpan` impl for `AstKind`. +//! +//! Variants of `AstKind` and `AstType` are not created for types listed in `BLACK_LIST` below. + use proc_macro2::Span; use quote::{format_ident, quote}; -use syn::{parse_quote, Arm, ImplItemFn, LitInt}; +use syn::LitInt; use crate::{ output::{output_path, Output}, - schema::{GetIdent, Schema, ToType}, - Generator, + schema::{Def, Schema, TypeDef}, + Codegen, Generator, }; use super::define_generator; -pub struct AstKindGenerator; - -define_generator!(AstKindGenerator); - -pub const BLACK_LIST: [&str; 62] = [ +/// Types to omit creating an `AstKind` for. +/// +/// Apart from this list every type with `#[ast(visit)]` attr gets an `AstKind`. +const BLACK_LIST: [&str; 62] = [ "Span", "Expression", "ObjectPropertyKind", @@ -81,115 +88,140 @@ pub const BLACK_LIST: [&str; 62] = [ "JSXSpreadChild", ]; -impl Generator for AstKindGenerator { - fn generate(&mut self, schema: &Schema) -> Output { - let have_kinds = schema - .defs - .iter() - .filter(|def| { - let is_visitable = def.is_visitable(); - let is_blacklisted = BLACK_LIST.contains(&def.name()); - is_visitable && !is_blacklisted - }) - .map(|def| { - let ident = def.ident(); - let typ = def.to_type(); - (ident, typ) - }) - .collect_vec(); - - let (types, kinds): (Vec<_>, Vec<_>) = have_kinds - .iter() - .enumerate() - .map(|(index, (ident, typ))| { - let index = u8::try_from(index).unwrap(); - let index = LitInt::new(&index.to_string(), Span::call_site()); - let type_variant = quote!( #ident = #index ); - let kind_variant = quote!( #ident(&'a #typ) = AstType::#ident as u8 ); - (type_variant, kind_variant) - }) - .unzip(); - - let span_matches: Vec = have_kinds - .iter() - .map(|(ident, _)| parse_quote!(Self :: #ident(it) => it.span())) - .collect_vec(); - - let as_ast_kind_impls: Vec = have_kinds - .iter() - .map(|(ident, typ)| { - let snake_case_name = - format_ident!("as_{}", ident.to_string().to_case(Case::Snake)); - parse_quote!( - ///@@line_break - #[inline] - pub fn #snake_case_name(self) -> Option<&'a #typ> { - if let Self::#ident(v) = self { - Some(v) - } else { - None - } - } - ) - }) - .collect_vec(); - - Output::Rust { - path: output_path(crate::AST_CRATE, "ast_kind.rs"), - tokens: quote! { - #![allow(missing_docs)] ///@ FIXME (in ast_tools/src/generators/ast_kind.rs) +/// Generator for `AstKind`, `AstType`, and related code. +pub struct AstKindGenerator; - ///@@line_break - use std::ptr; +define_generator!(AstKindGenerator); - ///@@line_break - use oxc_span::{GetSpan, Span}; +impl Generator for AstKindGenerator { + /// Set `has_kind` for structs and enums which are visited, and not on blacklist. + fn prepare(&self, schema: &mut Schema) { + // Set `has_kind` to `true` for all visited types + for type_def in &mut schema.types { + match type_def { + TypeDef::Struct(struct_def) => { + struct_def.kind.has_kind = struct_def.visit.is_visited; + } + TypeDef::Enum(enum_def) => { + enum_def.kind.has_kind = enum_def.visit.is_visited; + } + _ => {} + } + } - ///@@line_break - use crate::ast::*; + // Set `has_kind` to `false` for types on blacklist + for type_name in BLACK_LIST { + let type_def = schema.type_by_name_mut(type_name); + match type_def { + TypeDef::Struct(struct_def) => struct_def.kind.has_kind = false, + TypeDef::Enum(enum_def) => enum_def.kind.has_kind = false, + _ => panic!( + "Type which is not a struct or enum on `AstKind` blacklist: `{}`", + type_def.name() + ), + } + } + } + /// Generate `AstKind` etc definitions. + fn generate(&self, schema: &Schema, _codegen: &Codegen) -> Output { + let mut type_variants = quote!(); + let mut kind_variants = quote!(); + let mut span_match_arms = quote!(); + let mut as_methods = quote!(); + + let mut next_index = 0usize; + for type_def in &schema.types { + let has_kind = match type_def { + TypeDef::Struct(struct_def) => struct_def.kind.has_kind, + TypeDef::Enum(enum_def) => enum_def.kind.has_kind, + _ => false, + }; + if !has_kind { + continue; + } + + let type_ident = type_def.ident(); + let type_ty = type_def.ty(schema); + + let index = u8::try_from(next_index).unwrap(); + let index = LitInt::new(&index.to_string(), Span::call_site()); + type_variants.extend(quote!( #type_ident = #index, )); + kind_variants.extend(quote!( #type_ident(&'a #type_ty) = AstType::#type_ident as u8, )); + + span_match_arms.extend(quote!( Self::#type_ident(it) => it.span(), )); + + let as_method_name = format_ident!("as_{}", type_def.snake_name()); + as_methods.extend(quote! { ///@@line_break - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - #[repr(u8)] - pub enum AstType { - #(#types),*, + #[inline] + pub fn #as_method_name(self) -> Option<&'a #type_ty> { + if let Self::#type_ident(v) = self { + Some(v) + } else { + None + } } + }); - ///@@line_break - /// Untyped AST Node Kind - #[derive(Debug, Clone, Copy)] - #[repr(C, u8)] - pub enum AstKind<'a> { - #(#kinds),*, - } + next_index += 1; + } - ///@@line_break - impl AstKind<'_> { - /// Get the [`AstType`] of an [`AstKind`]. - #[inline] - pub fn ty(&self) -> AstType { - ///@ SAFETY: `AstKind` is `#[repr(C, u8)]`, so discriminant is stored in first byte, - ///@ and it's valid to read it. - ///@ `AstType` is also `#[repr(u8)]` and `AstKind` and `AstType` both have the same - ///@ discriminants, so it's valid to read `AstKind`'s discriminant as `AstType`. - unsafe { *ptr::from_ref(self).cast::().as_ref().unwrap_unchecked() } - } + let output = quote! { + #![allow(missing_docs)] ///@ FIXME (in ast_tools/src/generators/ast_kind.rs) + + ///@@line_break + use std::ptr; + + ///@@line_break + use oxc_span::{GetSpan, Span}; + + ///@@line_break + use crate::ast::*; + + ///@@line_break + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + #[repr(u8)] + pub enum AstType { + #type_variants + } + + ///@@line_break + /// Untyped AST Node Kind + #[derive(Debug, Clone, Copy)] + #[repr(C, u8)] + pub enum AstKind<'a> { + #kind_variants + } + + ///@@line_break + impl AstKind<'_> { + /// Get the [`AstType`] of an [`AstKind`]. + #[inline] + pub fn ty(&self) -> AstType { + ///@ SAFETY: `AstKind` is `#[repr(C, u8)]`, so discriminant is stored in first byte, + ///@ and it's valid to read it. + ///@ `AstType` is also `#[repr(u8)]` and `AstKind` and `AstType` both have the same + ///@ discriminants, so it's valid to read `AstKind`'s discriminant as `AstType`. + unsafe { *ptr::from_ref(self).cast::().as_ref().unwrap_unchecked() } } + } - ///@@line_break - impl GetSpan for AstKind<'_> { - fn span(&self) -> Span { - match self { - #(#span_matches),*, - } + ///@@line_break + impl GetSpan for AstKind<'_> { + fn span(&self) -> Span { + match self { + #span_match_arms } } + } - ///@@line_break - impl<'a> AstKind<'a> { - #(#as_ast_kind_impls)* - } - }, - } + ///@@line_break + impl<'a> AstKind<'a> { + #as_methods + } + }; + + Output::Rust { path: output_path(crate::AST_CRATE, "ast_kind.rs"), tokens: output } } } diff --git a/tasks/ast_tools/src/generators/get_id.rs b/tasks/ast_tools/src/generators/get_id.rs index e487f8c01dcbe..b2920b1c8fb3c 100644 --- a/tasks/ast_tools/src/generators/get_id.rs +++ b/tasks/ast_tools/src/generators/get_id.rs @@ -1,5 +1,5 @@ -//! Generator for ID getter/setter methods on all types with `scope_id`, `symbol_id`, `reference_id` -//! fields. +//! Generator for ID getter/setter methods on all structs with semantic ID fields +//! (`scope_id`, `symbol_id`, `reference_id`). //! //! e.g. Generates `scope_id` and `set_scope_id` methods on all types with a `scope_id` field. @@ -8,20 +8,24 @@ use quote::{format_ident, quote}; use crate::{ output::{output_path, Output}, - schema::{Schema, TypeDef}, - util::ToIdent, - Generator, + schema::{Def, Schema, TypeDef}, + Codegen, Generator, }; use super::define_generator; +/// Semantic ID types. +/// We generate builder methods both with and without these fields for types which include any of them. +const SEMANTIC_ID_TYPES: [&str; 3] = ["ScopeId", "SymbolId", "ReferenceId"]; + +/// Generator for methods to get/set semantic IDs on structs which have them. pub struct GetIdGenerator; define_generator!(GetIdGenerator); impl Generator for GetIdGenerator { - fn generate(&mut self, schema: &Schema) -> Output { - let impls = schema.defs.iter().filter_map(generate_for_type); + fn generate(&self, schema: &Schema, _codegen: &Codegen) -> Output { + let impls = schema.types.iter().filter_map(|type_def| generate_for_type(type_def, schema)); let output = quote! { use oxc_syntax::{reference::ReferenceId, scope::ScopeId, symbol::SymbolId}; @@ -36,29 +40,29 @@ impl Generator for GetIdGenerator { } } -fn generate_for_type(def: &TypeDef) -> Option { - let TypeDef::Struct(def) = def else { return None }; +fn generate_for_type(type_def: &TypeDef, schema: &Schema) -> Option { + let TypeDef::Struct(struct_def) = type_def else { return None }; - let struct_name = def.name.as_str(); + let struct_name = struct_def.name(); - let methods = def + let methods = struct_def .fields .iter() .filter_map(|field| { - let field_ident = field.ident().expect("expected named field"); - let field_name = field_ident.to_string(); - - let type_name = match (field_name.as_str(), field.typ.raw()) { - ("scope_id", "Cell>") => "ScopeId", - ("symbol_id", "Cell>") => "SymbolId", - ("reference_id", "Cell>") => "ReferenceId", - _ => return None, - }; - let type_ident = type_name.to_ident(); + let field_type = field.type_def(schema); + let inner_type = field_type.as_cell()?.inner_type(schema).as_option()?.inner_type(schema); + let inner_type_name = inner_type.name(); + if !SEMANTIC_ID_TYPES.contains(&inner_type_name) { + return None; + } + + let field_name = field.name(); + let field_ident = field.ident(); + let inner_type_ident = inner_type.ident(); // Generate getter method - let get_doc1 = format!(" Get [`{type_name}`] of [`{struct_name}`]."); - let get_doc2 = format!(" Only use this method on a post-semantic AST where [`{type_name}`]s are always defined."); + let get_doc1 = format!(" Get [`{inner_type_name}`] of [`{struct_name}`]."); + let get_doc2 = format!(" Only use this method on a post-semantic AST where [`{inner_type_name}`]s are always defined."); let get_doc3 = format!(" Panics if `{field_name}` is [`None`]."); let get_method = quote! { @@ -69,18 +73,18 @@ fn generate_for_type(def: &TypeDef) -> Option { /// # Panics #[doc = #get_doc3] #[inline] - pub fn #field_ident(&self) -> #type_ident { + pub fn #field_ident(&self) -> #inner_type_ident { self.#field_ident.get().unwrap() } }; // Generate setter method let set_method_ident = format_ident!("set_{field_name}"); - let set_doc = format!(" Set [`{type_name}`] of [`{struct_name}`]."); + let set_doc = format!(" Set [`{inner_type_name}`] of [`{struct_name}`]."); let set_method = quote! { #[doc = #set_doc] #[inline] - pub fn #set_method_ident(&self, #field_ident: #type_ident) { + pub fn #set_method_ident(&self, #field_ident: #inner_type_ident) { self.#field_ident.set(Some(#field_ident)); } }; @@ -93,19 +97,17 @@ fn generate_for_type(def: &TypeDef) -> Option { #set_method }) }) - .collect::>(); + .collect::(); if methods.is_empty() { return None; } - let struct_name_ident = struct_name.to_ident(); - let lifetime = if def.has_lifetime { quote!(<'_>) } else { TokenStream::default() }; - + let struct_ty = struct_def.ty_anon(schema); Some(quote! { ///@@line_break - impl #struct_name_ident #lifetime { - #(#methods)* + impl #struct_ty { + #methods } }) } diff --git a/tasks/ast_tools/src/generators/mod.rs b/tasks/ast_tools/src/generators/mod.rs index fac408d2ff805..75b3e20085e97 100644 --- a/tasks/ast_tools/src/generators/mod.rs +++ b/tasks/ast_tools/src/generators/mod.rs @@ -1,4 +1,8 @@ -use crate::{output::Output, Result, Schema}; +use crate::{ + output::Output, + parse::attr::{attr_positions, AttrLocation, AttrPart, AttrPositions}, + Codegen, Result, Runner, Schema, +}; mod assert_layouts; mod ast_builder; @@ -12,37 +16,119 @@ pub use ast_builder::AstBuilderGenerator; pub use ast_kind::AstKindGenerator; pub use get_id::GetIdGenerator; pub use typescript::TypescriptGenerator; -pub use visit::{VisitGenerator, VisitMutGenerator}; +pub use visit::VisitGenerator; -pub trait Generator { - // Methods defined by implementer +/// Trait to define a generator. +pub trait Generator: Runner { + // Methods which can/must be defined by implementer. - fn generate(&mut self, schema: &Schema) -> Output; + /// Attributes that this generator uses. + /// + /// If this [`Generator`] handles any attributes, override this method to return the details of where + /// those attributes can legally be used. + /// + /// [`parse_attr`] will be called with any attributes on structs/enums matching these patterns. + /// + /// e.g.: + /// + /// ```ignore + /// fn attrs(&self) -> &[(&'static str, AttrPositions)] { + /// &[("clone_in", AttrPositions::StructField)] + /// } + /// ``` + /// + /// ```ignore + /// fn attrs(&self) -> &[(&'static str, AttrPositions)] { + /// &[ + /// ("visit", attr_positions!(AstAttr | StructField | EnumVariant)), + /// ("scope", attr_positions!(Struct | Enum | StructField)), + /// ] + /// } + /// ``` + /// + /// [`parse_attr`]: Generator::parse_attr + fn attrs(&self) -> &[(&'static str, AttrPositions)] { + &[] + } + + /// Parse an attribute part and record information from it on type definition. + /// + /// `parse_attr` will only be called with attributes which this [`Generator`] has registered + /// its ownership of by returning their details from [`attrs`] method. + /// + /// * `attr_name` is name of the attribute. + /// * `location` is location attribute appears (e.g. on a struct field). + /// * `part` contains the details of this part of the attribute. + /// + /// e.g.: + /// + /// ``` + /// #[ast(visit)] + /// #[estree(rename = "FooFoo")] + /// struct Foo { + /// #[estree(skip, rename = "Blah")] + /// #[span] + /// blip: Bar, + /// } + /// ``` + /// + /// `parse_attr` will be called 5 times, with arguments: + /// + /// * `"visit", AttrLocation::StructAstAttr(struct_def), AttrPart::None` + /// * `"estree", AttrLocation::Struct(struct_def), AttrPart::String("rename", "FooFoo")` + /// * `"estree", AttrLocation::StructField(struct_def, 0), AttrPart::Tag("skip")` + /// * `"estree", AttrLocation::StructField(struct_def, 0), AttrPart::String("rename", "Blah")` + /// * `"span", AttrLocation::StructField(struct_def, 0), AttrPart::None` + /// + /// [`attrs`]: Generator::attrs + #[expect(unused_variables)] + fn parse_attr( + &self, + attr_name: &str, + location: AttrLocation<'_>, + part: AttrPart<'_>, + ) -> Result<()> { + Ok(()) + } + + /// Prepare for generatation, modifying schema. + /// + /// Runs before any `generate` or `derive` method runs. + #[expect(unused_variables)] + fn prepare(&self, schema: &mut Schema) {} - // Standard methods + /// Generate single output. + #[expect(unused_variables, clippy::unimplemented)] + fn generate(&self, schema: &Schema, codegen: &Codegen) -> Output { + unimplemented!() + } - fn output(&mut self, schema: &Schema) -> Result> { - Ok(vec![self.generate(schema)]) + /// Generate multiple outputs. + fn generate_many(&self, schema: &Schema, codegen: &Codegen) -> Vec { + vec![self.generate(schema, codegen)] } } +/// Macro to implement [`Runner`] for a [`Generator`]. +/// +/// Must be used on every [`Generator`]. +/// +/// # Example +/// ``` +/// struct AssertLayouts; +/// define_generator!(AssertLayouts); +/// ``` macro_rules! define_generator { ($ident:ident $($lifetime:lifetime)?) => { const _: () = { use $crate::{ - codegen::Runner, + codegen::{Codegen, Runner}, output::Output, schema::Schema, Result, }; impl $($lifetime)? Runner for $ident $($lifetime)? { - type Context = Schema; - - fn verb(&self) -> &'static str { - "Generate" - } - fn name(&self) -> &'static str { stringify!($ident) } @@ -51,8 +137,8 @@ macro_rules! define_generator { file!() } - fn run(&mut self, schema: &Schema) -> Result> { - self.output(schema) + fn run(&self, schema: &Schema, codegen: &Codegen) -> Result> { + Ok(self.generate_many(schema, codegen)) } } }; diff --git a/tasks/ast_tools/src/generators/typescript.rs b/tasks/ast_tools/src/generators/typescript.rs index ff4c67afd2173..eb2bf6a88d5f9 100644 --- a/tasks/ast_tools/src/generators/typescript.rs +++ b/tasks/ast_tools/src/generators/typescript.rs @@ -1,39 +1,74 @@ -use convert_case::{Case, Casing}; +//! Generator for TypeScript type definitions for all AST types. + +use std::borrow::Cow; + use itertools::Itertools; -use rustc_hash::{FxHashMap, FxHashSet}; use crate::{ - output::Output, - schema::{ - serialize::{enum_variant_name, get_always_flatten_structs, get_type_tag}, - EnumDef, FieldDef, GetIdent, Schema, StructDef, TypeDef, TypeName, + derives::estree::{ + get_fieldless_variant_value, get_struct_field_name, should_add_type_field_to_struct, + should_flatten_field, }, - Generator, TypeId, + output::Output, + schema::{Def, EnumDef, FieldDef, Schema, StructDef, TypeDef}, + Codegen, Generator, Result, TYPESCRIPT_DEFINITIONS_PATH, }; -use super::define_generator; +use super::{attr_positions, define_generator, AttrLocation, AttrPart, AttrPositions}; const CUSTOM_TYPESCRIPT: &str = include_str!("../../../../crates/oxc_ast/custom_types.d.ts"); +/// Generator for TypeScript type definitions. pub struct TypescriptGenerator; define_generator!(TypescriptGenerator); impl Generator for TypescriptGenerator { - fn generate(&mut self, schema: &Schema) -> Output { - let mut code = String::new(); + /// Register that accept `#[ts]` attr on struct fields and enum variants. + fn attrs(&self) -> &[(&'static str, AttrPositions)] { + &[("ts", attr_positions!(StructField | EnumVariant))] + } - let always_flatten_structs = get_always_flatten_structs(schema); + /// Parse `#[ts]` on struct field or enum variant. + fn parse_attr(&self, _attr_name: &str, location: AttrLocation, part: AttrPart) -> Result<()> { + // No need to check attr name is `ts`, because that's the only attribute this derive handles. + if !matches!(part, AttrPart::None) { + return Err(()); + } - for def in &schema.defs { - if !def.generates_derive("ESTree") { + // Location can only be `StructField` or `EnumVariant` + match location { + AttrLocation::StructField(struct_def, field_index) => { + struct_def.fields[field_index].estree.is_ts = true; + } + AttrLocation::EnumVariant(enum_def, variant_index) => { + enum_def.variants[variant_index].estree.is_ts = true; + } + _ => unreachable!(), + } + + Ok(()) + } + + /// Generate Typescript type definitions for all AST types. + fn generate(&self, schema: &Schema, codegen: &Codegen) -> Output { + let estree_derive_id = codegen.get_derive_id_by_name("ESTree"); + + let mut code = String::new(); + for type_def in &schema.types { + if !type_def.generates_derive(estree_derive_id) { continue; } - let ts_type_def = match def { - TypeDef::Struct(it) => Some(typescript_struct(it, &always_flatten_structs)), - TypeDef::Enum(it) => typescript_enum(it), + + let ts_type_def = match type_def { + TypeDef::Struct(struct_def) => generate_ts_type_def_for_struct(struct_def, schema), + TypeDef::Enum(enum_def) => { + let ts_type_def = generate_ts_type_def_for_enum(enum_def, schema); + let Some(ts_type_def) = ts_type_def else { continue }; + ts_type_def + } + _ => unreachable!(), }; - let Some(ts_type_def) = ts_type_def else { continue }; code.push_str(&ts_type_def); code.push_str("\n\n"); @@ -41,138 +76,135 @@ impl Generator for TypescriptGenerator { code.push_str(CUSTOM_TYPESCRIPT); - Output::Javascript { path: format!("{}/types.d.ts", crate::TYPESCRIPT_PACKAGE), code } + Output::Javascript { path: TYPESCRIPT_DEFINITIONS_PATH.to_string(), code } } } -// Untagged enums: `type Expression = BooleanLiteral | NullLiteral` -// Tagged enums: `type PropertyKind = 'init' | 'get' | 'set'` -fn typescript_enum(def: &EnumDef) -> Option { - if def.markers.estree.custom_ts_def { - return None; - } - - let is_untagged = def.all_variants().all(|var| var.fields.len() == 1); - - let union = if is_untagged { - def.all_variants().map(|var| type_to_string(var.fields[0].typ.name())).join(" | ") - } else { - def.all_variants().map(|var| format!("'{}'", enum_variant_name(var, def))).join(" | ") - }; - let ident = def.ident(); - Some(format!("export type {ident} = {union};")) -} - -fn typescript_struct(def: &StructDef, always_flatten_structs: &FxHashSet) -> String { - let ident = def.ident(); - let mut fields = String::new(); +/// Generate Typescript type definition for a struct. +fn generate_ts_type_def_for_struct(struct_def: &StructDef, schema: &Schema) -> String { + let type_name = struct_def.name(); + let mut fields_str = String::new(); let mut extends = vec![]; - if let Some(type_tag) = get_type_tag(def) { - fields.push_str(&format!("\n\ttype: '{type_tag}';")); + if should_add_type_field_to_struct(struct_def) { + let type_name = struct_def.estree.rename.as_deref().unwrap_or_else(|| struct_def.name()); + fields_str.push_str(&format!("\n\ttype: '{type_name}';")); } - let mut append_to: FxHashMap = FxHashMap::default(); - - // Scan through to find all append_to fields - for field in &def.fields { - let Some(parent) = field.markers.derive_attributes.estree.append_to.as_ref() else { - continue; - }; - assert!( - append_to.insert(parent.clone(), field).is_none(), - "Duplicate append_to target (on {ident})" - ); - } - - for field in &def.fields { - if field.markers.derive_attributes.estree.skip - || field.markers.derive_attributes.estree.append_to.is_some() - { - continue; - } - let mut ty = match &field.markers.derive_attributes.estree.typescript_type { - Some(ty) => ty.clone(), - None => type_to_string(field.typ.name()), - }; - - let always_flatten = match field.typ.type_id() { - Some(id) => always_flatten_structs.contains(&id), - None => false, - }; - - if always_flatten || field.markers.derive_attributes.estree.flatten { - extends.push(ty); + let mut output_as_type = false; + for field in &struct_def.fields { + if field.estree.skip { continue; } - let ident = field.ident().unwrap(); - if let Some(append_after) = append_to.get(&ident.to_string()) { - let ts_type = &append_after.markers.derive_attributes.estree.typescript_type; - let after_type = if let Some(ty) = ts_type { - ty.clone() - } else { - let typ = append_after.typ.name(); - if let TypeName::Opt(inner) = typ { - type_to_string(inner) - } else { - panic!( - "expected field labeled with append_to to be Option<...>, but found {typ}" - ); + let field_type_name = if let Some(append_field_index) = field.estree.append_field_index { + let appended_field = struct_def.fields[append_field_index].type_def(schema); + let appended_field = appended_field.as_option().unwrap(); + let appended_type_name = ts_type_name(appended_field.inner_type(schema), schema); + + let field_type = field.type_def(schema); + let (vec_def, is_option) = match field_type { + TypeDef::Vec(vec_def) => (vec_def, false), + TypeDef::Option(option_def) => { + let vec_def = option_def.inner_type(schema).as_vec().unwrap(); + (vec_def, true) } + _ => panic!( + "Can only append a field to a `Vec` or `Option>`: `{}::{}`", + type_name, + field.name() + ), }; + let inner_type_name = ts_type_name(vec_def.inner_type(schema), schema); - if let Some(inner) = ty.strip_prefix("Array<") { - ty = format!("Array<{after_type} | {inner}"); - } else { - panic!("expected append_to target to be a Vec, but found {ty}"); + // TODO: Reverse these two + let mut field_type_name = format!("Array<{appended_type_name} | {inner_type_name}>"); + if is_option { + field_type_name.push_str(" | null"); } + Cow::Owned(field_type_name) + } else { + get_field_type_name(field, schema) + }; + + if should_flatten_field(field, schema) { + if !output_as_type && field_type_name.contains('|') { + output_as_type = true; + } + extends.push(field_type_name); + continue; } - let name = match &field.markers.derive_attributes.estree.rename { - Some(rename) => rename.to_string(), - None => field.name.clone().unwrap().to_case(Case::Camel), - }; + let field_camel_name = get_struct_field_name(field); + fields_str.push_str(&format!("\n\t{field_camel_name}: {field_type_name};")); + } - fields.push_str(&format!("\n\t{name}: {ty};")); + if let Some(add_ts) = struct_def.estree.add_ts.as_deref() { + fields_str.push_str(&format!("\n\t{add_ts};")); } - let extends_union = extends.iter().any(|it| it.contains('|')); + if extends.is_empty() { + format!("export interface {type_name} {{{fields_str}\n}}") + } else if output_as_type { + format!("export type {type_name} = ({{{fields_str}\n}}) & {};", extends.join(" & ")) + } else { + format!("export interface {type_name} extends {} {{{fields_str}\n}}", extends.join(", ")) + } +} - let body = if let Some(extra_ts) = def.markers.estree.as_ref().and_then(|e| e.add_ts.as_ref()) { - format!("{{{fields}\n\t{extra_ts}\n}}") +/// Generate Typescript type definition for an enum. +fn generate_ts_type_def_for_enum(enum_def: &EnumDef, schema: &Schema) -> Option { + if enum_def.estree.custom_ts_def { + return None; + } + + let union = if enum_def.is_fieldless() { + enum_def + .all_variants(schema) + .map(|variant| format!("'{}'", get_fieldless_variant_value(enum_def, variant))) + .join(" | ") } else { - format!("{{{fields}\n}}") + enum_def + .all_variants(schema) + .map(|variant| ts_type_name(variant.field_type(schema).unwrap(), schema)) + .join(" | ") }; - if extends_union { - let extends = - if extends.is_empty() { String::new() } else { format!(" & {}", extends.join(" & ")) }; - format!("export type {ident} = ({body}){extends};") - } else { - let extends = if extends.is_empty() { - String::new() - } else { - format!(" extends {}", extends.join(", ")) - }; - format!("export interface {ident}{extends} {body}") - } + let enum_name = enum_def.name(); + Some(format!("export type {enum_name} = {union};")) } -fn type_to_string(ty: &TypeName) -> String { - match ty { - TypeName::Ident(ident) => match ident.as_str() { - "f64" | "f32" | "usize" | "u64" | "u32" | "u16" | "u8" | "i64" | "i32" | "i16" - | "i8" => "number", +/// Get TS type name for a type. +fn ts_type_name<'s>(type_def: &'s TypeDef, schema: &'s Schema) -> Cow<'s, str> { + match type_def { + TypeDef::Struct(struct_def) => Cow::Borrowed(struct_def.name()), + TypeDef::Enum(enum_def) => Cow::Borrowed(enum_def.name()), + TypeDef::Primitive(primitive_def) => Cow::Borrowed(match primitive_def.name() { + #[rustfmt::skip] + "u8" | "u16" | "u32" | "u64" | "u128" | "usize" + | "i8" | "i16" | "i32" | "i64" | "i128" | "isize" + | "f32" | "f64" => "number", "bool" => "boolean", - "str" | "String" | "Atom" | "CompactStr" => "string", - ty => ty, + "&str" | "Atom" => "string", + name => name, + }), + TypeDef::Option(option_def) => { + Cow::Owned(format!("{} | null", ts_type_name(option_def.inner_type(schema), schema))) } - .to_string(), - TypeName::Vec(type_name) => format!("Array<{}>", type_to_string(type_name)), - TypeName::Box(type_name) | TypeName::Ref(type_name) | TypeName::Complex(type_name) => { - type_to_string(type_name) + TypeDef::Vec(vec_def) => { + Cow::Owned(format!("Array<{}>", ts_type_name(vec_def.inner_type(schema), schema))) } - TypeName::Opt(type_name) => format!("{} | null", type_to_string(type_name)), + TypeDef::Box(box_def) => ts_type_name(box_def.inner_type(schema), schema), + TypeDef::Cell(cell_def) => ts_type_name(cell_def.inner_type(schema), schema), + } +} + +/// Get type name for a field. +fn get_field_type_name<'s>(field: &'s FieldDef, schema: &'s Schema) -> Cow<'s, str> { + if let Some(ts_type) = field.estree.ts_type.as_deref() { + Cow::Borrowed(ts_type) + } else { + let field_type = field.type_def(schema); + ts_type_name(field_type, schema) } } diff --git a/tasks/ast_tools/src/generators/visit.rs b/tasks/ast_tools/src/generators/visit.rs index 787aaf11e3ada..8b1008db67db7 100644 --- a/tasks/ast_tools/src/generators/visit.rs +++ b/tasks/ast_tools/src/generators/visit.rs @@ -1,74 +1,223 @@ -use std::borrow::Cow; +//! Generator for `Visit` and `VisitMut` traits. -use convert_case::{Case, Casing}; -use itertools::Itertools; +use cow_utils::CowUtils; +use oxc_index::IndexVec; use proc_macro2::TokenStream; use quote::{format_ident, quote, ToTokens}; -use rustc_hash::FxHashMap; -use syn::{parse_quote, Ident}; +use syn::{parse_str, punctuated::Punctuated, Expr, Ident, Meta, MetaList, Token}; use crate::{ - generators::ast_kind::BLACK_LIST as KIND_BLACK_LIST, - markers::VisitArg, output::{output_path, Output}, - schema::{EnumDef, GetIdent, Schema, StructDef, ToType, TypeDef}, - util::{StrExt, TokenStreamExt, TypeWrapper}, - Generator, + parse::convert_expr_to_string, + schema::{ + extensions::visit::Scope, Def, EnumDef, FieldDef, OptionDef, Schema, StructDef, TypeDef, + TypeId, VecDef, + }, + utils::create_ident_tokens, + Codegen, Generator, Result, AST_CRATE, }; -use super::define_generator; +use super::{attr_positions, define_generator, AttrLocation, AttrPart, AttrPositions}; +/// Generator for `Visit` and `VisitMut` traits. pub struct VisitGenerator; define_generator!(VisitGenerator); impl Generator for VisitGenerator { - fn generate(&mut self, schema: &Schema) -> Output { - Output::Rust { - path: output_path(crate::AST_CRATE, "visit.rs"), - tokens: generate_visit(false, schema), + /// Register that accept: + /// * `#[visit]` attr on struct fields or enum variants. + /// * `#[ast(visit)]` on structs or enums. + /// * `#[scope]` on structs or struct fields. + fn attrs(&self) -> &[(&'static str, AttrPositions)] { + &[ + ("visit", attr_positions!(AstAttr | StructField | EnumVariant)), + ("scope", attr_positions!(Struct | StructField)), + ] + } + + /// Parse `#[visit]`, `#[scope]` and `#[ast(visit)]` attrs. + fn parse_attr(&self, attr_name: &str, location: AttrLocation, part: AttrPart) -> Result<()> { + match attr_name { + "visit" => parse_visit_attr(location, part), + "scope" => parse_scope_attr(location, part), + _ => unreachable!(), } } -} -pub struct VisitMutGenerator; + /// Generate `Visit` and `VisitMut` traits. + fn generate_many(&self, schema: &Schema, _codegen: &Codegen) -> Vec { + let (visit_output, visit_mut_output) = generate_outputs(schema); + + let visit_output = + Output::Rust { path: output_path(AST_CRATE, "visit.rs"), tokens: visit_output }; + let visit_mut_output = + Output::Rust { path: output_path(AST_CRATE, "visit_mut.rs"), tokens: visit_mut_output }; + + vec![visit_output, visit_mut_output] + } +} -define_generator!(VisitMutGenerator); +/// Parse `#[visit]` or `#[ast(visit)]` attr. +fn parse_visit_attr(location: AttrLocation, part: AttrPart) -> Result<()> { + match (part, location) { + // `#[ast(visit)]` on struct + (AttrPart::None, AttrLocation::StructAstAttr(struct_def)) => { + struct_def.visit.is_visited = true; + } + // `#[ast(visit)]` on enum + (AttrPart::None, AttrLocation::EnumAstAttr(enum_def)) => { + enum_def.visit.is_visited = true; + } + // `#[visit(args(flags = ...))]` on struct field or enum variant + (AttrPart::List("args", meta_list), location) => { + // Parse args as a list of `x = expr` parts + let metas = meta_list + .parse_args_with(Punctuated::::parse_terminated) + .map_err(|_| ())?; + let mut args = vec![]; + for meta in metas { + if let Meta::NameValue(name_value) = meta { + let arg_name = name_value.path.get_ident().ok_or(())?.to_string(); + let arg_value = convert_expr_to_string(&name_value.value); + args.push((arg_name, arg_value)); + } else { + return Err(()); + } + } + if args.is_empty() { + return Err(()); + } -impl Generator for VisitMutGenerator { - fn generate(&mut self, schema: &Schema) -> Output { - Output::Rust { - path: output_path(crate::AST_CRATE, "visit_mut.rs"), - tokens: generate_visit(true, schema), + match location { + AttrLocation::StructField(struct_def, field_index) => { + struct_def.fields[field_index].visit.visit_args = Some(args); + } + AttrLocation::EnumVariant(enum_def, variant_index) => { + enum_def.variants[variant_index].visit.visit_args = Some(args); + } + _ => return Err(()), + } } + _ => return Err(()), } + + Ok(()) } -fn generate_visit(is_mut: bool, schema: &Schema) -> TokenStream { - let (visits, walks) = VisitBuilder::new(schema, is_mut).build(); +/// Parse `#[scope]` attr. +fn parse_scope_attr(location: AttrLocation, part: AttrPart) -> Result<()> { + fn get_or_create_scope(struct_def: &mut StructDef) -> Result<&mut Scope> { + if !struct_def.visit.is_visited { + return Err(()); + } - let walk_mod = if is_mut { quote!(walk_mut) } else { quote!(walk) }; - let trait_name = if is_mut { quote!(VisitMut) } else { quote!(Visit) }; - let ast_kind_type = if is_mut { quote!(AstType) } else { quote!(AstKind) }; - let ast_kind_life = if is_mut { TokenStream::default() } else { quote!(<'a>) }; + Ok(struct_def.visit.scope.get_or_insert_with(|| Scope { + enter_before_index: 0, + exit_before_index: struct_def.fields.len(), + flags: "ScopeFlags::empty()".to_string(), + strict_if: None, + })) + } - let may_alloc = if is_mut { - TokenStream::default() - } else { - quote! { - ///@@line_break - #[inline] - fn alloc(&self, t: &T) -> &'a T { - ///@ SAFETY: - ///@ This should be safe as long as `src` is an reference from the allocator. - ///@ But honestly, I'm not really sure if this is safe. - unsafe { - std::mem::transmute(t) - } + fn parse_list(meta_list: &MetaList) -> Result { + let exprs = meta_list + .parse_args_with(Punctuated::::parse_terminated) + .map_err(|_| ())?; + if exprs.len() == 1 { + Ok(exprs.first().unwrap().to_token_stream().to_string()) + } else { + Err(()) + } + } + + match (part, location) { + // `#[scope]` on struct + (AttrPart::None, AttrLocation::Struct(struct_def)) => { + get_or_create_scope(struct_def)?; + } + // `#[scope(flags(...))` on struct + (AttrPart::List("flags", meta_list), AttrLocation::Struct(struct_def)) => { + // TODO: Make syntax `#[scope(flags = ...)]`, so can use `AttrPart::String` instead of parsing here + let scope = get_or_create_scope(struct_def)?; + scope.flags = parse_list(meta_list)?; + } + // `#[scope(strict_if(...))` on struct + (AttrPart::List("strict_if", meta_list), AttrLocation::Struct(struct_def)) => { + // TODO: Make syntax `#[scope(strict_if = ...)]`, so can use `AttrPart::String` instead of parsing here + let scope = get_or_create_scope(struct_def)?; + scope.strict_if = Some(parse_list(meta_list)?); + } + // `#[scope(enter_before)]` on struct field + (AttrPart::Tag("enter_before"), AttrLocation::StructField(struct_def, field_index)) => { + let scope = struct_def.visit.scope.as_mut().ok_or(())?; + scope.enter_before_index = field_index; + } + // `#[scope(exit_before)]` on struct field + (AttrPart::Tag("exit_before"), AttrLocation::StructField(struct_def, field_index)) => { + let scope = struct_def.visit.scope.as_mut().ok_or(())?; + scope.exit_before_index = field_index; + } + _ => return Err(()), + } + + Ok(()) +} + +/// Generate outputs for `Visit` and `VisitMut`. +fn generate_outputs(schema: &Schema) -> (/* Visit */ TokenStream, /* VisitMut */ TokenStream) { + // Generate `visit_*` methods and `walk_*` functions for both `Visit` and `VisitMut` + let mut builder = VisitBuilder::new(schema); + builder.generate(); + let VisitBuilder { visit_methods, walk_fns, visit_mut_methods, walk_mut_fns, .. } = builder; + + // Generate `Visit` trait + let alloc_fn = quote! { + ///@@line_break + #[inline] + fn alloc(&self, t: &T) -> &'a T { + ///@ SAFETY: + ///@ This should be safe as long as `src` is an reference from the allocator. + ///@ But honestly, I'm not really sure if this is safe. + unsafe { + std::mem::transmute(t) } } }; + let visit_output = generate_output( + &format_ident!("Visit"), + &visit_methods, + &walk_fns, + &format_ident!("walk"), + &alloc_fn, + &format_ident!("AstKind"), + "e!(AstKind<'a>), + ); + + // Generate `VisitMut` trait + let visit_mut_output = generate_output( + &format_ident!("VisitMut"), + &visit_mut_methods, + &walk_mut_fns, + &format_ident!("walk_mut"), + "e!(), + &format_ident!("AstType"), + "e!(AstType), + ); + (visit_output, visit_mut_output) +} + +/// Generate output for `Visit` or `VisitMut` trait. +fn generate_output( + trait_ident: &Ident, + visit_methods: &TokenStream, + walk_fns: &TokenStream, + walk_mod_ident: &Ident, + maybe_alloc: &TokenStream, + ast_kind_or_type_ident: &Ident, + ast_kind_or_type_ty: &TokenStream, +) -> TokenStream { quote! { //! Visitor Pattern //! @@ -95,18 +244,18 @@ fn generate_visit(is_mut: bool, schema: &Schema) -> TokenStream { ///@@line_break use crate::ast::*; - use crate::ast_kind::#ast_kind_type; + use crate::ast_kind::#ast_kind_or_type_ident; ///@@line_break - use #walk_mod::*; + use #walk_mod_ident::*; ///@@line_break /// Syntax tree traversal - pub trait #trait_name <'a>: Sized { + pub trait #trait_ident<'a>: Sized { #[inline] - fn enter_node(&mut self, kind: #ast_kind_type #ast_kind_life) {} + fn enter_node(&mut self, kind: #ast_kind_or_type_ty) {} #[inline] - fn leave_node(&mut self, kind: #ast_kind_type #ast_kind_life) {} + fn leave_node(&mut self, kind: #ast_kind_or_type_ty) {} ///@@line_break #[inline] @@ -114,461 +263,808 @@ fn generate_visit(is_mut: bool, schema: &Schema) -> TokenStream { #[inline] fn leave_scope(&mut self) {} - #may_alloc + #maybe_alloc - #(#visits)* + #visit_methods } ///@@line_break - pub mod #walk_mod { + pub mod #walk_mod_ident { use super::*; ///@@line_break - #(#walks)* + #walk_fns } } } -struct VisitBuilder<'a> { - schema: &'a Schema, - - is_mut: bool, - - visits: Vec, - walks: Vec, - cache: FxHashMap>; 2]>, +/// Generator of `visit_*` methods and `walk_*` functions for `Visit` and `VisitMut`. +/// +/// Generates these functions for all AST types recursively, starting with `Program`, +/// and recursively walking dependent types (e.g. types of struct fields for a struct) +/// until all types which are visited have had functions generated for them. +// +// TODO: `Vec`s have their own `TypeDef`s, so could simplify this by just looping through all `TypeDef`s. +// The only purpose of using recursion is to produce a certain order of visit methods in output, +// but the order isn't important. +struct VisitBuilder<'s> { + schema: &'s Schema, + /// `visit_*` methods for `Visit` + visit_methods: TokenStream, + /// `visit_*` methods for `VisitMut` + visit_mut_methods: TokenStream, + /// `walk_*` functions for `Visit` + walk_fns: TokenStream, + /// `walk_*` functions for `VisitMut` + walk_mut_fns: TokenStream, + /// List tracking which visitors are generated already. + /// `true` = generated already, `false` = not generated yet. + generated_list: IndexVec, } -impl<'a> VisitBuilder<'a> { - fn new(schema: &'a Schema, is_mut: bool) -> Self { - Self { schema, is_mut, visits: Vec::new(), walks: Vec::new(), cache: FxHashMap::default() } - } +impl<'s> VisitBuilder<'s> { + /// Create new [`VisitBuilder`]. + fn new(schema: &'s Schema) -> Self { + let generated_list = schema.types.iter().map(|_| false).collect(); - fn build(mut self) -> (/* visits */ Vec, /* walks */ Vec) { - let program = self - .schema - .defs - .iter() - .filter(|it| it.is_visitable()) - .find(|it| it.name() == "Program") - .expect("Couldn't find the `Program` type!"); + Self { + schema, + visit_methods: quote!(), + walk_fns: quote!(), + visit_mut_methods: quote!(), + walk_mut_fns: quote!(), + generated_list, + } + } - self.get_visitor(program, false); - (self.visits, self.walks) + /// Generate `visit_*` methods and `walk_*` functions for `Visit` and `VisitMut`. + /// + /// After calling this method, [`VisitBuilder`] contains all `visit_*` methods and `walk_*` functions + /// in `visit_methods` etc fields. + fn generate(&mut self) { + let program_type = self.schema.type_by_name("Program"); + self.generate_visitor(program_type); } - fn with_ref_pat(&self, tk: T) -> TokenStream - where - T: ToTokens, - { - if self.is_mut { - quote!(&mut #tk) - } else { - quote!(&#tk) + /// Generate `visit_*` methods and `walk_*` functions for a type. + /// + /// Also generates methods/functions for child types. + fn generate_visitor(&mut self, type_def: &TypeDef) { + // Exit if visitor already generated + let type_id = type_def.id(); + if self.generated_list[type_id] { + return; } - } + self.generated_list[type_id] = true; - fn kind_type(&self, ident: &Ident) -> TokenStream { - if self.is_mut { - quote!(AstType::#ident) - } else { - quote!(AstKind::#ident(visitor.alloc(it))) + match type_def { + TypeDef::Struct(struct_def) => { + if struct_def.visit.is_visited { + self.generate_struct_visitor(struct_def); + } + } + TypeDef::Enum(enum_def) => { + if enum_def.visit.is_visited { + self.generate_enum_visitor(enum_def); + } + } + TypeDef::Vec(vec_def) => { + self.generate_vec_visitor(vec_def); + } + TypeDef::Option(option_def) => { + self.generate_visitor(option_def.inner_type(self.schema)); + } + TypeDef::Box(box_def) => { + self.generate_visitor(box_def.inner_type(self.schema)); + } + TypeDef::Primitive(_) | TypeDef::Cell(_) => { + // No-op. Primitives and `Cell`s are not visited. + } } } - fn get_visitor(&mut self, def: &TypeDef, collection: bool) -> Cow<'a, Ident> { - let cache_ix = usize::from(collection); - let (ident, as_type) = { - debug_assert!(def.is_visitable(), "{def:?}"); + /// Generate `visit_*` methods and `walk_*` functions for a struct. + /// + /// Also generates functions for types of struct fields. + fn generate_struct_visitor(&mut self, struct_def: &StructDef) { + // Generate visit methods + let struct_ty = struct_def.ty(self.schema); + let type_snake_name = struct_def.snake_name(); + let visit_fn_ident = format_ident!("visit_{type_snake_name}"); + let walk_fn_ident = format_ident!("walk_{type_snake_name}"); - let ident = def.ident(); - let as_type = def.to_type(); + // TODO: Don't hard-code this. Represent it in an attr in AST type definition instead. + let (extra_params, extra_args) = if struct_def.name() == "Function" { + (quote!( , flags: ScopeFlags ), quote!( , flags )) + } else { + (quote!(), quote!()) + }; - (ident, if collection { parse_quote!(Vec<'a, #as_type>) } else { as_type }) + let gen_visit_fn = |reference| { + quote! { + ///@@line_break + #[inline] + fn #visit_fn_ident(&mut self, it: #reference #struct_ty #extra_params) { + #walk_fn_ident(self, it #extra_args); + } + } }; + self.visit_methods.extend(gen_visit_fn(quote!( & ))); + self.visit_mut_methods.extend(gen_visit_fn(quote!( &mut ))); + + // Generate walk functions - // is it already generated? - if let Some(cached) = self.cache.get(&ident) { - if let Some(cached) = &cached[cache_ix] { - return Cow::clone(cached); + // Generate `enter_node` and `leave_node` calls (if this struct has an `AstKind`) + let struct_ident = struct_def.ident(); + let has_kind = struct_def.kind.has_kind; + let (enter_node, leave_node) = + generate_enter_and_leave_node(&struct_ident, has_kind, false); + let (enter_node_mut, leave_node_mut) = + generate_enter_and_leave_node(&struct_ident, has_kind, true); + + // Generate `enter_scope` and `leave_scope` calls (if this struct has a scope). + // They will be inserted before the relevant fields. + let (mut scope_entry, mut scope_exit) = if let Some(scope) = &struct_def.visit.scope { + let mut flags = parse_str::(&scope.flags).unwrap().to_token_stream(); + if let Some(strict_if) = &scope.strict_if { + let strict_if = parse_str::(&strict_if.cow_replace("self", "it")).unwrap(); + flags = quote! {{ + let mut flags = #flags; + if #strict_if { + flags |= ScopeFlags::StrictMode; + } + flags + }} } - } + let enter_scope = quote!( visitor.enter_scope(#flags, &it.scope_id); ); + let scope_entry = (scope.enter_before_index, enter_scope); - let ident_snake = { - let it = ident.to_string().to_case(Case::Snake); - let it = if collection { - // edge case for `Vec` to avoid conflicts with `FormalParameters` - // which both would generate the same name: `visit_formal_parameters`. - // and edge case for `Vec` to avoid conflicts with - // `TSImportAttributes` which both would generate the same name: `visit_formal_parameters`. - if matches!(it.as_str(), "formal_parameter" | "ts_import_attribute") { - let mut it = it; - it.push_str("_list"); - it - } else { - it.to_plural() - } - } else { - it - }; - format_ident!("{it}") - }; + let leave_scope = quote!( visitor.leave_scope(); ); + let scope_exit = (scope.exit_before_index, leave_scope); - let as_param_type = self.with_ref_pat(&as_type); - let (extra_params, extra_args) = if ident == "Function" { - (quote!(, flags: ScopeFlags,), quote!(, flags)) + (Some(scope_entry), Some(scope_exit)) } else { - (TokenStream::default(), TokenStream::default()) + (None, None) }; - let visit_name = { - let visit_name = format_ident!("visit_{}", ident_snake); - if !self.cache.contains_key(&ident) { - debug_assert!(self.cache.insert(ident.clone(), [None, None]).is_none()); - } - let cached = self.cache.get_mut(&ident).unwrap(); - assert!(cached[cache_ix].replace(Cow::Owned(visit_name)).is_none()); - Cow::clone(cached[cache_ix].as_ref().unwrap()) - }; + // Generate `visit_*` calls for struct fields + let mut field_visits_count = 0usize; + let (mut field_visits, mut field_visits_mut): (TokenStream, TokenStream) = struct_def + .fields + .iter() + .enumerate() + .filter_map(|(field_index, field)| { + let (visit, visit_mut) = self.generate_struct_field_visit( + field, + field_index, + &mut scope_entry, + &mut scope_exit, + )?; - let walk_name = format_ident!("walk_{}", ident_snake); + field_visits_count += 1; + + Some((visit, visit_mut)) + }) + .unzip(); + + // If didn't enter or exit scope already, enter/exit after last field + if let Some((_, enter_scope)) = scope_entry { + field_visits.extend(enter_scope.clone()); + field_visits_mut.extend(enter_scope); + } + if let Some((_, leave_scope)) = scope_exit { + field_visits.extend(leave_scope.clone()); + field_visits_mut.extend(leave_scope); + } - self.visits.push(quote! { + // `#[inline]` if there are 5 or less fields visited + // TODO: Is this ideal? + let maybe_inline_attr = + if field_visits_count <= 5 { quote!( #[inline] ) } else { quote!() }; + + self.walk_fns.extend(quote! { ///@@line_break - #[inline] - fn #visit_name (&mut self, it: #as_param_type #extra_params) { - #walk_name(self, it #extra_args); + #maybe_inline_attr + pub fn #walk_fn_ident<'a, V: Visit<'a>>(visitor: &mut V, it: &#struct_ty #extra_params) { + #enter_node + #field_visits + #leave_node } }); + self.walk_mut_fns.extend(quote! { + ///@@line_break + #maybe_inline_attr + pub fn #walk_fn_ident<'a, V: VisitMut<'a>>(visitor: &mut V, it: &mut #struct_ty #extra_params) { + #enter_node_mut + #field_visits_mut + #leave_node_mut + } + }); + + // Generate visitors for field types + for field in &struct_def.fields { + self.generate_visitor(field.type_def(self.schema)); + } + } - // We push an empty walk first, because we evaluate - and generate - each walk as we go, - // This would let us to maintain the order of first visit. - let this_walker = self.walks.len(); - self.walks.push(TokenStream::default()); + /// Generate visitor calls for a struct field. + /// + /// e.g. `visitor.visit_span(&it.span);`. + /// + /// Also inserts `enter_scope` / `leave_scope` calls before the visit call if needed. + fn generate_struct_field_visit( + &self, + field: &FieldDef, + field_index: usize, + scope_entry: &mut Option<(usize, TokenStream)>, + scope_exit: &mut Option<(usize, TokenStream)>, + ) -> Option<(/* visit */ TokenStream, /* visit_mut */ TokenStream)> { + // Generate `visit_*` method call for struct field + let field_type = field.type_def(self.schema); + let field_ident = field.ident(); + let (mut visit, mut visit_mut) = self.generate_visit_type( + field_type, + Target::Property(quote!( it.#field_ident )), + field.visit.visit_args.as_ref(), + &field_ident, + true, + )?; - let (walk_body, may_inline) = if collection { - let singular_visit = self.get_visitor(def, false); - ( - quote! { - for el in it { - visitor.#singular_visit(el); - } - }, - true, - ) + // Insert `enter_scope` / `leave_scope` call, if scope needs to be entered/exited before this field. + // + // We handle exiting scope first, to create correct output if entering and exiting on same field. + // The `if` block for entering scope prepends `enter_scope` call *before* whatever it's passed. + // If both entering and exiting, that means `enter_scope` is inserted before `leave_scope`. + if let Some((exit_index, _)) = scope_exit { + if *exit_index <= field_index { + let (_, leave_scope) = scope_exit.take().unwrap(); + visit = quote!( #leave_scope #visit ); + visit_mut = quote!( #leave_scope #visit_mut ); + } + } + + if let Some((enter_index, _)) = scope_entry { + if *enter_index <= field_index { + let (_, enter_scope) = scope_entry.take().unwrap(); + visit = quote!( #enter_scope #visit ); + visit_mut = quote!( #enter_scope #visit_mut ); + } + } + + Some((visit, visit_mut)) + } + + /// Generate visitor calls for a type. + /// + /// e.g.: + /// * `visitor.visit_span(&it.span)` + /// * `if let Some(span) = &it.span { visitor.visit_span(span); }`. + /// + /// Returns `None` if this type is not visited. + /// + /// * `target` is the expression for the type, represented by a [`Target`]. + /// e.g. `it.span` in first example above, or `span` in the 2nd. + /// + /// * `visit_args` contains details of any extra arguments to be passed to visitor. + /// Parsed from `#[visit(args(flags = ScopeFlags::Function))]` attr on struct field / enum variant. + /// + /// * `field_ident` is [`Ident`] for the field. + /// Is used in output for `Option`s. e.g. `span` in `if let Some(span) = ...`. + /// + /// * `trailing_semicolon` indicates if a semicolon postfix is needed. + /// This is `true` for struct fields, `false` for enum variants. + /// + /// [`Ident`]: struct@Ident + fn generate_visit_type( + &self, + type_def: &TypeDef, + target: Target, + visit_args: Option<&Vec<(String, String)>>, + field_ident: &TokenStream, + trailing_semicolon: bool, + ) -> Option<(/* visit */ TokenStream, /* visit_mut */ TokenStream)> { + match type_def { + TypeDef::Struct(_) | TypeDef::Enum(_) => Self::generate_visit_struct_or_enum( + type_def, + target, + visit_args, + trailing_semicolon, + ), + TypeDef::Option(option_def) => { + self.generate_visit_option(option_def, target, visit_args, field_ident) + } + TypeDef::Box(box_def) => { + // `Box`es can be treated as transparent, as auto-deref handles it + self.generate_visit_type( + box_def.inner_type(self.schema), + target, + visit_args, + field_ident, + trailing_semicolon, + ) + } + TypeDef::Vec(vec_def) => { + self.generate_visit_vec(vec_def, target, visit_args, trailing_semicolon) + } + // Primitives and `Cell`s are not visited + TypeDef::Primitive(_) | TypeDef::Cell(_) => None, + } + } + + /// Generate visitor calls for a struct or enum. + /// + /// e.g. `visitor.visit_span(&it.span)` + /// + /// Returns `None` if this type is not visited. + /// + /// See comment on [`Self::generate_visit_type`] for details of parameters. + fn generate_visit_struct_or_enum( + type_def: &TypeDef, + target: Target, + visit_args: Option<&Vec<(String, String)>>, + trailing_semicolon: bool, + ) -> Option<(/* visit */ TokenStream, /* visit_mut */ TokenStream)> { + if !is_visited(type_def) { + return None; + } + + let visit_fn_ident = format_ident!("visit_{}", type_def.snake_name()); + Some(Self::generate_visit_with_visit_args( + &visit_fn_ident, + target, + visit_args, + trailing_semicolon, + )) + } + + /// Generate visitor calls with specified visitor function name. + /// + /// Usually generates `visitor.visit_whatever(target)`, but also handles additional arguments to visitor. + /// e.g. if `visit_args` was parsed from `#[visit(args(flags = ScopeFlags::Function))]`, generates: + /// + /// ```ignore + /// { + /// let flags = ScopeFlags::Function; + /// visitor.visit_whatever(target, flags) + /// } + /// ``` + /// + /// See comment on [`Self::generate_visit_type`] for details of other parameters. + fn generate_visit_with_visit_args( + visit_fn_ident: &Ident, + target: Target, + visit_args: Option<&Vec<(String, String)>>, + trailing_semicolon: bool, + ) -> (/* visit */ TokenStream, /* visit_mut */ TokenStream) { + let (target_ref, target_mut) = target.generate_refs(); + + // Get extra function params for visit args. + // e.g. if attr on struct field/enum variant is `#[visit(args(x = something, y = something_else))]`, + // `extra_params` is `, x, y`. + let extra_params = if let Some(args) = visit_args { + let arg_params = args.iter().map(|(arg_name, _)| format_ident!("{arg_name}")); + quote!( , #(#arg_params),* ) } else { - match def { - TypeDef::Enum(enum_) => self.generate_enum_walk(enum_), - TypeDef::Struct(struct_) => self.generate_struct_walk(struct_), + quote!() + }; + + let gen_visit = |target| { + let mut visit = quote!( visitor.#visit_fn_ident(#target #extra_params) ); + if trailing_semicolon { + visit.extend(quote!(;)); } + + let Some(visit_args) = visit_args else { return visit }; + + // Wrap a visit call with `let` statements for visit args. + // e.g. if attr on struct field/enum variant is `#[visit(args(x = something, y = something_else))]`, + // then output `{ let x = something; let y = something_else; visitor.visit_thing(it, x, y) }`. + let let_args = visit_args.iter().map(|(arg_name, arg_value)| { + let arg_ident = format_ident!("{arg_name}"); + let arg_value = parse_str::(&arg_value.cow_replace("self", "it")).unwrap(); + quote!( let #arg_ident = #arg_value; ) + }); + quote! {{ + #(#let_args)* + #visit + }} }; + (gen_visit(target_ref), gen_visit(target_mut)) + } - let visit_trait = if self.is_mut { quote!(VisitMut) } else { quote!(Visit) }; - let may_inline = if may_inline { Some(quote!(#[inline])) } else { None }; + /// Generate visitor calls for an `Option`. + /// + /// e.g.: + /// ```ignore + /// if let Some(span) = &it.span { + /// visitor.visit_span(span); + /// } + /// ``` + /// + /// Returns `None` if inner type is not visited. + /// + /// See comment on [`Self::generate_visit_type`] for details of parameters. + fn generate_visit_option( + &self, + option_def: &OptionDef, + target: Target, + visit_args: Option<&Vec<(String, String)>>, + field_ident: &TokenStream, + ) -> Option<(/* visit */ TokenStream, /* visit_mut */ TokenStream)> { + let inner_type = option_def.inner_type(self.schema); + let (inner_visit, inner_visit_mut) = self.generate_visit_type( + inner_type, + Target::Reference(field_ident.clone()), + visit_args, + field_ident, + true, + )?; + let (target_ref, target_mut) = target.generate_refs(); - // replace the placeholder walker with the actual one! - self.walks[this_walker] = quote! { - ///@@line_break - #may_inline - pub fn #walk_name <'a, V: #visit_trait<'a>>(visitor: &mut V, it: #as_param_type #extra_params) { - #walk_body + let gen_visit = |inner_visit, target| { + quote! { + if let Some(#field_ident) = #target { + #inner_visit + } } }; - - visit_name + Some((gen_visit(inner_visit, target_ref), gen_visit(inner_visit_mut, target_mut))) } - fn generate_enum_walk(&mut self, enum_: &EnumDef) -> (TokenStream, /* inline */ bool) { - let ident = enum_.ident(); - let mut non_exhaustive = false; - let variants_matches = enum_ - .variants - .iter() - .filter(|var| { - if var.markers.visit.ignore { - // We are ignoring some variants so the match is no longer exhaustive. - non_exhaustive = true; - false - } else { - true + /// Generate visitor calls for a `Vec`. + /// + /// If `Vec` has its own visitor (it does when inner type is a struct or enum which is visited), + /// generates a call to that visitor e.g. `visitor.visit_statements(&it.statements)`. + /// + /// Otherwise, generates code to loop through the `Vec`'s elements and call the inner type's visitor: + /// + /// ```ignore + /// for statements in it.statements.iter() { + /// visitor.visit_statement(statements); + /// } + /// ``` + /// + /// If inner type is an option, adds `.flatten()`: + /// + /// ```ignore + /// for statements in it.statements.iter().flatten() { + /// visitor.visit_statement(statements); + /// } + /// ``` + /// + /// Returns `None` if inner type is not visited. + /// + /// See comment on [`Self::generate_visit_type`] for details of parameters. + fn generate_visit_vec( + &self, + vec_def: &VecDef, + target: Target, + visit_args: Option<&Vec<(String, String)>>, + trailing_semicolon: bool, + ) -> Option<(/* visit */ TokenStream, /* visit_mut */ TokenStream)> { + let mut inner_type = vec_def.inner_type(self.schema); + if is_visited(inner_type) { + // Inner type is a struct or enum which is visited. This `Vec` has own visitor. + let visit_fn_ident = format_ident!("visit_{}", plural(inner_type.snake_name())); + return Some(Self::generate_visit_with_visit_args( + &visit_fn_ident, + target, + visit_args, + trailing_semicolon, + )); + } + + // Flatten any `Option`s with `.flatten()` on the iterator. + // Treat any `Box`es as transparent - auto-deref means we can ignore them. + let mut maybe_flatten = quote!(); + loop { + match inner_type { + TypeDef::Option(option_def) => { + inner_type = option_def.inner_type(self.schema); + maybe_flatten.extend(quote!( .flatten() )); } - }) - .filter_map(|var| { - let typ = var - .fields - .iter() - .exactly_one() - .map(|f| &f.typ) - .map_err(|_| "We only support visited enum nodes with exactly one field!") - .unwrap(); - let variant_name = &var.ident(); - let type_id = typ.transparent_type_id()?; - let def = self.schema.get(type_id)?; - let is_visitable = def.is_visitable(); - if is_visitable { - let visit = self.get_visitor(def, false); - let (args_def, args) = var - .markers - .visit - .visit_args - .clone() - .unwrap_or_default() - .into_iter() - .fold((Vec::new(), Vec::new()), Self::visit_args_fold); - let body = quote!(visitor.#visit(it #(#args)*)); - let body = if args_def.is_empty() { - body - } else { - // if we have args wrap the result in a block to prevent ident clashes. - quote! {{ - #(#args_def)* - #body - }} - }; - Some(quote!(#ident::#variant_name(it) => #body)) - } else { - None + TypeDef::Box(box_def) => { + inner_type = box_def.inner_type(self.schema); } - }) - .collect_vec(); - - let inherit_matches = enum_.inherits.iter().filter_map(|it| { - let super_ = &it.super_; - let type_name = super_.name().as_name().unwrap().to_string(); - let def = super_.type_id().and_then(|id| self.schema.get(id))?; - if def.is_visitable() { - let snake_name = type_name.to_case(Case::Snake); - let match_macro = format_ident!("match_{snake_name}"); - let match_macro = quote!(#match_macro!(#ident)); - let to_child = if self.is_mut { - format_ident!("to_{snake_name}_mut") - } else { - format_ident!("to_{snake_name}") - }; - let visit = self.get_visitor(def, false); - Some(quote!(#match_macro => visitor.#visit(it.#to_child()))) - } else { - None + _ => break, } - }); + } - let matches = variants_matches.into_iter().chain(inherit_matches).collect_vec(); + // This `Vec` does not have it's own visitor. Loop through elements and visit each in turn. + let (inner_visit, inner_visit_mut) = self.generate_visit_type( + inner_type, + Target::Reference(create_ident_tokens("el")), + visit_args, + &create_ident_tokens("it"), + true, + )?; - let with_node_events = |tk| { - if KIND_BLACK_LIST.contains(&ident.to_string().as_str()) { - let comment = format!( - "@ No `{}` for this type", - if self.is_mut { "AstType" } else { "AstKind" } - ); - quote! { - #![doc = #comment] - #tk - } - } else { - let kind = self.kind_type(&ident); - quote! { - let kind = #kind; - visitor.enter_node(kind); - #tk - visitor.leave_node(kind); + let target = target.into_tokens(); + + let gen_visit = |inner_visit, iter_method| { + let iter_method_ident = format_ident!("{iter_method}"); + quote! { + for el in #target.#iter_method_ident() #maybe_flatten { + #inner_visit } } }; - let non_exhaustive = if non_exhaustive { Some(quote!(,_ => {})) } else { None }; - ( - with_node_events(quote!(match it { #(#matches),* #non_exhaustive })), - // inline if there are 5 or less match cases - matches.len() <= 5, - ) + let visit = gen_visit(inner_visit, "iter"); + let visit_mut = gen_visit(inner_visit_mut, "iter_mut"); + Some((visit, visit_mut)) } - fn generate_struct_walk(&mut self, struct_: &StructDef) -> (TokenStream, /* inline */ bool) { - let ident = struct_.ident(); - let scope_events = - struct_.markers.scope.as_ref().map_or_else(Default::default, |markers| { - let flags = markers - .flags - .as_ref() - .map_or_else(|| quote!(ScopeFlags::empty()), ToTokens::to_token_stream); - let flags = if let Some(strict_if) = &markers.strict_if { - let strict_if = - strict_if.to_token_stream().replace_ident("self", &format_ident!("it")); - quote! {{ - let mut flags = #flags; - if #strict_if { - flags |= ScopeFlags::StrictMode; - } - flags - }} - } else { - flags - }; - let enter = quote!(visitor.enter_scope(#flags, &it.scope_id);); - let leave = quote!(visitor.leave_scope();); - (enter, leave) - }); + /// Generate `visit_*` methods and `walk_*` functions for an enum. + /// + /// Also generates functions for types of enum variants. + fn generate_enum_visitor(&mut self, enum_def: &EnumDef) { + // Generate visit methods + let enum_ty = enum_def.ty(self.schema); + let type_snake_name = enum_def.snake_name(); + let visit_fn_ident = format_ident!("visit_{type_snake_name}"); + let walk_fn_ident = format_ident!("walk_{type_snake_name}"); - let node_events = if KIND_BLACK_LIST.contains(&ident.to_string().as_str()) { - let comment = - format!("@ No `{}` for this type", if self.is_mut { "AstType" } else { "AstKind" }); - (quote!(#![doc = #comment]), TokenStream::default()) - } else { - let kind = self.kind_type(&ident); - ( - quote! { - let kind = #kind; - visitor.enter_node(kind); - }, - quote!(visitor.leave_node(kind);), - ) + let gen_visit = |reference| { + quote! { + ///@@line_break + #[inline] + fn #visit_fn_ident(&mut self, it: #reference #enum_ty) { + #walk_fn_ident(self, it); + } + } }; + self.visit_methods.extend(gen_visit(quote!( & ))); + self.visit_mut_methods.extend(gen_visit(quote!( &mut ))); - let mut enter_scope_at = 0; - let mut exit_scope_at: Option = None; - let mut enter_node_at = 0; - let fields_visits: Vec = struct_ - .fields + // Generate walk functions + let enum_ident = enum_def.ident(); + let has_kind = enum_def.kind.has_kind; + let (enter_node, leave_node) = generate_enter_and_leave_node(&enum_ident, has_kind, false); + let (enter_node_mut, leave_node_mut) = + generate_enter_and_leave_node(&enum_ident, has_kind, true); + + let mut match_arm_count = 0usize; + let (variant_match_arms, variant_match_arms_mut): (TokenStream, TokenStream) = enum_def + .variants .iter() - .enumerate() - .filter_map(|(ix, field)| { - let analysis = field.typ.analysis(); - let def = field.typ.transparent_type_id().and_then(|id| self.schema.get(id))?; - if !def.is_visitable() { - return None; - } - let typ_wrapper = &analysis.wrapper; - let markers = &field.markers; - let visit_args = markers.visit.visit_args.clone(); - - let have_enter_scope = markers.scope.enter_before; - let have_exit_scope = markers.scope.exit_before; - let have_enter_node = markers.visit.enter_before; - - let (args_def, args) = visit_args - .map(|it| it.into_iter().fold((Vec::new(), Vec::new()), Self::visit_args_fold)) - .unwrap_or_default(); - let visit = self.get_visitor( - def, - matches!( - typ_wrapper, - TypeWrapper::Vec | TypeWrapper::VecBox | TypeWrapper::OptVec - ), + .filter_map(|variant| { + let variant_type = variant.field_type(self.schema)?; + let (visit, visit_mut) = self.generate_visit_type( + variant_type, + Target::Reference(create_ident_tokens("it")), + variant.visit.visit_args.as_ref(), + &create_ident_tokens("it"), + false, + )?; + + match_arm_count += 1; + + let variant_ident = variant.ident(); + let match_pattern = quote!( #enum_ident::#variant_ident(it) ); + let match_arm = quote!( #match_pattern => #visit, ); + let match_arm_mut = quote!( #match_pattern => #visit_mut, ); + Some((match_arm, match_arm_mut)) + }) + .unzip(); + + let (inherits_match_arms, inherits_match_arms_mut): (TokenStream, TokenStream) = enum_def + .inherits_types(self.schema) + .map(|inherits_type| { + assert!( + is_visited(inherits_type), + "When an enum inherits variants from another enum and the inheritor is visited, \ + the inherited enum must also be visited: `{}`", + enum_def.name() ); - let name = field.ident().expect("expected named fields!"); - let borrowed_field = self.with_ref_pat(quote!(it.#name)); - let mut result = match typ_wrapper { - TypeWrapper::Opt | TypeWrapper::OptBox | TypeWrapper::OptVec => quote! { - if let Some(#name) = #borrowed_field { - visitor.#visit(#name #(#args)*); - } - }, - TypeWrapper::VecOpt => { - let iter = if self.is_mut { quote!(iter_mut) } else { quote!(iter) }; - quote! { - for el in it.#name.#iter().flatten() { - visitor.#visit(el #(#args)*); - } - } - } - _ => quote! { - visitor.#visit(#borrowed_field #(#args)*); - }, + + match_arm_count += 1; + + let inherits_snake_name = inherits_type.snake_name(); + let match_ident = format_ident!("match_{inherits_snake_name}"); + let inner_visit_fn_ident = format_ident!("visit_{inherits_snake_name}"); + + let to_fn_ident = format_ident!("to_{inherits_snake_name}"); + let match_arm = quote! { + #match_ident!(#enum_ident) => visitor.#inner_visit_fn_ident(it.#to_fn_ident()), + }; + + let to_fn_ident_mut = format_ident!("to_{inherits_snake_name}_mut"); + let match_arm_mut = quote! { + #match_ident!(#enum_ident) => visitor.#inner_visit_fn_ident(it.#to_fn_ident_mut()), }; - // This comes first because we would prefer the `enter_node` to be placed on top of `enter_scope` - if have_enter_scope { - assert_eq!(enter_scope_at, 0); - let scope_enter = &scope_events.0; - result = quote! { - #scope_enter - #result - }; - enter_scope_at = ix; + (match_arm, match_arm_mut) + }) + .unzip(); + + // Add catch-all match arm if not all variants are visited + let catch_all_match_arm = + if match_arm_count < enum_def.variants.len() + enum_def.inherits.len() { + quote!( _ => {} ) + } else { + quote!() + }; + + // `#[inline]` if there are 5 or less match cases + // TODO: Is this ideal? + let maybe_inline_attr = if match_arm_count <= 5 { quote!( #[inline] ) } else { quote!() }; + + self.walk_fns.extend(quote! { + ///@@line_break + #maybe_inline_attr + pub fn #walk_fn_ident<'a, V: Visit<'a>>(visitor: &mut V, it: & #enum_ty) { + #enter_node + match it { + #variant_match_arms + #inherits_match_arms + #catch_all_match_arm } - if have_exit_scope { - assert!( - exit_scope_at.is_none(), - "Scopes cannot be exited more than once. Remove the extra `#[scope(exit_before)]` attribute(s)." - ); - let scope_exit = &scope_events.1; - result = quote! { - #scope_exit - #result - }; - exit_scope_at = Some(ix); + #leave_node + } + }); + self.walk_mut_fns.extend(quote! { + ///@@line_break + #maybe_inline_attr + pub fn #walk_fn_ident<'a, V: VisitMut<'a>>(visitor: &mut V, it: &mut #enum_ty) { + #enter_node_mut + match it { + #variant_match_arms_mut + #inherits_match_arms_mut + #catch_all_match_arm } + #leave_node_mut + } + }); - #[expect(unreachable_code)] - if have_enter_node { - // NOTE: this is disabled intentionally - unreachable!("`#[visit(enter_before)]` attribute is disabled!"); - assert_eq!(enter_node_at, 0); - let node_enter = &node_events.0; - result = quote! { - #node_enter - #result - }; - enter_node_at = ix; - } + // Generate visitors for variant types and inherited types + for variant in &enum_def.variants { + if let Some(variant_type) = variant.field_type(self.schema) { + self.generate_visitor(variant_type); + } + } - if args_def.is_empty() { - Some(result) - } else { - // if we have args wrap the result in a block to prevent ident clashes. - Some(quote! {{ - #(#args_def)* - #result - }}) + for inherits_type in enum_def.inherits_types(self.schema) { + self.generate_visitor(inherits_type); + } + } + + /// Generate `visit_*` methods and `walk_*` functions for a `Vec`. + /// + /// Also generates functions for inner type (`T` in `Vec`). + fn generate_vec_visitor(&mut self, vec_def: &VecDef) { + let inner_type = vec_def.inner_type(self.schema); + if is_visited(inner_type) { + // Generate visit methods + let vec_ty = vec_def.ty(self.schema); + let plural_snake_name = plural(inner_type.snake_name()); + let visit_fn_ident = format_ident!("visit_{plural_snake_name}"); + let walk_fn_ident = format_ident!("walk_{plural_snake_name}"); + + let gen_visit = |reference| { + quote! { + ///@@line_break + #[inline] + fn #visit_fn_ident(&mut self, it: #reference #vec_ty) { + #walk_fn_ident(self, it); + } } - }) - .collect(); - - let with_node_events = |body: TokenStream| match (node_events, enter_node_at) { - ((enter, leave), 0) => quote! { - #enter - #body - #leave - }, - ((_, leave), _) => quote! { - #body - #leave - }, - }; + }; + self.visit_methods.extend(gen_visit(quote!( & ))); + self.visit_mut_methods.extend(gen_visit(quote!( &mut ))); - let with_scope_events = - |body: TokenStream| match (scope_events, enter_scope_at, exit_scope_at) { - ((enter, leave), 0, None) => quote! { - #enter - #body - #leave - }, - ((_, leave), _, None) => quote! { - #body - #leave - }, - ((enter, _), 0, Some(_)) => quote! { - #enter - #body - }, - ((_, _), _, Some(_)) => quote! { - #body - }, + // Generate walk functions + let inner_visit_fn_ident = format_ident!("visit_{}", inner_type.snake_name()); + let gen_walk = |visit_trait_name, reference| { + let visit_trait_ident = format_ident!("{visit_trait_name}"); + quote! { + ///@@line_break + #[inline] + pub fn #walk_fn_ident<'a, V: #visit_trait_ident<'a>>(visitor: &mut V, it: #reference #vec_ty) { + for el in it { + visitor.#inner_visit_fn_ident(el); + } + } + } }; + self.walk_fns.extend(gen_walk("Visit", quote!( & ))); + self.walk_mut_fns.extend(gen_walk("VisitMut", quote!( &mut ))); + } + + // Generate visitor for inner type + self.generate_visitor(inner_type); + } +} + +/// Target for a visit function call. +/// +/// * `Target::Reference` represents a variable which is already a reference. +/// e.g. `span` in `if let Some(span) = &it.span {}` +/// Does not need `&` / `&mut` prepended to it when using it. +/// * `Target::Property` represents an object property e.g. `it.span`. +/// Needs `&` / `&mut` prepended to it when using it in most circumstances. +enum Target { + Reference(TokenStream), + Property(TokenStream), +} - let body = with_node_events(with_scope_events(quote!(#(#fields_visits)*))); +impl Target { + /// Prepend target with `&` or `&mut` if required. + /// + /// * If this [`Target`] is already a reference, return just the identifier. + /// * Otherwise, return pair of refs - `&target` and `&mut target`. + fn generate_refs(self) -> (TokenStream, TokenStream) { + match self { + Self::Reference(ident) => (ident.clone(), ident), + Self::Property(prop) => (quote!( &#prop ), quote!( &mut #prop )), + } + } - // inline if there are 5 or less fields. - (body, fields_visits.len() <= 5) + /// Get this [`Target`] as token stream, without prepending `&` / `&mut`. + fn into_tokens(self) -> TokenStream { + match self { + Self::Reference(ident) => ident, + Self::Property(prop) => prop, + } } +} + +/// Generate code for `enter_node` and `leave_node`. +/// +/// If the type has no `AstKind`, returns a comment for enter, and empty token stream for exit. +fn generate_enter_and_leave_node( + type_ident: &Ident, + has_kind: bool, + is_mut: bool, +) -> (/* enter_node */ TokenStream, /* leave_node */ TokenStream) { + if has_kind { + let kind = if is_mut { + quote!( AstType::#type_ident ) + } else { + quote!( AstKind::#type_ident(visitor.alloc(it)) ) + }; + let enter_node = quote! { + let kind = #kind; + visitor.enter_node(kind); + }; + let leave_node = quote!( visitor.leave_node(kind); ); + (enter_node, leave_node) + } else { + let comment = + format!("@ No `{}` for this type", if is_mut { "AstType" } else { "AstKind" }); + (quote!( #![doc = #comment] ), quote!()) + } +} + +/// Get plural of a snake case name. +fn plural(mut name: String) -> String { + if matches!(name.as_str(), "formal_parameter" | "ts_import_attribute") { + // Edge case for `Vec` to avoid conflicts with `FormalParameters` + // which both would generate the same name: `visit_formal_parameters`. + // Same for `Vec` to avoid conflicts with `TSImportAttributes`. + // TODO: Don't hardcode this - check for clashing type names, or use an attr to supply plural name. + name.push_str("_list"); + } else if name.ends_with("child") { + name.push_str("ren"); + } else { + match name.as_bytes().last() { + Some(b's') => { + name.push_str("es"); + } + Some(b'y') => { + name.pop(); + name.push_str("ies"); + } + _ => name.push('s'), + } + } + name +} - fn visit_args_fold( - mut accumulator: (Vec, Vec), - arg: VisitArg, - ) -> (Vec, Vec) { - let VisitArg { ident: id, value: val } = arg; - let val = val.to_token_stream().replace_ident("self", &format_ident!("it")); - accumulator.0.push(quote!(let #id = #val;)); - accumulator.1.push(quote!(, #id)); - accumulator +/// Get if a type is visited. +fn is_visited(type_def: &TypeDef) -> bool { + match type_def { + TypeDef::Struct(struct_def) => struct_def.visit.is_visited, + TypeDef::Enum(enum_def) => enum_def.visit.is_visited, + _ => false, } } diff --git a/tasks/ast_tools/src/layout.rs b/tasks/ast_tools/src/layout.rs deleted file mode 100644 index 1c6a8a315fa67..0000000000000 --- a/tasks/ast_tools/src/layout.rs +++ /dev/null @@ -1,170 +0,0 @@ -#[derive(Debug, Default, Clone)] -pub enum Layout { - #[default] - Unknown, - Layout(KnownLayout), -} - -impl Layout { - pub const fn known(size: usize, align: usize, niches: u128) -> Self { - Self::Layout(KnownLayout { size, align, niches, offsets: None }) - } - - pub fn layout(self) -> Option { - if let Self::Layout(layout) = self { - Some(layout) - } else { - None - } - } -} - -impl From for Layout { - fn from(layout: KnownLayout) -> Self { - Self::Layout(layout) - } -} - -#[derive(Debug, Default, Clone)] -pub struct KnownLayout { - size: usize, - align: usize, - /// number of available niches - niches: u128, - offsets: Option>, -} - -impl KnownLayout { - pub const fn new(size: usize, align: usize, niches: u128) -> Self { - Self { size, align, niches, offsets: None } - } - - #[inline] - pub fn size(&self) -> usize { - self.size - } - - #[inline] - pub fn align(&self) -> usize { - self.align - } - - /// number of available niches - #[inline] - pub fn niches(&self) -> u128 { - self.niches - } - - #[expect(unused)] - #[inline] - pub fn offsets(&self) -> Option<&Vec> { - self.offsets.as_ref() - } - - pub fn with_offsets(mut self, offsets: Vec) -> Self { - self.offsets = Some(offsets); - self - } - - /// Panics - /// if doesn't have enough viable space and `can_resize` is false - pub fn consume_niches(&mut self, n: u128, can_resize: bool) { - if self.niches() >= n { - self.niches -= n; - } else if can_resize { - let align = self.align(); - self.size += align; - self.niches += max_val_of_bytes(align); - self.consume_niches(n, can_resize); - } else { - panic!("`{}` called on a layout without enough space.", stringify!(consume_niches)); - } - } - - pub fn unpack(self) -> (/* size */ usize, /* align */ usize, /* offsets */ Option>) { - let Self { size, align, offsets, .. } = self; - (size, align, offsets) - } -} - -impl Layout { - /// # Panics - /// If `T` has more than 8 niches. - pub const fn of() -> Self { - // TODO: find a better way of calculating this. - struct N1(Option); - struct N2(N1>); - struct N3(N1>); - struct N4(N1>); - struct N5(N1>); - struct N6(N1>); - struct N7(N1>); - struct N8(N1>); - - let size = size_of::(); - let align = align_of::(); - let niches = if size_of::>() > size { - 0 - } else if size_of::>() > size { - 1 - } else if size_of::>() > size { - 2 - } else if size_of::>() > size { - 3 - } else if size_of::>() > size { - 4 - } else if size_of::>() > size { - 5 - } else if size_of::>() > size { - 6 - } else if size_of::>() > size { - 7 - } else if size_of::>() == size { - 8 - } else { - panic!("`T` has more niches than what we can infer automatically"); - }; - - Self::known(size, align, niches) - } - - pub const fn zero() -> Self { - #[repr(C)] - struct Empty; - Self::of::() - } - - pub const fn ptr_32() -> Self { - Layout::known(4, 4, 0) - } - - pub const fn ptr_64() -> Self { - Layout::known(8, 8, 0) - } - - pub const fn wide_ptr_32() -> Self { - Layout::known(8, 4, 1) - } - - pub const fn wide_ptr_64() -> Self { - Layout::of::<&str>() - } - - pub fn is_unknown(&self) -> bool { - matches!(self, Self::Unknown) - } -} - -/// Returns the max valid number in a primitive with the size of `n` bytes. -/// Panics -/// For `n` bigger than `16`, Or if it's not a power of 2 number -fn max_val_of_bytes(n: usize) -> u128 { - match n { - 1 => u128::from(u8::MAX), - 2 => u128::from(u16::MAX), - 4 => u128::from(u32::MAX), - 8 => u128::from(u64::MAX), - 16 => u128::MAX, - _ => panic!("We do not support `n` bigger than 16 bytes."), - } -} diff --git a/tasks/ast_tools/src/logger.rs b/tasks/ast_tools/src/logger.rs index fafb163eb3057..be85004396134 100644 --- a/tasks/ast_tools/src/logger.rs +++ b/tasks/ast_tools/src/logger.rs @@ -2,14 +2,18 @@ use std::sync::OnceLock; static LOG: OnceLock = OnceLock::new(); -pub(super) fn quiet() -> Result<(), bool> { - LOG.set(false) +/// Disable logging. +pub(super) fn quiet() { + LOG.set(false).expect("Failed to disable logger"); } pub(super) fn __internal_log_enable() -> bool { *LOG.get_or_init(|| true) } +/// Log a message to stdout. +/// +/// Does not include a trailing newline. macro_rules! log { ($fmt:literal $(, $args:expr)*) => { if $crate::logger::__internal_log_enable() { @@ -20,6 +24,20 @@ macro_rules! log { } pub(crate) use log; +/// Log a message to stdout. +/// +/// Includes a trailing newline. +macro_rules! logln { + ($fmt:literal $(, $args:expr)*) => { + if $crate::logger::__internal_log_enable() { + println!($fmt$(, $args)*); + std::io::Write::flush(&mut std::io::stdout()).unwrap(); + } + } +} +pub(crate) use logln; + +/// Log "Success". macro_rules! log_success { () => { $crate::log!("Done!\n"); @@ -27,6 +45,7 @@ macro_rules! log_success { } pub(crate) use log_success; +/// Log "FAILED". macro_rules! log_failed { () => { $crate::log!("FAILED\n"); @@ -34,6 +53,7 @@ macro_rules! log_failed { } pub(crate) use log_failed; +/// Log a [`Result`]. macro_rules! log_result { ($result:expr) => { match &($result) { diff --git a/tasks/ast_tools/src/main.rs b/tasks/ast_tools/src/main.rs index 5a4ecff28abd9..9eb9656dcd308 100644 --- a/tasks/ast_tools/src/main.rs +++ b/tasks/ast_tools/src/main.rs @@ -1,35 +1,198 @@ -use std::{cell::RefCell, io::Read, path::PathBuf, rc::Rc}; +//! Generator of code related to AST. +//! +//! # Overview +//! +//! `oxc_ast_tools` is a framework for generating code related to the AST. +//! +//! There are 3 main elements to this crate: +//! +//! 1. [`Codegen`] - Contains data for running the code generation process. +//! 2. [`Schema`] - Schema of all AST types, and their inter-relations. +//! 3. [`Generator`]s and [`Derive`]s - Code generators which generate code, based on the [`Schema`]. +//! +//! AST types are annotated with custom attributes (e.g. `#[visit]`, `#[scope]`). +//! These attributes guide [`Generator`]s and [`Derive`]s to generate code appropriately. +//! +//! [`Derive`]s are executed for a type if the type definition is tagged `#[generate_derive(TraitName)]`. +//! +//! The rest of this documentation explains how the code generation works, and how to add a new +//! code generator to this crate. +//! +//! # When code generation happens, and where it goes +//! +//! Code generation can be triggered by running this crate: +//! +//! ```sh +//! cargo run -p oxc_ast_tools +//! ``` +//! +//! The generated code is checked into git. +//! +//! Code generation is *not* run automatically during compilation. This has 2 advantages: +//! 1. Code generation does not slow down compile times, unlike e.g. proc macros. +//! 2. Generated code can be viewed and navigated easily in an IDE, in the usual way. +//! +//! # Phases +//! +//! The codegen process proceeds in 5 phases: +//! +//! ### Phase 1: Load +//! +//! All the source files listed in [`SOURCE_PATHS`] are read, and parsed with [`syn`]. +//! +//! At this stage, only type names and other basic information about types is obtained. +//! Each type is assigned a [`TypeId`], and a mapping of type name to [`TypeId`] is built. +//! +//! This is the bare minimum required to link types up to each other in the next phase. +//! +//! ### Phase 2: Parse +//! +//! In this phase, [`syn`]'s ASTs for each type definition are parsed in full to generate +//! a [`TypeDef`] for each type. +//! +//! These [`TypeDef`]s contain all the info about each type: +//! +//! * [`StructDef`] contains info about the name and type of every field in the struct. +//! * [`EnumDef`] contains info about all the enum's variants. +//! +//! Additional "defs" are created for other known types which are encountered in the AST: +//! +//! * `Option`: [`OptionDef`] +//! * `Box`: [`BoxDef`] +//! * `Vec`: [`VecDef`] +//! * `Cell`: [`CellDef`] +//! * Primitive types: [`PrimitiveDef`] - e.g. `u32`, `&str` +//! * Special types: [`PrimitiveDef`] - e.g. `Atom` +//! +//! The types are linked up to each other, so that all struct fields ([`FieldDef`]s) contain +//! the [`TypeId`] of the type that field contains. Ditto enum variants ([`VariantDef`]s). +//! Container types e.g. [`VecDef`] contain the `TypeId` of the inner type (e.g. `T` in `Vec`). +//! +//! Custom attributes on types (e.g. `#[visit]`) are also parsed at this stage, in conjunction with +//! the [`Generator`]s and [`Derive`]s which define those attributes. +//! +//! The end result of this phase is the [`Schema`], which is the single source of truth about the AST. +//! +//! After this point, the types produced by `syn` are not used - all info about the AST is in +//! the [`Schema`], and everything from this point onwards works off the `Schema` only. +//! +//! ### Phase 3: Prepare +//! +//! [`Generator`]s and [`Derive`]s have already had a chance to input into the creation of the [`Schema`], +//! setting properties on [`StructDef`]s and [`EnumDef`] during parsing of custom attributes. +//! +//! However, at that point generators only had access to a single `StructDef` or `EnumDef` at a time. +//! +//! Now, in the prepare phase, generators can perform any modifications to the `Schema` that require +//! access to more than 1 [`TypeDef`] at the same time. They do this by implementing the +//! [`Generator::prepare`] or [`Derive::prepare`] method. +//! A good example of this is the [`AssertLayouts`] generator. +//! +//! At the end of this phase, the [`Schema`] is locked as read-only. +//! +//! ### Phase 4: Generate +//! +//! This is main code-generation phase. +//! +//! Each generator is run in parallel, and provided an immutable reference to [`Schema`] and [`Codegen`]. +//! +//! The difference between `Generator`s and `Derive`s is: +//! +//! * [`Generator`]s act on the entire AST in one go. They can generate 1 or more [`Output`]s, +//! which can be Rust code, JS code, or other types of output. +//! +//! * [`Derive`]s act on a single type at a time (though they also have access to the whole `Schema`). +//! [`Derive::derive`] should return a [`TokenStream`] containing an implementation of the trait +//! the `Derive` is for. `oxc_ast_tools` combines these into a single output file for each crate. +//! +//! [`Output`]s are converted to [`RawOutput`]s, which includes formatting the generated code +//! with `rustfmt` or `dprint`. +//! +//! ### Phase 5: Output +//! +//! All [`RawOutput`]s generated in previous phase are written to disk. +//! +//! # Generators and Derives +//! +//! [`Generator`]s and [`Derive`]s should keep "special case" logic written with the generator's code +//! to a minimum (and ideally not do it at all). +//! +//! Any info that the generator needs about how to treat each type should be recorded on the type +//! definition itself, with custom attributes e.g. `#[visit]`, `#[clone_in(default)]` - instead of +//! hard-coding those cases within the generator code itself. +//! +//! A generator defines attributes that it uses by implementing [`Generator::attrs`] / [`Derive::attrs`] +//! method. During parsing phase, [`Generator::parse_attr`] / [`Derive::parse_attr`] will be called +//! with details of where those attributes were found, and the generator can record that info in the +//! `Schema`. +//! +//! # Creating a new Generator or Derive +//! +//! ## [`Generator`] +//! * Add a file to `generators` directory in this crate e.g. `generators/picture.rs`. +//! * Add a reference to it to [`GENERATORS`] in `main.rs`. +//! +//! ## [`Derive`] +//! * Add a file to `derives` directory in this crate e.g. `derives/get_flaps.rs`. +//! * Add a reference to it to [`DERIVES`] in `main.rs`. +//! +//! ## Both +//! * If the generator needs to store extra info in the `Schema`, create a file in `schema/extensions` +//! directory e.g. `schema/extensions/picture.rs`. +//! * Import that file into `mod extensions` in `schema/mod.rs`. +//! * That file should define types for structs / enums / struct fields / enum variants, depending on +//! where the data needs to be stored. e.g. `PictureStruct`, `PictureEnumField`. +//! * Those types must implement `Default` and `Debug`. +//! * Add those types to [`StructDef`], [`EnumDef`], [`FieldDef`] and/or [`VariantDef`]. +//! * Implement [`Generator::attrs`] / [`Derive::attrs`] to declare the generator's custom attributes. +//! * Implement [`Generator::parse_attr`] / [`Derive::parse_attr`] to parse those attributes +//! and mutate the "extension" types in [`Schema`] as required. +//! * Add the attributes' names to the list on `ast_derive` in `crates/oxc_ast_macros/src/lib.rs`. +//! +//! #### Attributes +//! +//! `oxc_ast_tools` provides abstractions [`AttrLocation`] and [`AttrPart`] which assist with parsing +//! custom attributes, and are much simpler than `syn`'s types. +//! +//! [`TypeId`]: schema::TypeId +//! [`TypeDef`]: schema::TypeDef +//! [`StructDef`]: schema::StructDef +//! [`EnumDef`]: schema::EnumDef +//! [`OptionDef`]: schema::OptionDef +//! [`BoxDef`]: schema::BoxDef +//! [`VecDef`]: schema::VecDef +//! [`CellDef`]: schema::CellDef +//! [`PrimitiveDef`]: schema::PrimitiveDef +//! [`FieldDef`]: schema::FieldDef +//! [`VariantDef`]: schema::VariantDef +//! [`AssertLayouts`]: generators::AssertLayouts +//! [`TokenStream`]: proc_macro2::TokenStream +//! [`AttrLocation`]: parse::attr::AttrLocation +//! [`AttrPart`]: parse::attr::AttrPart + +use std::fmt::Write; use bpaf::{Bpaf, Parser}; -use codegen::{AstCodegen, AstCodegenResult}; -use itertools::Itertools; -use syn::parse_file; +use rayon::prelude::*; mod codegen; mod derives; mod generators; -mod layout; mod logger; -mod markers; mod output; -mod passes; -mod rust_ast; +mod parse; mod schema; -mod util; - -use derives::{ - DeriveCloneIn, DeriveContentEq, DeriveESTree, DeriveGetAddress, DeriveGetSpan, DeriveGetSpanMut, -}; -use generators::{ - AssertLayouts, AstBuilderGenerator, AstKindGenerator, Generator, GetIdGenerator, - TypescriptGenerator, VisitGenerator, VisitMutGenerator, -}; -use logger::{log, log_failed, log_result, log_success}; +mod utils; + +use codegen::{get_runners, Codegen, Runner}; +use derives::Derive; +use generators::Generator; +use logger::{log, log_failed, log_result, log_success, logln}; use output::{Output, RawOutput}; -use passes::{CalcLayout, Linker}; +use parse::parse_files; use schema::Schema; -use util::NormalizeError; +/// Paths to source files containing AST types static SOURCE_PATHS: &[&str] = &[ "crates/oxc_ast/src/ast/literal.rs", "crates/oxc_ast/src/ast/js.rs", @@ -43,94 +206,123 @@ static SOURCE_PATHS: &[&str] = &[ "crates/oxc_regular_expression/src/ast.rs", ]; +/// Path to `oxc_ast` crate const AST_CRATE: &str = "crates/oxc_ast"; -const TYPESCRIPT_PACKAGE: &str = "npm/oxc-types"; + +/// Path to write TS type definitions to +const TYPESCRIPT_DEFINITIONS_PATH: &str = "npm/oxc-types/types.d.ts"; + +/// Path to write CI filter list to const GITHUB_WATCH_LIST_PATH: &str = ".github/.generated_ast_watch_list.yml"; -const SCHEMA_PATH: &str = "schema.json"; -type Result = std::result::Result; -type TypeId = usize; +/// Derives (for use with `#[generate_derive]`) +const DERIVES: &[&(dyn Derive + Sync)] = &[ + &derives::DeriveCloneIn, + &derives::DeriveGetAddress, + &derives::DeriveGetSpan, + &derives::DeriveGetSpanMut, + &derives::DeriveContentEq, + &derives::DeriveESTree, +]; + +/// Code generators +const GENERATORS: &[&(dyn Generator + Sync)] = &[ + &generators::AssertLayouts, + &generators::AstKindGenerator, + &generators::AstBuilderGenerator, + &generators::GetIdGenerator, + &generators::VisitGenerator, + &generators::TypescriptGenerator, +]; +type Result = std::result::Result; + +/// CLI options. #[derive(Debug, Bpaf)] -pub struct CliOptions { - /// Runs all generators but won't write anything down. - #[bpaf(switch)] +struct CliOptions { + /// Run all generators but don't write to disk dry_run: bool, - /// Prints no logs. + /// Run all generators in series (useful when debugging) + serial: bool, + /// Print no logs quiet: bool, - /// Output JSON schema. - schema: bool, } -fn main() -> std::result::Result<(), Box> { - let cli_options = cli_options().run(); +fn main() { + // Parse CLI options + let options = cli_options().run(); - if cli_options.quiet { - logger::quiet().normalize_with("Failed to set logger to `quiet` mode.")?; + // Init logger + if options.quiet { + logger::quiet(); } - let AstCodegenResult { mut outputs, schema } = SOURCE_PATHS - .iter() - .fold(AstCodegen::default(), AstCodegen::add_file) - .pass(Linker) - .pass(CalcLayout) - .generate(DeriveCloneIn) - .generate(DeriveGetAddress) - .generate(DeriveGetSpan) - .generate(DeriveGetSpanMut) - .generate(DeriveContentEq) - .generate(DeriveESTree) - .generate(AssertLayouts) - .generate(AstKindGenerator) - .generate(AstBuilderGenerator) - .generate(GetIdGenerator) - .generate(VisitGenerator) - .generate(VisitMutGenerator) - .generate(TypescriptGenerator) - .run()?; - - outputs.push(generate_ci_filter(&outputs)); + // Parse inputs and generate `Schema` + let codegen = Codegen::new(); + let mut schema = parse_files(SOURCE_PATHS, &codegen); - if cli_options.schema { - outputs.push(generate_json_schema(&schema)?); + // Run `prepare` actions + let runners = get_runners(); + for runner in &runners { + runner.prepare(&mut schema); } - if !cli_options.dry_run { + // Run generators + let mut outputs = if options.serial { + // Run in series + let mut outputs = vec![]; + for runner in &runners { + outputs.extend(runner.run(&schema, &codegen)); + } + outputs + } else { + // Run in parallel + runners.par_iter().map(|runner| runner.run(&schema, &codegen)).reduce( + Vec::new, + |mut outputs, runner_outputs| { + outputs.extend(runner_outputs); + outputs + }, + ) + }; + + logln!("All Derives and Generators... Done!"); + + // Add CI filter file to outputs + outputs.sort_unstable_by(|o1, o2| o1.path.cmp(&o2.path)); + outputs.push(generate_ci_filter(&outputs)); + + // Write outputs to disk + if !options.dry_run { for output in outputs { - output.write_to_file()?; + output.write_to_file().unwrap(); } } - - Ok(()) } +/// Generate CI filter list file. +/// +/// This is used in `ast_changes` CI job to skip running `oxc_ast_tools` +/// unless relevant files have changed. +/// +/// List includes source files, generated files, and all files in `oxc_ast_tools` itself. fn generate_ci_filter(outputs: &[RawOutput]) -> RawOutput { log!("Generate CI filter... "); - let mut code = "src:\n".to_string(); - let mut push_item = |path: &str| code.push_str(format!(" - '{path}'\n").as_str()); - - for input in SOURCE_PATHS { - push_item(input); - } + let mut paths = SOURCE_PATHS + .iter() + .copied() + .chain(outputs.iter().map(|output| output.path.as_str())) + .chain(["tasks/ast_tools/src/**", GITHUB_WATCH_LIST_PATH]) + .collect::>(); + paths.sort_unstable(); - for output in outputs { - push_item(output.path.as_str()); + let mut code = "src:\n".to_string(); + for path in paths { + writeln!(&mut code, " - '{path}'").unwrap(); } - push_item("tasks/ast_tools/src/**"); - push_item(GITHUB_WATCH_LIST_PATH); - log_success!(); Output::Yaml { path: GITHUB_WATCH_LIST_PATH.to_string(), code }.into_raw(file!()) } - -fn generate_json_schema(schema: &Schema) -> Result { - log!("Generate JSON schema... "); - let result = serde_json::to_string_pretty(&schema.defs).normalize(); - log_result!(result); - let schema = result?; - let output = Output::Raw { path: SCHEMA_PATH.to_string(), code: schema }.into_raw(file!()); - Ok(output) -} diff --git a/tasks/ast_tools/src/markers.rs b/tasks/ast_tools/src/markers.rs deleted file mode 100644 index d5d8cff5e7f13..0000000000000 --- a/tasks/ast_tools/src/markers.rs +++ /dev/null @@ -1,476 +0,0 @@ -use proc_macro2::TokenStream; -use quote::ToTokens; -use serde::Serialize; -use syn::{ - ext::IdentExt, - parenthesized, - parse::{Parse, ParseStream}, - parse2, - punctuated::{self, Punctuated}, - spanned::Spanned, - token, Attribute, Expr, Ident, LitStr, Meta, MetaNameValue, Path, Token, -}; - -use crate::util::NormalizeError; - -/// A single visit argument passed via `#[visit(args(...))]` -#[derive(Debug, Clone)] -pub struct VisitArg { - pub ident: Ident, - pub value: Expr, -} - -impl Parse for VisitArg { - fn parse(input: ParseStream) -> Result { - let nv: MetaNameValue = input.parse()?; - Ok(Self { - ident: nv.path.get_ident().map_or_else( - || Err(syn::Error::new(nv.span(), "Invalid `visit_args` input!")), - |it| Ok(it.clone()), - )?, - value: nv.value, - }) - } -} - -/// A struct containing `#[visit(args(...))]` items -/// ^^^^^^^^^ -#[derive(Debug, Default, Clone)] -pub struct VisitArgs(Punctuated); - -impl IntoIterator for VisitArgs { - type IntoIter = punctuated::IntoIter; - type Item = VisitArg; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -impl Parse for VisitArgs { - fn parse(input: ParseStream) -> Result { - input.parse_terminated(VisitArg::parse, Token![,]).map(Self) - } -} - -/// A struct representing `#[visit(...)]` markers -#[derive(Default, Debug)] -pub struct VisitMarkers { - pub visit_args: Option, - pub enter_before: bool, - pub ignore: bool, -} - -/// A struct representing `#[scope(...)]` markers -#[derive(Default, Debug)] -pub struct ScopeMarkers { - /// `#[scope(enter_before)]` - pub enter_before: bool, - /// `#[scope(exit_before)]` - pub exit_before: bool, -} - -/// A struct representing all the helper attributes that might be used with `#[generate_derive(...)]` -#[derive(Debug, Default, Serialize)] -pub struct DeriveAttributes { - pub clone_in: CloneInAttribute, - pub estree: ESTreeFieldAttribute, -} - -/// A enum representing the value passed in `#[clone_in(...)]` derive helper attribute. -#[derive(Debug, Default, Serialize)] -pub enum CloneInAttribute { - #[default] - None, - Default, -} - -impl From<&Ident> for CloneInAttribute { - fn from(ident: &Ident) -> Self { - if ident == "default" { - Self::Default - } else { - panic!("Invalid argument used in `#[clone_in(...)]` attribute."); - } - } -} - -/// An enum representing the `#[estree(...)]` attributes that we implement for structs. -#[derive(Debug, Serialize, PartialEq, Eq)] -pub struct ESTreeStructAttribute { - pub tag_mode: Option, - pub always_flatten: bool, - pub via: Option, - pub add_ts: Option, -} - -#[derive(Debug, Serialize, PartialEq, Eq)] -pub enum ESTreeStructTagMode { - CustomSerialize, - NoType, - Type(String), -} - -impl Parse for ESTreeStructAttribute { - fn parse(input: ParseStream) -> Result { - let mut tag_mode = None; - let mut always_flatten = false; - let mut via = None; - let mut add_ts = None; - - loop { - let ident = input.call(Ident::parse_any).unwrap().to_string(); - match ident.as_str() { - "always_flatten" => { - if always_flatten { - panic!("Duplicate estree(always_flatten)"); - } else { - always_flatten = true; - } - } - "custom_serialize" => { - assert!( - tag_mode.replace(ESTreeStructTagMode::CustomSerialize).is_none(), - "Duplicate tag mode in #[estree(...)]" - ); - } - "no_type" => { - assert!( - tag_mode.replace(ESTreeStructTagMode::NoType).is_none(), - "Duplicate tag mode in #[estree(...)]" - ); - } - "rename" => { - input.parse::()?; - let value = input.parse::()?.value(); - assert!( - tag_mode.replace(ESTreeStructTagMode::Type(value)).is_none(), - "Duplicate tag mode in #[estree(...)]" - ); - } - "via" => { - input.parse::()?; - let value = input.parse::()?.to_token_stream().to_string(); - assert!(via.replace(value).is_none(), "Duplicate estree(via)"); - } - "add_ts" => { - input.parse::()?; - let value = input.parse::()?.value(); - assert!(add_ts.replace(value).is_none(), "Duplicate estree(add_ts)"); - } - arg => panic!("Unsupported #[estree(...)] argument: {arg}"), - } - let comma = input.peek(Token![,]); - if comma { - input.parse::().unwrap(); - } else { - break; - } - } - Ok(Self { tag_mode, always_flatten, via, add_ts }) - } -} - -/// A struct representing the `#[estree(...)]` attributes that we implement for enums. -#[derive(Debug, Serialize, Default)] -pub struct ESTreeEnumAttribute { - pub no_rename_variants: bool, - pub custom_ts_def: bool, -} - -impl Parse for ESTreeEnumAttribute { - fn parse(input: ParseStream) -> Result { - let mut no_rename_variants = false; - let mut custom_ts_def = false; - - loop { - let ident = input.call(Ident::parse_any).unwrap().to_string(); - match ident.as_str() { - "custom_ts_def" => { - if custom_ts_def { - panic!("Duplicate estree(custom_ts_def)"); - } else { - custom_ts_def = true; - } - } - "no_rename_variants" => { - if no_rename_variants { - panic!("Duplicate estree(no_rename_variants)"); - } else { - no_rename_variants = true; - } - } - arg => panic!("Unsupported #[estree(...)] argument: {arg}"), - } - let comma = input.peek(Token![,]); - if comma { - input.parse::().unwrap(); - } else { - break; - } - } - Ok(Self { no_rename_variants, custom_ts_def }) - } -} - -/// A struct representing the `#[estree(...)]` attributes that we implement for fields. -#[derive(Debug, Serialize, Default)] -pub struct ESTreeFieldAttribute { - pub flatten: bool, - pub skip: bool, - pub rename: Option, - pub typescript_type: Option, - pub append_to: Option, - pub via: Option, -} - -impl Parse for ESTreeFieldAttribute { - fn parse(input: ParseStream) -> Result { - let mut flatten = false; - let mut skip = false; - let mut rename = None; - let mut typescript_type = None; - let mut append_to = None; - let mut via = None; - - loop { - let ident = input.call(Ident::parse_any).unwrap().to_string(); - match ident.as_str() { - "rename" => { - input.parse::()?; - assert!( - rename.replace(input.parse::()?.value()).is_none(), - "Duplicate estree(rename)" - ); - } - "flatten" => { - if flatten { - panic!("Duplicate estree(flatten)"); - } else { - flatten = true; - } - } - "skip" => { - if skip { - panic!("Duplicate estree(skip)"); - } else { - skip = true; - } - } - "ts_type" => { - input.parse::()?; - assert!( - typescript_type.replace(input.parse::()?.value()).is_none(), - "Duplicate estree(ts_type)" - ); - } - "append_to" => { - input.parse::()?; - assert!( - append_to.replace(input.parse::()?.value()).is_none(), - "Duplicate estree(append_to)" - ); - } - "via" => { - input.parse::()?; - assert!( - via.replace(input.parse::()?.to_token_stream().to_string()).is_none(), - "Duplicate estree(with)" - ); - } - arg => panic!("Unsupported #[estree(...)] argument: {arg}"), - } - let comma = input.peek(Token![,]); - if comma { - input.parse::().unwrap(); - } else { - break; - } - } - Ok(Self { flatten, skip, rename, typescript_type, append_to, via }) - } -} - -/// A struct representing the `#[scope(...)]` attribute. -#[derive(Debug, Default)] -pub struct ScopeAttribute { - pub flags: Option, - pub strict_if: Option, -} - -impl Parse for ScopeAttribute { - fn parse(input: ParseStream) -> Result { - let parsed = input.parse_terminated(CommonAttribute::parse, Token![,])?; - Ok(parsed.into_iter().fold(Self::default(), |mut acc, CommonAttribute { ident, args }| { - let expr = parse2(args).expect("Invalid `#[scope]` input."); - match ident.to_string().as_str() { - "flags" => acc.flags = Some(expr), - "strict_if" => acc.strict_if = Some(expr), - _ => {} - } - acc - })) - } -} - -#[derive(Debug)] -struct CommonAttribute { - ident: Ident, - args: TokenStream, -} - -impl Parse for CommonAttribute { - fn parse(input: ParseStream) -> Result { - let ident = input.call(Ident::parse_any).unwrap(); - let args = - if input.peek(token::Paren) || input.peek(token::Bracket) || input.peek(token::Brace) { - let content; - parenthesized!(content in input); - content.parse()? - } else { - TokenStream::default() - }; - Ok(CommonAttribute { ident, args }) - } -} - -pub fn get_visit_markers<'a, I>(attrs: I) -> crate::Result -where - I: IntoIterator, -{ - #[expect(clippy::trivially_copy_pass_by_ref)] - fn predicate(it: &&Attribute) -> bool { - it.path().is_ident("visit") - } - - let mut iter = attrs.into_iter(); - let attr = iter.find(predicate); - debug_assert_eq!( - iter.find(predicate), - None, - "For now we only accept one `#[visit]` marker per field/variant, Please merge them together!" - ); - - attr.map_or_else( - || Ok(VisitMarkers::default()), - |attr| { - let mut visit_args = None; - let mut enter_before = false; - let mut ignore = false; - let nested = - attr.parse_args_with(Punctuated::::parse_terminated); - nested - .map(|nested| { - for com in nested { - if com.ident == "args" { - visit_args = Some(parse2(com.args).unwrap()); - } else if com.ident == "enter_before" { - enter_before = true; - } else if com.ident == "ignore" { - ignore = true; - } else { - panic!("Invalid `#[visit(...)]` input!") - } - } - }) - .map(|()| VisitMarkers { visit_args, enter_before, ignore }) - .normalize() - }, - ) -} - -pub fn get_scope_markers<'a, I>(attrs: I) -> crate::Result -where - I: IntoIterator, -{ - #[expect(clippy::trivially_copy_pass_by_ref)] - fn predicate(it: &&Attribute) -> bool { - it.path().is_ident("scope") - } - - let mut iter = attrs.into_iter(); - let attr = iter.find(predicate); - debug_assert_eq!( - iter.find(predicate), - None, - "For now we only accept one `#[scope]` marker per field/variant, Please merge them together!" - ); - - attr.map_or_else( - || Ok(ScopeMarkers::default()), - |attr| { - attr.parse_args_with(Ident::parse) - .map(|id| ScopeMarkers { - enter_before: id == "enter_before", - exit_before: id == "exit_before", - }) - .normalize() - }, - ) -} - -pub fn get_derive_attributes<'a, I>(attrs: I) -> crate::Result -where - I: IntoIterator, -{ - fn try_parse_clone_in(attr: &Attribute) -> crate::Result> { - if attr.path().is_ident("clone_in") { - let arg = attr.parse_args_with(Ident::parse).normalize()?; - Ok(Some(CloneInAttribute::from(&arg))) - } else { - Ok(None) - } - } - fn try_parse_estree(attr: &Attribute) -> crate::Result> { - if attr.path().is_ident("estree") { - let arg = attr.parse_args_with(ESTreeFieldAttribute::parse).normalize()?; - Ok(Some(arg)) - } else { - Ok(None) - } - } - let mut clone_in = None; - let mut estree = None; - for attr in attrs { - if let Some(attr) = try_parse_clone_in(attr)? { - assert!(clone_in.replace(attr).is_none(), "Duplicate `#[clone_in(...)]` attribute."); - } - if let Some(attr) = try_parse_estree(attr)? { - assert!(estree.replace(attr).is_none(), "Duplicate `#[estree(...)]` attribute."); - } - } - Ok(DeriveAttributes { - clone_in: clone_in.unwrap_or_default(), - estree: estree.unwrap_or_default(), - }) -} - -pub fn get_scope_attribute<'a, I>(attrs: I) -> Option> -where - I: IntoIterator, -{ - let attr = attrs.into_iter().find(|it| it.path().is_ident("scope")); - attr.map(|attr| { - debug_assert!(attr.path().is_ident("scope")); - let result = if matches!(attr.meta, Meta::Path(_)) { - // empty `#[scope]`. - Ok(ScopeAttribute::default()) - } else { - attr.parse_args_with(ScopeAttribute::parse) - }; - - result.normalize() - }) -} - -pub fn get_estree_attribute<'a, T, I>(attrs: I) -> Option> -where - I: IntoIterator, - T: Parse, -{ - let attr = attrs.into_iter().find(|it| it.path().is_ident("estree")); - attr.map(|attr| { - debug_assert!(attr.path().is_ident("estree")); - attr.parse_args_with(T::parse).normalize() - }) -} diff --git a/tasks/ast_tools/src/output/javascript.rs b/tasks/ast_tools/src/output/javascript.rs index ab5e8e60314ef..1436f47104225 100644 --- a/tasks/ast_tools/src/output/javascript.rs +++ b/tasks/ast_tools/src/output/javascript.rs @@ -11,7 +11,7 @@ pub fn print_javascript(code: &str, generator_path: &str) -> String { format(&code) } -/// Format JS/TS code with dprint. +/// Format JS/TS code with `dprint`. fn format(source_text: &str) -> String { let mut dprint = Command::new("dprint") .stdin(Stdio::piped()) diff --git a/tasks/ast_tools/src/output/mod.rs b/tasks/ast_tools/src/output/mod.rs index 32a0145bf1ef4..d801655350279 100644 --- a/tasks/ast_tools/src/output/mod.rs +++ b/tasks/ast_tools/src/output/mod.rs @@ -23,7 +23,6 @@ pub fn output_path(krate: &str, path: &str) -> String { /// Add a generated file warning to top of file. fn add_header(code: &str, generator_path: &str, comment_start: &str) -> String { - // TODO: Add generation date, AST source hash, etc here. format!( "{comment_start} Auto-generated code, DO NOT EDIT DIRECTLY!\n\ {comment_start} To edit this generated file you have to edit `{generator_path}`\n\n\ @@ -34,6 +33,7 @@ fn add_header(code: &str, generator_path: &str, comment_start: &str) -> String { /// An output from codegen. /// /// Can be Rust, Javascript, or other formats. +#[expect(dead_code)] pub enum Output { Rust { path: String, tokens: TokenStream }, Javascript { path: String, code: String }, @@ -42,6 +42,9 @@ pub enum Output { } impl Output { + /// Convert [`Output`] to [`RawOutput`]. + /// + /// This involves printing and formatting the output. pub fn into_raw(self, generator_path: &str) -> RawOutput { let generator_path = generator_path.cow_replace('\\', "/"); @@ -66,7 +69,7 @@ impl Output { /// A raw output from codegen. /// -/// Content is formatted, and converted to bytes. +/// Content is formatted, and in byte array form, ready to write to file. #[derive(Debug)] pub struct RawOutput { pub path: String, @@ -74,7 +77,7 @@ pub struct RawOutput { } impl RawOutput { - /// Write output to file + /// Write [`RawOutput`] to file pub fn write_to_file(&self) -> io::Result<()> { log!("Write {}... ", &self.path); let result = write_to_file_impl(&self.content, &self.path); diff --git a/tasks/ast_tools/src/output/rust.rs b/tasks/ast_tools/src/output/rust.rs index 690838c6377c8..e8bd03e3afdc6 100644 --- a/tasks/ast_tools/src/output/rust.rs +++ b/tasks/ast_tools/src/output/rust.rs @@ -18,6 +18,9 @@ pub fn print_rust(tokens: TokenStream, generator_path: &str) -> String { rust_fmt(&code) } +/// Format Rust code with `rustfmt`. +/// +/// Does not format on disk - interfaces with `rustfmt` via stdin/stdout. fn rust_fmt(source_text: &str) -> String { let mut rustfmt = Command::new("rustfmt") .stdin(Stdio::piped()) @@ -45,15 +48,15 @@ fn rust_fmt(source_text: &str) -> String { /// `quote!` macro ignores plain comments, but we can use these to generate plain comments /// in generated code. /// +/// `//!@` form can be used to insert a line break in a position where `///@ ...` +/// is not valid syntax e.g. before an `#![allow(...)]`. +/// /// To dynamically generate a comment: /// ``` /// let comment = format!("@ NOTE: {} doesn't exist!", name); -/// quote!(#[doc = #comment]) -/// // or `quote!(#![doc = #comment])` +/// quote!( #[doc = #comment] ) +/// // or `quote!( #![doc = #comment] )` /// ``` -/// -/// `//!@@line_break` can be used to insert a line break in a position where `///@@line_break` -/// is not valid syntax e.g. before an `#![allow(...)]`. struct CommentReplacer; impl Replacer for CommentReplacer { diff --git a/tasks/ast_tools/src/parse/attr.rs b/tasks/ast_tools/src/parse/attr.rs new file mode 100644 index 0000000000000..1a08800e2a6be --- /dev/null +++ b/tasks/ast_tools/src/parse/attr.rs @@ -0,0 +1,141 @@ +use std::fmt::{self, Display}; + +use bitflags::bitflags; +use syn::MetaList; + +use crate::{ + codegen::{DeriveId, GeneratorId}, + schema::{Def, EnumDef, StructDef}, + DERIVES, GENERATORS, +}; + +/// Processor of an attribute - either a derive or a generator. +#[derive(Clone, Copy, Debug)] +pub enum AttrProcessor { + Derive(DeriveId), + Generator(GeneratorId), +} + +impl AttrProcessor { + /// Get name of this [`AttrProcessor`]. + pub fn name(self) -> &'static str { + match self { + Self::Derive(id) => DERIVES[id].trait_name(), + Self::Generator(id) => GENERATORS[id].name(), + } + } +} + +bitflags! { + /// Positions in which an attribute is legal. + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub struct AttrPositions: u8 { + /// Attribute on a struct + const Struct = 1 << 0; + /// Attribute on an enum + const Enum = 1 << 1; + /// Attribute on a struct field + const StructField = 1 << 2; + /// Attribute on an enum variant + const EnumVariant = 1 << 3; + /// Part of `#[ast]` attr e.g. `visit` in `#[ast(visit)]` + const AstAttr = 1 << 4; + } +} + +/// Macro to combine multiple `AttrPositions` as a const. +/// +/// `attr_positions!(Struct | Enum)` is equivalent to `AttrPositions::Struct | AttrPositions::Enum`, +/// except it evaluates in const context. +/// +/// Useful for `Derive::attrs` and `Generator::attrs` methods, where a const is required. +macro_rules! attr_positions { + ($($positions:ident)|+) => { + const { + use $crate::parse::attr::AttrPositions; + AttrPositions::empty() $(.union(AttrPositions::$positions))+ + } + } +} +pub(crate) use attr_positions; + +/// Attribute location. +pub enum AttrLocation<'d> { + /// Attribute on a struct + Struct(&'d mut StructDef), + /// Attribute on an enum + Enum(&'d mut EnumDef), + /// Attribute on a struct field. + /// Comprises [`StructDef`] and field index. + StructField(&'d mut StructDef, usize), + /// Attribute on an enum variant. + /// Comprises [`EnumDef`]` and variant index. + EnumVariant(&'d mut EnumDef, usize), + /// Part of `#[ast]` attr on a struct + StructAstAttr(&'d mut StructDef), + /// Part of `#[ast]` attr on an enum + EnumAstAttr(&'d mut EnumDef), +} + +impl AttrLocation<'_> { + /// Convert `&mut AttrLocation` to `AttrLocation`. + pub fn unpack(&mut self) -> AttrLocation { + match self { + AttrLocation::Struct(struct_def) => AttrLocation::Struct(struct_def), + AttrLocation::Enum(enum_def) => AttrLocation::Enum(enum_def), + AttrLocation::StructField(struct_def, field_index) => { + AttrLocation::StructField(struct_def, *field_index) + } + AttrLocation::EnumVariant(enum_def, variant_index) => { + AttrLocation::EnumVariant(enum_def, *variant_index) + } + AttrLocation::StructAstAttr(struct_def) => AttrLocation::StructAstAttr(struct_def), + AttrLocation::EnumAstAttr(enum_def) => AttrLocation::EnumAstAttr(enum_def), + } + } +} + +impl Display for AttrLocation<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AttrLocation::Struct(struct_def) | AttrLocation::StructAstAttr(struct_def) => { + f.write_str(struct_def.name()) + } + AttrLocation::Enum(enum_def) | AttrLocation::EnumAstAttr(enum_def) => { + f.write_str(enum_def.name()) + } + AttrLocation::StructField(struct_def, field_index) => { + write!(f, "{}::{}", struct_def.name(), struct_def.fields[*field_index].name()) + } + AttrLocation::EnumVariant(enum_def, variant_index) => { + write!(f, "{}::{}", enum_def.name(), enum_def.variants[*variant_index].name()) + } + } + } +} + +/// Part of an attribute. +/// +/// e.g.: +/// +/// * `#[span]` translates to a single `AttrPart::None`. +/// * `#[estree(skip)]` translates to a single `AttrPart::Tag("skip")`. +/// * `#[estree(skip, rename = "Foo", ts_type = "Foo | null")]` translates to 3 `AttrPart`s: +/// * `AttrPart::Tag("skip")` +/// * `AttrPart::String("rename", "Foo")` +/// * `AttrPart::String("ts_type", "Foo | null")` +#[derive(Debug)] +pub enum AttrPart<'p> { + /// No parts in attribute. + /// e.g. `#[ts]`. + None, + /// Named part. + /// e.g. `#[estree(skip)]`. + Tag(&'p str), + /// String part. + /// e.g. `#[estree(rename = "Foo")]` or `#[estree(via = crate::serialize::OptionVecDefault)]`. + String(&'p str, String), + /// List part. + /// e.g. `#[visit(args(flags = ScopeFlags::Function))]`. + List(&'p str, &'p MetaList), +} diff --git a/tasks/ast_tools/src/parse/load.rs b/tasks/ast_tools/src/parse/load.rs new file mode 100644 index 0000000000000..b1911d7b29011 --- /dev/null +++ b/tasks/ast_tools/src/parse/load.rs @@ -0,0 +1,141 @@ +use std::{fs, io::Read}; + +use indexmap::map::Entry; +use syn::{ + braced, + parse::{Parse, ParseBuffer}, + parse_file, + punctuated::Punctuated, + Attribute, Generics, Ident, Item, ItemEnum, ItemMacro, ItemStruct, Token, Variant, Visibility, + WhereClause, +}; + +use crate::schema::FileId; + +use super::{ + ident_name, + skeleton::{EnumSkeleton, Skeleton, StructSkeleton}, + FxIndexMap, +}; + +/// Load file and extract structs and enums with `#[ast]` attr. +/// +/// Only parses enough to get: +/// * Name of type. +/// * Inherits of enums wrapped in `inherit_variants!` macro. +/// +/// Inserts [`Skeleton`]s into `skeletons` and adds mappings from type name to type ID. +/// +/// This is the bare minimum to be able to "link up" types to each other in next pass. +pub fn load_file(file_id: FileId, file_path: &str, skeletons: &mut FxIndexMap) { + let mut file = fs::File::open(file_path).unwrap(); + let mut content = String::new(); + file.read_to_string(&mut content).unwrap(); + + let file = parse_file(content.as_str()).unwrap(); + + for item in file.items { + let (name, skeleton) = match item { + Item::Struct(item) => { + let Some(skeleton) = parse_struct(item, file_id) else { continue }; + (skeleton.name.clone(), Skeleton::Struct(skeleton)) + } + Item::Enum(item) => { + let Some(skeleton) = parse_enum(item, file_id) else { continue }; + (skeleton.name.clone(), Skeleton::Enum(skeleton)) + } + Item::Macro(item) => { + let Some(skeleton) = parse_macro(&item, file_id) else { continue }; + (skeleton.name.clone(), Skeleton::Enum(skeleton)) + } + _ => continue, + }; + + match skeletons.entry(name) { + Entry::Occupied(entry) => panic!("2 types with same name: {}", entry.key()), + Entry::Vacant(entry) => { + entry.insert(skeleton); + } + } + } +} + +fn parse_struct(item: ItemStruct, file_id: FileId) -> Option { + if !has_ast_attr(&item.attrs) { + return None; + } + + let name = ident_name(&item.ident); + Some(StructSkeleton { name, file_id, item }) +} + +fn parse_enum(item: ItemEnum, file_id: FileId) -> Option { + if !has_ast_attr(&item.attrs) { + return None; + } + + let name = ident_name(&item.ident); + Some(EnumSkeleton { name, file_id, item, inherits: vec![] }) +} + +fn parse_macro(item: &ItemMacro, file_id: FileId) -> Option { + if !item.mac.path.is_ident("inherit_variants") { + return None; + } + + let skeleton = item + .mac + .parse_body_with(|input: &ParseBuffer| { + // Because of `@inherit`s we can't use the actual `ItemEnum` parse. + // This closure is similar to how `ItemEnum` parser works, with the exception + // of how we approach parsing variants. + // First we try to parse as a `Variant`. If that fails, we try parsing as `@inherits`. + // We raise an error only if both of these fail. + let attrs = input.call(Attribute::parse_outer)?; + + let vis = input.parse::()?; + let enum_token = input.parse::()?; + let ident = input.parse::()?; + let generics = input.parse::()?; + + let name = ident_name(&ident); + + let where_clause = input.parse::>()?; + assert!(where_clause.is_none(), "Types with `where` clauses are not supported"); + + assert!( + has_ast_attr(&attrs), + "Enum in `inherit_variants!` macro must have `#[ast]` attr: {name}", + ); + + let content; + let brace_token = braced!(content in input); + let mut variants = Punctuated::new(); + let mut inherits = vec![]; + while !content.is_empty() { + if let Ok(variant) = Variant::parse(&content) { + variants.push_value(variant); + let punct = content.parse()?; + variants.push_punct(punct); + } else if content.parse::().is_ok() + && content.parse::().is_ok_and(|id| id == "inherit") + { + let inherit_ident = content.parse::().expect("Invalid `@inherits`"); + inherits.push(ident_name(&inherit_ident)); + } else { + panic!("Invalid `inherit_variants!` macro usage"); + } + } + + let item = ItemEnum { attrs, vis, enum_token, ident, generics, brace_token, variants }; + Ok(EnumSkeleton { name, file_id, item, inherits }) + }) + .expect("Failed to parse contents of `inherit_variants!` macro"); + + Some(skeleton) +} + +/// Returns `true` if type has an `#[ast]` attribute on it. +fn has_ast_attr(attrs: &[Attribute]) -> bool { + attrs.iter().any(|attr| attr.path().is_ident("ast")) +} diff --git a/tasks/ast_tools/src/parse/mod.rs b/tasks/ast_tools/src/parse/mod.rs new file mode 100644 index 0000000000000..aa200463e8e95 --- /dev/null +++ b/tasks/ast_tools/src/parse/mod.rs @@ -0,0 +1,114 @@ +//! Create schema of types from Rust source files. +//! +//! Parsing proceeds in 2 phases: +//! +//! 1. Parse Rust source files and build list of all type definitions. +//! 2. Parse all the types into [`TypeDef`]s and link them to each other, to form the [`Schema`]. +//! +//! ## Phase 1 +//! +//! 1st phase involves minimal parsing, just enough to identify structs/enums with `#[ast]` attr, +//! and to get names of types. +//! +//! Each type gets assigned a [`TypeId`], and is represented by a [`Skeleton`]. +//! An indexed hash map is built, mapping type names to their [`TypeId`]s and [`Skeleton`]s. +//! +//! ## Phase 2 +//! +//! 2nd phase involves full parsing of each type, and linking types to each other. +//! +//! A [`TypeDef`] is generated for each type. The `IndexVec` that is created is indexed +//! by [`TypeId`] - same order of entries as the `FxIndexMap` from phase 1. +//! +//! `parse_attr` method is called on [`Derive`]s and [`Generator`]s which handle attributes, +//! for the derive/generator to parse the attribute and update the [`TypeDef`] accordingly. +//! +//! [`TypeDef`]s are also created for other types which are found within the type definitions: +//! +//! * Primitives (e.g. `f64`, `&str`). +//! * Known types (`Vec`, `Box`, `Option`, `Cell`). +//! * Special cases (`Atom`, `RegExpFlags`, `ScopeId`, `SymbolId`, `ReferenceId`). +//! +//! Each [`TypeDef`] contains a [`FileId`], indicating which file the type was defined in. +//! +//! Note: Individual [`TypeDef`]s are created for every different `Vec`, `Box`, `Option` and `Cell`. +//! i.e. There are separate [`TypeDef`]s for `Vec` and `Vec`, +//! not a single [`TypeDef`] for `Vec`. +//! +//! ## Schema +//! +//! [`Schema`] contains all the [`TypeDef`]s and [`File`]s. +//! +//! * `types: IndexVec` is indexed by [`TypeId`]. +//! * `files: IndexVec` is indexed by [`FileId`]. +//! +//! [`TypeId`]: crate::schema::TypeId +//! [`TypeDef`]: crate::schema::TypeDef +//! [`Derive`]: crate::Derive +//! [`Generator`]: crate::Generator + +use indexmap::{IndexMap, IndexSet}; +use oxc_index::IndexVec; +use rustc_hash::FxBuildHasher; +use syn::Ident; + +use crate::{ + log, log_success, + schema::Derives, + schema::{File, FileId, Schema}, + Codegen, +}; + +pub mod attr; +mod load; +#[expect(clippy::module_inception)] +mod parse; +mod skeleton; +use load::load_file; +pub use parse::convert_expr_to_string; +use parse::parse; +use skeleton::Skeleton; + +type FxIndexMap = IndexMap; +type FxIndexSet = IndexSet; + +/// Analyse the files with provided paths, and generate a [`Schema`]. +pub fn parse_files(file_paths: &[&str], codegen: &Codegen) -> Schema { + // Load files and populate `Vec` of skeletons + mapping from type name to `TypeId`. + // `TypeId` is index into `skeletons`. + let mut skeletons = FxIndexMap::default(); + + let files = file_paths + .iter() + .enumerate() + .map(|(file_id, &file_path)| { + let file_id = FileId::from_usize(file_id); + analyse_file(file_id, file_path, &mut skeletons) + }) + .collect::>(); + + // Convert skeletons into schema + parse(skeletons, files, codegen) +} + +/// Analyse file with provided path and add types to `skeletons`. +/// +/// Returns a [`File`]. +fn analyse_file( + file_id: FileId, + file_path: &str, + skeletons: &mut FxIndexMap, +) -> File { + log!("Load {file_path}... "); + load_file(file_id, file_path, skeletons); + log_success!(); + + File::new(file_path) +} + +/// Convert [`Ident`] to `String`, removing `r#` from start. +/// +/// [`Ident`]: struct@Ident +fn ident_name(ident: &Ident) -> String { + ident.to_string().trim_start_matches("r#").to_string() +} diff --git a/tasks/ast_tools/src/parse/parse.rs b/tasks/ast_tools/src/parse/parse.rs new file mode 100644 index 0000000000000..9f2627086f940 --- /dev/null +++ b/tasks/ast_tools/src/parse/parse.rs @@ -0,0 +1,798 @@ +use oxc_index::IndexVec; +use quote::ToTokens; +use rustc_hash::FxHashMap; +use syn::{ + punctuated::Punctuated, AttrStyle, Attribute, Expr, ExprLit, Field, Fields, GenericArgument, + Generics, Ident, ItemEnum, ItemStruct, Lit, Meta, PathArguments, PathSegment, Token, Type, + TypePath, TypeReference, Variant, Visibility as SynVisibility, +}; + +use crate::{ + codegen::Codegen, + schema::{ + BoxDef, CellDef, Def, EnumDef, FieldDef, File, FileId, OptionDef, PrimitiveDef, Schema, + StructDef, TypeDef, TypeId, VariantDef, VecDef, Visibility, + }, + Result, DERIVES, GENERATORS, +}; + +use super::{ + attr::{AttrLocation, AttrPart, AttrPositions, AttrProcessor}, + ident_name, + skeleton::{EnumSkeleton, Skeleton, StructSkeleton}, + Derives, FxIndexMap, FxIndexSet, +}; + +/// Parse [`Skeleton`]s into [`TypeDef`]s. +pub fn parse( + skeletons: FxIndexMap, + files: IndexVec, + codegen: &Codegen, +) -> Schema { + // Split `skeletons` into a `IndexSet` (type names) and `IndexVec` (skeletons) + let (type_names, skeletons_vec) = skeletons.into_iter().unzip(); + + let parser = Parser::new(type_names, files, codegen); + parser.parse_all(skeletons_vec) +} + +/// Types parser. +struct Parser<'c> { + /// Index hash set indexed by type ID, containing type names + type_names: FxIndexSet, + /// Source files + files: IndexVec, + /// Reference to `CodeGen` + codegen: &'c Codegen, + /// Extra types which don't have type definitions in the source files + /// e.g. primitives (`u8` etc), `Option`s, `Box`es, `Vec`s, `Cell`s + extra_types: Vec, + // These `FxHashMap`s: + // * Key: Inner type's `TypeId`. + // * Value: Outer type's (`Option`/`Box`/`Vec`/`Cell`) `TypeId`. + // i.e. if `Expression` has ID 1, and `Option` has ID 2, then key is 1 and value is 2 + // `options` hash map. + options: FxHashMap, + boxes: FxHashMap, + vecs: FxHashMap, + cells: FxHashMap, +} + +impl<'c> Parser<'c> { + /// Create [`Parser`]. + fn new( + type_names: FxIndexSet, + files: IndexVec, + codegen: &'c Codegen, + ) -> Self { + Self { + type_names, + files, + codegen, + extra_types: vec![], + options: FxHashMap::default(), + boxes: FxHashMap::default(), + vecs: FxHashMap::default(), + cells: FxHashMap::default(), + } + } + + /// Parse all [`Skeleton`]s into [`TypeDef`]s and return [`Schema`]. + fn parse_all(mut self, skeletons: IndexVec) -> Schema { + let mut types = skeletons + .into_iter_enumerated() + .map(|(type_id, skeleton)| self.parse_type(type_id, skeleton)) + .collect::>(); + types.extend(self.extra_types); + + let type_names = self + .type_names + .into_iter() + .enumerate() + .map(|(type_id, type_name)| (type_name, TypeId::from_usize(type_id))) + .collect(); + + Schema { types, type_names, files: self.files } + } + + /// Get [`TypeId`] for type name. + fn type_id(&mut self, name: &str) -> TypeId { + // Get type ID if already known + if let Some(type_id) = self.type_names.get_index_of(name) { + return TypeId::from_usize(type_id); + } + + // Generate new type for known primitives/special cases + let primitive = |name| TypeDef::Primitive(PrimitiveDef::new(name)); + + let type_def = match name { + "bool" => primitive("bool"), + "u8" => primitive("u8"), + "u16" => primitive("u16"), + "u32" => primitive("u32"), + "u64" => primitive("u64"), + "u128" => primitive("u128"), + "usize" => primitive("usize"), + "i8" => primitive("i8"), + "i16" => primitive("i16"), + "i32" => primitive("i32"), + "i64" => primitive("i64"), + "i128" => primitive("i128"), + "isize" => primitive("isize"), + "f32" => primitive("f32"), + "f64" => primitive("f64"), + "&str" => primitive("&str"), + "Atom" => primitive("Atom"), + "ScopeId" => primitive("ScopeId"), + "SymbolId" => primitive("SymbolId"), + "ReferenceId" => primitive("ReferenceId"), + // TODO: Remove the need for this by adding + // `#[cfg_attr(target_pointer_width = "64", repr(align(8)))]` to all AST types + "PointerAlign" => primitive("PointerAlign"), + // Cannot be parsed normally as is defined inside `bitflags!` macro. + // TODO: Find a way to encode this in the actual file. + // e.g. `#[ast(alias_for(RegExpFlags))] struct RegExpFlagsAlias(u8);` + "RegExpFlags" => TypeDef::Struct(StructDef::new( + TypeId::DUMMY, + "RegExpFlags".to_string(), + false, + self.get_file_id("oxc_ast", "::ast::literal"), + Derives::none(), + vec![FieldDef::new( + "inner".to_string(), + self.type_id("u8"), + Visibility::Private, + None, + )], + )), + _ => panic!("Unknown type: {name}"), + }; + self.create_new_type(type_def) + } + + /// Get type name for a [`TypeId`]. + fn type_name(&mut self, type_id: TypeId) -> &str { + &self.type_names[type_id.index()] + } + + /// Create a new type definition. + fn create_new_type(&mut self, mut type_def: TypeDef) -> TypeId { + let type_id = TypeId::from_usize(self.type_names.len()); + + match &mut type_def { + TypeDef::Struct(def) => def.id = type_id, + TypeDef::Enum(def) => def.id = type_id, + TypeDef::Primitive(def) => def.id = type_id, + TypeDef::Option(def) => def.id = type_id, + TypeDef::Box(def) => def.id = type_id, + TypeDef::Vec(def) => def.id = type_id, + TypeDef::Cell(def) => def.id = type_id, + } + + let was_inserted = self.type_names.insert(type_def.name().to_string()); + assert!(was_inserted); + + self.extra_types.push(type_def); + + type_id + } + + /// Get [`FileId`] for file with provided crate and import path. + fn get_file_id(&self, krate: &str, import_path: &str) -> FileId { + let file_and_id = self + .files + .iter_enumerated() + .find(|(_, file)| file.krate() == krate && file.import_path() == import_path); + match file_and_id { + Some((file_id, _)) => file_id, + None => panic!("Could not find file with import path: {import_path}"), + } + } + + /// Parse [`Skeleton`] to yield a [`TypeDef`]. + fn parse_type(&mut self, type_id: TypeId, skeleton: Skeleton) -> TypeDef { + match skeleton { + Skeleton::Struct(skeleton) => self.parse_struct(type_id, skeleton), + Skeleton::Enum(skeleton) => self.parse_enum(type_id, skeleton), + } + } + + /// Parse [`StructSkeleton`] to yield a [`TypeDef`]. + fn parse_struct(&mut self, type_id: TypeId, skeleton: StructSkeleton) -> TypeDef { + let StructSkeleton { name, item, file_id } = skeleton; + let has_lifetime = check_generics(&item.generics, &name); + let fields = self.parse_fields(&item.fields); + let generated_derives = self.get_generated_derives(&item.attrs, &name); + let mut type_def = TypeDef::Struct(StructDef::new( + type_id, + name, + has_lifetime, + file_id, + generated_derives, + fields, + )); + + // Parse attrs on type and fields + self.parse_type_attrs(&mut type_def, &item.attrs); + self.parse_field_attrs(&mut type_def, &item, generated_derives); + + type_def + } + + /// Parse attributes on struct's fields with parsers provided by [`Derive`]s and [`Generator`]s. + /// + /// [`Derive`]: crate::Derive + /// [`Generator`]: crate::Generator + fn parse_field_attrs( + &self, + type_def: &mut TypeDef, + item: &ItemStruct, + generated_derives: Derives, + ) { + let struct_def = type_def.as_struct_mut().unwrap(); + for (field_index, field) in item.fields.iter().enumerate() { + for attr in &field.attrs { + if !matches!(attr.style, AttrStyle::Outer) { + continue; + } + let Some(attr_ident) = attr.path().get_ident() else { continue }; + let attr_name = ident_name(attr_ident); + + if let Some((processor, positions)) = self.codegen.attr_processor(&attr_name) { + // Check attribute is legal in this position + // and has the relevant trait `#[generate_derive]`-ed on it + check_attr_position( + positions, + AttrPositions::StructField, + struct_def.name(), + &attr_name, + "struct field", + ); + check_attr_is_derived( + processor, + generated_derives, + struct_def.name(), + &attr_name, + ); + + let location = AttrLocation::StructField(struct_def, field_index); + let result = process_attr(processor, &attr_name, location, &attr.meta); + assert!( + result.is_ok(), + "Invalid use of `#[{attr_name}]` on `{}::{}` struct field", + struct_def.name(), + struct_def.fields[field_index].name() + ); + } + } + } + } + + /// Parse [`EnumSkeleton`] to yield a [`TypeDef`]. + fn parse_enum(&mut self, type_id: TypeId, skeleton: EnumSkeleton) -> TypeDef { + let EnumSkeleton { name, item, inherits, file_id } = skeleton; + let has_lifetime = check_generics(&item.generics, &name); + let variants = item.variants.iter().map(|variant| self.parse_variant(variant)).collect(); + let inherits = inherits.into_iter().map(|name| self.type_id(&name)).collect(); + let generated_derives = self.get_generated_derives(&item.attrs, &name); + let mut type_def = TypeDef::Enum(EnumDef::new( + type_id, + name, + has_lifetime, + file_id, + generated_derives, + variants, + inherits, + )); + + // Parse attrs on type and variants + self.parse_type_attrs(&mut type_def, &item.attrs); + self.parse_variant_attrs(&mut type_def, &item, generated_derives); + + type_def + } + + /// Parse attributes on enum's variants with parsers provided by [`Derive`]s and [`Generator`]s. + /// + /// [`Derive`]: crate::Derive + /// [`Generator`]: crate::Generator + fn parse_variant_attrs( + &self, + type_def: &mut TypeDef, + item: &ItemEnum, + generated_derives: Derives, + ) { + let enum_def = type_def.as_enum_mut().unwrap(); + for (variant_index, variant) in item.variants.iter().enumerate() { + for attr in &variant.attrs { + if !matches!(attr.style, AttrStyle::Outer) { + continue; + } + let Some(attr_ident) = attr.path().get_ident() else { continue }; + let attr_name = ident_name(attr_ident); + + if let Some((processor, positions)) = self.codegen.attr_processor(&attr_name) { + // Check attribute is legal in this position + // and has the relevant trait `#[generate_derive]`-ed on it + check_attr_position( + positions, + AttrPositions::EnumVariant, + enum_def.name(), + &attr_name, + "enum variant", + ); + check_attr_is_derived( + processor, + generated_derives, + enum_def.name(), + &attr_name, + ); + + let location = AttrLocation::EnumVariant(enum_def, variant_index); + let result = process_attr(processor, &attr_name, location, &attr.meta); + assert!( + result.is_ok(), + "Invalid use of `#[{attr_name}]` on `{}::{}` enum variant", + enum_def.name(), + enum_def.variants[variant_index].name(), + ); + } + } + } + } + + /// Parse struct fields to [`FieldDef`]s. + /// + /// [`Vec`]: FieldDef + fn parse_fields(&mut self, fields: &Fields) -> Vec { + fields.iter().enumerate().map(|(index, field)| self.parse_field(field, index)).collect() + } + + /// Parse struct field to [`FieldDef`]. + fn parse_field(&mut self, field: &Field, index: usize) -> FieldDef { + let name = match field.ident.as_ref() { + Some(ident) => ident_name(ident), + None => index.to_string(), + }; + + let ty = &field.ty; + let type_id = self + .parse_type_name(ty) + .unwrap_or_else(|| panic!("Cannot parse type reference: {}", ty.to_token_stream())); + let visibility = match &field.vis { + SynVisibility::Public(_) => Visibility::Public, + SynVisibility::Restricted(_) => Visibility::Restricted, + SynVisibility::Inherited => Visibility::Private, + }; + + // Get doc comment + let mut doc_comment = None; + for attr in &field.attrs { + if let Meta::NameValue(name_value) = &attr.meta { + if name_value.path.is_ident("doc") { + if let Expr::Lit(expr_lit) = &name_value.value { + if let Lit::Str(lit_str) = &expr_lit.lit { + doc_comment = Some(lit_str.value().trim().to_string()); + break; + } + } + } + } + } + + FieldDef::new(name, type_id, visibility, doc_comment) + } + + /// Parse enum variant to [`VariantDef`]. + fn parse_variant(&mut self, variant: &Variant) -> VariantDef { + let name = ident_name(&variant.ident); + + let field_type_id = if variant.fields.is_empty() { + None + } else { + assert!( + variant.fields.len() == 1, + "Only enum variants with a single field are supported: {name}" + ); + let field = variant.fields.iter().next().unwrap(); + let type_id = self.parse_type_name(&field.ty).unwrap_or_else(|| { + panic!("Cannot parse type reference: {}", field.ty.to_token_stream()) + }); + Some(type_id) + }; + + let discriminant = { + let Some((_, discriminant)) = variant.discriminant.as_ref() else { + panic!("All enum variants must have explicit discriminants: {name}"); + }; + let Expr::Lit(ExprLit { lit: Lit::Int(lit), .. }) = discriminant else { + panic!("Invalid enum discriminant {discriminant:?} on {name}"); + }; + let Ok(discriminant) = lit.base10_parse() else { + panic!("Invalid base10 enum discriminant {discriminant:?} on {name}"); + }; + discriminant + }; + + VariantDef::new(name, field_type_id, discriminant) + } + + /// Resolve type name to its [`TypeId`]. + fn parse_type_name(&mut self, ty: &Type) -> Option { + match ty { + Type::Path(type_path) => self.parse_type_path(type_path), + Type::Reference(type_ref) => self.parse_type_reference(type_ref), + _ => None, + } + } + + fn parse_type_path(&mut self, type_path: &TypePath) -> Option { + let segment = type_path_segment(type_path)?; + let name = ident_name(&segment.ident); + match &segment.arguments { + PathArguments::None => Some(self.type_id(&name)), + PathArguments::Parenthesized(_) => None, + PathArguments::AngleBracketed(angled) => { + // Get first arg, skipping over lifetime arg + let mut args = angled.args.iter(); + let arg = match args.next().unwrap() { + GenericArgument::Lifetime(_) => args.next(), + arg => Some(arg), + }; + + if let Some(arg) = arg { + self.parse_complex_type_path(&name, arg) + } else { + Some(self.type_id(&name)) + } + } + } + } + + fn parse_complex_type_path( + &mut self, + wrapper_name: &str, + arg: &GenericArgument, + ) -> Option { + let GenericArgument::Type(ty) = arg else { return None }; + + let inner_type_id = self.parse_type_name(ty)?; + + let type_id = match wrapper_name { + "Option" => self.options.get(&inner_type_id).copied().unwrap_or_else(|| { + let name = format!("Option<{}>", self.type_name(inner_type_id)); + let type_def = TypeDef::Option(OptionDef::new(name, inner_type_id)); + let type_id = self.create_new_type(type_def); + self.options.insert(inner_type_id, type_id); + type_id + }), + "Box" => self.boxes.get(&inner_type_id).copied().unwrap_or_else(|| { + let name = format!("Box<{}>", self.type_name(inner_type_id)); + let type_def = TypeDef::Box(BoxDef::new(name, inner_type_id)); + let type_id = self.create_new_type(type_def); + self.boxes.insert(inner_type_id, type_id); + type_id + }), + "Vec" => self.vecs.get(&inner_type_id).copied().unwrap_or_else(|| { + let name = format!("Vec<{}>", self.type_name(inner_type_id)); + let type_def = TypeDef::Vec(VecDef::new(name, inner_type_id)); + let type_id = self.create_new_type(type_def); + self.vecs.insert(inner_type_id, type_id); + type_id + }), + "Cell" => self.cells.get(&inner_type_id).copied().unwrap_or_else(|| { + let name = format!("Cell<{}>", self.type_name(inner_type_id)); + let type_def = TypeDef::Cell(CellDef::new(name, inner_type_id)); + let type_id = self.create_new_type(type_def); + self.cells.insert(inner_type_id, type_id); + type_id + }), + _ => return None, + }; + Some(type_id) + } + + fn parse_type_reference(&mut self, type_ref: &TypeReference) -> Option { + if type_ref.mutability.is_some() { + return None; + } + let Type::Path(type_path) = &*type_ref.elem else { return None }; + let segment = type_path_segment(type_path)?; + if segment.ident != "str" || segment.arguments != PathArguments::None { + return None; + } + Some(self.type_id("&str")) + } + + /// Parse attributes on struct or enum with parsers provided by [`Derive`]s and [`Generator`]s. + /// + /// [`Derive`]: crate::Derive + /// [`Generator`]: crate::Generator + fn parse_type_attrs(&mut self, type_def: &mut TypeDef, attrs: &[Attribute]) { + for attr in attrs { + if !matches!(attr.style, AttrStyle::Outer) { + continue; + } + let Some(attr_ident) = attr.path().get_ident() else { continue }; + let attr_name = ident_name(attr_ident); + + if attr_name == "ast" { + self.parse_ast_attr(type_def, attr); + continue; + } + + if let Some((processor, positions)) = self.codegen.attr_processor(&attr_name) { + // Check attribute is legal in this position + match type_def { + TypeDef::Struct(struct_def) => { + check_attr_position( + positions, + AttrPositions::Struct, + struct_def.name(), + &attr_name, + "struct", + ); + } + TypeDef::Enum(enum_def) => { + check_attr_position( + positions, + AttrPositions::Enum, + enum_def.name(), + &attr_name, + "enum", + ); + } + _ => unreachable!(), + } + + // Check this type has the relevant trait `#[generate_derive]`-ed on it + check_attr_is_derived( + processor, + type_def.generated_derives(), + type_def.name(), + &attr_name, + ); + + let location = match type_def { + TypeDef::Struct(struct_def) => AttrLocation::Struct(struct_def), + TypeDef::Enum(enum_def) => AttrLocation::Enum(enum_def), + _ => unreachable!(), + }; + let result = process_attr(processor, &attr_name, location, &attr.meta); + assert!( + result.is_ok(), + "Invalid use of `#[{attr_name}]` on `{}` type", + type_def.name() + ); + } + } + } + + /// Parse `#[ast]` attribute parts on struct or enum with parsers provided by [`Derive`]s + /// and [`Generator`]s. + /// + /// e.g. `#[ast(visit)]` + /// + /// [`Derive`]: crate::Derive + /// [`Generator`]: crate::Generator + fn parse_ast_attr(&mut self, type_def: &mut TypeDef, attr: &Attribute) { + let parts = match &attr.meta { + Meta::Path(_) => return, + Meta::List(meta_list) => meta_list + .parse_args_with(Punctuated::::parse_terminated) + .map_err(|_| ()), + Meta::NameValue(_) => Err(()), + }; + let Ok(parts) = parts else { + panic!("Unable to parse `#[ast]` attribute on `{}` type", type_def.name()); + }; + + for meta in &parts { + let attr_name = meta.path().get_ident().unwrap().to_string(); + if let Some((processor, positions)) = self.codegen.attr_processor(&attr_name) { + // Check attribute is legal in this position + // and has the relevant trait `#[generate_derive]`-ed on it + check_attr_position( + positions, + AttrPositions::AstAttr, + type_def.name(), + &attr_name, + "`#[ast]` attr", + ); + check_attr_is_derived( + processor, + type_def.generated_derives(), + type_def.name(), + &attr_name, + ); + + let location = match type_def { + TypeDef::Struct(struct_def) => AttrLocation::StructAstAttr(struct_def), + TypeDef::Enum(enum_def) => AttrLocation::EnumAstAttr(enum_def), + _ => unreachable!(), + }; + let result = process_attr(processor, &attr_name, location, meta); + assert!( + result.is_ok(), + "Invalid use of `#[ast({attr_name})]` on `{}` type", + type_def.name() + ); + } else { + panic!("Unknown attribute `#[ast({attr_name})]` on `{}` type", type_def.name()); + } + } + } + + /// Get derives which are generated with `#[generate_derive(...)]` attrs. + fn get_generated_derives(&self, attrs: &[Attribute], type_name: &str) -> Derives { + let mut derives = Derives::none(); + for attr in attrs { + if attr.path().is_ident("generate_derive") { + let args = attr.parse_args_with(Punctuated::::parse_terminated); + let Ok(args) = args else { + panic!("Unable to parse `#[generated_derives]` on `{type_name}` type"); + }; + for arg in args { + let derive_id = self.codegen.get_derive_id_by_name(&ident_name(&arg)); + derives.add(derive_id); + } + } + } + + derives + } +} + +/// Check generics. +/// +/// Return `true` if type has a lifetime. +/// +/// # Panics +/// Panics if type has type params, const params, or more than one lifetime. +fn check_generics(generics: &Generics, name: &str) -> bool { + assert!( + generics.type_params().next().is_none(), + "Types with generic type params are not supported: {name}" + ); + assert!( + generics.const_params().next().is_none(), + "Types with generic const params are not supported: {name}" + ); + + match generics.lifetimes().count() { + 0 => false, + 1 => true, + _ => panic!("Types with more than 1 lifetime are not supported: {name}"), + } +} + +/// Get first segment from `TypePath`. +/// +/// Returns `None` if has `qself` or leading colon, or if more than 1 segment. +fn type_path_segment(type_path: &TypePath) -> Option<&PathSegment> { + if type_path.qself.is_some() || type_path.path.leading_colon.is_some() { + return None; + } + + let segments = &type_path.path.segments; + if segments.len() != 1 { + return None; + } + segments.first() +} + +/// Process attribute with a processor (derive or generator). +fn process_attr( + processor: AttrProcessor, + attr_name: &str, + mut location: AttrLocation, + meta: &Meta, +) -> Result<()> { + match meta { + Meta::Path(_) => process_attr_part(processor, attr_name, location, AttrPart::None), + Meta::List(meta_list) => { + let parts = meta_list + .parse_args_with(Punctuated::::parse_terminated) + .map_err(|_| ())?; + for meta in parts { + match &meta { + Meta::Path(path) => { + let part_name = path.get_ident().ok_or(())?.to_string(); + process_attr_part( + processor, + attr_name, + location.unpack(), + AttrPart::Tag(&part_name), + )?; + } + Meta::List(meta_list) => { + let part_name = meta_list.path.get_ident().ok_or(())?.to_string(); + process_attr_part( + processor, + attr_name, + location.unpack(), + AttrPart::List(&part_name, meta_list), + )?; + } + Meta::NameValue(name_value) => { + let part_name = name_value.path.get_ident().ok_or(())?.to_string(); + let str = convert_expr_to_string(&name_value.value); + process_attr_part( + processor, + attr_name, + location.unpack(), + AttrPart::String(&part_name, str), + )?; + } + }; + } + Ok(()) + } + Meta::NameValue(_) => Err(()), + } +} + +/// Convert an [`Expr`] to a string. +/// +/// If the `Expr` is a string literal, get the value of the string. +/// Otherwise print the `Expr` as a string. +/// +/// This function is also used in `Visit` generator. +pub fn convert_expr_to_string(expr: &Expr) -> String { + if let Expr::Lit(ExprLit { lit: Lit::Str(s), .. }) = expr { + s.value() + } else { + expr.to_token_stream().to_string() + } +} + +fn process_attr_part( + processor: AttrProcessor, + attr_name: &str, + location: AttrLocation, + part: AttrPart, +) -> Result<()> { + match processor { + AttrProcessor::Derive(derive_id) => { + DERIVES[derive_id].parse_attr(attr_name, location, part) + } + AttrProcessor::Generator(generator_id) => { + GENERATORS[generator_id].parse_attr(attr_name, location, part) + } + } +} + +/// If attribute is processed by a derive, check that trait is derived on the type. +fn check_attr_is_derived( + processor: AttrProcessor, + generated_derives: Derives, + type_name: &str, + attr_name: &str, +) { + let AttrProcessor::Derive(derive_id) = processor else { return }; + if generated_derives.has(derive_id) { + return; + } + + let trait_name = DERIVES[derive_id].trait_name(); + panic!( + "`{type_name}` type has `#[{attr_name}]` attribute, but `{trait_name}` trait \ + that handles `#[{attr_name}]` is not derived on `{type_name}`.\n\ + Expected `#[generate_derive({trait_name})]` to be present." + ); +} + +/// Check attribute is in a legal position. +fn check_attr_position( + expected_positions: AttrPositions, + found_in_position: AttrPositions, + type_name: &str, + attr_name: &str, + position_debug_str: &str, +) { + assert!( + expected_positions.contains(found_in_position), + "`{type_name}` type has `#[{attr_name}]` attribute on a {position_debug_str}, \ + but `#[{attr_name}]` is not legal in this position." + ); +} diff --git a/tasks/ast_tools/src/parse/skeleton.rs b/tasks/ast_tools/src/parse/skeleton.rs new file mode 100644 index 0000000000000..5379f7ac64dee --- /dev/null +++ b/tasks/ast_tools/src/parse/skeleton.rs @@ -0,0 +1,31 @@ +use syn::{ItemEnum, ItemStruct}; + +use crate::schema::FileId; + +/// "Skeleton" parsed from type definition in source file. +/// +/// Contains only very basic information - type name, [`syn`]'s parsed AST for the type, +/// [`FileId`] of the file this type is defined in, and names of any enums this enum inherits. +/// +/// [`Skeleton`]s are created in first parsing pass, is contains the bare minimum required +/// to be able to link up the types in the 2nd pass. +#[derive(Debug)] +pub enum Skeleton { + Struct(StructSkeleton), + Enum(EnumSkeleton), +} + +#[derive(Debug)] +pub struct StructSkeleton { + pub name: String, + pub file_id: FileId, + pub item: ItemStruct, +} + +#[derive(Debug)] +pub struct EnumSkeleton { + pub name: String, + pub file_id: FileId, + pub item: ItemEnum, + pub inherits: Vec, +} diff --git a/tasks/ast_tools/src/passes/calc_layout.rs b/tasks/ast_tools/src/passes/calc_layout.rs deleted file mode 100644 index d59f4198c17ef..0000000000000 --- a/tasks/ast_tools/src/passes/calc_layout.rs +++ /dev/null @@ -1,306 +0,0 @@ -use std::cmp::max; - -use cow_utils::CowUtils; -use itertools::Itertools; -use lazy_static::lazy_static; -use quote::ToTokens; -use rustc_hash::FxHashMap; -use syn::Type; - -use crate::{ - codegen::EarlyCtx, - layout::{KnownLayout, Layout}, - rust_ast::{AstRef, AstType, Enum, Struct}, - util::{NormalizeError, TypeAnalysis, TypeExt, TypeWrapper}, - Result, -}; - -use super::{define_pass, Pass}; - -/// We use compiler to infer 64bit type layouts. -#[cfg(not(target_pointer_width = "64"))] -compile_error!("This module only supports 64bit architectures."); - -type WellKnown = FxHashMap<&'static str, PlatformLayout>; - -pub struct CalcLayout; - -define_pass!(CalcLayout); - -impl Pass for CalcLayout { - fn each(&mut self, ty: &mut AstType, ctx: &EarlyCtx) -> crate::Result { - calc_layout(ty, ctx) - } -} - -#[derive(Debug, Clone)] -struct PlatformLayout(/* 64 */ Layout, /* 32 */ Layout); - -impl PlatformLayout { - const UNKNOWN: Self = Self(Layout::Unknown, Layout::Unknown); - - const fn zero() -> Self { - Self(Layout::zero(), Layout::zero()) - } - - const fn ptr() -> Self { - Self(Layout::ptr_64(), Layout::ptr_32()) - } - - const fn wide_ptr() -> Self { - Self(Layout::wide_ptr_64(), Layout::wide_ptr_32()) - } - - pub const fn of() -> Self { - Self(Layout::of::(), Layout::of::()) - } - - /// Return `true` if either of platform layouts is unknown. - fn is_unknown(&self) -> bool { - self.0.is_unknown() || self.1.is_unknown() - } -} - -impl From<(Layout, Layout)> for PlatformLayout { - fn from((x64, x32): (Layout, Layout)) -> Self { - Self(x64, x32) - } -} - -/// Calculates the layout of `ty` by mutating it. -/// Returns `false` if the layout is unknown at this point. -pub fn calc_layout(ty: &mut AstType, ctx: &EarlyCtx) -> Result { - let unknown_layout = ty - .layout_32() - .and_then(|x32| ty.layout_64().map(|x64| PlatformLayout(x64, x32))) - .is_ok_and(|pl| pl.is_unknown()); - let layout = match ty { - AstType::Enum(enum_) if unknown_layout => calc_enum_layout(enum_, ctx), - AstType::Struct(struct_) if unknown_layout => calc_struct_layout(struct_, ctx), - _ => return Ok(true), - }?; - if layout.is_unknown() { - Ok(false) - } else { - let PlatformLayout(x64, x32) = layout; - ty.set_layout(x64, x32)?; - Ok(true) - } -} - -fn calc_enum_layout(ty: &mut Enum, ctx: &EarlyCtx) -> Result { - struct SizeAlign { - size: usize, - align: usize, - } - - // Get max size and align of variants - let mut size_align_64 = SizeAlign { size: 0, align: 1 }; - let mut size_align_32 = SizeAlign { size: 0, align: 1 }; - - for variant in &ty.item.variants { - if variant.fields.is_empty() { - continue; - } - - let field = variant.fields.iter().exactly_one().normalize().unwrap(); - let typ = field.ty.analyze(ctx); - let PlatformLayout(variant_layout_64, variant_layout_32) = calc_type_layout(&typ, ctx)?; - - let variant_layout_64 = variant_layout_64.layout().unwrap(); - size_align_64.size = max(size_align_64.size, variant_layout_64.size()); - size_align_64.align = max(size_align_64.align, variant_layout_64.align()); - - let variant_layout_32 = variant_layout_32.layout().unwrap(); - size_align_32.size = max(size_align_32.size, variant_layout_32.size()); - size_align_32.align = max(size_align_32.align, variant_layout_32.align()); - } - - // Round up size to largest variant alignment. - // Largest variant is not necessarily the most highly aligned e.g. `enum { A([u8; 50]), B(u64) }` - size_align_64.size = size_align_64.size.next_multiple_of(size_align_64.align); - size_align_32.size = size_align_32.size.next_multiple_of(size_align_32.align); - - // Add discriminant. - // All enums are `#[repr(C, u8)]` (fieldful) or `#[repr(u8)]` (fieldless), so disriminant is 1 byte. - // But padding is inserted to align all payloads to largest alignment of any variant. - size_align_64.size += size_align_64.align; - size_align_32.size += size_align_32.align; - - // Variant payloads are not relevant in niche calculation for `#[repr(u8)]` / `#[repr(C, u8)]` enums. - // The niche optimization for Option-like enums is disabled by `#[repr(u8)]`. - // https://doc.rust-lang.org/nightly/nomicon/other-reprs.html#repru-repri - // So number of niches only depends on the number of discriminants. - // TODO: This isn't quite correct. Number of niches depends only on how many unused discriminant - // values at *start* or *end* of range. - // https://github.com/oxc-project/oxc/pull/5774#pullrequestreview-2306334340 - let niches = (256 - ty.item.variants.len()) as u128; - - let layout_64 = KnownLayout::new(size_align_64.size, size_align_64.align, niches); - let layout_32 = KnownLayout::new(size_align_32.size, size_align_32.align, niches); - Ok(PlatformLayout(Layout::from(layout_64), Layout::from(layout_32))) -} - -fn calc_struct_layout(ty: &mut Struct, ctx: &EarlyCtx) -> Result { - fn collect_field_layouts(ty: &Struct, ctx: &EarlyCtx) -> Result> { - if ty.item.fields.is_empty() { - Ok(vec![PlatformLayout::zero()]) - } else { - ty.item - .fields - .iter() - .map(|field| { - let typ = field.ty.analyze(ctx); - calc_type_layout(&typ, ctx) - }) - .collect() - } - } - - fn with_padding( - layouts: &[KnownLayout], - ) -> std::result::Result { - let layouts = layouts.iter().enumerate(); - let mut offsets = vec![0; layouts.len()]; - let mut output = std::alloc::Layout::from_size_align(0, 1)?; - let mut niches = 0; - for (ix, layout) in layouts { - let (new_layout, offset) = output - .extend(std::alloc::Layout::from_size_align(layout.size(), layout.align())?)?; - output = new_layout; - niches += layout.niches(); - offsets[ix] = offset; - } - let output = output.pad_to_align(); - Ok(KnownLayout::new(output.size(), output.align(), niches).with_offsets(offsets)) - } - - let layouts = collect_field_layouts(ty, ctx)?; - - if layouts.iter().any(PlatformLayout::is_unknown) { - return Ok(PlatformLayout::UNKNOWN); - } - - let (layouts_x64, layouts_x32): (Vec, Vec) = layouts - .into_iter() - .map(|PlatformLayout(x64, x32)| { - x64.layout().and_then(|x64| x32.layout().map(|x32| (x64, x32))) - }) - .collect::>() - .expect("already checked."); - - let x32 = with_padding(&layouts_x32).normalize()?; - let x64 = with_padding(&layouts_x64).normalize()?; - - Ok(PlatformLayout(Layout::from(x64), Layout::from(x32))) -} - -fn calc_type_layout(ty: &TypeAnalysis, ctx: &EarlyCtx) -> Result { - fn is_slice(ty: &TypeAnalysis) -> bool { - if let Type::Reference(typ) = &ty.typ { - // TODO: support for &[T] slices. - typ.elem.get_ident().as_ident().is_some_and(|id| id == "str") - } else { - false - } - } - - fn try_fold_option(layout: Layout) -> Layout { - let Layout::Layout(mut known) = layout else { return layout }; - // option needs only one niche, We allow resizing in case there isn't enough space. - known.consume_niches(1, true); - Layout::Layout(known) - } - - let get_layout = |ast_ref: Option<&AstRef>| -> Result { - let result = if let Some(ast_ref) = &ast_ref { - if calc_layout(&mut ast_ref.borrow_mut(), ctx)? { - ast_ref.borrow().layouts().map(PlatformLayout::from)? - } else { - PlatformLayout::UNKNOWN - } - } else if let Some(well_known) = - WELL_KNOWN.get(ty.typ.get_ident().inner_ident().to_string().as_str()) - { - well_known.clone() - } else { - let Type::Path(typ) = &ty.typ else { - panic!(); - }; - - let typ = typ.path.segments.first().unwrap().to_token_stream().to_string(); - let typ = &*typ.cow_replace(' ', ""); - - if let Some(typ) = WELL_KNOWN.get(typ) { - typ.clone() - } else { - panic!("Unsupported type: {:#?}", ty.typ.to_token_stream().to_string()) - } - }; - Ok(result) - }; - - let layout = match ty.wrapper { - TypeWrapper::Vec | TypeWrapper::VecBox | TypeWrapper::VecOpt => { - WELL_KNOWN[stringify!(Vec)].clone() - } - TypeWrapper::OptVec => { - let mut pl = WELL_KNOWN[stringify!(Vec)].clone(); - // preconsume one niche for option - if let Layout::Layout(layout) = &mut pl.0 { - layout.consume_niches(1, true); - } - if let Layout::Layout(layout) = &mut pl.1 { - layout.consume_niches(1, true); - } - pl - } - TypeWrapper::Ref if is_slice(ty) => PlatformLayout::wide_ptr(), - TypeWrapper::Ref | TypeWrapper::Box | TypeWrapper::OptBox => PlatformLayout::ptr(), - TypeWrapper::None => get_layout(ty.type_id.map(|id| ctx.ast_ref(id)).as_ref())?, - TypeWrapper::Opt => { - let PlatformLayout(x64, x32) = - get_layout(ty.type_id.map(|id| ctx.ast_ref(id)).as_ref())?; - PlatformLayout(try_fold_option(x64), try_fold_option(x32)) - } - TypeWrapper::Complex => { - let PlatformLayout(x64, x32) = - get_layout(ty.type_id.map(|id| ctx.ast_ref(id)).as_ref())?; - PlatformLayout(x64, x32) - } - }; - Ok(layout) -} - -lazy_static! { - static ref WELL_KNOWN: WellKnown = FxHashMap::from_iter([ - // Primitives - ("char", PlatformLayout::of::()), - ("bool", PlatformLayout::of::()), - ("u8", PlatformLayout::of::()), - ("i8", PlatformLayout::of::()), - ("u16", PlatformLayout::of::()), - ("i16", PlatformLayout::of::()), - ("u32", PlatformLayout::of::()), - ("i32", PlatformLayout::of::()), - ("f32", PlatformLayout::of::()), - ("u64", PlatformLayout::of::()), - ("i64", PlatformLayout::of::()), - ("f64", PlatformLayout::of::()), - ("usize", PlatformLayout::ptr()), - ("isize", PlatformLayout::ptr()), - // Well known types - // TODO: Generate const assertions for these in `oxc_ast` crate - ("Atom", PlatformLayout::wide_ptr()), - // External Bumpalo type - ("Vec", PlatformLayout(Layout::known(32, 8, 1), Layout::known(16, 4, 1))), - // Unsupported: We don't analyze `Cell` types - ("Cell>", PlatformLayout::of::()), - ("Cell>", PlatformLayout::of::()), - ("Cell>", PlatformLayout::of::()), - // Unsupported: this is a `bitflags` generated type, we don't expand macros - ("RegExpFlags", PlatformLayout::of::()), - // `PointerAlign` is a field of `Span`. ZST with pointer alignment. - ("PointerAlign", PlatformLayout(Layout::known(0, 8, 0), Layout::known(0, 4, 0))), - ]); -} diff --git a/tasks/ast_tools/src/passes/linker.rs b/tasks/ast_tools/src/passes/linker.rs deleted file mode 100644 index 247c52cfa1124..0000000000000 --- a/tasks/ast_tools/src/passes/linker.rs +++ /dev/null @@ -1,91 +0,0 @@ -use std::borrow::Cow; - -use syn::parse_quote; - -use crate::{codegen::EarlyCtx, rust_ast::Inherit, util::NormalizeError}; - -use super::{define_pass, AstType, Pass, Result}; - -pub trait Unresolved { - fn unresolved(&self) -> bool; - - // TODO: remove me - #[expect(dead_code)] - fn resolved(&self) -> bool { - !self.unresolved() - } -} - -impl Unresolved for Inherit { - fn unresolved(&self) -> bool { - matches!(self, Self::Unlinked(_)) - } -} - -impl Unresolved for Vec { - fn unresolved(&self) -> bool { - self.iter().any(Unresolved::unresolved) - } -} - -pub struct Linker; - -define_pass!(Linker); - -impl Pass for Linker { - /// # Panics - /// On invalid inheritance. - fn each(&mut self, ty: &mut AstType, ctx: &EarlyCtx) -> crate::Result { - // Exit early if it isn't an enum, We only link to resolve enum inheritance! - let AstType::Enum(ty) = ty else { - return Ok(true); - }; - - // Exit early if there is this enum doesn't use enum inheritance - if ty.meta.inherits.is_empty() { - return Ok(true); - } - - let inherits = ty - .meta - .inherits - .drain(..) - .map(|it| match &it { - Inherit::Unlinked(sup) => { - let linkee = ctx - .find(&Cow::Owned(sup.to_string())) - .normalize_with(format!("Unknown type {sup:?}"))?; - let linkee = linkee.borrow(); - let inherit_value = format!(r#""{}""#, linkee.ident().unwrap()); - let variants = match &*linkee { - AstType::Enum(enum_) => { - if enum_.meta.inherits.unresolved() { - return Ok(Err(it)); - } - enum_.item.variants.clone().into_iter().map(|mut v| { - v.attrs = vec![parse_quote!(#[inherit = #inherit_value])]; - v - }) - } - _ => { - panic!( - "invalid inheritance, you can only inherit from enums and in enums." - ) - } - }; - ty.item.variants.extend(variants.clone()); - Ok(Ok(Inherit::Linked { - super_: linkee.as_type().unwrap(), - variants: variants.collect(), - })) - } - Inherit::Linked { .. } => Ok(Ok(it)), - }) - .collect::>>>()?; - let unresolved = inherits.iter().any(std::result::Result::is_err); - - ty.meta.inherits = inherits.into_iter().map(|it| it.unwrap_or_else(|it| it)).collect(); - - Ok(!unresolved) - } -} diff --git a/tasks/ast_tools/src/passes/mod.rs b/tasks/ast_tools/src/passes/mod.rs deleted file mode 100644 index a4a6036b3227c..0000000000000 --- a/tasks/ast_tools/src/passes/mod.rs +++ /dev/null @@ -1,70 +0,0 @@ -use std::collections::VecDeque; - -use crate::{codegen::EarlyCtx, output::Output, rust_ast::AstType, Result}; - -mod calc_layout; -mod linker; -pub use calc_layout::CalcLayout; -pub use linker::Linker; - -pub trait Pass { - // Methods defined by implementer - - /// Run on each type. - /// Returns `false` if can't resolve. - fn each(&mut self, ty: &mut AstType, ctx: &EarlyCtx) -> Result; - - // Standard methods - - /// Run pass. - fn output(&mut self, ctx: &EarlyCtx) -> Result> { - // We sort by `TypeId`, so we have the same ordering as it's written in Rust source - let mut unresolved = ctx.chronological_idents().collect::>(); - - while let Some(next) = unresolved.pop_back() { - let next_id = ctx.type_id(next).unwrap(); - - let ast_ref = ctx.ast_ref(next_id); - let val = &mut ast_ref.borrow_mut(); - - if !self.each(val, ctx)? { - unresolved.push_front(next); - } - } - Ok(vec![]) - } -} - -macro_rules! define_pass { - ($ident:ident $($lifetime:lifetime)?) => { - const _: () = { - use $crate::{ - codegen::{EarlyCtx, Runner}, - output::Output, - Result, - }; - - impl $($lifetime)? Runner for $ident $($lifetime)? { - type Context = EarlyCtx; - - fn verb(&self) -> &'static str { - "Run pass" - } - - fn name(&self) -> &'static str { - stringify!($ident) - } - - fn file_path(&self) -> &'static str { - file!() - } - - fn run(&mut self, ctx: &Self::Context) -> Result> { - self.output(ctx) - } - } - }; - }; -} - -pub(crate) use define_pass; diff --git a/tasks/ast_tools/src/rust_ast.rs b/tasks/ast_tools/src/rust_ast.rs deleted file mode 100644 index ea0279b916bb4..0000000000000 --- a/tasks/ast_tools/src/rust_ast.rs +++ /dev/null @@ -1,451 +0,0 @@ -use syn::{ - braced, - parse::{Parse, ParseBuffer}, - parse_quote, - punctuated::Punctuated, - Attribute, Generics, Ident, Item, ItemEnum, ItemMacro, ItemStruct, Meta, Path, Token, Type, - Variant, Visibility, -}; - -use crate::{ - layout::Layout, - parse_file, - util::{unexpanded_macro_err, NormalizeError}, - Itertools, PathBuf, Rc, Read, RefCell, Result, -}; - -pub type AstRef = Rc>; - -#[derive(Debug, Clone)] -pub enum Inherit { - Unlinked(String), - Linked { super_: Type, variants: Punctuated }, -} - -impl From for Inherit { - fn from(ident: Ident) -> Self { - Self::Unlinked(ident.to_string()) - } -} - -#[derive(Debug, Clone)] -pub struct EnumMeta { - pub inherits: Vec, - pub layout_32: Layout, - pub layout_64: Layout, - pub is_visitable: bool, - pub ast: bool, - pub module_path: String, -} - -impl EnumMeta { - fn new(module_path: String) -> Self { - Self { - inherits: Vec::default(), - layout_32: Layout::default(), - layout_64: Layout::default(), - is_visitable: false, - ast: false, - module_path, - } - } -} - -#[derive(Debug)] -pub struct Enum { - pub item: ItemEnum, - pub meta: EnumMeta, -} - -impl Enum { - pub fn with_meta(item: ItemEnum, meta: EnumMeta) -> Self { - Self { item, meta } - } - - pub fn ident(&self) -> &Ident { - &self.item.ident - } - - pub fn as_type(&self) -> Type { - let ident = self.ident(); - let generics = &self.item.generics; - parse_quote!(#ident #generics) - } -} - -/// Placeholder for now! -#[derive(Debug, Clone)] -pub struct StructMeta { - pub layout_32: Layout, - pub layout_64: Layout, - pub is_visitable: bool, - pub ast: bool, - pub module_path: String, -} - -impl StructMeta { - fn new(module_path: String) -> Self { - Self { - layout_32: Layout::default(), - layout_64: Layout::default(), - is_visitable: false, - ast: false, - module_path, - } - } -} - -#[derive(Debug)] -pub struct Struct { - pub item: ItemStruct, - pub meta: StructMeta, -} - -impl Struct { - pub fn with_meta(item: ItemStruct, meta: StructMeta) -> Self { - Self { item, meta } - } - - pub fn ident(&self) -> &Ident { - &self.item.ident - } - - pub fn as_type(&self) -> Type { - let ident = self.ident(); - let generics = &self.item.generics; - parse_quote!(#ident #generics) - } -} - -#[derive(Debug)] -pub struct Macro { - pub item: ItemMacro, - pub meta: MacroMeta, -} - -impl Macro { - pub fn with_meta(item: ItemMacro, meta: MacroMeta) -> Self { - Self { item, meta } - } -} - -#[derive(Debug)] -pub struct MacroMeta { - pub module_path: String, -} - -impl MacroMeta { - fn new(module_path: String) -> Self { - Self { module_path } - } -} - -#[derive(Debug)] -pub enum AstType { - Enum(Enum), - Struct(Struct), - - // we need this to expand `inherit` macro calls. - Macro(Macro), -} - -impl AstType { - fn new(item: Item, module_path: String) -> Result { - match item { - Item::Enum(it) => Ok(AstType::Enum(Enum::with_meta(it, EnumMeta::new(module_path)))), - Item::Struct(it) => { - Ok(AstType::Struct(Struct::with_meta(it, StructMeta::new(module_path)))) - } - Item::Macro(it) => { - Ok(AstType::Macro(Macro::with_meta(it, MacroMeta::new(module_path)))) - } - _ => Err(String::from("Unsupported Item!")), - } - } - - pub fn ident(&self) -> Option<&Ident> { - match self { - AstType::Enum(ty) => Some(ty.ident()), - AstType::Struct(ty) => Some(ty.ident()), - AstType::Macro(ty) => ty.item.ident.as_ref(), - } - } - - pub fn as_type(&self) -> Option { - match self { - AstType::Enum(it) => Some(it.as_type()), - AstType::Struct(it) => Some(it.as_type()), - AstType::Macro(_) => None, - } - } - - #[expect(unused)] - pub fn is_visitable(&self) -> bool { - match self { - AstType::Enum(it) => it.meta.is_visitable, - AstType::Struct(it) => it.meta.is_visitable, - AstType::Macro(_) => false, - } - } - - pub fn set_visitable(&mut self, value: bool) -> Result<()> { - match self { - AstType::Enum(enum_) => { - debug_assert!(enum_.meta.ast, "only AST types can be visitable!"); - enum_.meta.is_visitable = value; - } - AstType::Struct(struct_) => { - debug_assert!(struct_.meta.ast, "only AST types can be visitable!"); - struct_.meta.is_visitable = value; - } - AstType::Macro(macro_) => return Err(unexpanded_macro_err(¯o_.item)), - }; - Ok(()) - } - - pub fn set_ast(&mut self, value: bool) -> Result<()> { - match self { - AstType::Enum(it) => it.meta.ast = value, - AstType::Struct(it) => it.meta.ast = value, - AstType::Macro(it) => return Err(unexpanded_macro_err(&it.item)), - } - Ok(()) - } - - pub fn layout_32(&self) -> Result { - match self { - AstType::Enum(it) => Ok(it.meta.layout_32.clone()), - AstType::Struct(it) => Ok(it.meta.layout_32.clone()), - AstType::Macro(it) => Err(unexpanded_macro_err(&it.item)), - } - } - - pub fn layout_64(&self) -> Result { - match self { - AstType::Enum(it) => Ok(it.meta.layout_64.clone()), - AstType::Struct(it) => Ok(it.meta.layout_64.clone()), - AstType::Macro(it) => Err(unexpanded_macro_err(&it.item)), - } - } - - pub fn layouts(&self) -> Result<(/* 64 */ Layout, /* 32 */ Layout)> { - self.layout_64().and_then(|x64| self.layout_32().map(|x32| (x64, x32))) - } - - pub fn set_layout(&mut self, layout_64: Layout, layout_32: Layout) -> Result<()> { - match self { - AstType::Enum(enum_) => { - enum_.meta.layout_32 = layout_32; - enum_.meta.layout_64 = layout_64; - } - AstType::Struct(struct_) => { - struct_.meta.layout_32 = layout_32; - struct_.meta.layout_64 = layout_64; - } - AstType::Macro(macro_) => return Err(unexpanded_macro_err(¯o_.item)), - } - Ok(()) - } - - pub fn module_path(&self) -> String { - match self { - AstType::Enum(it) => it.meta.module_path.clone(), - AstType::Struct(it) => it.meta.module_path.clone(), - AstType::Macro(it) => it.meta.module_path.clone(), - } - } -} - -const LOAD_ERROR: &str = "should be loaded by now!"; - -#[derive(Debug)] -pub struct Module { - pub file_path: PathBuf, - pub path: String, - pub items: Vec, - pub loaded: bool, -} - -impl Module { - /// Expects a file path to a rust source file in the `crates` directory. - pub fn with_path(file_path: PathBuf) -> Self { - let path = { - let no_ext = file_path.with_extension(""); - let string = no_ext.to_string_lossy(); - let mut parts = string.split('/'); - assert_eq!(parts.next(), Some("crates")); - let krate = parts.next().unwrap(); - assert_eq!(parts.next(), Some("src")); - let mut parts = [krate].into_iter().chain(parts); - parts.join("::") - }; - Self { file_path, path, items: Vec::new(), loaded: false } - } - - pub fn load(mut self) -> Result { - assert!(!self.loaded, "can't load twice!"); - let mut file = std::fs::File::open(&self.file_path).normalize().map_err(|err| { - format!("Error reading file: {}, reason: {}", &self.file_path.to_string_lossy(), err) - })?; - let mut content = String::new(); - file.read_to_string(&mut content).normalize()?; - let file = parse_file(content.as_str()).normalize()?; - self.items = file - .items - .into_iter() - .filter(|it| match it { - Item::Enum(_) | Item::Struct(_) => true, - // These contain enums with inheritance - Item::Macro(m) if m.mac.path.is_ident("inherit_variants") => true, - _ => false, - }) - .map(|it| AstType::new(it, self.path.clone())) - .map_ok(|it| Rc::new(RefCell::new(it))) - .collect::>()?; - self.loaded = true; - Ok(self) - } - - /// Expand `inherit_variants` macros to their inner enum. - /// This would also populate `inherits` field of `EnumMeta` types. - pub fn expand(self) -> Result { - if !self.loaded { - return Err(String::from(LOAD_ERROR)); - } - - self.items.iter().try_for_each(expand)?; - Ok(self) - } - - /// Fills the Meta types. - pub fn analyze(self) -> Result { - if !self.loaded { - return Err(String::from(LOAD_ERROR)); - } - - self.items.iter().try_for_each(analyze)?; - Ok(self) - } -} - -pub fn expand(ast_ref: &AstRef) -> Result<()> { - let to_replace = match &*ast_ref.borrow() { - ast_ref @ AstType::Macro(mac) => { - let (enum_, inherits) = mac - .item - .mac - .parse_body_with(|input: &ParseBuffer| { - // Because of `@inherit`s we can't use the actual `ItemEnum` parse, - // This closure is similar to how `ItemEnum` parser works, With the exception - // of how we approach our variants, First we try to parse a variant out of our - // tokens if we fail we try parsing the inheritance, And we would raise an - // error only if both of these fail. - let attrs = input.call(Attribute::parse_outer)?; - let vis = input.parse::()?; - let enum_token = input.parse::()?; - let ident = input.parse::()?; - let generics = input.parse::()?; - let (where_clause, brace_token, variants, inherits) = { - let where_clause = input.parse()?; - - let content; - let brace = braced!(content in input); - let mut variants = Punctuated::new(); - let mut inherits = Vec::::new(); - while !content.is_empty() { - if let Ok(variant) = Variant::parse(&content) { - variants.push_value(variant); - let punct = content.parse()?; - variants.push_punct(punct); - } else if content.parse::().is_ok() - && content.parse::().is_ok_and(|id| id == "inherit") - { - inherits.push(content.parse::()?); - } else { - panic!("Invalid inherit_variants usage!"); - } - } - - (where_clause, brace, variants, inherits) - }; - Ok(( - ItemEnum { - attrs, - vis, - enum_token, - ident, - generics: Generics { where_clause, ..generics }, - brace_token, - variants, - }, - inherits, - )) - }) - .normalize()?; - Some(AstType::Enum(Enum::with_meta( - enum_, - EnumMeta { - inherits: inherits.into_iter().map(Into::into).collect(), - ..EnumMeta::new(ast_ref.module_path()) - }, - ))) - } - _ => None, - }; - - if let Some(to_replace) = to_replace { - *ast_ref.borrow_mut() = to_replace; - } - - Ok(()) -} - -pub fn analyze(ast_ref: &AstRef) -> Result<()> { - enum AstAttr { - None, - Mark, - Visit, - } - let ast_attr = match &*ast_ref.borrow() { - AstType::Enum(Enum { item: ItemEnum { attrs, .. }, .. }) - | AstType::Struct(Struct { item: ItemStruct { attrs, .. }, .. }) => { - let attr = attrs.iter().find(|attr| attr.path().is_ident("ast")); - let attr = match attr { - Some(Attribute { meta: Meta::Path(_), .. }) => AstAttr::Mark, - Some(attr @ Attribute { meta: Meta::List(_), .. }) => { - // TODO: support for punctuated list of arguments here if needed! - let args = attr.parse_args::().normalize()?; - if args.is_ident("visit") { - AstAttr::Visit - } else { - AstAttr::Mark - } - } - Some(_) => return Err(String::from("Invalid arguments in the `ast` attribute!")), - None => AstAttr::None, - }; - Some(attr) - } - AstType::Macro(_) => None, - }; - - match ast_attr { - Some(AstAttr::Visit) => { - ast_ref.borrow_mut().set_ast(true)?; - ast_ref.borrow_mut().set_visitable(true)?; - } - Some(AstAttr::Mark) => { - // AST without visit! - ast_ref.borrow_mut().set_ast(true)?; - } - Some(AstAttr::None) => { - return Err(format!( - "All `enums` and `structs` defined in the source of truth should be marked with an `#[ast]` attribute(missing `#[ast]` on '{:?}')", - ast_ref.borrow().ident() - )); - } - None => { /* unrelated items like `use`, `type` and `macro` definitions */ } - } - - Ok(()) -} diff --git a/tasks/ast_tools/src/schema/defs.rs b/tasks/ast_tools/src/schema/defs.rs deleted file mode 100644 index 433d5f05be2f0..0000000000000 --- a/tasks/ast_tools/src/schema/defs.rs +++ /dev/null @@ -1,264 +0,0 @@ -use serde::Serialize; -use syn::Ident; - -use crate::{ - markers::{ - DeriveAttributes, ESTreeEnumAttribute, ESTreeStructAttribute, ScopeAttribute, ScopeMarkers, - VisitMarkers, - }, - util::{ToIdent, TypeAnalysis, TypeWrapper}, - TypeId, -}; - -use super::TypeName; - -#[derive(Debug, Serialize)] -#[serde(untagged)] -#[expect(clippy::large_enum_variant)] -pub enum TypeDef { - Struct(StructDef), - Enum(EnumDef), -} - -impl TypeDef { - pub fn name(&self) -> &str { - match self { - TypeDef::Struct(def) => &def.name, - TypeDef::Enum(def) => &def.name, - } - } - - pub fn is_visitable(&self) -> bool { - match self { - TypeDef::Struct(def) => def.is_visitable, - TypeDef::Enum(def) => def.is_visitable, - } - } - - pub fn generated_derives(&self) -> &Vec { - match self { - TypeDef::Struct(def) => &def.generated_derives, - TypeDef::Enum(def) => &def.generated_derives, - } - } - - pub fn generates_derive(&self, derive: &str) -> bool { - let generated_derives = self.generated_derives(); - generated_derives.iter().any(|it| it == derive) - } - - pub fn module_path(&self) -> &str { - match self { - TypeDef::Struct(def) => &def.module_path, - TypeDef::Enum(def) => &def.module_path, - } - } -} - -#[derive(Debug, Serialize)] -#[serde(tag = "type", rename = "struct", rename_all = "camelCase")] -pub struct StructDef { - pub id: TypeId, - pub name: String, - #[serde(skip)] - pub is_visitable: bool, - pub fields: Vec, - #[serde(skip)] - pub has_lifetime: bool, - pub size_64: usize, - pub align_64: usize, - pub offsets_64: Option>, - pub size_32: usize, - pub align_32: usize, - pub offsets_32: Option>, - #[serde(skip)] - pub generated_derives: Vec, - #[serde(skip)] - pub markers: StructOuterMarkers, - #[serde(skip)] - pub module_path: String, -} - -#[derive(Debug, Serialize)] -#[serde(tag = "type", rename = "enum", rename_all = "camelCase")] -pub struct EnumDef { - pub id: TypeId, - pub name: String, - pub is_visitable: bool, - pub variants: Vec, - /// For `@inherits` inherited enum variants - pub inherits: Vec, - pub has_lifetime: bool, - pub size_64: usize, - pub align_64: usize, - pub offsets_64: Option>, - pub size_32: usize, - pub align_32: usize, - pub offsets_32: Option>, - pub generated_derives: Vec, - #[serde(skip)] - pub module_path: String, - #[serde(skip)] - pub markers: EnumOuterMarkers, -} - -impl EnumDef { - /// Returns an iterator that would first walk all "real" variants and moves onto inherited ones - /// based on the inheritance order. - pub fn all_variants(&self) -> impl Iterator { - self.variants.iter().chain(self.inherits.iter().flat_map(|it| it.variants.iter())) - } - - /// Are all the variants in this enum unit? - /// Example: - /// ``` - /// enum E { A, B, C, D } - /// - /// ``` - /// - pub fn is_unit(&self) -> bool { - self.all_variants().all(VariantDef::is_unit) - } -} - -#[derive(Debug, Serialize)] -pub struct VariantDef { - pub name: String, - pub fields: Vec, - pub discriminant: u8, - #[serde(skip)] - pub markers: InnerMarkers, -} - -impl VariantDef { - pub fn ident(&self) -> Ident { - self.name.to_ident() - } - - pub fn is_unit(&self) -> bool { - self.fields.is_empty() - } -} - -#[derive(Debug, Serialize)] -pub struct InheritDef { - #[serde(rename = "super")] - pub super_: TypeRef, - pub variants: Vec, -} - -#[derive(Debug, Serialize)] -pub struct FieldDef { - /// `None` if unnamed - pub name: Option, - #[serde(skip)] - pub vis: Visibility, - #[serde(rename = "type")] - pub typ: TypeRef, - #[serde(skip)] - pub markers: InnerMarkers, - #[serde(skip)] - pub docs: Vec, -} - -#[derive(Debug, Serialize)] -#[serde(rename_all = "camelCase")] -pub enum Visibility { - None, - Pub, - /// rest of the restricted visibilities - Rest, -} - -impl Visibility { - pub fn is_pub(&self) -> bool { - matches!(self, Self::Pub) - } -} - -impl From<&syn::Visibility> for Visibility { - fn from(vis: &syn::Visibility) -> Self { - match vis { - syn::Visibility::Public(_) => Self::Pub, - syn::Visibility::Inherited => Self::None, - syn::Visibility::Restricted(_) => Self::Rest, - } - } -} - -impl FieldDef { - pub fn ident(&self) -> Option { - self.name.as_ref().map(ToIdent::to_ident) - } -} - -#[derive(Debug, Serialize)] -pub struct TypeRef { - #[serde(skip)] - pub(super) id: Option, - pub(super) name: TypeName, - - #[serde(rename = "id")] - pub(super) transparent_id: Option, - - #[serde(skip)] - pub(super) raw: String, - #[serde(skip)] - pub(super) analysis: TypeAnalysis, -} - -impl TypeRef { - /// It is `None` for foreign types. - #[inline] - pub fn type_id(&self) -> Option { - self.id - } - - /// Reflects the inner most type id of `Adt1>>` - #[inline] - pub fn transparent_type_id(&self) -> Option { - self.transparent_id - } - - /// Reflects the inner type id of `Box` - #[inline] - pub fn name(&self) -> &TypeName { - &self.name - } - - #[inline] - pub fn analysis(&self) -> &TypeAnalysis { - &self.analysis - } - - #[inline] - pub fn raw(&self) -> &str { - &self.raw - } - - pub fn is_str_slice(&self) -> bool { - matches!(self.analysis().wrapper, TypeWrapper::Ref if self.name.inner_name() == "str") - } -} - -#[derive(Debug)] -pub struct StructOuterMarkers { - pub scope: Option, - pub estree: Option, -} - -#[derive(Debug)] -pub struct EnumOuterMarkers { - pub estree: ESTreeEnumAttribute, -} - -#[derive(Debug, Serialize)] -pub struct InnerMarkers { - /// marker that hints to fold span in here - pub span: bool, - pub derive_attributes: DeriveAttributes, - #[serde(skip)] - pub visit: VisitMarkers, - #[serde(skip)] - pub scope: ScopeMarkers, -} diff --git a/tasks/ast_tools/src/schema/defs/box.rs b/tasks/ast_tools/src/schema/defs/box.rs new file mode 100644 index 0000000000000..f2ac651de39be --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/box.rs @@ -0,0 +1,81 @@ +use proc_macro2::TokenStream; +use quote::quote; + +use super::{extensions::layout::Layout, Def, Derives, FileId, Schema, TypeDef, TypeId}; + +/// Type definition for a `Box`. +#[derive(Debug)] +pub struct BoxDef { + pub id: TypeId, + pub name: String, + pub inner_type_id: TypeId, + pub layout: Layout, +} + +impl BoxDef { + /// Create new [`BoxDef`]. + pub fn new(name: String, inner_type_id: TypeId) -> Self { + Self { id: TypeId::DUMMY, name, inner_type_id, layout: Layout::default() } + } + + /// Get inner type. + /// + /// This is the direct inner type e.g. `Box>` -> `Option`. + /// Use [`innermost_type`] method if you want `Expression` in this example. + /// + /// [`innermost_type`]: Self::innermost_type + pub fn inner_type<'s>(&self, schema: &'s Schema) -> &'s TypeDef { + &schema.types[self.inner_type_id] + } +} + +impl Def for BoxDef { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + self.id + } + + /// Get type name. + fn name(&self) -> &str { + &self.name + } + + /// Get [`FileId`] of file containing definition of this type. + /// + /// `Box`es are not defined in a file, so returns `None`. + fn file_id(&self) -> Option { + None + } + + /// Get all traits which have derives generated for this type. + /// + /// `Box`es never have any generated derives. + fn generated_derives(&self) -> Derives { + Derives::none() + } + + /// Get if type has a lifetime. + #[expect(unused_variables)] + fn has_lifetime(&self, schema: &Schema) -> bool { + true + } + + /// Get type signature (including lifetimes). + /// Lifetimes are anonymous (`'_`) if `anon` is true. + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + let inner_ty = self.inner_type(schema).ty_with_lifetime(schema, anon); + let lifetime = if anon { quote!( '_ ) } else { quote!( 'a ) }; + quote!( Box<#lifetime, #inner_ty> ) + } + + /// Get inner type, if type has one. + /// + /// All `Box`es have an inner type, so better to use [`inner_type`] or [`innermost_type`] methods, + /// which don't return an `Option`. + /// + /// [`inner_type`]: Self::inner_type + /// [`innermost_type`]: Self::innermost_type + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + Some(self.inner_type(schema)) + } +} diff --git a/tasks/ast_tools/src/schema/defs/cell.rs b/tasks/ast_tools/src/schema/defs/cell.rs new file mode 100644 index 0000000000000..85406b3a08e2a --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/cell.rs @@ -0,0 +1,79 @@ +use proc_macro2::TokenStream; +use quote::quote; + +use super::{extensions::layout::Layout, Def, Derives, FileId, Schema, TypeDef, TypeId}; + +/// Type definition for a `Cell`. +#[derive(Debug)] +pub struct CellDef { + pub id: TypeId, + pub name: String, + pub inner_type_id: TypeId, + pub layout: Layout, +} + +impl CellDef { + /// Create new [`CellDef`]. + pub fn new(name: String, inner_type_id: TypeId) -> Self { + Self { id: TypeId::DUMMY, name, inner_type_id, layout: Layout::default() } + } + + /// Get inner type. + /// + /// This is the direct inner type e.g. `Cell>` -> `Option`. + /// Use [`innermost_type`] method if you want `ScopeId` in this example. + /// + /// [`innermost_type`]: Self::innermost_type + pub fn inner_type<'s>(&self, schema: &'s Schema) -> &'s TypeDef { + &schema.types[self.inner_type_id] + } +} + +impl Def for CellDef { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + self.id + } + + /// Get type name. + fn name(&self) -> &str { + &self.name + } + + /// Get [`FileId`] of file containing definition of this type. + /// + /// `Cell`s are not defined in a file, so returns `None`. + fn file_id(&self) -> Option { + None + } + + /// Get all traits which have derives generated for this type. + /// + /// `Cell`s never have any generated derives. + fn generated_derives(&self) -> Derives { + Derives::none() + } + + /// Get if type has a lifetime. + fn has_lifetime(&self, schema: &Schema) -> bool { + self.inner_type(schema).has_lifetime(schema) + } + + /// Get type signature (including lifetimes). + /// Lifetimes are anonymous (`'_`) if `anon` is true. + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + let inner_ty = self.inner_type(schema).ty_with_lifetime(schema, anon); + quote!( Cell<#inner_ty> ) + } + + /// Get inner type, if type has one. + /// + /// All `Cell`s have an inner type, so better to use [`inner_type`] or [`innermost_type`] methods, + /// which don't return an `Option`. + /// + /// [`inner_type`]: Self::inner_type + /// [`innermost_type`]: Self::innermost_type + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + Some(self.inner_type(schema)) + } +} diff --git a/tasks/ast_tools/src/schema/defs/enum.rs b/tasks/ast_tools/src/schema/defs/enum.rs new file mode 100644 index 0000000000000..9f8b41cc9e668 --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/enum.rs @@ -0,0 +1,240 @@ +use std::{iter::FusedIterator, ops::Range}; + +use convert_case::{Case, Casing}; +use proc_macro2::TokenStream; +use quote::quote; +use syn::Ident; + +use crate::utils::create_ident; + +use super::{ + extensions::{ + estree::{ESTreeEnum, ESTreeEnumVariant}, + kind::Kind, + layout::Layout, + visit::{VisitEnum, VisitFieldOrVariant}, + }, + Def, Derives, FileId, Schema, TypeDef, TypeId, +}; + +pub type Discriminant = u8; + +/// Type definition for an enum. +#[derive(Debug)] +pub struct EnumDef { + pub id: TypeId, + pub name: String, + pub has_lifetime: bool, + pub file_id: FileId, + pub generated_derives: Derives, + pub variants: Vec, + /// For `@inherits` inherited enum variants + pub inherits: Vec, + pub visit: VisitEnum, + pub kind: Kind, + pub layout: Layout, + pub estree: ESTreeEnum, +} + +impl EnumDef { + /// Create new [`EnumDef`]. + pub fn new( + id: TypeId, + name: String, + has_lifetime: bool, + file_id: FileId, + generated_derives: Derives, + variants: Vec, + inherits: Vec, + ) -> Self { + Self { + id, + name, + has_lifetime, + file_id, + generated_derives, + variants, + inherits, + visit: VisitEnum::default(), + kind: Kind::default(), + layout: Layout::default(), + estree: ESTreeEnum::default(), + } + } + + /// Get iterator over all enum's variants (including inherited) + pub fn all_variants<'s>(&'s self, schema: &'s Schema) -> AllVariantsIter<'s> { + AllVariantsIter::new(self, schema) + } + + /// Get own enum variants (not including inherited). + pub fn inherits_types<'s>(&'s self, schema: &'s Schema) -> impl Iterator { + self.inherits.iter().map(|&type_id| &schema.types[type_id]) + } + + /// Get whether all variants are fieldless. + pub fn is_fieldless(&self) -> bool { + // All AST enums are `#[repr(C, u8)]` or `#[repr(u8)]`. + // Such enums must have at least 1 variant, so only way can have size 1 + // is if all variants are fieldless. + self.layout.layout_64.size == 1 + } + + /// Get iterator over variant indexes. + /// + /// Only includes own variant, not inherited. + pub fn variant_indices(&self) -> Range { + 0..self.variants.len() + } + + /// Get iterator over inherits indexes. + pub fn inherits_indices(&self) -> Range { + 0..self.inherits.len() + } +} + +impl Def for EnumDef { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + self.id + } + + /// Get type name. + fn name(&self) -> &str { + &self.name + } + + /// Get [`FileId`] of file containing definition of this type. + fn file_id(&self) -> Option { + Some(self.file_id) + } + + /// Get all traits which have derives generated for this type. + fn generated_derives(&self) -> Derives { + self.generated_derives + } + + /// Get if type has a lifetime. + #[expect(unused_variables)] + fn has_lifetime(&self, schema: &Schema) -> bool { + self.has_lifetime + } + + /// Get type signature (including lifetime). + /// Lifetime is anonymous (`'_`) if `anon` is true. + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + let ident = self.ident(); + let lifetime = self.lifetime_maybe_anon(schema, anon); + quote!( #ident #lifetime ) + } + + /// Get inner type, if type has one. + /// + /// Enums don't have a single inner type, so returns `None`. + #[expect(unused_variables)] + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + None + } +} + +#[derive(Debug)] +pub struct VariantDef { + pub name: String, + pub field_type_id: Option, + pub discriminant: Discriminant, + pub visit: VisitFieldOrVariant, + pub estree: ESTreeEnumVariant, +} + +impl VariantDef { + /// Create new [`VariantDef`]. + pub fn new(name: String, field_type_id: Option, discriminant: Discriminant) -> Self { + Self { + name, + field_type_id, + discriminant, + visit: VisitFieldOrVariant::default(), + estree: ESTreeEnumVariant::default(), + } + } + + /// Get variant name. + pub fn name(&self) -> &str { + &self.name + } + + /// Get variant name in camel case. + pub fn camel_name(&self) -> String { + self.name().to_case(Case::Camel) + } + + /// Get variant name as an [`Ident`]. + /// + /// [`Ident`]: struct@Ident + pub fn ident(&self) -> Ident { + create_ident(self.name()) + } + + /// Get variant's field type. + /// + /// Returns `None` if variant is fieldless. + pub fn field_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + self.field_type_id.map(|type_id| &schema.types[type_id]) + } + + /// Returns `true` if variant is fieldless. + /// + /// e.g. `enum Foo { Bar, Qux(u64) }` + /// `Bar` variant is fieldless, `Qux` variant is not. + pub fn is_fieldless(&self) -> bool { + self.field_type_id.is_none() + } +} + +/// Iterator over all variants of an enum (including inherited). +pub struct AllVariantsIter<'s> { + schema: &'s Schema, + variants_iter: std::slice::Iter<'s, VariantDef>, + inherits_iter: std::slice::Iter<'s, TypeId>, + inner_iter: Option>>, +} + +impl<'s> AllVariantsIter<'s> { + /// Create new [`AllVariantsIter`]. + fn new(enum_def: &'s EnumDef, schema: &'s Schema) -> Self { + let variants_iter = enum_def.variants.iter(); + let inherits_iter = enum_def.inherits.iter(); + Self { schema, variants_iter, inherits_iter, inner_iter: None } + } +} + +impl<'s> Iterator for AllVariantsIter<'s> { + type Item = &'s VariantDef; + + fn next(&mut self) -> Option { + // Yield own variants first + if let Some(variant) = self.variants_iter.next() { + return Some(variant); + } + + // Yield from inner iterator (iterating over inherited type's variants) + if let Some(inner_iter) = &mut self.inner_iter { + if let Some(variant) = inner_iter.next() { + return Some(variant); + } + self.inner_iter = None; + } + + // No current inner iterator. Start iterating over next inherited type. + if let Some(&inherits_type_id) = self.inherits_iter.next() { + let inherited = self.schema.enum_def(inherits_type_id); + let inner_iter = inherited.all_variants(self.schema); + self.inner_iter = Some(Box::new(inner_iter)); + Some(self.inner_iter.as_mut().unwrap().next().unwrap()) + } else { + None + } + } +} + +impl FusedIterator for AllVariantsIter<'_> {} diff --git a/tasks/ast_tools/src/schema/defs/mod.rs b/tasks/ast_tools/src/schema/defs/mod.rs new file mode 100644 index 0000000000000..b243540b1ce07 --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/mod.rs @@ -0,0 +1,127 @@ +use convert_case::{Case, Casing}; +use proc_macro2::TokenStream; +use quote::quote; +use syn::Ident; + +use crate::{codegen::DeriveId, utils::create_ident, Schema}; + +use super::{extensions, Derives, FileId, TypeId}; + +mod r#box; +mod cell; +mod r#enum; +mod option; +mod primitive; +mod r#struct; +mod r#type; +mod vec; +pub use cell::CellDef; +pub use option::OptionDef; +pub use primitive::PrimitiveDef; +pub use r#box::BoxDef; +pub use r#enum::{Discriminant, EnumDef, VariantDef}; +pub use r#struct::{FieldDef, StructDef, Visibility}; +pub use r#type::TypeDef; +pub use vec::VecDef; + +/// Trait for type defs. +pub trait Def { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId; + + /// Get type name. + fn name(&self) -> &str; + + /// Get [`FileId`] of file containing definition of this type. + fn file_id(&self) -> Option; + + /// Get all traits which have derives generated for this type. + fn generated_derives(&self) -> Derives; + + /// Get whether a derive is generated for this type. + fn generates_derive(&self, derive_id: DeriveId) -> bool { + self.generated_derives().has(derive_id) + } + + /// Get if type has a lifetime. + fn has_lifetime(&self, schema: &Schema) -> bool; + + /// Get type name in snake case. + fn snake_name(&self) -> String { + self.name().to_case(Case::Snake) + } + + /// Get type name as an [`Ident`]. + /// + /// [`Ident`]: struct@Ident + fn ident(&self) -> Ident { + create_ident(self.name()) + } + + /// Get type signature (including lifetimes). + fn ty(&self, schema: &Schema) -> TokenStream { + self.ty_with_lifetime(schema, false) + } + + /// Get type signature (including anonymous lifetimes). + fn ty_anon(&self, schema: &Schema) -> TokenStream { + self.ty_with_lifetime(schema, true) + } + + /// Get type signature (including lifetimes). + /// Lifetimes are anonymous (`'_`) if `anon` is true. + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream; + + /// Get lifetime (if type has one). + /// Lifetime is anonymous (`'_`) if `anon` is true. + fn lifetime_maybe_anon(&self, schema: &Schema, anon: bool) -> TokenStream { + if anon { + self.lifetime_anon(schema) + } else { + self.lifetime(schema) + } + } + + /// Get lifetime (if type has one). + fn lifetime(&self, schema: &Schema) -> TokenStream { + if self.has_lifetime(schema) { + quote!( <'a> ) + } else { + quote!() + } + } + + /// Get anonymous lifetime (if type has one). + fn lifetime_anon(&self, schema: &Schema) -> TokenStream { + if self.has_lifetime(schema) { + quote!( <'_> ) + } else { + quote!() + } + } + + /// Get inner type, if type has one. + /// + /// This is the direct inner type e.g. `Cell>` -> `Option`. + /// Use [`innermost_type`] method if you want `ScopeId` in this example. + /// + /// Returns `None` for types which don't have a single inner type (structs, enums, and primitives). + /// + /// [`innermost_type`]: Def::innermost_type + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef>; + + /// Get innermost type. + /// + /// e.g. `ScopeId` in `Cell>`. + /// + /// Use [`inner_type`] method if you want the direct inner type (`Option` in this example). + /// + /// [`inner_type`]: Def::innermost_type + fn innermost_type<'s>(&self, schema: &'s Schema) -> &'s TypeDef { + if let Some(inner_type) = self.maybe_inner_type(schema) { + inner_type.innermost_type(schema) + } else { + &schema.types[self.id()] + } + } +} diff --git a/tasks/ast_tools/src/schema/defs/option.rs b/tasks/ast_tools/src/schema/defs/option.rs new file mode 100644 index 0000000000000..b6eec61a5a55e --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/option.rs @@ -0,0 +1,79 @@ +use proc_macro2::TokenStream; +use quote::quote; + +use super::{extensions::layout::Layout, Def, Derives, FileId, Schema, TypeDef, TypeId}; + +/// Type definition for an `Option`. +#[derive(Debug)] +pub struct OptionDef { + pub id: TypeId, + pub name: String, + pub inner_type_id: TypeId, + pub layout: Layout, +} + +impl OptionDef { + /// Create new [`OptionDef`]. + pub fn new(name: String, inner_type_id: TypeId) -> Self { + Self { id: TypeId::DUMMY, name, inner_type_id, layout: Layout::default() } + } + + /// Get inner type. + /// + /// This is the direct inner type e.g. `Option>` -> `Box`. + /// Use [`innermost_type`] method if you want `FunctionBody` in this example. + /// + /// [`innermost_type`]: Self::innermost_type + pub fn inner_type<'s>(&self, schema: &'s Schema) -> &'s TypeDef { + &schema.types[self.inner_type_id] + } +} + +impl Def for OptionDef { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + self.id + } + + /// Get type name. + fn name(&self) -> &str { + &self.name + } + + /// Get [`FileId`] of file containing definition of this type. + /// + /// `Options`s are not defined in a file, so returns `None`. + fn file_id(&self) -> Option { + None + } + + /// Get all traits which have derives generated for this type. + /// + /// `Option`s never have any generated derives. + fn generated_derives(&self) -> Derives { + Derives::none() + } + + /// Get if type has a lifetime. + fn has_lifetime(&self, schema: &Schema) -> bool { + self.inner_type(schema).has_lifetime(schema) + } + + /// Get type signature (including lifetimes). + /// Lifetimes are anonymous (`'_`) if `anon` is true. + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + let inner_ty = self.inner_type(schema).ty_with_lifetime(schema, anon); + quote!( Option<#inner_ty> ) + } + + /// Get inner type, if type has one. + /// + /// All `Option`s have an inner type, so better to use [`inner_type`] or [`innermost_type`] methods, + /// which don't return an `Option`. + /// + /// [`inner_type`]: Self::inner_type + /// [`innermost_type`]: Self::innermost_type + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + Some(self.inner_type(schema)) + } +} diff --git a/tasks/ast_tools/src/schema/defs/primitive.rs b/tasks/ast_tools/src/schema/defs/primitive.rs new file mode 100644 index 0000000000000..ad832e16d5e5f --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/primitive.rs @@ -0,0 +1,89 @@ +use proc_macro2::TokenStream; +use quote::quote; + +use super::{extensions::layout::Layout, Def, Derives, FileId, Schema, TypeDef, TypeId}; + +/// Type definition for a primitive type. +/// +/// Includes: +/// * Built-ins e.g. `u8`, `&str`. +/// * Special Oxc types e.g. `ScopeId`, `Atom`. +#[derive(Debug)] +pub struct PrimitiveDef { + pub id: TypeId, + pub name: &'static str, + pub layout: Layout, +} + +impl PrimitiveDef { + /// Create new [`PrimitiveDef`]. + pub fn new(name: &'static str) -> Self { + Self { id: TypeId::DUMMY, name, layout: Layout::default() } + } +} + +impl Def for PrimitiveDef { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + self.id + } + + /// Get type name. + fn name(&self) -> &str { + self.name + } + + /// Get [`FileId`] of file containing definition of this type. + /// + /// Primitives are not defined in a file, so returns `None`. + fn file_id(&self) -> Option { + None + } + + /// Get all traits which have derives generated for this type. + /// + /// Primitives never have any generated derives. + fn generated_derives(&self) -> Derives { + Derives::none() + } + + /// Get if type has a lifetime. + #[expect(unused_variables)] + fn has_lifetime(&self, schema: &Schema) -> bool { + self.name() == "&str" || self.name() == "Atom" + } + + /// Get type signature (including lifetimes). + /// Lifetime is anonymous (`'_`) if `anon` is true. + #[expect(unused_variables)] + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + match self.name() { + "&str" => { + if anon { + quote!(&str) + } else { + quote!(&'a str) + } + } + "Atom" => { + if anon { + quote!(Atom<'_>) + } else { + quote!(Atom<'a>) + } + } + _ => { + let ident = self.ident(); + quote!( #ident ) + } + } + } + + /// Get inner type, if type has one. + /// + /// Primitives don't have an inner type, so returns `None`. + #[expect(unused_variables)] + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + None + } +} diff --git a/tasks/ast_tools/src/schema/defs/struct.rs b/tasks/ast_tools/src/schema/defs/struct.rs new file mode 100644 index 0000000000000..19d13ec495880 --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/struct.rs @@ -0,0 +1,174 @@ +use std::ops::Range; + +use convert_case::{Case, Casing}; +use proc_macro2::TokenStream; +use quote::quote; + +use crate::utils::create_ident_tokens; + +use super::{ + extensions::{ + clone_in::CloneInStructField, + estree::{ESTreeStruct, ESTreeStructField}, + kind::Kind, + layout::{Layout, Offset}, + span::SpanStruct, + visit::{VisitFieldOrVariant, VisitStruct}, + }, + Def, Derives, FileId, Schema, TypeDef, TypeId, +}; + +/// Type definition for a struct. +#[derive(Debug)] +pub struct StructDef { + pub id: TypeId, + pub name: String, + pub has_lifetime: bool, + pub file_id: FileId, + pub generated_derives: Derives, + pub fields: Vec, + pub visit: VisitStruct, + pub kind: Kind, + pub layout: Layout, + pub span: SpanStruct, + pub estree: ESTreeStruct, +} + +impl StructDef { + /// Create new [`StructDef`]. + pub fn new( + id: TypeId, + name: String, + has_lifetime: bool, + file_id: FileId, + generated_derives: Derives, + fields: Vec, + ) -> Self { + Self { + id, + name, + has_lifetime, + file_id, + generated_derives, + fields, + visit: VisitStruct::default(), + kind: Kind::default(), + layout: Layout::default(), + span: SpanStruct::default(), + estree: ESTreeStruct::default(), + } + } + + /// Get iterator over field indexes. + pub fn field_indices(&self) -> Range { + 0..self.fields.len() + } +} + +impl Def for StructDef { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + self.id + } + + /// Get type name. + fn name(&self) -> &str { + &self.name + } + + /// Get [`FileId`] of file containing definition of this type. + fn file_id(&self) -> Option { + Some(self.file_id) + } + + /// Get all traits which have derives generated for this type. + fn generated_derives(&self) -> Derives { + self.generated_derives + } + + /// Get if type has a lifetime. + #[expect(unused_variables)] + fn has_lifetime(&self, schema: &Schema) -> bool { + self.has_lifetime + } + + /// Get type signature (including lifetime). + /// Lifetime is anonymous (`'_`) if `anon` is true. + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + let ident = self.ident(); + let lifetime = self.lifetime_maybe_anon(schema, anon); + quote!( #ident #lifetime ) + } + + /// Get inner type, if type has one. + /// + /// Structs don't have a single inner type, so returns `None`. + #[expect(unused_variables)] + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + None + } +} + +#[derive(Debug)] +pub struct FieldDef { + pub name: String, + pub type_id: TypeId, + pub visibility: Visibility, + pub doc_comment: Option, + pub visit: VisitFieldOrVariant, + pub offset: Offset, + pub clone_in: CloneInStructField, + pub estree: ESTreeStructField, +} + +impl FieldDef { + /// Create new [`FieldDef`]. + pub fn new( + name: String, + type_id: TypeId, + visibility: Visibility, + doc_comment: Option, + ) -> Self { + Self { + name, + type_id, + visibility, + doc_comment, + visit: VisitFieldOrVariant::default(), + offset: Offset::default(), + clone_in: CloneInStructField::default(), + estree: ESTreeStructField::default(), + } + } + + /// Get field name. + pub fn name(&self) -> &str { + &self.name + } + + /// Get field name in camel case. + pub fn camel_name(&self) -> String { + self.name().to_case(Case::Camel) + } + + /// Get field name as an identifier. + /// + /// This is a [`TokenStream`] not `Ident`, to handle unnamed fields where field name is e.g. `0`. + pub fn ident(&self) -> TokenStream { + create_ident_tokens(self.name()) + } + + /// Get field type. + pub fn type_def<'s>(&self, schema: &'s Schema) -> &'s TypeDef { + &schema.types[self.type_id] + } +} + +/// Visibility of a struct field. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Visibility { + Public, + /// `pub(crate)` or `pub(super)` + Restricted, + Private, +} diff --git a/tasks/ast_tools/src/schema/defs/type.rs b/tasks/ast_tools/src/schema/defs/type.rs new file mode 100644 index 0000000000000..035f2344d807a --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/type.rs @@ -0,0 +1,255 @@ +use proc_macro2::TokenStream; + +use super::{ + BoxDef, CellDef, Def, Derives, EnumDef, FileId, OptionDef, PrimitiveDef, Schema, StructDef, + TypeId, VecDef, +}; + +/// Type definition for a type. +#[derive(Debug)] +pub enum TypeDef { + Struct(StructDef), + Enum(EnumDef), + Primitive(PrimitiveDef), + Option(OptionDef), + Box(BoxDef), + Vec(VecDef), + Cell(CellDef), +} + +impl Def for TypeDef { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + match self { + TypeDef::Struct(def) => def.id(), + TypeDef::Enum(def) => def.id(), + TypeDef::Primitive(def) => def.id(), + TypeDef::Option(def) => def.id(), + TypeDef::Box(def) => def.id(), + TypeDef::Vec(def) => def.id(), + TypeDef::Cell(def) => def.id(), + } + } + + /// Get type name. + fn name(&self) -> &str { + match self { + TypeDef::Struct(def) => def.name(), + TypeDef::Enum(def) => def.name(), + TypeDef::Primitive(def) => def.name(), + TypeDef::Option(def) => def.name(), + TypeDef::Box(def) => def.name(), + TypeDef::Vec(def) => def.name(), + TypeDef::Cell(def) => def.name(), + } + } + + /// Get [`FileId`] of file containing definition of this type. + /// + /// Returns `None` if type is not defined in a file (e.g. primitives). + fn file_id(&self) -> Option { + match self { + TypeDef::Struct(def) => def.file_id(), + TypeDef::Enum(def) => def.file_id(), + TypeDef::Primitive(def) => def.file_id(), + TypeDef::Option(def) => def.file_id(), + TypeDef::Box(def) => def.file_id(), + TypeDef::Vec(def) => def.file_id(), + TypeDef::Cell(def) => def.file_id(), + } + } + + /// Get all traits which have derives generated for this type. + fn generated_derives(&self) -> Derives { + match self { + TypeDef::Struct(def) => def.generated_derives(), + TypeDef::Enum(def) => def.generated_derives(), + TypeDef::Primitive(def) => def.generated_derives(), + TypeDef::Option(def) => def.generated_derives(), + TypeDef::Box(def) => def.generated_derives(), + TypeDef::Vec(def) => def.generated_derives(), + TypeDef::Cell(def) => def.generated_derives(), + } + } + + /// Get if type has a lifetime. + fn has_lifetime(&self, schema: &Schema) -> bool { + match self { + TypeDef::Struct(def) => def.has_lifetime(schema), + TypeDef::Enum(def) => def.has_lifetime(schema), + TypeDef::Primitive(def) => def.has_lifetime(schema), + TypeDef::Option(def) => def.has_lifetime(schema), + TypeDef::Box(def) => def.has_lifetime(schema), + TypeDef::Vec(def) => def.has_lifetime(schema), + TypeDef::Cell(def) => def.has_lifetime(schema), + } + } + + /// Get type signature (including anonymous lifetimes). + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + match self { + TypeDef::Struct(def) => def.ty_with_lifetime(schema, anon), + TypeDef::Enum(def) => def.ty_with_lifetime(schema, anon), + TypeDef::Primitive(def) => def.ty_with_lifetime(schema, anon), + TypeDef::Option(def) => def.ty_with_lifetime(schema, anon), + TypeDef::Box(def) => def.ty_with_lifetime(schema, anon), + TypeDef::Vec(def) => def.ty_with_lifetime(schema, anon), + TypeDef::Cell(def) => def.ty_with_lifetime(schema, anon), + } + } + + /// Get inner type, if type has one. + /// + /// This is the direct inner type e.g. `Cell>` -> `Option`. + /// Use [`innermost_type`] method if you want `ScopeId` in this example. + /// + /// Returns `None` for types which don't have a single inner type (structs, enums, and primitives). + /// + /// [`innermost_type`]: Self::innermost_type + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + match self { + TypeDef::Struct(def) => def.maybe_inner_type(schema), + TypeDef::Enum(def) => def.maybe_inner_type(schema), + TypeDef::Primitive(def) => def.maybe_inner_type(schema), + TypeDef::Option(def) => def.maybe_inner_type(schema), + TypeDef::Box(def) => def.maybe_inner_type(schema), + TypeDef::Vec(def) => def.maybe_inner_type(schema), + TypeDef::Cell(def) => def.maybe_inner_type(schema), + } + } +} + +/// `is_*` / `as_*` / `as_*_mut` methods. +impl TypeDef { + #[expect(dead_code)] + pub fn is_struct(&self) -> bool { + matches!(self, Self::Struct(_)) + } + + pub fn as_struct(&self) -> Option<&StructDef> { + match self { + Self::Struct(def) => Some(def), + _ => None, + } + } + + pub fn as_struct_mut(&mut self) -> Option<&mut StructDef> { + match self { + Self::Struct(def) => Some(def), + _ => None, + } + } + + #[expect(dead_code)] + pub fn is_enum(&self) -> bool { + matches!(self, Self::Enum(_)) + } + + pub fn as_enum(&self) -> Option<&EnumDef> { + match self { + Self::Enum(def) => Some(def), + _ => None, + } + } + + pub fn as_enum_mut(&mut self) -> Option<&mut EnumDef> { + match self { + Self::Enum(def) => Some(def), + _ => None, + } + } + + #[expect(dead_code)] + pub fn is_primitive(&self) -> bool { + matches!(self, Self::Primitive(_)) + } + + pub fn as_primitive(&self) -> Option<&PrimitiveDef> { + match self { + Self::Primitive(def) => Some(def), + _ => None, + } + } + + pub fn as_primitive_mut(&mut self) -> Option<&mut PrimitiveDef> { + match self { + Self::Primitive(def) => Some(def), + _ => None, + } + } + + #[expect(dead_code)] + pub fn is_option(&self) -> bool { + matches!(self, Self::Option(_)) + } + + pub fn as_option(&self) -> Option<&OptionDef> { + match self { + Self::Option(def) => Some(def), + _ => None, + } + } + + pub fn as_option_mut(&mut self) -> Option<&mut OptionDef> { + match self { + Self::Option(def) => Some(def), + _ => None, + } + } + + pub fn is_box(&self) -> bool { + matches!(self, Self::Box(_)) + } + + pub fn as_box(&self) -> Option<&BoxDef> { + match self { + Self::Box(def) => Some(def), + _ => None, + } + } + + pub fn as_box_mut(&mut self) -> Option<&mut BoxDef> { + match self { + Self::Box(def) => Some(def), + _ => None, + } + } + + #[expect(dead_code)] + pub fn is_vec(&self) -> bool { + matches!(self, Self::Vec(_)) + } + + pub fn as_vec(&self) -> Option<&VecDef> { + match self { + Self::Vec(def) => Some(def), + _ => None, + } + } + + pub fn as_vec_mut(&mut self) -> Option<&mut VecDef> { + match self { + Self::Vec(def) => Some(def), + _ => None, + } + } + + #[expect(dead_code)] + pub fn is_cell(&self) -> bool { + matches!(self, Self::Cell(_)) + } + + pub fn as_cell(&self) -> Option<&CellDef> { + match self { + Self::Cell(def) => Some(def), + _ => None, + } + } + + pub fn as_cell_mut(&mut self) -> Option<&mut CellDef> { + match self { + Self::Cell(def) => Some(def), + _ => None, + } + } +} diff --git a/tasks/ast_tools/src/schema/defs/vec.rs b/tasks/ast_tools/src/schema/defs/vec.rs new file mode 100644 index 0000000000000..08c0a1589fce1 --- /dev/null +++ b/tasks/ast_tools/src/schema/defs/vec.rs @@ -0,0 +1,81 @@ +use proc_macro2::TokenStream; +use quote::quote; + +use super::{extensions::layout::Layout, Def, Derives, FileId, Schema, TypeDef, TypeId}; + +/// Type definition for a `Vec`. +#[derive(Debug)] +pub struct VecDef { + pub id: TypeId, + pub name: String, + pub inner_type_id: TypeId, + pub layout: Layout, +} + +impl VecDef { + /// Create new [`VecDef`]. + pub fn new(name: String, inner_type_id: TypeId) -> Self { + Self { id: TypeId::DUMMY, name, inner_type_id, layout: Layout::default() } + } + + /// Get inner type. + /// + /// This is the direct inner type e.g. `Vec>` -> `Option`. + /// Use [`innermost_type`] method if you want `Expression` in this example. + /// + /// [`innermost_type`]: Self::innermost_type + pub fn inner_type<'s>(&self, schema: &'s Schema) -> &'s TypeDef { + &schema.types[self.inner_type_id] + } +} + +impl Def for VecDef { + /// Get [`TypeId`] for type. + fn id(&self) -> TypeId { + self.id + } + + /// Get type name. + fn name(&self) -> &str { + &self.name + } + + /// Get [`FileId`] of file containing definition of this type. + /// + /// `Vec`s are not defined in a file, so returns `None`. + fn file_id(&self) -> Option { + None + } + + /// Get all traits which have derives generated for this type. + /// + /// `Vec`s never have any generated derives. + fn generated_derives(&self) -> Derives { + Derives::none() + } + + /// Get if type has a lifetime. + #[expect(unused_variables)] + fn has_lifetime(&self, schema: &Schema) -> bool { + true + } + + /// Get type signature (including lifetimes). + /// Lifetimes are anonymous (`'_`) if `anon` is true. + fn ty_with_lifetime(&self, schema: &Schema, anon: bool) -> TokenStream { + let inner_ty = self.inner_type(schema).ty_with_lifetime(schema, anon); + let lifetime = if anon { quote!( '_ ) } else { quote!( 'a ) }; + quote!( Vec<#lifetime, #inner_ty> ) + } + + /// Get inner type, if type has one. + /// + /// All `Vec`s have an inner type, so better to use [`inner_type`] or [`innermost_type`] methods, + /// which don't return an `Option`. + /// + /// [`inner_type`]: Self::inner_type + /// [`innermost_type`]: Self::innermost_type + fn maybe_inner_type<'s>(&self, schema: &'s Schema) -> Option<&'s TypeDef> { + Some(self.inner_type(schema)) + } +} diff --git a/tasks/ast_tools/src/schema/derives.rs b/tasks/ast_tools/src/schema/derives.rs new file mode 100644 index 0000000000000..54fb9add324a8 --- /dev/null +++ b/tasks/ast_tools/src/schema/derives.rs @@ -0,0 +1,135 @@ +use std::{ + fmt::{self, Debug}, + iter::FusedIterator, +}; + +use crate::{codegen::DeriveId, DERIVES}; + +/// Number of bytes required for bit set which can represent all [`DeriveId`]s. +const NUM_BYTES: usize = (DERIVES.len() + 7) / 8; + +/// Bit set with a bit for each [`DeriveId`]. +#[derive(Clone, Copy)] +pub struct Derives([u8; NUM_BYTES]); + +impl Derives { + /// Create empty [`Derives`] with no bits set. + /// i.e. the type doesn't derive any traits generated by this crate. + pub const fn none() -> Self { + Self([0; NUM_BYTES]) + } + + /// Create [`Derives`] with all bits set. + /// i.e. the type derives all traits generated by this crate. + pub const fn all() -> Self { + let mut out = Self::none(); + let mut index = 0; + while index < DERIVES.len() { + out = out.with(index); + index += 1; + } + out + } + + /// Returns `true` if the type derives the derive with provided [`DeriveId`]. + pub const fn has(self, id: DeriveId) -> bool { + let (byte_index, mask) = Self::byte_index_and_mask(id); + (self.0[byte_index] & mask) != 0 + } + + /// Set bit for provided [`DeriveId`], and return a new [`Derives`]. + /// i.e. the type derives this trait. + pub const fn with(mut self, id: DeriveId) -> Self { + let (byte_index, mask) = Self::byte_index_and_mask(id); + self.0[byte_index] |= mask; + self + } + + /// Unset bit for provided [`DeriveId`], and return a new [`Derives`]. + /// i.e. the type does not derive this trait. + pub const fn without(mut self, id: DeriveId) -> Self { + let (byte_index, mask) = Self::byte_index_and_mask(id); + self.0[byte_index] &= !mask; + self + } + + /// Set bit for provided [`DeriveId`] on this [`Derives`]. + /// i.e. the type derives this trait. + pub fn add(&mut self, id: DeriveId) { + *self = self.with(id); + } + + /// Unset bit for provided [`DeriveId`] on this [`Derives`]. + /// i.e. the type does not derive this trait. + pub fn remove(&mut self, id: DeriveId) { + *self = self.without(id); + } + + /// Get byte index and mask for a [`DeriveId`]. + /// (internal method) + const fn byte_index_and_mask(id: DeriveId) -> (usize, u8) { + (id / 8, 1u8 << (id & 7)) + } +} + +impl IntoIterator for Derives { + type Item = DeriveId; + type IntoIter = DerivesIter; + + fn into_iter(self) -> DerivesIter { + DerivesIter::new(self) + } +} + +impl IntoIterator for &Derives { + type Item = DeriveId; + type IntoIter = DerivesIter; + + fn into_iter(self) -> DerivesIter { + DerivesIter::new(*self) + } +} + +/// Iterator over the [`DeriveId`]s which this [`Derives`] does derive, +/// in ascending order of [`DeriveId`]. +/// +/// i.e. yields the [`DeriveId`]s for traits which a type derives. +pub struct DerivesIter { + derives: Derives, + next_id: DeriveId, +} + +impl DerivesIter { + fn new(derives: Derives) -> Self { + Self { derives, next_id: 0 } + } +} + +impl Iterator for DerivesIter { + type Item = DeriveId; + + fn next(&mut self) -> Option { + while self.next_id < DERIVES.len() { + let id = self.next_id; + self.next_id += 1; + + if self.derives.has(id) { + return Some(id); + } + } + + None + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(DERIVES.len() - self.next_id)) + } +} + +impl FusedIterator for DerivesIter {} + +impl Debug for Derives { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.into_iter().map(|id| DERIVES[id].trait_name())).finish() + } +} diff --git a/tasks/ast_tools/src/schema/extensions/clone_in.rs b/tasks/ast_tools/src/schema/extensions/clone_in.rs new file mode 100644 index 0000000000000..2d2d2f9f3278c --- /dev/null +++ b/tasks/ast_tools/src/schema/extensions/clone_in.rs @@ -0,0 +1,6 @@ +/// Details for `CloneIn` derive on a struct field. +#[derive(Default, Debug)] +pub struct CloneInStructField { + /// `true` if field should be filled with default value when cloning + pub is_default: bool, +} diff --git a/tasks/ast_tools/src/schema/extensions/estree.rs b/tasks/ast_tools/src/schema/extensions/estree.rs new file mode 100644 index 0000000000000..199d1c69ee308 --- /dev/null +++ b/tasks/ast_tools/src/schema/extensions/estree.rs @@ -0,0 +1,37 @@ +/// Configuration for ESTree generator on a struct. +#[derive(Default, Debug)] +pub struct ESTreeStruct { + pub rename: Option, + pub via: Option, + pub add_ts: Option, + pub always_flatten: bool, + pub no_type: bool, + pub custom_serialize: bool, +} + +/// Configuration for ESTree generator on an enum. +#[derive(Default, Debug)] +pub struct ESTreeEnum { + pub no_rename_variants: bool, + pub custom_ts_def: bool, +} + +/// Configuration for ESTree generator on a struct field. +#[derive(Default, Debug)] +pub struct ESTreeStructField { + pub rename: Option, + pub via: Option, + pub ts_type: Option, + /// Field index of field to append to this one + pub append_field_index: Option, + pub skip: bool, + pub flatten: bool, + pub is_ts: bool, +} + +/// Configuration for ESTree generator on an enum variant. +#[derive(Default, Debug)] +pub struct ESTreeEnumVariant { + pub rename: Option, + pub is_ts: bool, +} diff --git a/tasks/ast_tools/src/schema/extensions/kind.rs b/tasks/ast_tools/src/schema/extensions/kind.rs new file mode 100644 index 0000000000000..df785047ce5d2 --- /dev/null +++ b/tasks/ast_tools/src/schema/extensions/kind.rs @@ -0,0 +1,6 @@ +/// Details for `AstKind` generator on a struct or enum. +#[derive(Default, Debug)] +pub struct Kind { + /// `true` if struct / enum has an `AstKind` + pub has_kind: bool, +} diff --git a/tasks/ast_tools/src/schema/extensions/layout.rs b/tasks/ast_tools/src/schema/extensions/layout.rs new file mode 100644 index 0000000000000..0457832b77af6 --- /dev/null +++ b/tasks/ast_tools/src/schema/extensions/layout.rs @@ -0,0 +1,92 @@ +use std::mem::{align_of, size_of}; + +/// The layout of a type. +#[derive(Clone, Default, Debug)] +pub struct Layout { + /// Layout on 64-bit platforms + pub layout_64: PlatformLayout, + /// Layout on 32-bit platforms + pub layout_32: PlatformLayout, +} + +impl Layout { + /// Create [`Layout`] from a Rust type. + pub fn from_type() -> Self { + Self::from_size_align( + u32::try_from(size_of::()).unwrap(), + u32::try_from(align_of::()).unwrap(), + ) + } + + /// Create [`Layout`] from `size` and `align` pair, with no niche. + /// + /// Layout is same for both 64-bit and 32-bit platforms. + pub fn from_size_align(size: u32, align: u32) -> Self { + Self { + layout_64: PlatformLayout::from_size_align(size, align), + layout_32: PlatformLayout::from_size_align(size, align), + } + } + + /// Create [`Layout`] from `size` and `align` pair, and [`Niche`]. + /// + /// Layout is same for both 64-bit and 32-bit platforms. + pub fn from_size_align_niche(size: u32, align: u32, niche: Niche) -> Self { + Self { + layout_64: PlatformLayout::from_size_align_niche(size, align, niche.clone()), + layout_32: PlatformLayout::from_size_align_niche(size, align, niche), + } + } +} + +/// The layout of a type on a specific platform type (64 bit or 32 bit). +#[derive(Clone, Default, Debug)] +pub struct PlatformLayout { + pub size: u32, + pub align: u32, + pub niche: Option, +} + +impl PlatformLayout { + /// Create [`PlatformLayout`] from `size` and `align` pair, with no niche. + pub fn from_size_align(size: u32, align: u32) -> Self { + Self { size, align, niche: None } + } + + /// Create [`PlatformLayout`] from `size` and `align` pair, and [`Niche`]. + pub fn from_size_align_niche(size: u32, align: u32, niche: Niche) -> Self { + Self { size, align, niche: Some(niche) } + } +} + +/// Niche that a type has. +#[derive(Clone, Debug)] +pub struct Niche { + /// Byte offset of the niche from start of type + pub offset: u32, + /// Size of the niche in bytes + #[expect(dead_code)] + pub size: u32, + /// `true` if niche is at start of range (e.g. 0..3). + /// `false` if niche is at end of range (e.g. 2..255). + #[expect(dead_code)] + pub is_range_start: bool, + /// Number of niche values in the niche (e.g. 1 for `&str`, 254 for `bool`) + pub count: u32, +} + +impl Niche { + /// Create new [`Niche`]. + pub fn new(offset: u32, size: u32, is_range_start: bool, count: u32) -> Self { + Self { offset, size, is_range_start, count } + } +} + +/// Offset of a struct field. +#[derive(Clone, Default, Debug)] +pub struct Offset { + /// Offset in bytes on 64-bit platforms + pub offset_64: u32, + /// Offset in bytes on 32-bit platforms + pub offset_32: u32, +} diff --git a/tasks/ast_tools/src/schema/extensions/span.rs b/tasks/ast_tools/src/schema/extensions/span.rs new file mode 100644 index 0000000000000..31a484a539b54 --- /dev/null +++ b/tasks/ast_tools/src/schema/extensions/span.rs @@ -0,0 +1,6 @@ +/// Details of span on a struct. +#[derive(Default, Debug)] +pub struct SpanStruct { + /// Field index that span for struct is obtained from + pub span_field_index: Option, +} diff --git a/tasks/ast_tools/src/schema/extensions/visit.rs b/tasks/ast_tools/src/schema/extensions/visit.rs new file mode 100644 index 0000000000000..ffad1910caea5 --- /dev/null +++ b/tasks/ast_tools/src/schema/extensions/visit.rs @@ -0,0 +1,34 @@ +/// Details of visiting on a struct. +#[derive(Default, Debug)] +pub struct VisitStruct { + pub is_visited: bool, + pub scope: Option, +} + +/// Details of visiting on an enum. +#[derive(Default, Debug)] +pub struct VisitEnum { + pub is_visited: bool, +} + +/// Details of visiting on a struct field or enum variant. +#[derive(Default, Debug)] +pub struct VisitFieldOrVariant { + pub visit_args: Option>, +} + +/// Details of scope on a struct. +#[derive(Debug)] +pub struct Scope { + /// Field index before which scope is entered + pub enter_before_index: usize, + /// Field index before which scope is exited. + /// If scope is exited after last field, this is `struct_def.fields.len()`. + pub exit_before_index: usize, + /// Scope flags for the scope. + /// Stored as a string which should be parsed as an expression. + pub flags: String, + /// Conditions in which scope is strict mode. + /// Stored as a string which should be parsed as an expression. + pub strict_if: Option, +} diff --git a/tasks/ast_tools/src/schema/file.rs b/tasks/ast_tools/src/schema/file.rs new file mode 100644 index 0000000000000..d9970d3a75cf6 --- /dev/null +++ b/tasks/ast_tools/src/schema/file.rs @@ -0,0 +1,63 @@ +use itertools::Itertools; + +/// A Rust source file. +#[derive(Debug)] +pub struct File { + /// Crate file is in e.g. `oxc_ast` + pub krate: String, + /// Import path excluding crate e.g. `::ast::js` + pub import_path: String, +} + +impl File { + /// Create new [`File`] from a source path. + pub fn new(file_path: &str) -> Self { + // Convert file path to crate and import path. + // `crates/oxc_ast/src/ast/js.rs` -> `oxc_ast`, `::ast::js`. + // `crates/oxc_span/src/source_type/mod.rs` -> `oxc_span`, `::source_type`. + // `crates/oxc_syntax/src/lib.rs` -> `oxc_syntax`, ``. + let path = file_path.trim_end_matches(".rs").trim_end_matches("/mod"); + + let mut parts = path.split('/'); + assert_eq!(parts.next(), Some("crates")); + let krate = parts.next().unwrap().to_string(); + assert_eq!(parts.next(), Some("src")); + + let mut import_path = format!("::{}", parts.join("::")); + if import_path == "::lib" { + import_path = String::new(); + } + + Self { krate, import_path } + } + + /// Get name of crate this [`File`] is in. + pub fn krate(&self) -> &str { + &self.krate + } + + /// Get import path for this [`File`]. + pub fn import_path(&self) -> &str { + &self.import_path + } +} + +#[cfg(test)] +mod test { + use super::File; + + #[test] + fn test_file_new() { + let cases = [ + ("crates/oxc_ast/src/ast/js.rs", "oxc_ast", "::ast::js"), + ("crates/oxc_span/src/source_type/mod.rs", "oxc_span", "::source_type"), + ("crates/oxc_syntax/src/lib.rs", "oxc_syntax", ""), + ]; + + for (file_path, krate, import_path) in cases { + let file = File::new(file_path); + assert_eq!(file.krate(), krate); + assert_eq!(file.import_path(), import_path); + } + } +} diff --git a/tasks/ast_tools/src/schema/get_generics.rs b/tasks/ast_tools/src/schema/get_generics.rs deleted file mode 100644 index 797d6e1ed5fd3..0000000000000 --- a/tasks/ast_tools/src/schema/get_generics.rs +++ /dev/null @@ -1,38 +0,0 @@ -use syn::{parse_quote, Generics}; - -use super::defs::{EnumDef, StructDef, TypeDef}; - -pub trait GetGenerics { - fn has_lifetime(&self) -> bool { - false - } - - fn generics(&self) -> Option { - if self.has_lifetime() { - Some(parse_quote!(<'a>)) - } else { - None - } - } -} - -impl GetGenerics for TypeDef { - fn has_lifetime(&self) -> bool { - match self { - TypeDef::Struct(def) => def.has_lifetime(), - TypeDef::Enum(def) => def.has_lifetime(), - } - } -} - -impl GetGenerics for StructDef { - fn has_lifetime(&self) -> bool { - self.has_lifetime - } -} - -impl GetGenerics for EnumDef { - fn has_lifetime(&self) -> bool { - self.has_lifetime - } -} diff --git a/tasks/ast_tools/src/schema/get_ident.rs b/tasks/ast_tools/src/schema/get_ident.rs deleted file mode 100644 index 1f1f5f8769085..0000000000000 --- a/tasks/ast_tools/src/schema/get_ident.rs +++ /dev/null @@ -1,30 +0,0 @@ -use syn::Ident; - -use crate::util::ToIdent; - -use super::defs::{EnumDef, StructDef, TypeDef}; - -pub trait GetIdent { - fn ident(&self) -> Ident; -} - -impl GetIdent for TypeDef { - fn ident(&self) -> Ident { - match self { - TypeDef::Struct(def) => def.ident(), - TypeDef::Enum(def) => def.ident(), - } - } -} - -impl GetIdent for StructDef { - fn ident(&self) -> Ident { - self.name.to_ident() - } -} - -impl GetIdent for EnumDef { - fn ident(&self) -> Ident { - self.name.to_ident() - } -} diff --git a/tasks/ast_tools/src/schema/mod.rs b/tasks/ast_tools/src/schema/mod.rs index cfc3dc30ba08d..68165be23735c 100644 --- a/tasks/ast_tools/src/schema/mod.rs +++ b/tasks/ast_tools/src/schema/mod.rs @@ -1,339 +1,185 @@ -use std::fmt; - -use quote::ToTokens; -use rustc_hash::FxHashSet; +use oxc_index::{define_index_type, IndexVec}; +use rustc_hash::FxHashMap; +// Have to import this even though don't use it, due to a bug in `define_index_type!` macro +#[expect(unused_imports)] use serde::Serialize; -use syn::{ - punctuated::Punctuated, Attribute, Expr, ExprLit, Field, Ident, Lit, Meta, MetaNameValue, - Token, Type, Variant, -}; - -use crate::{ - codegen::EarlyCtx, - layout::KnownLayout, - markers::{ - get_derive_attributes, get_estree_attribute, get_scope_attribute, get_scope_markers, - get_visit_markers, - }, - rust_ast as rust, - util::{unexpanded_macro_err, TypeExt}, - Result, TypeId, -}; mod defs; -mod get_generics; -mod get_ident; -pub mod serialize; -mod to_type; +mod derives; +mod file; pub use defs::*; -pub use get_generics::GetGenerics; -pub use get_ident::GetIdent; -pub use to_type::ToType; +pub use derives::Derives; +pub use file::File; -#[derive(Debug, Serialize)] -pub enum TypeName { - Ident(String), - Vec(Box), - Box(Box), - Opt(Box), - Ref(Box), - /// We bailed on detecting wrapper - Complex(Box), +/// Extensions to schema for specific derives / generators +pub mod extensions { + pub mod clone_in; + pub mod estree; + pub mod kind; + pub mod layout; + pub mod span; + pub mod visit; } -impl TypeName { - pub fn inner_name(&self) -> &str { - match self { - Self::Ident(it) => it, - Self::Complex(it) | Self::Vec(it) | Self::Box(it) | Self::Opt(it) | Self::Ref(it) => { - it.inner_name() - } - } - } - - pub fn as_name(&self) -> Option<&str> { - if let Self::Ident(it) = self { - Some(it) - } else { - None - } - } - - /// First identifier of multi-part type - /// - /// `Adt` - /// ^^^ - /// - /// Panics - /// - /// When `self` is `TypeName::Ref` or `TypeName::Complex`. - /// - pub fn first_ident(&self) -> &str { - match self { - Self::Ident(it) => it.as_str(), - Self::Vec(_) => "Vec", - Self::Box(_) => "Box", - Self::Opt(_) => "Option", - Self::Ref(_) | Self::Complex(_) => panic!(), - } - } +define_index_type! { + /// ID of type in the AST + pub struct TypeId = u32; } -impl<'a> From> for TypeName { - fn from(it: crate::util::TypeIdentResult<'a>) -> Self { - use crate::util::TypeIdentResult; - match it { - TypeIdentResult::Ident(it) => Self::Ident(it.to_string()), - TypeIdentResult::Vec(it) => Self::Vec(Box::new(Self::from(*it))), - TypeIdentResult::Box(it) => Self::Box(Box::new(Self::from(*it))), - TypeIdentResult::Option(it) => Self::Opt(Box::new(Self::from(*it))), - TypeIdentResult::Reference(it) => Self::Ref(Box::new(Self::from(*it))), - TypeIdentResult::Complex(it) => Self::Complex(Box::new(Self::from(*it))), - } - } +impl TypeId { + pub const DUMMY: Self = Self::from_raw_unchecked(0); } -impl fmt::Display for TypeName { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Ident(it) => write!(f, "{it}"), - Self::Vec(it) => write!(f, "Vec<{it}>"), - Self::Box(it) => write!(f, "Box<{it}>"), - Self::Opt(it) => write!(f, "Option<{it}>"), - Self::Ref(it) => write!(f, "&{it}"), - Self::Complex(it) => write!(f, "{it}"), - } - } + +define_index_type! { + /// ID of source file + pub struct FileId = u32; } -#[derive(Debug, Default, serde::Serialize)] +/// Schema of all AST types. +#[derive(Debug)] pub struct Schema { - pub defs: Vec, + /// Type definitions + pub types: IndexVec, + /// Mapping from type name to [`TypeId`] + pub type_names: FxHashMap, + /// Source files + pub files: IndexVec, } impl Schema { - pub fn get(&self, id: TypeId) -> Option<&TypeDef> { - self.defs.get(id) + /// Get reference to [`TypeDef`] for a type name. + /// + /// # Panics + /// Panics if no type with supplied name. + pub fn type_by_name(&self, name: &str) -> &TypeDef { + let type_id = self.type_names[name]; + &self.types[type_id] } -} - -fn parse_struct_outer_markers(attrs: &Vec) -> Result { - Ok(StructOuterMarkers { - scope: get_scope_attribute(attrs).transpose()?, - estree: get_estree_attribute(attrs).transpose()?, - }) -} - -fn parse_enum_outer_markers(attrs: &Vec) -> Result { - Ok(EnumOuterMarkers { estree: get_estree_attribute(attrs).transpose()?.unwrap_or_default() }) -} - -fn parse_inner_markers(attrs: &Vec) -> Result { - Ok(InnerMarkers { - span: attrs.iter().any(|a| a.path().is_ident("span")), - visit: get_visit_markers(attrs)?, - scope: get_scope_markers(attrs)?, - derive_attributes: get_derive_attributes(attrs)?, - }) -} - -// lower `AstType` to `TypeDef`. -pub fn lower_ast_types(ctx: &EarlyCtx) -> Schema { - let defs = ctx - .mods() - .borrow() - .iter() - .flat_map(|it| &it.items) - .map(|it| lower_ast_type(&it.borrow(), ctx)) - .collect(); - Schema { defs } -} -fn lower_ast_type(ty: &rust::AstType, ctx: &EarlyCtx) -> TypeDef { - match ty { - rust::AstType::Enum(it) => TypeDef::Enum(lower_ast_enum(it, ctx)), - rust::AstType::Struct(it) => TypeDef::Struct(lower_ast_struct(it, ctx)), - rust::AstType::Macro(it) => panic!("{}", unexpanded_macro_err(&it.item)), + /// Get mutable reference to [`TypeDef`] for a type name. + /// + /// # Panics + /// Panics if no type with supplied name. + pub fn type_by_name_mut(&mut self, name: &str) -> &mut TypeDef { + let type_id = self.type_names[name]; + &mut self.types[type_id] } } -fn lower_ast_enum(it @ rust::Enum { item, meta }: &rust::Enum, ctx: &EarlyCtx) -> EnumDef { - let (size_64, align_64, offsets_64) = meta - .layout_64 - .clone() - .layout() - .map_or_else(|| panic!("Uncalculated layout on {}!", item.ident), KnownLayout::unpack); - let (size_32, align_32, offsets_32) = meta - .layout_32 - .clone() - .layout() - .map_or_else(|| panic!("Uncalculated layout on {}!", item.ident), KnownLayout::unpack); - EnumDef { - id: ctx.type_id(&it.ident().to_string()).unwrap(), - name: it.ident().to_string(), - is_visitable: meta.is_visitable, - variants: item - .variants - .iter() - .filter(|it| !it.attrs.iter().any(|it| it.path().is_ident("inherit"))) - .map(|var| lower_variant(var, || it.ident().to_string(), ctx)) - .collect(), - inherits: meta.inherits.iter().map(|it| lower_inherit(it, ctx)).collect(), - has_lifetime: item.generics.lifetimes().count() > 0, - - size_64, - align_64, - offsets_64, - size_32, - align_32, - offsets_32, - - markers: parse_enum_outer_markers(&item.attrs).unwrap(), - generated_derives: parse_generate_derive(&item.attrs), - - module_path: meta.module_path.clone(), +/// Methods for getting a specific type def (e.g. [`StructDef`]) for a [`TypeId`]. +/// +/// These methods are useful in `Generator::prepare` / `Derive::prepare` where +/// you have to deal in [`TypeId`]s, to work around borrow-checker restrictions. +impl Schema { + /// Get reference to [`StructDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a struct. + pub fn struct_def(&self, type_id: TypeId) -> &StructDef { + self.types[type_id].as_struct().unwrap() } -} -fn lower_ast_struct(it @ rust::Struct { item, meta }: &rust::Struct, ctx: &EarlyCtx) -> StructDef { - // If the struct contains a `span` field, it must be the first field for consistency, and also - // small performance improvement from byte ordering. - if item - .fields - .iter() - .map(|field| field.ident.as_ref().unwrap().to_string()) - .position(|ident| ident == "span") - .filter(|i| *i != 0) - .is_some() - { - panic!("First field of `{}` must be `span`.", it.item.ident); + /// Get mutable reference to [`StructDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a struct. + pub fn struct_def_mut(&mut self, type_id: TypeId) -> &mut StructDef { + self.types[type_id].as_struct_mut().unwrap() } - let (size_64, align_64, offsets_64) = meta - .layout_64 - .clone() - .layout() - .map_or_else(|| panic!("Uncalculated layout on {}!", item.ident), KnownLayout::unpack); - let (size_32, align_32, offsets_32) = meta - .layout_32 - .clone() - .layout() - .map_or_else(|| panic!("Uncalculated layout on {}!", item.ident), KnownLayout::unpack); - StructDef { - id: ctx.type_id(&it.ident().to_string()).unwrap(), - name: it.ident().to_string(), - is_visitable: meta.is_visitable, - fields: item.fields.iter().map(|fi| lower_field(fi, ctx)).collect(), - has_lifetime: item.generics.lifetimes().count() > 0, + /// Get reference to [`EnumDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not an enum. + pub fn enum_def(&self, type_id: TypeId) -> &EnumDef { + self.types[type_id].as_enum().unwrap() + } - size_64, - align_64, - offsets_64, - size_32, - align_32, - offsets_32, + /// Get mutable reference to [`EnumDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not an enum. + pub fn enum_def_mut(&mut self, type_id: TypeId) -> &mut EnumDef { + self.types[type_id].as_enum_mut().unwrap() + } - markers: parse_struct_outer_markers(&item.attrs).unwrap(), - generated_derives: parse_generate_derive(&item.attrs), + /// Get reference to [`PrimitiveDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a primitive. + pub fn primitive_def(&self, type_id: TypeId) -> &PrimitiveDef { + self.types[type_id].as_primitive().unwrap() + } - module_path: meta.module_path.clone(), + /// Get mutable reference to [`PrimitiveDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a primitive. + pub fn primitive_def_mut(&mut self, type_id: TypeId) -> &mut PrimitiveDef { + self.types[type_id].as_primitive_mut().unwrap() } -} -fn lower_variant(variant: &Variant, enum_dbg_name: F, ctx: &EarlyCtx) -> VariantDef -where - F: Fn() -> String, -{ - VariantDef { - name: variant.ident.to_string(), - discriminant: variant.discriminant.as_ref().map_or_else( - || panic!("expected explicit enum discriminants on {}", enum_dbg_name()), - |(_, disc)| match disc { - Expr::Lit(ExprLit { lit: Lit::Int(lit), .. }) => { - lit.base10_parse().expect("invalid base10 enum discriminant") - } - _ => panic!("invalid enum discriminant {:?} on {}", disc, enum_dbg_name()), - }, - ), - fields: variant.fields.iter().map(|fi| lower_field(fi, ctx)).collect(), - markers: parse_inner_markers(&variant.attrs).unwrap(), + /// Get reference to [`OptionDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not an `Option`. + pub fn option_def(&self, type_id: TypeId) -> &OptionDef { + self.types[type_id].as_option().unwrap() } -} -fn lower_inherit(inherit: &rust::Inherit, ctx: &EarlyCtx) -> InheritDef { - match inherit { - rust::Inherit::Linked { super_, variants } => InheritDef { - super_: create_type_ref(super_, ctx), - variants: variants - .iter() - .map(|var| lower_variant(var, || super_.get_ident().inner_ident().to_string(), ctx)) - .collect(), - }, - rust::Inherit::Unlinked(_) => { - panic!("`Unlinked` inherits can't be converted to a valid `InheritDef`!") - } + /// Get mutable reference to [`OptionDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not an `Option`. + pub fn option_def_mut(&mut self, type_id: TypeId) -> &mut OptionDef { + self.types[type_id].as_option_mut().unwrap() } -} -fn lower_field(field: &Field, ctx: &EarlyCtx) -> FieldDef { - FieldDef { - name: field - .ident - .as_ref() - .map(|ident| ident.to_string().trim_start_matches("r#").to_string()), - vis: Visibility::from(&field.vis), - typ: create_type_ref(&field.ty, ctx), - markers: parse_inner_markers(&field.attrs).unwrap(), - docs: get_docs(&field.attrs), + /// Get reference to [`BoxDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a `Box`. + pub fn box_def(&self, type_id: TypeId) -> &BoxDef { + self.types[type_id].as_box().unwrap() } -} -fn create_type_ref(ty: &Type, ctx: &EarlyCtx) -> TypeRef { - let ident = ty.get_ident(); - let id = ident.as_ident().and_then(|id| ctx.type_id(&id.to_string())); - let transparent_id = ctx.type_id(&ident.inner_ident().to_string()); - #[expect(clippy::disallowed_methods)] - let raw = ty.to_token_stream().to_string().replace(' ', ""); - TypeRef { - id, - transparent_id, - raw, - name: TypeName::from(ty.get_ident()), - analysis: ty.analyze(ctx), + /// Get mutable reference to [`BoxDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a `Box`. + pub fn box_def_mut(&mut self, type_id: TypeId) -> &mut BoxDef { + self.types[type_id].as_box_mut().unwrap() } -} -fn get_docs(attrs: &[Attribute]) -> Vec { - attrs - .iter() - .filter_map(|attr| { - if let Meta::NameValue(MetaNameValue { path, value: Expr::Lit(lit), .. }) = &attr.meta { - if !path.is_ident("doc") { - return None; - } - match &lit.lit { - Lit::Str(lit) => Some(lit.value().trim().to_string()), - _ => None, - } - } else { - None - } - }) - .collect() -} + /// Get reference to [`VecDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a `Vec`. + pub fn vec_def(&self, type_id: TypeId) -> &VecDef { + self.types[type_id].as_vec().unwrap() + } -fn parse_generate_derive(attrs: &[Attribute]) -> Vec { - let mut derives = FxHashSet::default(); - for attr in attrs { - if !attr.path().is_ident("generate_derive") { - continue; - } + /// Get mutable reference to [`VecDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a `Vec`. + pub fn vec_def_mut(&mut self, type_id: TypeId) -> &mut VecDef { + self.types[type_id].as_vec_mut().unwrap() + } - let args: Punctuated = - attr.parse_args_with(Punctuated::parse_terminated).unwrap(); + /// Get reference to [`CellDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a `Cell`. + pub fn cell_def(&self, type_id: TypeId) -> &CellDef { + self.types[type_id].as_cell().unwrap() + } - for arg in args { - derives.insert(arg.to_string()); - } + /// Get mutable reference to [`CellDef`] for a [`TypeId`]. + /// + /// # Panics + /// Panics if type [`TypeId`] refers to is not a `Cell`. + pub fn cell_def_mut(&mut self, type_id: TypeId) -> &mut CellDef { + self.types[type_id].as_cell_mut().unwrap() } - Vec::from_iter(derives) } diff --git a/tasks/ast_tools/src/schema/serialize.rs b/tasks/ast_tools/src/schema/serialize.rs deleted file mode 100644 index 8f83219c0207c..0000000000000 --- a/tasks/ast_tools/src/schema/serialize.rs +++ /dev/null @@ -1,49 +0,0 @@ -use convert_case::{Case, Casing}; -use rustc_hash::FxHashSet; - -use crate::{markers::ESTreeStructTagMode, schema::GetIdent, Schema, TypeId}; - -use super::{EnumDef, StructDef, TypeDef, VariantDef}; - -pub fn enum_variant_name(var: &VariantDef, enm: &EnumDef) -> String { - match var.markers.derive_attributes.estree.rename.as_ref() { - Some(rename) => rename.to_string(), - None => { - if enm.markers.estree.no_rename_variants { - var.ident().to_string() - } else { - var.ident().to_string().to_case(Case::Camel) - } - } - } -} - -pub fn get_type_tag(def: &StructDef) -> Option { - let tag_mode = def.markers.estree.as_ref().and_then(|e| e.tag_mode.as_ref()); - match tag_mode { - Some(ESTreeStructTagMode::NoType) => None, - Some(ESTreeStructTagMode::Type(type_name)) => Some(type_name.clone()), - Some(ESTreeStructTagMode::CustomSerialize) | None => { - let has_type_field = - def.fields.iter().any(|f| matches!(f.name.as_deref(), Some("type"))); - if has_type_field { - None - } else { - Some(def.ident().to_string()) - } - } - } -} - -/// Returns a HashSet of structs that have the #[estree(always_flatten)] attribute. -pub fn get_always_flatten_structs(schema: &Schema) -> FxHashSet { - let mut set = FxHashSet::default(); - for def in &schema.defs { - if let TypeDef::Struct(def) = def { - if def.markers.estree.as_ref().is_some_and(|e| e.always_flatten) { - set.insert(def.id); - } - } - } - set -} diff --git a/tasks/ast_tools/src/schema/to_type.rs b/tasks/ast_tools/src/schema/to_type.rs deleted file mode 100644 index 14ddedf01abc5..0000000000000 --- a/tasks/ast_tools/src/schema/to_type.rs +++ /dev/null @@ -1,65 +0,0 @@ -use proc_macro2::TokenStream; -use quote::ToTokens; -use syn::{parse_quote, parse_str, Type}; - -use super::{ - defs::{EnumDef, StructDef, TypeDef, TypeRef}, - GetGenerics, GetIdent, -}; - -pub trait ToType { - fn to_type(&self) -> Type; - fn to_type_elide(&self) -> Type; - fn to_elided_type(&self) -> Type; - fn to_type_with_explicit_generics(&self, generics: TokenStream) -> Type; -} - -impl ToType for TypeRef { - fn to_type(&self) -> Type { - parse_str(self.raw()).unwrap() - } - - fn to_type_elide(&self) -> Type { - self.to_type_with_explicit_generics(proc_macro2::TokenStream::default()) - } - - fn to_elided_type(&self) -> Type { - self.to_type_with_explicit_generics(parse_quote! {<'_>}) - } - - fn to_type_with_explicit_generics(&self, generics: proc_macro2::TokenStream) -> Type { - let ident = self.name().first_ident(); - parse_quote!(#ident #generics) - } -} - -macro_rules! impl_to_type { - ($($ty:ty,)+) => ( - $( - impl ToType for $ty { - fn to_type(&self) -> Type { - self.to_type_with_explicit_generics(self.generics().to_token_stream()) - } - - fn to_type_elide(&self) -> Type { - self.to_type_with_explicit_generics(TokenStream::default()) - } - - fn to_elided_type(&self) -> Type { - self.to_type_with_explicit_generics(parse_quote! {<'_>}) - } - - fn to_type_with_explicit_generics(&self, generics: TokenStream) -> Type { - let name = self.ident(); - parse_quote!(#name #generics) - } - } - )+ - ) -} - -impl_to_type! { - TypeDef, - EnumDef, - StructDef, -} diff --git a/tasks/ast_tools/src/util.rs b/tasks/ast_tools/src/util.rs deleted file mode 100644 index c8aa31bbe164e..0000000000000 --- a/tasks/ast_tools/src/util.rs +++ /dev/null @@ -1,319 +0,0 @@ -use itertools::Itertools; -use proc_macro2::{Group, TokenStream, TokenTree}; -use quote::{format_ident, ToTokens}; -use serde::Serialize; -use syn::{spanned::Spanned, GenericArgument, Ident, ItemMacro, PathArguments, Type, TypePath}; - -use crate::{codegen::EarlyCtx, TypeId}; - -pub trait NormalizeError { - fn normalize(self) -> crate::Result; - fn normalize_with(self, err: E) -> crate::Result - where - E: ToString; -} - -impl NormalizeError for Result -where - E: ToString, -{ - fn normalize(self) -> crate::Result { - self.map_err(|e| e.to_string()) - } - - fn normalize_with(self, err: U) -> crate::Result - where - U: ToString, - { - self.map_err(|_| err.to_string()) - } -} - -impl NormalizeError for Option { - fn normalize(self) -> crate::Result { - self.normalize_with(String::default()) - } - - fn normalize_with(self, err: E) -> crate::Result - where - E: ToString, - { - self.map_or_else(|| Err(err.to_string()), |r| Ok(r)) - } -} - -pub trait TokenStreamExt { - fn replace_ident(self, needle: &str, replace: &Ident) -> TokenStream; -} - -pub trait TypeExt { - fn get_ident(&self) -> TypeIdentResult; - fn analyze(&self, ctx: &EarlyCtx) -> TypeAnalysis; -} - -pub trait StrExt: AsRef { - /// Dead simple, just adds either `s` or `es` based on the last character. - /// doesn't handle things like `sh`, `x`, `z`, etc. It also creates wrong results when the word - /// ends with `y` but there is a preceding vowl similar to `toys`, - /// It WILL output the WRONG result `toies`! - /// As an edge case would output `children` for the input `child`. - fn to_plural(self) -> String; -} - -pub trait ToIdent { - fn to_ident(&self) -> Ident; -} - -#[derive(Debug)] -pub enum TypeIdentResult<'a> { - Ident(&'a Ident), - Vec(Box>), - Box(Box>), - Option(Box>), - Reference(Box>), - /// We bailed on detecting wrapper - Complex(Box>), -} - -impl<'a> TypeIdentResult<'a> { - fn boxed(inner: Self) -> Self { - Self::Box(Box::new(inner)) - } - - fn vec(inner: Self) -> Self { - Self::Vec(Box::new(inner)) - } - - fn option(inner: Self) -> Self { - Self::Option(Box::new(inner)) - } - - fn complex(inner: Self) -> Self { - Self::Complex(Box::new(inner)) - } - - fn reference(inner: Self) -> Self { - Self::Reference(Box::new(inner)) - } - - pub fn inner_ident(&self) -> &'a Ident { - match self { - Self::Ident(it) => it, - Self::Complex(it) - | Self::Vec(it) - | Self::Box(it) - | Self::Option(it) - | Self::Reference(it) => it.inner_ident(), - } - } - - pub fn as_ident(&self) -> Option<&'a Ident> { - if let Self::Ident(it) = self { - Some(it) - } else { - None - } - } -} - -#[derive(Debug, PartialEq, Clone, Serialize)] -pub enum TypeWrapper { - None, - Box, - Vec, - Opt, - #[expect(dead_code)] - VecBox, - VecOpt, - OptBox, - OptVec, - Ref, - /// We bailed on detecting the type wrapper - Complex, -} - -#[derive(Debug, Clone, Serialize)] -pub struct TypeAnalysis { - pub type_id: Option, - pub wrapper: TypeWrapper, - // pub name: String, - #[serde(skip)] - pub typ: Type, -} - -impl TypeExt for Type { - fn get_ident(&self) -> TypeIdentResult { - match self { - Type::Path(TypePath { path, .. }) => { - let seg1 = path.segments.first().unwrap(); - match &seg1.arguments { - PathArguments::None => TypeIdentResult::Ident(&seg1.ident), - PathArguments::AngleBracketed(it) => { - let args = &it.args.iter().collect_vec(); - assert!(args.len() < 3, "Max path arguments here is 2, eg `Box<'a, Adt>`"); - if let Some(second) = args.get(1) { - let GenericArgument::Type(second) = second else { panic!() }; - let inner = second.get_ident(); - if seg1.ident == "Box" { - TypeIdentResult::boxed(inner) - } else if seg1.ident == "Vec" { - TypeIdentResult::vec(inner) - } else { - panic!(); - } - } else { - match args.first() { - Some(GenericArgument::Type(it)) => { - let inner = it.get_ident(); - if seg1.ident == "Option" { - TypeIdentResult::option(inner) - } else { - TypeIdentResult::complex(inner) - } - } - Some(GenericArgument::Lifetime(_)) => { - TypeIdentResult::Ident(&seg1.ident) - } - _ => panic!("unsupported type!"), - } - } - } - PathArguments::Parenthesized(_) => { - panic!("Parenthesized path arguments aren't supported!") - } - } - } - Type::Reference(typ) => TypeIdentResult::reference(typ.elem.get_ident()), - _ => panic!("Unsupported type."), - } - } - - fn analyze(&self, ctx: &EarlyCtx) -> TypeAnalysis { - fn analyze<'a>(res: &'a TypeIdentResult) -> Option<(&'a Ident, TypeWrapper)> { - let mut wrapper = TypeWrapper::None; - let ident = match res { - TypeIdentResult::Ident(inner) => inner, - TypeIdentResult::Complex(inner) => { - wrapper = TypeWrapper::Complex; - let (inner, _) = analyze(inner)?; - inner - } - TypeIdentResult::Box(inner) => { - wrapper = TypeWrapper::Box; - let (inner, inner_kind) = analyze(inner)?; - assert!(inner_kind == TypeWrapper::None,); - inner - } - TypeIdentResult::Vec(inner) => { - wrapper = TypeWrapper::Vec; - let (inner, inner_kind) = analyze(inner)?; - if inner_kind == TypeWrapper::Opt { - wrapper = TypeWrapper::VecOpt; - } else if inner_kind != TypeWrapper::None { - panic!(); - } - inner - } - TypeIdentResult::Option(inner) => { - wrapper = TypeWrapper::Opt; - let (inner, inner_kind) = analyze(inner)?; - if inner_kind == TypeWrapper::Vec { - wrapper = TypeWrapper::OptVec; - } else if inner_kind == TypeWrapper::Box { - wrapper = TypeWrapper::OptBox; - } else if inner_kind != TypeWrapper::None { - panic!(); - } - inner - } - TypeIdentResult::Reference(_) => return None, - }; - Some((ident, wrapper)) - } - let type_ident = self.get_ident(); - let Some((type_ident, wrapper)) = analyze(&type_ident) else { - return TypeAnalysis { type_id: None, wrapper: TypeWrapper::Ref, typ: self.clone() }; - }; - - let type_id = ctx.type_id(&type_ident.to_string()); - TypeAnalysis { type_id, wrapper, typ: self.clone() } - } -} - -impl> StrExt for T { - fn to_plural(self) -> String { - let txt = self.as_ref(); - if txt.is_empty() { - return String::default(); - } - - let mut txt = txt.to_string(); - if txt.ends_with("child") { - txt.push_str("ren"); - } else { - match txt.chars().last() { - Some('s') => { - txt.push_str("es"); - } - Some('y') => { - txt.pop(); - txt.push_str("ies"); - } - _ => txt.push('s'), - } - } - txt - } -} - -impl TokenStreamExt for TokenStream { - fn replace_ident(self, needle: &str, replace: &Ident) -> TokenStream { - self.into_iter() - .map(|it| match it { - TokenTree::Ident(ident) if ident == needle => replace.to_token_stream(), - TokenTree::Group(group) => { - Group::new(group.delimiter(), group.stream().replace_ident(needle, replace)) - .to_token_stream() - } - _ => it.to_token_stream(), - }) - .collect() - } -} - -// From https://doc.rust-lang.org/reference/keywords.html -#[rustfmt::skip] -static RESERVED_NAMES: &[&str] = &[ - // Strict keywords - "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for", "if", - "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self", - "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "async", - "await", "dyn", - // Reserved keywords - "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", - "virtual", "yield", "try", - // Weak keywords - "macro_rules", "union", // "dyn" also listed as a weak keyword, but is already on strict list -]; - -pub fn is_reserved_name(name: &str) -> bool { - RESERVED_NAMES.contains(&name) -} - -impl ToIdent for S -where - S: AsRef, -{ - fn to_ident(&self) -> Ident { - let name = self.as_ref(); - if is_reserved_name(name) { - format_ident!("r#{name}") - } else { - format_ident!("{name}") - } - } -} - -pub fn unexpanded_macro_err(mac: &ItemMacro) -> String { - format!("Unexpanded macro: {:?}:{:?}", mac.ident, mac.span()) -} diff --git a/tasks/ast_tools/src/utils.rs b/tasks/ast_tools/src/utils.rs new file mode 100644 index 0000000000000..df34715dc666f --- /dev/null +++ b/tasks/ast_tools/src/utils.rs @@ -0,0 +1,52 @@ +use proc_macro2::{Span, TokenStream}; +use quote::{format_ident, quote}; +use syn::{Ident, LitInt}; + +/// Reserved word in Rust. +/// From . +#[rustfmt::skip] +static RESERVED_NAMES: &[&str] = &[ + // Strict keywords + "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for", "if", + "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self", + "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "async", + "await", "dyn", + // Reserved keywords + "abstract", "become", "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", + "virtual", "yield", "try", + // Weak keywords + "macro_rules", "union", // "dyn" also listed as a weak keyword, but is already on strict list +]; + +/// Returns `true` if `name` is a reserved word in Rust. +pub fn is_reserved_name(name: &str) -> bool { + RESERVED_NAMES.contains(&name) +} + +/// Create an [`Ident`] from a string. +/// +/// If the name is a reserved word, it's prepended with `r#`. +/// e.g. `type` -> `r#type`. +/// +/// [`Ident`]: struct@Ident +pub fn create_ident(name: &str) -> Ident { + if is_reserved_name(name) { + format_ident!("r#{name}") + } else { + format_ident!("{name}") + } +} + +/// Create an identifier from a string. +/// +/// If the name is a reserved word, it's prepended with `r#`. +/// e.g. `type` -> `r#type`. +pub fn create_ident_tokens(name: &str) -> TokenStream { + if name.as_bytes().first().is_some_and(u8::is_ascii_digit) { + let lit = LitInt::new(name, Span::call_site()); + quote!(#lit) + } else { + let ident = create_ident(name); + quote!(#ident) + } +}