super-tiny-compiler.js

/**
 * TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH     HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
 * T:::::::::::::::::::::TH:::::::H     H:::::::HE::::::::::::::::::::E
 * T:::::::::::::::::::::TH:::::::H     H:::::::HE::::::::::::::::::::E
 * T:::::TT:::::::TT:::::THH::::::H     H::::::HHEE::::::EEEEEEEEE::::E
 * TTTTTT  T:::::T  TTTTTT  H:::::H     H:::::H    E:::::E       EEEEEE
 *         T:::::T          H:::::H     H:::::H    E:::::E
 *         T:::::T          H::::::HHHHH::::::H    E::::::EEEEEEEEEE
 *         T:::::T          H:::::::::::::::::H    E:::::::::::::::E
 *         T:::::T          H:::::::::::::::::H    E:::::::::::::::E
 *         T:::::T          H::::::HHHHH::::::H    E::::::EEEEEEEEEE
 *         T:::::T          H:::::H     H:::::H    E:::::E
 *         T:::::T          H:::::H     H:::::H    E:::::E       EEEEEE
 *       TT:::::::TT      HH::::::H     H::::::HHEE::::::EEEEEEEE:::::E
 *       T:::::::::T      H:::::::H     H:::::::HE::::::::::::::::::::E
 *       T:::::::::T      H:::::::H     H:::::::HE::::::::::::::::::::E
 *       TTTTTTTTTTT      HHHHHHHHH     HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
 *
 *    SSSSSSSSSSSSSSS UUUUUUUU     UUUUUUUUPPPPPPPPPPPPPPPPP   EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
 *  SS:::::::::::::::SU::::::U     U::::::UP::::::::::::::::P  E::::::::::::::::::::ER::::::::::::::::R
 * S:::::SSSSSS::::::SU::::::U     U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R
 * S:::::S     SSSSSSSUU:::::U     U:::::UUPP:::::P     P:::::PEE::::::EEEEEEEEE::::ERR:::::R     R:::::R
 * S:::::S             U:::::U     U:::::U   P::::P     P:::::P  E:::::E       EEEEEE  R::::R     R:::::R
 * S:::::S             U:::::U     U:::::U   P::::P     P:::::P  E:::::E               R::::R     R:::::R
 *  S::::SSSS          U:::::U     U:::::U   P::::PPPPPP:::::P   E::::::EEEEEEEEEE     R::::RRRRRR:::::R
 *   SS::::::SSSSS     U:::::U     U:::::U   P:::::::::::::PP    E:::::::::::::::E     R:::::::::::::RR
 *     SSS::::::::SS   U:::::U     U:::::U   P::::PPPPPPPPP      E:::::::::::::::E     R::::RRRRRR:::::R
 *        SSSSSS::::S  U:::::U     U:::::U   P::::P              E::::::EEEEEEEEEE     R::::R     R:::::R
 *             S:::::S U:::::U     U:::::U   P::::P              E:::::E               R::::R     R:::::R
 *             S:::::S U::::::U   U::::::U   P::::P              E:::::E       EEEEEE  R::::R     R:::::R
 * SSSSSSS     S:::::S U:::::::UUU:::::::U PP::::::PP          EE::::::EEEEEEEE:::::ERR:::::R     R:::::R
 * S::::::SSSSSS:::::S  UU:::::::::::::UU  P::::::::P          E::::::::::::::::::::ER::::::R     R:::::R
 * S:::::::::::::::SS     UU:::::::::UU    P::::::::P          E::::::::::::::::::::ER::::::R     R:::::R
 *  SSSSSSSSSSSSSSS         UUUUUUUUU      PPPPPPPPPP          EEEEEEEEEEEEEEEEEEEEEERRRRRRRR     RRRRRRR
 *
 * TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN        NNNNNNNNYYYYYYY       YYYYYYY
 * T:::::::::::::::::::::TI::::::::IN:::::::N       N::::::NY:::::Y       Y:::::Y
 * T:::::::::::::::::::::TI::::::::IN::::::::N      N::::::NY:::::Y       Y:::::Y
 * T:::::TT:::::::TT:::::TII::::::IIN:::::::::N     N::::::NY::::::Y     Y::::::Y
 * TTTTTT  T:::::T  TTTTTT  I::::I  N::::::::::N    N::::::NYYY:::::Y   Y:::::YYY
 *         T:::::T          I::::I  N:::::::::::N   N::::::N   Y:::::Y Y:::::Y
 *         T:::::T          I::::I  N:::::::N::::N  N::::::N    Y:::::Y:::::Y
 *         T:::::T          I::::I  N::::::N N::::N N::::::N     Y:::::::::Y
 *         T:::::T          I::::I  N::::::N  N::::N:::::::N      Y:::::::Y
 *         T:::::T          I::::I  N::::::N   N:::::::::::N       Y:::::Y
 *         T:::::T          I::::I  N::::::N    N::::::::::N       Y:::::Y
 *         T:::::T          I::::I  N::::::N     N:::::::::N       Y:::::Y
 *       TT:::::::TT      II::::::IIN::::::N      N::::::::N       Y:::::Y
 *       T:::::::::T      I::::::::IN::::::N       N:::::::N    YYYY:::::YYYY
 *       T:::::::::T      I::::::::IN::::::N        N::::::N    Y:::::::::::Y
 *       TTTTTTTTTTT      IIIIIIIIIINNNNNNNN         NNNNNNN    YYYYYYYYYYYYY
 *
 *         CCCCCCCCCCCCC     OOOOOOOOO     MMMMMMMM               MMMMMMMMPPPPPPPPPPPPPPPPP   IIIIIIIIIILLLLLLLLLLL             EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR
 *      CCC::::::::::::C   OO:::::::::OO   M:::::::M             M:::::::MP::::::::::::::::P  I::::::::IL:::::::::L             E::::::::::::::::::::ER::::::::::::::::R
 *    CC:::::::::::::::C OO:::::::::::::OO M::::::::M           M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L             E::::::::::::::::::::ER::::::RRRRRR:::::R
 *   C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M         M:::::::::MPP:::::P     P:::::PII::::::IILL:::::::LL             EE::::::EEEEEEEEE::::ERR:::::R     R:::::R
 *  C:::::C       CCCCCCO::::::O   O::::::OM::::::::::M       M::::::::::M  P::::P     P:::::P  I::::I    L:::::L                 E:::::E       EEEEEE  R::::R     R:::::R
 * C:::::C              O:::::O     O:::::OM:::::::::::M     M:::::::::::M  P::::P     P:::::P  I::::I    L:::::L                 E:::::E               R::::R     R:::::R
 * C:::::C              O:::::O     O:::::OM:::::::M::::M   M::::M:::::::M  P::::PPPPPP:::::P   I::::I    L:::::L                 E::::::EEEEEEEEEE     R::::RRRRRR:::::R
 * C:::::C              O:::::O     O:::::OM::::::M M::::M M::::M M::::::M  P:::::::::::::PP    I::::I    L:::::L                 E:::::::::::::::E     R:::::::::::::RR
 * C:::::C              O:::::O     O:::::OM::::::M  M::::M::::M  M::::::M  P::::PPPPPPPPP      I::::I    L:::::L                 E:::::::::::::::E     R::::RRRRRR:::::R
 * C:::::C              O:::::O     O:::::OM::::::M   M:::::::M   M::::::M  P::::P              I::::I    L:::::L                 E::::::EEEEEEEEEE     R::::R     R:::::R
 * C:::::C              O:::::O     O:::::OM::::::M    M:::::M    M::::::M  P::::P              I::::I    L:::::L                 E:::::E               R::::R     R:::::R
 *  C:::::C       CCCCCCO::::::O   O::::::OM::::::M     MMMMM     M::::::M  P::::P              I::::I    L:::::L         LLLLLL  E:::::E       EEEEEE  R::::R     R:::::R
 *   C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M               M::::::MPP::::::PP          II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R     R:::::R
 *    CC:::::::::::::::C OO:::::::::::::OO M::::::M               M::::::MP::::::::P          I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R     R:::::R
 *      CCC::::::::::::C   OO:::::::::OO   M::::::M               M::::::MP::::::::P          I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R     R:::::R
 *         CCCCCCCCCCCCC     OOOOOOOOO     MMMMMMMM               MMMMMMMMPPPPPPPPPP          IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR     RRRRRRR
 *
 * =======================================================================================================================================================================
 * =======================================================================================================================================================================
 * =======================================================================================================================================================================
 * =======================================================================================================================================================================
 */

/**

 * Today we're going to write a compiler together. But not just any compiler... A
   今天我们要写一个编译器. 不过不是普通的编译器...
   
 * super duper teeny tiny compiler! A compiler that is so small that if you
   是个超小的编译器! 这个编译器的代码很短, 
   
 * remove all the comments this file would only be ~200 lines of actual code.
   如果你把所有注释删掉, 代码才 200 行左右
   
 * We're going to compile some lisp-like function calls into some C-like
   我们要把 Lisp 语法的函数 编译成 C 语法的函数
   
 * function calls.

 * If you are not familiar with one or the other. I'll just give you a quick intro.
   如果你不熟悉这两种语言. 我快速介绍下
   
 * If we had two functions `add` and `subtract` they would be written like this:
   如果有两个函数 `加` 和 `减`  那么语法如下
 *
 *                  LISP                      C
 *
 *   2 + 2          (add 2 2)                 add(2, 2)
 *   4 - 2          (subtract 4 2)            subtract(4, 2)
 *   2 + (4 - 2)    (add 2 (subtract 4 2))    add(2, subtract(4, 2))
 *
 * Easy peezy right?
   很简单对吧?
 *
 * Well good, because this is exactly what we are going to compile. While this
 * is neither a complete LISP or C syntax, it will be enough of the syntax to
 * demonstrate many of the major pieces of a modern compiler.
   这就是今天我们要编译的东西. 虽然不是完整的 LIST 或 C 语法.
   但足以展示现代编译器中的主要模块了
 */

/**
 * Most compilers break down into three primary stages: Parsing, Transformation,
 * and Code Generation
   大部分编译器可以分成以下几个阶段: 分析(Parsing), 变形(Transformation), 和代码生成(Code Generation)
 *
 * 1. *Parsing* is taking raw code and turning it into a more abstract
 *    representation of the code.
      *分析* 是把要编译的代码 变成更抽象的表示
 *
 * 2. *Transformation* takes this abstract representation and manipulates to do
 *    whatever the compiler wants it to.
      *变形* 把上一步的抽象表示, 变换成想要的格式
 *
 * 3. *Code Generation* takes the transformed representation of the code and
 *    turns it into new code.
      *代码生成* 是把变形后的结果 变成新的代码
 */

/**
 * Parsing 分析
 * -------
 *
 * Parsing typically gets broken down into two phases: Lexical Analysis and
 * Syntactic Analysis.
   分析这个阶段一般分成2个大块: 
     1. 语法分析(Lexical Analysis) 
     2. 语义分析(Syntactic Analysis)
 *
 * 1. *Lexical Analysis* takes the raw code and splits it apart into these things
 *    called tokens by a thing called a tokenizer (or lexer).
   1. *语法分析* 把源代码切成一块一块叫 token 的东西, 完成这件事情的程序叫做 tokenizer(或者lexer)
 *
 *    Tokens are an array of tiny little objects that describe an isolated piece
 *    of the syntax. They could be numbers, labels, punctuation, operators,
 *    whatever.
      Tokens 是一块块不可分割的基本单元. token 可以是数字, 标签, 标点符号, 操作符等等.
 *
 * 2. *Syntactic Analysis* takes the tokens and reformats them into a
 *    representation that describes each part of the syntax and their relation
 *    to one another. This is known as an intermediate representation or
 *    Abstract Syntax Tree.
   2. *语义分析* 把前面折腾出来的一堆 token 变成一种抽象表示, 描述出这些 token 各自的关系.
       这种抽象表示叫做 中间表示(intermediate representation) 
       或者 抽象语法树(Abstract Syntax Tree)
 *
 *    An Abstract Syntax Tree, or AST for short, is a deeply nested object that
 *    represents code in a way that is both easy to work with and tells us a lot
 *    of information.
      抽象语法树 或者简称 AST, 是一个嵌套N层的对象, 
      这种表示代码的方式既容易处理, 也告诉了我们很多信息
 *
 * For the following syntax:
   对于下面的代码
   
 *   (add 2 (subtract 4 2))
 *
 * Tokens might look something like this:
   变成 token 数组后: 可以看到根据先后顺序把 类型 和 值 都判断出来了.
 * 
 *   [
 *     { type: 'paren',  value: '('        },
 *     { type: 'name',   value: 'add'      },
 *     { type: 'number', value: '2'        },
 *     { type: 'paren',  value: '('        },
 *     { type: 'name',   value: 'subtract' },
 *     { type: 'number', value: '4'        },
 *     { type: 'number', value: '2'        },
 *     { type: 'paren',  value: ')'        },
 *     { type: 'paren',  value: ')'        }
 *   ]
 *
 * And an Abstract Syntax Tree (AST) might look like this:
   token 数组变成 AST 后:
 *
 *   {
 *     type: 'Program',
 *     body: [{
 *       type: 'CallExpression',
 *       name: 'add',
 *       params: [{
 *         type: 'NumberLiteral',
 *         value: '2'
 *       }, {
 *         type: 'CallExpression',
 *         name: 'subtract',
 *         params: [{
 *           type: 'NumberLiteral',
 *           value: '4'
 *         }, {
 *           type: 'NumberLiteral',
 *           value: '2'
 *         }]
 *       }]
 *     }]
 *   }
 */

/**
 * Transformation 变形
 * --------------
 *
 * The next type of stage for a compiler is transformation. Again, this just
 * takes the AST from the last step and makes changes to it. It can manipulate
 * the AST in the same language or it can translate it into an entirely new
 * language.
   生成 AST 后, 下一步是变形
 *
 * Let’s look at how we would transform an AST.
   接下来看看具体怎么变形 AST 
 *
 * You might notice that our AST has elements within it that look very similar.
 * There are these objects with a type property. Each of these are known as an
 * AST Node. These nodes have defined properties on them that describe one
 * isolated part of the tree.
   你可能注意到了 AST 看起来蛮眼熟的, 
   它和 token 一样有 type 属性.
   然后的话, AST 里每一个都叫做 Node(节点).
     
 *
 * We can have a node for a "NumberLiteral":
   我们用类型为 NumberLiteral (数字字面量) 的节点来表示数字
 *
 *   {
 *     type: 'NumberLiteral',
 *     value: '2'
 *   }
 *
 * Or maybe a node for a "CallExpression":
   用类型为 CallExpression 的节点表示调用表达式(比如add和subtract)
 *
 *   {
 *     type: 'CallExpression',
 *     name: 'subtract',
 *     params: [...nested nodes go here...]
 *   }
 *
 * When transforming the AST we can manipulate nodes by
 * adding/removing/replacing properties, we can add new nodes, remove nodes, or
 * we could leave the existing AST alone and create an entirely new one based
 * on it.
   变形 AST 时我们可以给节点 添加/删除/替换 属性.
   或者干脆就添加节点, 删除节点。
   或是基于 AST 的信息，弄个全新的 AST，而不是去修改原来的 AST 
 *
 * Since we’re targeting a new language, we’re going to focus on creating an
 * entirely new AST that is specific to the target language.
   因为我们要编译成其他语言, 所以这里我们就新建一个用于目标语言的新 AST 
 *
 * Traversal 遍历
 * ---------
 *
 * In order to navigate through all of these nodes, we need to be able to
 * traverse through them. This traversal process goes to each node in the AST
 * depth-first.
   我们用深度优先(depth-first)方式遍历 AST 的各个节点来变形
 *
 *   {
 *     type: 'Program',
 *     body: [{
 *       type: 'CallExpression',
 *       name: 'add',
 *       params: [{
 *         type: 'NumberLiteral',
 *         value: '2'
 *       }, {
 *         type: 'CallExpression',
 *         name: 'subtract',
 *         params: [{
 *           type: 'NumberLiteral',
 *           value: '4'
 *         }, {
 *           type: 'NumberLiteral',
 *           value: '2'
 *         }]
 *       }]
 *     }]
 *   }
 *
 * So for the above AST we would go:
   对于以上的 AST, 深度优先这种遍历方式的访问顺序是: 
 *
 *   1. Program - Starting at the top level of the AST
 *   2. CallExpression (add) - Moving to the first element of the Program's body
 *   3. NumberLiteral (2) - Moving to the first element of CallExpression's params
 *   4. CallExpression (subtract) - Moving to the second element of CallExpression's params
 *   5. NumberLiteral (4) - Moving to the first element of CallExpression's params
 *   6. NumberLiteral (2) - Moving to the second element of CallExpression's params
 *
   上面这段没翻, 想必你也看得懂.
   简单说就是  add 2 subtract 4 2 这么按顺序访问
   
 * If we were manipulating this AST directly, instead of creating a separate AST,
 * we would likely introduce all sorts of abstractions here. But just visiting
 * each node in the tree is enough.
   如果我们要直接改这个 AST， 而不是创建一个新 AST,
   逻辑可能需要写的更复杂一些, 可能要多写些代码, 所以这里访问各个节点就好. 
 
 * The reason I use the word “visiting” is because there is this pattern of how
 * to represent operations on elements of an object structure.
   
 *
 * Visitors  访问者
 * --------
 *
 * The basic idea here is that we are going to create a “visitor” object that
 * has methods that will accept different node types.
   我们会创建一个"访问者"(visitor)对象
   它有方法去接收不同节点类型
 *
 *   var visitor = {
 *     NumberLiteral() {},
 *     CallExpression() {}
 *   };
 *
 * When we traverse our AST we will call the methods on this visitor whenever we
 * encounter a node of a matching type.
   当遍历 AST 时, Vistior 碰到不同节点会调用不同方法
 *
 * In order to make this useful we will also pass the node and a reference to
 * the parent node.
   为了方便创建新 AST, 我们把节点和父节点都传递过去
   
 *
 *   var visitor = {
 *     NumberLiteral(node, parent) {},
 *     CallExpression(node, parent) {}
 *   };
 */

/**
 * Code Generation 代码生成
 * ---------------
 *
 * The final phase of a compiler is code generation. Sometimes compilers will do
 * things that overlap with transformation, but for the most part code
 * generation just means take our AST and string-ify code back out.
   最后一个阶段是代码生成. 有时编译器会把"代码生成"这一步和"变形"混合着一起做.
   代码生成 就是把 AST 变成目标语言字符串
 *
 * Code generators work several different ways, some compilers will reuse the
 * tokens from earlier, others will have created a separate representation of
 * the code so that they can print node linearly, but from what I can tell most
 * will use the same AST we just created, which is what we’re going to focus on.
   代码生成有几种不同方式实现, 有些编译器会重用前面分析出来的 tokens,
   有些会创造代码的另一种表示方式, 然后就可以线性输出节点,
   但据我所知大部分编译器会用上一步的 AST.
 *
 * Effectively our code generator will know how to “print” all of the different
 * node types of the AST, and it will recursively call itself to print nested
 * nodes until everything is printed into one long string of code.
   我们的代码生成器知道怎么正确输出 AST 里的不同节点,并且它会递归调用自己.
   直到所有节点都输出完.
 */

/**
 * And that's it! That's all the different pieces of a compiler.
   现在各个部分都介绍完了.
 *
 * Now that isn’t to say every compiler looks exactly like I described here.
 * Compilers serve many different purposes, and they might need more steps than
 * I have detailed.
   注意不是每个编译器都和这里说的长得完全一样.
   编译器可以用来做很多事情, 有些事需要额外的步骤去实现.
 *
 * But now you should have a general high-level idea of what most compilers look
 * like.
   不过现在你大概了解编译器是怎么做的了.
 *
 * Now that I’ve explained all of this, you’re all good to go write your own
 * compilers right?
   现在我解释了各个部分, 你就自己写个编译器吧?
 *
 * Just kidding, that's what I'm here to help with :P
   开个玩笑, 下面我会演示怎么写.
 *
 * So let's begin...
   开始吧
 */

/**
 * ============================================================================
 *                                   (/^▽^)/
 *                                THE TOKENIZER!
 * ============================================================================
 */

/**
 * We're gonna start off with our first phase of parsing, lexical analysis, with
 * the tokenizer.
   我们要开始分析(parsing)的第一个阶段, 语法分析, 做这个事情的函数我们叫它 tokenizer
 *
 * We're just going to take our string of code and break it down into an array
 * of tokens.
   我们会把如下代码, 拆成一个 token 数组
 *
 *   (add 2 (subtract 4 2))   =>   [{ type: 'paren', value: '(' }, ...]
 */

// We start by accepting an input string of code, and we're gonna set up two
// things...
// tokenizer 接收字符串形式的源代码
function tokenizer(input) {

  // A `current` variable for tracking our position in the code like a cursor.
  // `current` 变量用于存当前处理到哪里了, 就像手指指着纸上我们处理到哪个位置了
  var current = 0;

  // And a `tokens` array for pushing our tokens to.
  // `tokens` 数组用于存所有的 token，待会我们会把 token 一个个 push 进去
  var tokens = [];

  // We start by creating a `while` loop where we are setting up our `current`
  // variable to be incremented as much as we want `inside` the loop.
  //
  // We do this because we may want to increment `current` many times within a
  // single loop because our tokens can be any length.
  // 我们用 while 来遍历源代码里的每个字符
  while (current < input.length) {

    // We're also going to store the `current` character in the `input`.
    // 把当前的单个字符拿到
    var char = input[current];

    // The first thing we want to check for is an open parenthesis. This will
    // later be used for `CallExpressions` but for now we only care about the
    // character.
    // 我们首先想处理的是左括号, 这个后面会用于 调用表达式(CallExpressions) 
    // 
    // We check to see if we have an open parenthesis:
    // 看看是不是左括号
    if (char === '(') {

      // If we do, we push a new token with the type `paren` and set the value
      // to an open parenthesis.
      // 如果是左括号, 就用个对象标上, type(类型): 'paren(括号)'  (值)value: '('
      // 然后 push 扔数组里
      tokens.push({
        type: 'paren',
        value: '('
      });

      // Then we increment `current`
      // 手指往后挪一位
      current++;

      // And we `continue` onto the next cycle of the loop.
      // 直接下一次循环
      continue;
    }

    // Next we're going to check for a closing parenthesis. We do the same exact
    // thing as before: Check for a closing parenthesis, add a new token,
    // increment `current`, and `continue`.
    // 和上面的左括号处理流程完全一样.  只不过这次是右括号
    if (char === ')') {
      tokens.push({
        type: 'paren',
        value: ')'
      });
      current++;
      continue;
    }

    // Moving on, we're now going to check for whitespace. This is interesting
    // because we care that whitespace exists to separate characters, but it
    // isn't actually important for us to store as a token. We would only throw
    // it out later.
    //
    // So here we're just going to test for existence and if it does exist we're
    // going to just `continue` on.
    // 继续, 我们现在来处理空格, 空格用于隔开字符, 但不用像括号一样当做 token 存起来.
    // 这里跳过不处理
    // 这里 /\s/ 这种写法叫正则表达式, 如果你第一次看到这种写法
    // 那么我解释下: 正则表达式是用来匹配字符串模式, 用途是判断一个字符串是不是电子邮件, 是不是电话号码, 
    // 或者是否达到指定位数(比如6位数字). 诸如此类的模式匹配. 
    // 有时候我们需要用正则表达式判断 yes 或 no. 比如是不是电子邮件
    // 有时候我们需要用正则表达式返回字符串里面的字符, 比如从 "av1355589" 里提取出数字 13555589
    var WHITESPACE = /\s/;
    if (WHITESPACE.test(char)) {
      current++;
      continue;
    }

    // The next type of token is a number. This is different than what we have
    // seen before because a number could be any number of characters and we
    // want to capture the entire sequence of characters as one token.
    //
    //   (add 123 456)
    //        ^^^ ^^^
    //        Only two separate tokens
    //
    // So we start this off when we encounter the first number in a sequence.
    // 下一种要处理的 token 是数字,  数字可以是任意长度, 比如 123(3位)  88990(5位)
    // 所以一旦碰到数字, 就看看下一个是不是数字, 如果是就把手指也移过去, 直到碰到不是数字的才停
    // 这样就拿到了一整串数字
    var NUMBERS = /[0-9]/;
    if (NUMBERS.test(char)) {

      // We're going to create a `value` string that we are going to push
      // characters to.
      // 用于存一整串数字, 如 123 或者 88990
      var value = '';

      // Then we're going to loop through each character in the sequence until
      // we encounter a character that is not a number, pushing each character
      // that is a number to our `value` and incrementing `current` as we go.
      // 一直循环, 直到碰到不是数字
      while (NUMBERS.test(char)) {
        value += char;
        char = input[++current];
      }

      // After that we push our `number` token to the `tokens` array.
      // 整个数字拿到手了就把这个 token 放起来
      tokens.push({
        type: 'number',
        value: value
      });

      // And we continue on.
      // 下一次循环
      continue;
    }

    // The last type of token will be a `name` token. This is a sequence of
    // letters instead of numbers, that are the names of functions in our lisp
    // syntax.
    //
    //   (add 2 4)
    //    ^^^
    //    Name token
    //
    // 最后这种 token 类型是 name(名字)
    // 逻辑和数字一样, 也是正则匹配
    var LETTERS = /[a-z]/i;
    if (LETTERS.test(char)) {
      var value = '';

      // Again we're just going to loop through all the letters pushing them to
      // a value.
      while (LETTERS.test(char)) {
        value += char;
        char = input[++current];
      }

      // And pushing that value as a token with the type `name` and continuing.
      tokens.push({
        type: 'name',
        value: value
      });

      continue;
    }

    // Finally if we have not matched a character by now, we're going to throw
    // an error and completely exit.
    // 如果一个字符在上面这几种类型里全匹配不到, 就会运行到这里(因为上面的 continue 一个都没碰到)
    // 那么我们就抛出错误
    throw new TypeError('I dont know what this character is: ' + char);
  }

  // Then at the end of our `tokenizer` we simply return the tokens array.
  // 最后把 token 数组返回
  return tokens;
}

/**
 * ============================================================================
 *                                 ヽ/❀o ل͜ o\ﾉ
 *                                THE PARSER!!!
 * ============================================================================
 */

/**
 * For our parser we're going to take our array of tokens and turn it into an
 * AST.
 *
 *   [{ type: 'paren', value: '(' }, ...]   =>   { type: 'Program', body: [...] }
 * 
 * 这一步, 我们要把 token 数组转换成 AST (Abstract Syntax Tree) 抽象语法树
 */

// Okay, so we define a `parser` function that accepts our array of `tokens`.
// 接收 token 数组参数
function parser(tokens) {

  // Again we keep a `current` variable that we will use as a cursor.
  // 我们这里又用 current 变量, 就像手指一样指着处理到哪里了
  var current = 0;

  // But this time we're going to use recursion instead of a `while` loop. So we
  // define a `walk` function.
  // 这一次不用 while 循环了. 用递归. 所以这里定义个 walk 函数, 待会它会调用自己
  function walk() {

    // Inside the walk function we start by grabbing the `current` token.
    // 拿到要处理的 token
    var token = tokens[current];

    // We're going to split each type of token off into a different code path,
    // starting off with `number` tokens.
    // 根据 token 的不同类型来处理 token
    //
    // We test to see if we have a `number` token.
    // 如果是数字
    if (token.type === 'number') {

      // If we have one, we'll increment `current`.
      // 先指向下一个要处理的
      current++;

      // And we'll return a new AST node called `NumberLiteral` and setting its
      // value to the value of our token.
      // 把这个 token 变成 node (对于数字, 这个 node 和原先的 token 其实没啥区别, 就是 type 不一样了)
      return {
        type: 'NumberLiteral',
        value: token.value
      };
    }

    // Next we're going to look for CallExpressions. We start this off when we
    // encounter an open parenthesis.
    // 接下来找 调用表达式(CallExpressions). 我们会先碰到左括号
    // 如果是括号 而且是 左括号
    if (
      token.type === 'paren' &&
      token.value === '('
    ) {
      
      // We'll increment `current` to skip the parenthesis since we don't care
      // about it in our AST.
      // current + 1 因为在 AST 里我们不关心括号
      token = tokens[++current];

      // We create a base node with the type `CallExpression`, and we're going
      // to set the name as the current token's value since the next token after
      // the open parenthesis is the name of the function.
      // 我们创建一个类型是 CallExpression 的节点
      // 值是函数名, 也就是紧跟左括号后面的 add 或 subtract 这样的名字
      var node = {
        type: 'CallExpression',
        name: token.value, // add or subtract, you get the idea
        params: []
      };

      // We increment `current` *again* to skip the name token.
      // 函数名处理了, 这里就 +1 跳过函数名.
      token = tokens[++current];

      // And now we want to loop through each token that will be the `params` of
      // our `CallExpression` until we encounter a closing parenthesis.
      // 现在我们想遍历 CallExpression 的参数, 直到碰到右括号
      //
      // Now this is where recursion comes in. Instead of trying to parse a
      // potentially infinitely nested set of nodes we're going to rely on
      // recursion to resolve things.
      // 现在就是递归登场的地方了. 因为不管有多少层嵌套都可以用递归解决.
      //
      // To explain this, let's take our Lisp code. You can see that the
      // parameters of the `add` are a number and a nested `CallExpression` that
      // includes its own numbers.
      // 举个例子, 如下是嵌套了一层, 先算里面那层, 再算外面, 但 subtract 里面还可以继续一直嵌套别的.
      // 所以递归是个好方案
      //
      //   (add 2 (subtract 4 2))
      //
      // You'll also notice that in our tokens array we have multiple closing
      // parenthesis.
      // 你应该注意到了 token 数组里有多个右括号
      //
      //   [
      //     { type: 'paren',  value: '('        },
      //     { type: 'name',   value: 'add'      },
      //     { type: 'number', value: '2'        },
      //     { type: 'paren',  value: '('        },
      //     { type: 'name',   value: 'subtract' },
      //     { type: 'number', value: '4'        },
      //     { type: 'number', value: '2'        },
      //     { type: 'paren',  value: ')'        }, <<< Closing parenthesis (右括号)
      //     { type: 'paren',  value: ')'        }  <<< Closing parenthesis (右括号)
      //   ]
      //
      // We're going to rely on the nested `walk` function to increment our
      // `current` variable past any nested `CallExpressions`.
      // 我们需要 walk 函数来移动我们的手指 (current变量) 来处理嵌套的 CallExpressions

      // So we create a `while` loop that will continue until it encounters a
      // token with a `type` of `'paren'` and a `value` of a closing
      // parenthesis.
      // 这里用 while 循环, 直到碰到右括号才退出.
      // 如果不是括号  token.type !== 'paren'
      // 或者是括号而且不是右括号  token.type === 'paren' && token.value !== ')'
      while (
        (token.type !== 'paren') ||
        (token.type === 'paren' && token.value !== ')')
      ) {
        // we'll call the `walk` function which will return a `node` and we'll
        // push it into our `node.params`.
        // 可以看到这里递归调用了 walk()
        // 当前节点的 params 数组属性 node.params 推入(push) 递归的结果( 也是个 node )
        node.params.push(walk());
        token = tokens[current]; 
        // 注意 current 是在 walk 函数外面的 不会每次都变0
        // 在 walk() 里面 current+1 之后
        // 这里就可以继续处理下一个 token
      }

      // Finally we will increment `current` one last time to skip the closing
      // parenthesis.
      // 跳过右括号 (跳出上面 while 循环的唯一可能就是碰到右括号)
      current++;

      // And return the node.
      // 并且返回节点(node)
      return node;
    }

    // Again, if we haven't recognized the token type by now we're going to
    // throw an error.
    // 同样, 认不出 token 类型就报错
    throw new TypeError(token.type);
  }// walk 函数结束

  // Now, we're going to create our AST which will have a root which is a
  // `Program` node.
  // 现在根据 token 数组创建 AST， AST 根节点类型是 "Program" 代表我们的整个程序
  var ast = {
    type: 'Program',
    body: []
  };

  // And we're going to kickstart our `walk` function, pushing nodes to our
  // `ast.body` array.
  //
  // 现在我们要调用前面定义的 walk 函数, 把节点 push 到 
  // ast.body 这个数组里
  // 
  // The reason we are doing this inside a loop is because our program can have
  // `CallExpressions` after one another instead of being nested.
  //
  // 这里用 while 循环, 是因为我们的程序可以多个 CallExpressions 并排, 不一定要嵌套
  //
  //   (add 2 2)(subtract 4 2)
  while (current < tokens.length) {
    ast.body.push(walk());
  }

  // At the end of our parser we'll return the AST.
  return ast;
  // 最后返回 AST
}

/**
 * ============================================================================
 *                                 ⌒(❀>◞౪◟<❀)⌒
 *                               THE TRAVERSER!!!
 * ============================================================================
 */

/**
  译者注:
    注意这里有点不一样了,
    前面  tokenizer 和 parser 都是单个函数就搞定
    
    这里是 traverser + transformer 搭配用, 2 个函数
    traverser 里又有2个函数, 分别是 traverseArray 和 traverseNode
    
    def traverser()
        def traverseArray()     # 定义函数
        def traverseNode()      # 定义函数
            traverseNode(a,b)   # 调函数
    
    def transformer()
        traverser(ast, visitor) # 调函数
        
    总体结构如上, 因为这个部分我看了半天没懂(主要是_context那里没懂, 后来弄懂了)
    这里整理下清晰些
        

 * So now we have our AST, and we want to be able to visit different nodes with
 * a visitor. We need to be able to call the methods on the visitor whenever we
 * encounter a node with a matching type.
 * 
 * 现在我们有 AST 了, 我们需要用 visitor 访问 AST 里的各个节点
 * 然后根据节点的不同类型, 调用 visitor 的不同方法(从而创建新 AST)
 *
 *   traverse(ast, {
 *     Program(node, parent) {
 *       // ...
 *     },
 *
 *     CallExpression(node, parent) {
 *       // ...
 *     },
 *
 *     NumberLiteral(node, parent) {
 *       // ...
 *     }
 *   });
 */

// So we define a traverser function which accepts an AST and a
// visitor. Inside we're going to define two functions...
// 我们定义个 traverser, 参数是 ast 和 visitor
function traverser(ast, visitor) {

  // A `traverseArray` function that will allow us to iterate over an array and
  // call the next function that we will define: `traverseNode`.
  // `traverseArray` 遍历数组, 并且对数组每个元素调用 traverseNode 这个函数
  // 访问顺序是 add, 2, subtract, 4, 2
  function traverseArray(array, parent) {
    array.forEach(function(child) {
      traverseNode(child, parent);
    });
  }

  // `traverseNode` will accept a `node` and its `parent` node. So that it can
  // pass both to our visitor methods.
  // `traverseNode` 接收一个节点 以及它的上级节点(parent node)
  // 这样它就可以把这俩参数传给 visitor 方法
  function traverseNode(node, parent) {

    // We start by testing for the existence of a method on the visitor with a
    // matching `type`.
    // 这部分根据节点类型, 执行 visitor 里对应方法
    var method = visitor[node.type];
    if (method) {
      method(node, parent);
    }

    // Next we are going to split things up by the current node type.
    // 这部分取属性继续遍历
    switch (node.type) {

      // We'll start with our top level `Program`. Since Program nodes have a
      // property named body that has an array of nodes, we will call
      // `traverseArray` to traverse down into them.
      //
      // (Remember that `traverseArray` will in turn call `traverseNode` so  we
      // are causing the tree to be traversed recursively)
      //
      // 我们从顶层节点, `Program` 开始.
      // 这个节点的 body 属性里是节点数组
      // 我们用 traverseArray 来遍历这个节点数组
      //
      // (记住 traverseArray 又会调用 traverseNode, 使得我们可以递归访问一整棵树)
      case 'Program':
        traverseArray(node.body, node);
        break;

      // Next we do the same with `CallExpressions` and traverse their `params`.
      // 如果节点是 CallExpressions 就 traverseArray 这个节点的 params
      case 'CallExpression':
        traverseArray(node.params, node);
        break;

      // In the case of `NumberLiterals` we don't have any child nodes to visit,
      // so we'll just break.
      // 如果是数字, 那就没有子节点要访问, 所以直接 break;
      case 'NumberLiteral':
        break;

      // And again, if we haven't recognized the node type then we'll throw an
      // error.
      // 如果认不出节点类型就报错
      default:
        throw new TypeError(node.type);
    }
  }

  // Finally we kickstart the traverser by calling `traverseNode` with our ast
  // with no `parent` because the top level of the AST doesn't have a parent.
  // 我们在这个 traverser 里面定义了 traverseArray 函数 和 traverseNode 函数
  // 现在要调用它了, 第二个参数是 null 是因为 ast 本身没有上级节点了
  traverseNode(ast, null);
}

/**
 * ============================================================================
 *                                   ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽
 *                              THE TRANSFORMER!!!
 * ============================================================================
 */
/**
 * Next up, the transformer. Our transformer is going to take the AST that we
 * have built and pass it to our traverser function with a visitor and will
 * create a new ast.
   接下来， transformer. 
   transformer 的用途就是变换 AST 结构
   
   我们会在 transformer 里定义个 visitor, visitor 碰到不同节点会调不同的方法
   
   transformer 把 AST 和 visitor 传给 traverser
   通过遍历所有节点(traverser负责遍历和看节点类型)，并且根据不同节点调用不同方法(visitor负责提供方法)
   最后我们会得到新的 AST
 *
 * ----------------------------------------------------------------------------
 *   Original AST                     |   Transformed AST
 * ----------------------------------------------------------------------------
 *   {                                |   {
 *     type: 'Program',               |     type: 'Program',
 *     body: [{                       |     body: [{
 *       type: 'CallExpression',      |       type: 'ExpressionStatement',
 *       name: 'add',                 |       expression: {
 *       params: [{                   |         type: 'CallExpression',
 *         type: 'NumberLiteral',     |         callee: {
 *         value: '2'                 |           type: 'Identifier',
 *       }, {                         |           name: 'add'
 *         type: 'CallExpression',    |         },
 *         name: 'subtract',          |         arguments: [{
 *         params: [{                 |           type: 'NumberLiteral',
 *           type: 'NumberLiteral',   |           value: '2'
 *           value: '4'               |         }, {
 *         }, {                       |           type: 'CallExpression',
 *           type: 'NumberLiteral',   |           callee: {
 *           value: '2'               |             type: 'Identifier',
 *         }]                         |             name: 'subtract'
 *       }]                           |           },
 *     }]                             |           arguments: [{
 *   }                                |             type: 'NumberLiteral',
 *                                    |             value: '4'
 * ---------------------------------- |           }, {
 *                                    |             type: 'NumberLiteral',
 *                                    |             value: '2'
 *                                    |           }]
 *  (sorry the other one is longer.)  |         }
 *                                    |       }
 *                                    |     }]
 *                                    |   }
 * ----------------------------------------------------------------------------
 */

// So we have our transformer function which will accept the lisp ast.
// 我们的 transformer 会接收 lisp 的 AST
function transformer(ast) {

  // We'll create a `newAst` which like our previous AST will have a program
  // node.
  // 新的 AST 和之前的 老 AST 一样也有个根节点叫 Program
  var newAst = {
    type: 'Program',
    body: []
  };

  // Next I'm going to cheat a little and create a bit of a hack. We're going to
  // use a property named `context` on our parent nodes that we're going to push
  // nodes to their parent's `context`. Normally you would have a better
  // abstraction than this, but for our purposes this keeps things simple.
  //
  //
  // Just take note that the context is a reference *from* the old ast *to* the
  // new ast.
  // 这行代码使得 旧 AST 的 _context 指向了 新 AST 的 body
  // 就是说你往 旧 AST 的 _context 里面 push 
  // 和 push 到 新 AST 的 body 是一回事
  // 这么写的话就方便些
  ast._context = newAst.body;

  // We'll start by calling the traverser function with our ast and a visitor.
  // 调 traverser, 第一个参数 ast 没啥说的, 第二个参数是 visitor
  traverser(ast, {

    // The first visitor method accepts `NumberLiterals`
    // 处理数字
    NumberLiteral: function(node, parent) {
      // We'll create a new node also named `NumberLiteral` that we will push to
      // the parent context.
      // 首先对于数字节点, parent 总是 CallExpression 这点先明确了.
      // 然后
      // 这里创建了个新节点, 然后 parent._haha.push 就会进上级 CallExpression 的 arguments 里了
      // 之所以管用请看下面 CallExpression 写了 node._haha = expression.arguments;
      parent._haha.push({
        type: 'NumberLiteral',
        value: node.value
      });
    },

    // Next up, `CallExpressions`.
    // 处理调用表达式 (CallExpressions)
    CallExpression: function(node, parent) {

      // We start creating a new node `CallExpression` with a nested
      // `Identifier`.
      // 创建个新节点, 把 name 先包一下.
      // 之前的 CallExpressions 就是 type,name,params 三个属性
      // 现在先把 name 包一层同时给个类型(Identifier), 然后 arguments 其实就是 params (含义一样,我不是说指向)
      var expression = {
        type: 'CallExpression',
        callee: {
          type: 'Identifier',
          name: node.name
        },
        arguments: []
      };

      // Next we're going to define a new context on the original
      // `CallExpression` node that will reference the `expression`'s arguments
      // so that we can push arguments.
      // 注意这里是给 CallExpression 节点自己, 定义一个 _haha ，和 ast._context 是没有任何关系的
      // 原作者这里写的是 node._context 很不便于理解. 所以这里是我改成 _haha 的, 避免混淆.
      // 我这里混淆了之后就没能理解, 看着 travser 和 transformer 脑子扰不过来, 怎么都搞不清新 AST 的怎么创建好的
      // 为啥只往旧 AST 里 push 就可以弄成那么复杂的树, 后来发现我理解错了, 这里的 _context 和上面的 ast._context 不是一回事
      // 这里这么一写, 上面那个数字节点的 parent._haha.push 才能成功, 也就等于 push 到 arguments 里去了
      node._haha = expression.arguments;

      // Then we're going to check if the parent node is a `CallExpression`.
      // If it is not...
      // 看看上级节点是不是 CallExpress, 不是就包多一层. 是就不包
      // 那么顶层的 CallExp 会被包(那个add)  而里面的 CallExp(subtract) 就不会被包
      if (parent.type !== 'CallExpression') {

        // We're going to wrap our `CallExpression` node with an
        // `ExpressionStatement`. We do this because the top level
        // `CallExpressions` in JavaScript are actually statements.
        // 包法: 用 ExpressionStatement 把 CallExpression 套里面
        expression = {
          type: 'ExpressionStatement',
          expression: expression
        };
        
        parent._context.push(expression); // 如果是 add 就会调用到这里
        // 结果就是 push 到 ast._context 里，从而就 push 到 newAST.body 里
      } else {
        parent._haha.push(expression); // 如果是 subtract 就会调用到这里
        // 结果就是挂载到上级 add 的 _haha 里, 从而 push 到上级 add 的 arguments 里
      }


    }
  });

  // At the end of our transformer function we'll return the new ast that we
  // just created.
  // 变形完了就返回新 AST 
  return newAst;
}

/**
 * ============================================================================
 *                               ヾ（〃＾∇＾）ﾉ♪
 *                            THE CODE GENERATOR!!!!
 * ============================================================================
 */

/**
 * Now let's move onto our last phase: The Code Generator.
 * 现在继续下一个阶段: 代码生成.
 *
 * Our code generator is going to recursively call itself to print each node in
 * the tree into one giant string.
 * 代码生成器会递归调用自己. 来输出树中的每个节点, 
 * 从而输出一长串字符串, 这个就是生成的代码了
 */

function codeGenerator(node) {

  // We'll break things down by the `type` of the `node`.
  // 根据节点类型来处理
  switch (node.type) {

    // If we have a `Program` node. We will map through each node in the `body`
    // and run them through the code generator and join them with a newline.
    // 对于并列语句如 (add 2 3)(subtract 4 5), 下面 join('\n') 会导致换行, 变成
    // add(2,3);
    // subtract(4,5);
    case 'Program':
      return node.body.map(codeGenerator)
        .join('\n');

    // For `ExpressionStatements` we'll call the code generator on the nested
    // expression and we'll add a semicolon...
    // 这里就只是加分号
    case 'ExpressionStatement':
      return (
        codeGenerator(node.expression) +
        ';' // << (...because we like to code the *correct* way)
      );

    // For `CallExpressions` we will print the `callee`, add an open
    // parenthesis, we'll map through each node in the `arguments` array and run
    // them through the code generator, joining them with a comma, and then
    // we'll add a closing parenthesis.
    // 重新折腾函数名, 括号和分号
    // 把 (add 2 3) 变成 add(2,3) 的就是这个部分
    case 'CallExpression':
      return (
        codeGenerator(node.callee) +
        '(' +
        node.arguments.map(codeGenerator)
          .join(', ') +
        ')'
      );

    // For `Identifiers` we'll just return the `node`'s name.
    // 对于函数名就直接返回名字, 不然就永远递归下去了, 要有基础案例(base case)才行
    case 'Identifier':
      return node.name;

    // For `NumberLiterals` we'll just return the `node`'s value.
    // 对于数字节点就返回数字, 同理
    case 'NumberLiteral':
      return node.value;

    // And if we haven't recognized the node, we'll throw an error.
    // 正常情况所有类型我们都处理到了，不会跑到这里，跑到这里说明有其他类型了
    // 我们不认得就报个错
    default:
      throw new TypeError(node.type);
  }
}

/**
 * ============================================================================
 *                                  (۶* ‘ヮ’)۶”
 *                         !!!!!!!!THE COMPILER!!!!!!!!
 * ============================================================================
 */
/**
 * FINALLY! We'll create our `compiler` function. Here we will link together
 * every part of the pipeline.
 *
 *   1. input  => tokenizer   => tokens
 *   2. tokens => parser      => ast
 *   3. ast    => transformer => newAst
 *   4. newAst => generator   => output
 *   
 *   终于要结束了! 我们定义个 compiler 函数, 把各个部分组合在一起
 *   这个函数接收源代码, 输出编译后的代码
 */

function compiler(input) {
  var tokens = tokenizer(input);
  var ast    = parser(tokens);
  var newAst = transformer(ast);
  var output = codeGenerator(newAst);
  return output;
}

/**
 * ============================================================================
 *                                   (๑˃̵ᴗ˂̵)و
 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!YOU MADE IT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 * ============================================================================
 */

// Now I'm just exporting everything...
// 导出所有东西, 不然在 test.js 里 require 之后会拿不到这些函数, 调用函数会提示 xxx is not a function 错误
module.exports = {
  tokenizer: tokenizer,
  parser: parser,
  transformer: transformer,
  codeGenerator: codeGenerator,
  compiler: compiler
};