Merge PR coq#8764: Add parsing of decimal constants (e.g., 1.02e+01)

Reviewed-by: Zimmi48 Reviewed-by: ejgallego Ack-by: gares Ack-by: herbelin Ack-by: ppedrot Ack-by: proux01
ggonthier · Apr 4, 2019 · be6f3a6 · be6f3a6
2 parents 2e1aa5c + 6af420b
commit be6f3a6
Show file tree

Hide file tree

Showing 36 changed files with 529 additions and 183 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -72,6 +72,14 @@ Notations
 - New command `String Notation` to register string syntax for custom
   inductive types.
 
+- Numeral notations now parse decimal constants such as 1.02e+01 or
+  10.2. Parsers added for Q and R. This should be considered as an
+  experimental feature currently.
+  Note: in -- the rare -- case when such numeral notations were used
+  in a development along with Q or R, they may have to be removed or
+  deconflicted through explicit scope annotations (1.23%Q,
+  45.6%R,...).
+
 - Various bugs have been fixed (e.g. PR #9214 on removing spurious
   parentheses on abbreviations shortening a strict prefix of an application).
 

diff --git a/coqpp/coqpp_main.ml b/coqpp/coqpp_main.ml
@@ -225,7 +225,8 @@ function
 | "IDENT", s -> fprintf fmt "Tok.PIDENT (%a)" print_pat s
 | "PATTERNIDENT", s -> fprintf fmt "Tok.PPATTERNIDENT (%a)" print_pat s
 | "FIELD", s -> fprintf fmt "Tok.PFIELD (%a)" print_pat s
-| "INT", s -> fprintf fmt "Tok.PINT (%a)" print_pat s
+| "NUMERAL", None -> fprintf fmt "Tok.PNUMERAL None"
+| "NUMERAL", Some s -> fprintf fmt "Tok.PNUMERAL (Some (NumTok.int %a))" print_string s
 | "STRING", s -> fprintf fmt "Tok.PSTRING (%a)" print_pat s
 | "LEFTQMARK", None -> fprintf fmt "Tok.PLEFTQMARK"
 | "BULLET", s -> fprintf fmt "Tok.PBULLET (%a)" print_pat s
@@ -463,7 +464,10 @@ end =
 struct
 
 let terminal s =
-  let c = Printf.sprintf "Extend.Atoken (CLexer.terminal \"%s\")" s in
+  let p =
+    if s <> "" && s.[0] >= '0' && s.[0] <= '9' then "CLexer.terminal_numeral"
+    else "CLexer.terminal" in
+  let c = Printf.sprintf "Extend.Atoken (%s \"%s\")" p s in
   SymbQuote c
 
 let rec parse_symb self = function

diff --git a/dev/ci/user-overlays/08764-validsdp-master-parsing-decimal.sh b/dev/ci/user-overlays/08764-validsdp-master-parsing-decimal.sh
@@ -0,0 +1,18 @@
+if [ "$CI_PULL_REQUEST" = "8764" ] || [ "$CI_BRANCH" = "master-parsing-decimal" ]; then
+
+    ltac2_CI_REF=master-parsing-decimal
+    ltac2_CI_GITURL=https://github.com/proux01/ltac2
+
+    quickchick_CI_REF=master-parsing-decimal
+    quickchick_CI_GITURL=https://github.com/proux01/QuickChick
+
+    Corn_CI_REF=master-parsing-decimal
+    Corn_CI_GITURL=https://github.com/proux01/corn
+
+    HoTT_CI_REF=master-parsing-decimal
+    HoTT_CI_GITURL=https://github.com/proux01/HoTT
+
+    stdlib2_CI_REF=master-parsing-decimal
+    stdlib2_CI_GITURL=https://github.com/proux01/stdlib2
+
+fi
diff --git a/doc/sphinx/language/gallina-specification-language.rst b/doc/sphinx/language/gallina-specification-language.rst
@@ -74,14 +74,20 @@ Identifiers and access identifiers
   `.` (dot) without blank. They are used in the syntax of qualified
   identifiers.
 
-Natural numbers and integers
-  Numerals are sequences of digits. Integers are numerals optionally
-  preceded by a minus sign.
+Numerals
+  Numerals are sequences of digits with a potential fractional part
+  and exponent. Integers are numerals without fractional nor exponent
+  part and optionally preceded by a minus sign. Underscores ``_`` can
+  be used as comments in numerals.
 
   .. productionlist:: coq
      digit   : 0..9
      num     : `digit`…`digit`
      integer : [-]`num`
+     dot     : .
+     exp     : e | E
+     sign    : + | -
+     numeral : `num`[`dot` `num`][`exp`[`sign`]`num`]
 
 Strings
   Strings are delimited by ``"`` (double quote), and enclose a sequence of

diff --git a/doc/sphinx/user-extensions/syntax-extensions.rst b/doc/sphinx/user-extensions/syntax-extensions.rst
@@ -943,8 +943,8 @@ instance the infix symbol ``+``, can be used to denote distinct
 definitions of the additive operator. Depending on which interpretation
 scopes are currently open, the interpretation is different.
 Interpretation scopes can include an interpretation for numerals and
-strings. However, this is only made possible at the Objective Caml
-level.
+strings, either at the OCaml level or using :cmd:`Numeral Notation`
+or :cmd:`String Notation`.
 
 .. cmd:: Declare Scope @scope
 
@@ -1214,7 +1214,7 @@ Scopes` or :cmd:`Print Scope`.
 
 ``nat_scope``
   This scope includes the standard arithmetical operators and relations on type
-  nat. Positive numerals in this scope are mapped to their canonical
+  nat. Positive integer numerals in this scope are mapped to their canonical
   representent built from :g:`O` and :g:`S`. The scope is delimited by the key
   ``nat``, and bound to the type :g:`nat` (see above).
 
@@ -1238,20 +1238,19 @@ Scopes` or :cmd:`Print Scope`.
   This scope includes the standard arithmetical operators and relations on
   type :g:`Q` (rational numbers defined as fractions of an integer and a
   strictly positive integer modulo the equality of the numerator-
-  denominator cross-product). As for numerals, only 0 and 1 have an
-  interpretation in scope ``Q_scope`` (their interpretations are 0/1 and 1/1
-  respectively).
+  denominator cross-product) and comes with an interpretation for numerals
+  as closed terms of type :g:`Q`.
 
 ``Qc_scope``
   This scope includes the standard arithmetical operators and relations on the
   type :g:`Qc` of rational numbers defined as the type of irreducible
   fractions of an integer and a strictly positive integer.
 
-``real_scope``
+``R_scope``
   This scope includes the standard arithmetical operators and relations on
   type :g:`R` (axiomatic real numbers). It is delimited by the key ``R`` and comes
   with an interpretation for numerals using the :g:`IZR` morphism from binary
-  integer numbers to :g:`R`.
+  integer numbers to :g:`R` and :g:`Z.pow_pos` for potential exponent parts.
 
 ``bool_scope``
   This scope includes notations for the boolean operators. It is delimited by the
@@ -1458,6 +1457,8 @@ Numeral notations
      * :n:`Decimal.uint -> option @ident__1`
      * :n:`Z -> @ident__1`
      * :n:`Z -> option @ident__1`
+     * :n:`Decimal.decimal -> @ident__1`
+     * :n:`Decimal.decimal -> option @ident__1`
 
    And the printing function :n:`@ident__3` should have one of the
    following types:
@@ -1468,6 +1469,8 @@ Numeral notations
      * :n:`@ident__1 -> option Decimal.uint`
      * :n:`@ident__1 -> Z`
      * :n:`@ident__1 -> option Z`
+     * :n:`@ident__1 -> Decimal.decimal`
+     * :n:`@ident__1 -> option Decimal.decimal`
 
      When parsing, the application of the parsing function
      :n:`@ident__2` to the number will be fully reduced, and universes
@@ -1501,15 +1504,16 @@ Numeral notations
      The numeral notation registered for :token:`type` does not support
      the given numeral.  This error is given when the interpretation
      function returns :g:`None`, or if the interpretation is registered
-     for only non-negative integers, and the given numeral is negative.
+     only for integers or non-negative integers, and the given numeral
+     has a fractional or exponent part or is negative.
 
 
-   .. exn:: @ident should go from Decimal.int to @type or (option @type). Instead of Decimal.int, the types Decimal.uint or Z or Int63.int could be used (you may need to require BinNums or Decimal or Int63 first).
+   .. exn:: @ident should go from Decimal.int to @type or (option @type). Instead of Decimal.int, the types Decimal.uint or Z or Int63.int or Decimal.decimal could be used (you may need to require BinNums or Decimal or Int63 first).
 
      The parsing function given to the :cmd:`Numeral Notation`
      vernacular is not of the right type.
 
-   .. exn:: @ident should go from @type to Decimal.int or (option Decimal.int).  Instead of Decimal.int, the types Decimal.uint or Z or Int63.int could be used (you may need to require BinNums or Decimal or Int63 first).
+   .. exn:: @ident should go from @type to Decimal.int or (option Decimal.int).  Instead of Decimal.int, the types Decimal.uint or Z or Int63.int or Decimal.decimal could be used (you may need to require BinNums or Decimal or Int63 first).
 
      The printing function given to the :cmd:`Numeral Notation`
      vernacular is not of the right type.

diff --git a/interp/constrexpr.ml b/interp/constrexpr.ml
@@ -48,16 +48,26 @@ type abstraction_kind = AbsLambda | AbsPi
 
 type proj_flag = int option (** [Some n] = proj of the n-th visible argument *)
 
-(** Representation of integer literals that appear in Coq scripts.
-    We now use raw strings of digits in base 10 (big-endian), and a separate
-    sign flag. Note that this representation is not unique, due to possible
-    multiple leading zeros, and -0 = +0 *)
-
-type sign = bool
-type raw_natural_number = string
+(** Representation of decimal literals that appear in Coq scripts.
+   We now use raw strings following the format defined by
+   [NumTok.t] and a separate sign flag.
+
+   Note that this representation is not unique, due to possible
+   multiple leading or trailing zeros, and -0 = +0, for instances.
+   The reason to keep the numeral exactly as it was parsed is that
+   specific notations can be declared for specific numerals
+   (e.g. [Notation "0" := False], or [Notation "00" := (nil,nil)], or
+   [Notation "2e1" := ...]). Those notations, which override the
+   generic interpretation as numeral, use the same representation of
+   numeral using the Numeral constructor. So the latter should be able
+   to record the form of the numeral which exactly matches the
+   notation. *)
+
+type sign = SPlus | SMinus
+type raw_numeral = NumTok.t
 
 type prim_token =
-  | Numeral of raw_natural_number * sign
+  | Numeral of sign * raw_numeral
   | String of string
 
 type instance_expr = Glob_term.glob_level list

diff --git a/interp/constrexpr_ops.ml b/interp/constrexpr_ops.ml
@@ -50,13 +50,14 @@ let names_of_local_binders bl =
 (**********************************************************************)
 (* Functions on constr_expr *)
 
-(* Note: redundant Numeral representations such as -0 and +0 (or different
-   numbers of leading zeros) are considered different here. *)
+(* Note: redundant Numeral representations, such as -0 and +0 (and others),
+   are considered different here. *)
 
 let prim_token_eq t1 t2 = match t1, t2 with
-| Numeral (n1,s1), Numeral (n2,s2) -> String.equal n1 n2 && s1 == s2
+| Numeral (SPlus,n1), Numeral (SPlus,n2)
+| Numeral (SMinus,n1), Numeral (SMinus,n2) -> NumTok.equal n1 n2
 | String s1, String s2 -> String.equal s1 s2
-| _ -> false
+| (Numeral ((SPlus|SMinus),_) | String _), _ -> false
 
 let explicitation_eq ex1 ex2 = match ex1, ex2 with
 | ExplByPos (i1, id1), ExplByPos (i2, id2) ->

diff --git a/interp/constrextern.ml b/interp/constrextern.ml
@@ -318,29 +318,28 @@ let drop_implicits_in_patt cst nb_expl args =
 let destPrim = function { CAst.v = CPrim t } -> Some t | _ -> None
 let destPatPrim = function { CAst.v = CPatPrim t } -> Some t | _ -> None
 
-let is_number s =
-  let rec aux i =
-    Int.equal (String.length s) i ||
-    match s.[i] with '0'..'9' -> aux (i+1) | _ -> false
-  in aux 0
-
 let is_zero s =
   let rec aux i =
     Int.equal (String.length s) i || (s.[i] == '0' && aux (i+1))
   in aux 0
+let is_zero n = is_zero n.NumTok.int && is_zero n.NumTok.frac
 
 let make_notation_gen loc ntn mknot mkprim destprim l bl =
   match snd ntn,List.map destprim l with
     (* Special case to avoid writing "- 3" for e.g. (Z.opp 3) *)
-    | "- _", [Some (Numeral (p,true))] when not (is_zero p) ->
+    | "- _", [Some (Numeral (SPlus,p))] when not (is_zero p) ->
         assert (bl=[]);
         mknot (loc,ntn,([mknot (loc,(InConstrEntrySomeLevel,"( _ )"),l,[])]),[])
     | _ ->
 	match decompose_notation_key ntn, l with
-        | (InConstrEntrySomeLevel,[Terminal "-"; Terminal x]), [] when is_number x ->
-	   mkprim (loc, Numeral (x,false))
-        | (InConstrEntrySomeLevel,[Terminal x]), [] when is_number x ->
-	   mkprim (loc, Numeral (x,true))
+        | (InConstrEntrySomeLevel,[Terminal "-"; Terminal x]), [] ->
+           begin match NumTok.of_string x with
+           | Some n -> mkprim (loc, Numeral (SMinus,n))
+           | None -> mknot (loc,ntn,l,bl) end
+        | (InConstrEntrySomeLevel,[Terminal x]), [] ->
+           begin match NumTok.of_string x with
+           | Some n -> mkprim (loc, Numeral (SPlus,n))
+           | None -> mknot (loc,ntn,l,bl) end
         | _ -> mknot (loc,ntn,l,bl)
 
 let make_notation loc ntn (terms,termlists,binders,binderlists as subst) =
@@ -969,7 +968,7 @@ let rec extern inctx (custom,scopes as allscopes) vars r =
       CCast (sub_extern true scopes vars c,
              map_cast_type (extern_typ scopes vars) c')
   | GInt i ->
-     CPrim(Numeral (Uint63.to_string i,true))
+     CPrim(Numeral (SPlus, NumTok.int (Uint63.to_string i)))
 
   in insert_coercion coercion (CAst.make ?loc c)
 

diff --git a/interp/constrintern.ml b/interp/constrintern.ml
@@ -1487,8 +1487,9 @@ let alias_of als = match als.alias_ids with
 
 let is_zero s =
   let rec aux i =
-    Int.equal (String.length s) i || (s.[i] == '0' && aux (i+1))
+    Int.equal (String.length s) i || ((s.[i] == '0' || s.[i] == '_') && aux (i+1))
   in aux 0
+let is_zero n = is_zero n.NumTok.int && is_zero n.NumTok.frac
 
 let merge_subst s1 s2 = Id.Map.fold Id.Map.add s1 s2
 
@@ -1513,11 +1514,11 @@ let rec subst_pat_iterator y t = DAst.(map (function
   | RCPatOr pl -> RCPatOr (List.map (subst_pat_iterator y t) pl)))
 
 let is_non_zero c = match c with
-| { CAst.v = CPrim (Numeral (p, true)) } -> not (is_zero p)
+| { CAst.v = CPrim (Numeral (SPlus, p)) } -> not (is_zero p)
 | _ -> false
 
 let is_non_zero_pat c = match c with
-| { CAst.v = CPatPrim (Numeral (p, true)) } -> not (is_zero p)
+| { CAst.v = CPatPrim (Numeral (SPlus, p)) } -> not (is_zero p)
 | _ -> false
 
 let get_asymmetric_patterns = Goptions.declare_bool_option_and_ref
@@ -1628,8 +1629,8 @@ let drop_notations_pattern looked_for genv =
         let (argscs1,_) = find_remaining_scopes expl_pl pl g in
         DAst.make ?loc @@ RCPatCstr (g, List.map2 (in_pat_sc scopes) argscs1 expl_pl @ List.map (in_pat false scopes) pl, [])
     | CPatNotation ((InConstrEntrySomeLevel,"- _"),([a],[]),[]) when is_non_zero_pat a ->
-      let p = match a.CAst.v with CPatPrim (Numeral (p, _)) -> p | _ -> assert false in
-      let pat, _df = Notation.interp_prim_token_cases_pattern_expr ?loc (ensure_kind false loc) (Numeral (p,false)) scopes in
+      let p = match a.CAst.v with CPatPrim (Numeral (_, p)) -> p | _ -> assert false in
+      let pat, _df = Notation.interp_prim_token_cases_pattern_expr ?loc (ensure_kind false loc) (Numeral (SMinus,p)) scopes in
       rcp_of_glob scopes pat
     | CPatNotation ((InConstrEntrySomeLevel,"( _ )"),([a],[]),[]) ->
       in_pat top scopes a
@@ -1944,8 +1945,8 @@ let internalize globalenv env pattern_mode (_, ntnvars as lvar) c =
         GLetIn (na.CAst.v, inc1, int,
           intern (push_name_env ntnvars (impls_term_list inc1) env na) c2)
     | CNotation ((InConstrEntrySomeLevel,"- _"), ([a],[],[],[])) when is_non_zero a ->
-      let p = match a.CAst.v with CPrim (Numeral (p, _)) -> p | _ -> assert false in
-       intern env (CAst.make ?loc @@ CPrim (Numeral (p,false)))
+      let p = match a.CAst.v with CPrim (Numeral (_, p)) -> p | _ -> assert false in
+       intern env (CAst.make ?loc @@ CPrim (Numeral (SMinus,p)))
     | CNotation ((InConstrEntrySomeLevel,"( _ )"),([a],[],[],[])) -> intern env a
     | CNotation (ntn,args) ->
         intern_notation intern env ntnvars loc ntn args

diff --git a/interp/interp.mllib b/interp/interp.mllib
@@ -1,3 +1,4 @@
+NumTok
 Constrexpr
 Tactypes
 Stdarg