From a866a4e536fe7bab5f65d8a0ed9208bb201bfb3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Blanqui?=
 <fblanqui@users.noreply.github.com>
Date: Sat, 9 Dec 2023 18:25:51 +0100
Subject: [PATCH] rewrite: use a map to proof_content to use less memory (#74)

---
 CHANGES.md |  5 +++++
 NOTES.md   | 39 +++++++++++++++++++---------------
 README.md  | 33 ++++++++++++++---------------
 TODO.md    |  8 +++++--
 fusion.ml  |  8 ++++---
 main.ml    | 61 +++++++++++++++++++++++++++---------------------------
 6 files changed, 84 insertions(+), 70 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 9fe06e90..3f38a8d1 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -13,6 +13,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
   these two commands greatly reduce the size of generated proofs
 - command simp is equivalent to rewrite and purge
 - command dump-simp is equivalent to dump, pos, use and simp
+- allow simultaneous dumping
+
+### Modified
+
+- identifier renamings
 
 ## 0.0.1 (2023-11-22)
 
diff --git a/NOTES.md b/NOTES.md
index 66ee8b4d..8b592ff5 100644
--- a/NOTES.md
+++ b/NOTES.md
@@ -3,24 +3,29 @@ NOTES
 
 ## 06/12/23
 
-- 100/isoperimetric:
-  * dump 44m23s 153 Go 223 M steps +17166 named theorems (half of HOL-Light)
+Translation of `100/isoperimetric.ml`:
+  * dump 44m23s 153 Go 223 M steps +17166 named theorems (half of all HOL-Light named theorems)
   * pos 13m51s 1.9 Go
-  * use 13m54s 1 Go
-  * rewrite killed
-  * pos
-  * use
-  * purge
-  * dk
-  * dko
-
-- Logic/make:
-  * dump-simp 11m42s 21.2 M steps (83% unused including hol.ml) +1729 named theorems
-  * dk 1m13s dko 4m15s lp 42s v 12s vo 71m48s
-
-- Library/analysis:
-  * ocaml 1m10s
-  * dump-simp 3m58s 9 M steps +485 named theorems
+  * use 13m54s 1 Go 7% unused
+  * rewrite 1h17m - pos&use = 49m 14% simplifications 29% unused
+  * purge 21m31s 59% useless
+  * dg 100 15m5s
+  * too big to be translated to dk/lp
+
+Translation of `Multivariate/make.ml`:
+  * dump 35m52s 120 Go +16646 named theorems
+
+Translation of `Multivariate/make_upto_derivatives.ml`:
+  * dump 27m46s 91 Go + 14901 named theorems
+
+Translation of `Multivariate/make_upto_topology.ml`:
+  * dump 15m55s 48 Go +11987 named theorems 89 M steps
+  * pos 4m15s, use 4m18s 7% unused
+  * rewrite 24m43s -pos&use = 16m 14% simplifications 28% unused
+  * purge 3m37 58% useless, dg 100 4m55s
+  * too big to be translated to dk/lp
+
+## 27/11/23
 
 - results on new computer with 32 * 13th Gen Intel(R) Core(TM) i9-13950HX:
   * hol.ml dg 100 j 32: lp 48s v 39s vo 175m18s
diff --git a/README.md b/README.md
index 6ed6918e..6b875319 100644
--- a/README.md
+++ b/README.md
@@ -21,17 +21,16 @@ Calculus of Inductive Constructions.
 Results
 -------
 
-The HOL-Light base library `hol.ml` or the library `Logic/make.ml`
+The HOL-Light base library `hol.ml` and the library `Logic/make.ml`
 formalizing the metatheory of first-order logic can be exported and
-translated to Dedukti, Lambdapi and Coq in a few minutes. It is
-possible to export other theories as well. Below, you will find some
-data.
-
-However, it may take hours for Coq and require too much memory for
-Lambdapi to check the translated files.
-
-Moreover, while it is possible to translate all HOL-Light proofs to
-Coq, the translated proofs may not be directly usable by Coq users
+translated to Dedukti, Lambdapi and Coq in a few minutes. However, it
+may take hours for Coq and require too much memory for Lambdapi to
+check the translated files. Bigger libraries like
+`Multivariate/make.ml` requires too much memory for the moment. But we
+will fix this soon.
+
+Moreover, while it is possible to translate any HOL-Light proof to
+Coq, the translated theorem may not be directly usable by Coq users
 because HOL-Light types and functions may not be aligned with those of
 the Coq standard library yet. Currently, only the type of natural
 numbers and various functions on natural numbers have been aligned. We
@@ -340,20 +339,20 @@ Performance
 
 Performance on a machine with 32 processors i9-13950HX and 64 Go RAM:
 
-Dumping and translation of `Logic/make.ml` with `dg 32`:
-  * dump-simp 11m42s 21.2 M steps (83% unused including hol.ml) +1729 named theorems
-  * dk 1m13s dko 4m15s lp 42s v 12s vo 71m48s
+Translation of `Logic/make.ml` with `dg 32` (includes `Library/analysis`):
+  * dump-simp 11m42s 10 Go 21.2 M steps (83% unused including hol.ml) +1729 named theorems
+  * dk 1m13s dko 4m15s lp 42s v 12s vo 1h11ms
   
-Dumping and translation of `hol.ml` with `dg 100`:
+Translation of `hol.ml` with `dg 100`:
   * checking time without proof dumping: 1m14s
   * checking time with proof dumping: 1m44s (+40%)
   * dumped files size: 3 Go
   * number of named theorems: 2842
   * number of proof steps: 8.5 M (8% unused)
-  * simplification time: 2m06s
+  * simplification time: 1m22s
   * number of simplifications: 1.2 M (14%)
   * unused proof steps after simplification: 29%
-  * purge time: 12s
+  * purge time: 11s
   * unused proof steps after purge: 60%
 
 | rule         |  % |
@@ -407,7 +406,7 @@ Single-threaded translation to Dedukti:
   * type abbreviations: 348 Ko
   * term abbreviations: 590 Mo (42%)
 
-Dumping and translation of `arith.ml` with `dg 7`:
+Translation of `arith.ml` with `dg 7`:
   * proof dumping time: 11s 77 Mo 448 named theorems
   * number of proof steps: 302 K (9% unused)
   * prf simplification: 2s
diff --git a/TODO.md b/TODO.md
index 36b0056c..aa496b5f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,6 +1,12 @@
 TODO
 ----
 
+- create a pos file for each part
+
+- get rid of use file? in pos files, set pos to -1 for unused theorems
+
+- proofs can be simplified in parallel by working on the proof of each named theorem separately
+
 - put nb_proofs in a separate file
 
 - add commands for patch/unpatch/add-links ?
@@ -14,8 +20,6 @@ TODO
 
 - add v file checking in ci
 
-- instrument excluded middle in class.ml
-
 - make export incremental (1 lp file for each ml file)
 
 - make or_elim more implicit ?
diff --git a/fusion.ml b/fusion.ml
index 4144cb58..ebfd6da6 100644
--- a/fusion.ml
+++ b/fusion.ml
@@ -111,6 +111,8 @@ module type Hol_kernel =
       val hyp : thm -> term list
       val concl : thm -> term
       val index_of : thm -> int
+      val content_of : proof -> proof_content
+      val change_content : proof -> proof_content -> proof
       (*REMOVE
       val REFL : term -> thm
       val TRANS : thm -> thm -> thm
@@ -157,7 +159,6 @@ module type Hol_kernel =
       (*REMOVE*)val the_term_constants : (string * hol_type) list ref
       (*REMOVE*)val the_axioms : thm list ref
       (*REMOVE*)val the_definitions : thm list ref
-      (*REMOVE*)val change_proof_content : proof -> proof_content -> proof
 end;;
 
 (* ------------------------------------------------------------------------- *)
@@ -212,7 +213,8 @@ module Hol : Hol_kernel = struct
 
   type proof = Proof of (thm * proof_content)
 
-  let change_proof_content p c = let Proof(th,_) = p in Proof(th,c)
+  let content_of (Proof(_,c)) = c
+  let change_content p c = let Proof(th,_) = p in Proof(th,c)
 
   let thm_index = ref (-1)
 
@@ -615,7 +617,7 @@ module Hol : Hol_kernel = struct
 
   let concl (Sequent(asl,c,_)) = c
 
-  let index_of(Sequent(_,_,k)) = k
+  let index_of (Sequent(_,_,k)) = k
 
 (* ------------------------------------------------------------------------- *)
 (* Basic equality properties; TRANS is derivable but included for efficiency *)
diff --git a/main.ml b/main.ml
index 43422c4c..62bf95e0 100644
--- a/main.ml
+++ b/main.ml
@@ -466,64 +466,63 @@ and command = function
      let n = ref 0 in
      (* map from theorem indexes to their new proofs *)
      let map = ref MapInt.empty in
-     let add i p = map := MapInt.add i p !map in
-     let proof_at j = try MapInt.find j !map with Not_found -> proof_at j in
-     let pc_at j = let Proof(_,c) = proof_at j in c in
+     let add i c = map := MapInt.add i c !map in
+     let pc_at j =
+       match MapInt.find_opt j !map with
+       | Some c -> c
+       | None -> content_of (proof_at j)
+     in
      (* simplification of proof p at index k *)
      let simp k p =
        let default() = output_value oc p in
-       let out pc =
-         let p = change_proof_content p pc in
-         incr n; add k p; output_value oc p
-       in
-       if Array.get !last_use k < 0 then
-         output_value oc p (*(change_proof_content p Ptruth)*)
-       else
-       let Proof(_,c) = p in
-       match c with
+       let l = Array.get !last_use k in
+       if l < 0 then default() else
+       let out c = incr n; add k c; output_value oc (change_content p c) in
+       begin match content_of p with
        | Ptrans(i,j) ->
-          let pi = proof_at i and pj = proof_at j in
-          let Proof(_,ci) = pi and Proof(_,cj) = pj in
+          let ci = pc_at i and cj = pc_at j in
           begin match ci, cj with
           | Prefl _, _ -> (* i:t=t j:t=u ==> k:t=u *) out cj
           | _, Prefl _ -> (* i:t=u j:u=u ==> k:t=u *) out ci
           | _ -> default()
           end
        | Psym i ->
-          let pi = proof_at i in
-          let Proof(_,ci) = pi in
-          begin
-            match ci with
-            | Prefl _ -> (* i:t=t ==> k:t=t *) out ci
-            | Psym j -> (* j:t=u ==> i:u=t ==> k:t=u *) out (pc_at j)
-            | _ -> default()
+          let ci = pc_at i in
+          begin match ci with
+          | Prefl _ -> (* i:t=t ==> k:t=t *) out ci
+          | Psym j -> (* j:t=u ==> i:u=t ==> k:t=u *) out (pc_at j)
+          | _ -> default()
           end
        | Pconjunct1 i ->
-          begin match proof_at i with
-          | Proof(_,Pconj(j,_)) -> (* j:p ==> i:p/\q ==> k:p *) out (pc_at j)
+          begin match pc_at i with
+          | Pconj(j,_) -> (* j:p ==> i:p/\q ==> k:p *) out (pc_at j)
           | _ -> default()
           end
        | Pconjunct2 i ->
-          begin match proof_at i with
-          | Proof(_,Pconj(_,j)) -> (* j:q ==> i:p/\q ==> k:q *) out (pc_at j)
+          begin match pc_at i with
+          | Pconj(_,j) -> (* j:q ==> i:p/\q ==> k:q *) out (pc_at j)
           | _ -> default()
           end
        | Pmkcomb(i,j) ->
-          begin match proof_at i with
-          | Proof(_,Prefl t) ->
-             begin match proof_at j with
-             | Proof(_,Prefl u) -> (* i:t=t j:u=u ==> k:tu=tu *)
+          begin match pc_at i with
+          | Prefl t ->
+             begin match pc_at j with
+             | Prefl u -> (* i:t=t j:u=u ==> k:tu=tu *)
                 out (Prefl(mk_comb(t,u)))
              | _ -> default()
              end
           | _ -> default()
           end
        | Peqmp(i,j) ->
-          begin match proof_at i with
-          | Proof(_,Prefl _) -> (* i:p=p j:p ==> k:p *) out (pc_at j)
+          begin match pc_at i with
+          | Prefl _ -> (* i:p=p j:p ==> k:p *) out (pc_at j)
           | _ -> default()
           end
        | _ -> default()
+       end;
+       (* we can empty the map since the proofs coming after a named
+          theorem cannot refer to proofs coming before it *)
+       if l = 0 then map := MapInt.empty
      in
      iter_proofs_at simp;
      close_in !Xproof.ic_prf;