diff --git a/pyproject.toml b/pyproject.toml
index 617b06dc..be069149 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ exclude = [
[project]
name = "lean-dojo"
-version = "1.4.5"
+version = "1.5.0rc"
authors = [
{ name="Kaiyu Yang", email="kaiyuy@caltech.edu" },
]
diff --git a/scripts/generate-benchmark-lean3.ipynb b/scripts/generate-benchmark-lean3.ipynb
index b76598d5..a1a3db36 100644
--- a/scripts/generate-benchmark-lean3.ipynb
+++ b/scripts/generate-benchmark-lean3.ipynb
@@ -21,7 +21,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 8,
"id": "cda71d78",
"metadata": {},
"outputs": [],
@@ -61,7 +61,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 9,
"id": "acdb4e64",
"metadata": {},
"outputs": [],
@@ -83,7 +83,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 10,
"id": "0d7bcf76",
"metadata": {},
"outputs": [],
@@ -123,7 +123,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 11,
"id": "a9e1fe70",
"metadata": {},
"outputs": [],
@@ -140,7 +140,6 @@
" # Figure out the number of theorems in train/val/test.\n",
" num_theorems = len(traced_theorems)\n",
" num_val_test = NUM_VAL + NUM_TEST\n",
- " num_train = num_theorems - num_val_test\n",
" theorems_val_test = set()\n",
"\n",
" # Map each premise to a list of theorems using it.\n",
@@ -179,7 +178,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 12,
"id": "8509a475",
"metadata": {},
"outputs": [],
@@ -206,20 +205,11 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 13,
"id": "ac984a99",
"metadata": {},
"outputs": [],
"source": [
- "def _get_file_path(traced_repo: TracedRepo, thm: TracedTheorem) -> str:\n",
- " if thm.repo == traced_repo.repo:\n",
- " # The theorem belongs to the traced repo itself.\n",
- " return str(thm.theorem.file_path)\n",
- " else:\n",
- " # The theorem belongs to one of the dependencies.\n",
- " return f\"{LEAN3_PACKAGES_DIR}/{thm.repo.name}/{thm.theorem.file_path}\"\n",
- "\n",
- "\n",
"def export_proofs(\n",
" traced_repo: TracedRepo, splits: Dict[SPLIT_STRATEGY, SPLIT], dst_path: Path\n",
") -> None:\n",
@@ -249,7 +239,7 @@
" {\n",
" \"url\": thm.repo.url,\n",
" \"commit\": thm.repo.commit,\n",
- " \"file_path\": _get_file_path(traced_repo, thm),\n",
+ " \"file_path\": str(thm.theorem.file_path),\n",
" \"full_name\": thm.theorem.full_name,\n",
" \"start\": list(thm.start),\n",
" \"end\": list(thm.end),\n",
@@ -283,7 +273,7 @@
" + \"\\n\"\n",
" )\n",
" logger.info(\n",
- " f\"{num_premises} theorems/definitions from {traced_repo.num_traced_files} files saved to {oup_path}\"\n",
+ " f\"{num_premises} theorems/definitions from {len(traced_repo.traced_files)} files saved to {oup_path}\"\n",
" )\n",
"\n",
"\n",
@@ -354,7 +344,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 14,
"id": "59eaebb3",
"metadata": {
"scrolled": true
@@ -364,113 +354,114 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "\u001b[32m2024-01-05 08:04:17.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/leanprover-community-mathlib-19c869efa56bbb8b500f2724c0b77261edbfa28c/mathlib\u001b[0m\n",
- "2024-01-05 08:04:19,851\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
- "100%|██████████| 3384/3384 [04:14<00:00, 13.30it/s] \n",
- "\u001b[32m2024-01-05 08:08:58.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_data\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1m98734 theorems in total\u001b[0m\n",
- "\u001b[32m2024-01-05 08:08:58.886\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_randomly\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mSplitting the theorems randomly\u001b[0m\n",
- "\u001b[32m2024-01-05 08:08:58.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_by_premise\u001b[0m:\u001b[36m8\u001b[0m - \u001b[1mSplitting the theorems by premises\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.056\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.817\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.817\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.818\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.821\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.821\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.821\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.821\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.822\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.822\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.823\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.823\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.823\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.824\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:05.921\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.453\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.454\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.455\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.455\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:06.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:08.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:08.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:08.661\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:08.662\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:10.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:10.509\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:10.510\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:11.668\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:11.668\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:12.193\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:19.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m94734 theorems and 208949 tactics saved to ../leandojo_benchmark/random/train.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:20.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m2000 theorems and 4561 tactics saved to ../leandojo_benchmark/random/val.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:20.235\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:20.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m2000 theorems and 4266 tactics saved to ../leandojo_benchmark/random/test.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:24.907\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:24.908\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:24.974\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:24.975\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:25.415\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.436\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.438\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.462\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.462\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.462\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.463\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.463\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.464\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.482\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.483\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.484\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.484\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.486\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.486\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.487\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.487\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.491\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:26.491\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:33.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m94734 theorems and 191184 tactics saved to ../leandojo_benchmark/novel_premises/train.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:34.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m2000 theorems and 13321 tactics saved to ../leandojo_benchmark/novel_premises/val.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:09:35.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m2000 theorems and 13271 tactics saved to ../leandojo_benchmark/novel_premises/test.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:10:05.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m72\u001b[0m - \u001b[1m130283 theorems/definitions from 3384 files saved to ../leandojo_benchmark/corpus.jsonl\u001b[0m\n"
+ "\u001b[32m2024-01-08 12:50:44.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m193\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/leanprover-community-mathlib-19c869efa56bbb8b500f2724c0b77261edbfa28c/mathlib\u001b[0m\n",
+ "2024-01-08 12:50:49,468\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
+ "100%|██████████| 3384/3384 [05:19<00:00, 10.58it/s] \n",
+ "\u001b[32m2024-01-08 12:56:33.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_data\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1m98734 theorems in total\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:33.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_randomly\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mSplitting the theorems randomly\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:33.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_by_premise\u001b[0m:\u001b[36m8\u001b[0m - \u001b[1mSplitting the theorems by premises\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:39.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m109\u001b[0m - \u001b[33m\u001b[1m../leandojo_benchmark already exists. Removing it now.\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.115\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.115\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.115\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.116\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.116\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.118\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.118\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.656\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:40.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:41.625\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:43.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:44.077\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:44.429\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:44.430\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.043\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.044\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.045\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.045\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.046\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.046\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.047\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.047\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.047\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.048\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.048\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.050\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.050\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:45.749\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:46.377\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:46.379\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:46.649\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:48.358\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:48.359\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:55.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m94734 theorems and 208460 tactics saved to ../leandojo_benchmark/random/train.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:55.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m2000 theorems and 4652 tactics saved to ../leandojo_benchmark/random/val.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:56:56.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m2000 theorems and 4664 tactics saved to ../leandojo_benchmark/random/test.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:00.367\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:00.369\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:00.442\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:00.443\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:00.710\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:01.998\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:01.999\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.001\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.001\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.004\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.004\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.006\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.006\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.038\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.040\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.104\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.105\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.107\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.107\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.108\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.108\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.108\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.109\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.109\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:02.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:09.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m94734 theorems and 191524 tactics saved to ../leandojo_benchmark/novel_premises/train.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:10.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m2000 theorems and 12678 tactics saved to ../leandojo_benchmark/novel_premises/val.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:11.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m2000 theorems and 13574 tactics saved to ../leandojo_benchmark/novel_premises/test.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 12:57:44.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m63\u001b[0m - \u001b[1m130283 theorems/definitions from 3384 files saved to ../leandojo_benchmark/corpus.jsonl\u001b[0m\n"
]
}
],
@@ -524,7 +515,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 15,
"id": "b1b97c12",
"metadata": {},
"outputs": [
@@ -550,7 +541,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 16,
"id": "0eefe7b8",
"metadata": {},
"outputs": [
@@ -560,7 +551,7 @@
"dict_keys(['path', 'imports', 'premises'])"
]
},
- "execution_count": 10,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -582,20 +573,21 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 17,
"id": "05e1740f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "('src/data/polynomial/induction.lean',\n",
- " ['src/ring_theory/ideal/basic.lean',\n",
- " '_target/deps/lean/library/init/default.lean',\n",
- " 'src/data/polynomial/basic.lean'])"
+ "('src/linear_algebra/affine_space/affine_equiv.lean',\n",
+ " ['_target/deps/lean/library/init/default.lean',\n",
+ " 'src/linear_algebra/general_linear_group.lean',\n",
+ " 'src/linear_algebra/affine_space/affine_map.lean',\n",
+ " 'src/algebra/invertible.lean'])"
]
},
- "execution_count": 11,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -606,17 +598,17 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 18,
"id": "04e6e71f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "5"
+ "94"
]
},
- "execution_count": 12,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -635,21 +627,21 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 19,
"id": "c4b447b0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{'full_name': 'polynomial.induction_on',\n",
- " 'code': '@[elab_as_eliminator] protected lemma induction_on {M : R[X] → Prop} (p : R[X])\\n (h_C : ∀a, M (C a))\\n (h_add : ∀p q, M p → M q → M (p + q))\\n (h_monomial : ∀(n : ℕ) (a : R), M (C a * X^n) → M (C a * X^(n+1))) :\\n M p',\n",
- " 'start': [34, 1],\n",
- " 'end': [51, 4],\n",
- " 'kind': 'theorem'}"
+ "{'full_name': 'affine_equiv',\n",
+ " 'code': \"@[nolint has_nonempty_instance]\\nstructure affine_equiv (k P₁ P₂ : Type*) {V₁ V₂ : Type*} [ring k]\\n [add_comm_group V₁] [module k V₁] [add_torsor V₁ P₁]\\n [add_comm_group V₂] [module k V₂] [add_torsor V₂ P₂] extends P₁ ≃ P₂ :=\\n(linear : V₁ ≃ₗ[k] V₂)\\n(map_vadd' : ∀ (p : P₁) (v : V₁), to_equiv (v +ᵥ p) = linear v +ᵥ to_equiv p)\",\n",
+ " 'start': [46, 1],\n",
+ " 'end': [51, 78],\n",
+ " 'kind': 'structure'}"
]
},
- "execution_count": 13,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -673,7 +665,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 20,
"id": "a59b38b6",
"metadata": {},
"outputs": [
@@ -683,7 +675,7 @@
"94734"
]
},
- "execution_count": 14,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -704,7 +696,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 21,
"id": "9dc67dd2",
"metadata": {},
"outputs": [
@@ -714,13 +706,13 @@
"dict_keys(['url', 'commit', 'file_path', 'full_name', 'start', 'end', 'traced_tactics'])"
]
},
- "execution_count": 15,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "for proof in proofs_train[::-1]:\n",
+ "for proof in proofs_train:\n",
" if proof[\"traced_tactics\"] != []:\n",
" break\n",
"proof.keys()"
@@ -728,20 +720,20 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 22,
"id": "cc2bc340",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "('https://github.com/leanprover-community/mathlib',\n",
- " '19c869efa56bbb8b500f2724c0b77261edbfa28c',\n",
- " 'src/data/rat/cast.lean',\n",
- " 'rat.cast_mk')"
+ "('https://github.com/leanprover-community/lean',\n",
+ " 'cce7990ea86a78bdb383e38ed7f9b5ba93c60ce0',\n",
+ " 'library/init/data/array/basic.lean',\n",
+ " 'd_array.of_beq_aux_eq_tt')"
]
},
- "execution_count": 16,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -760,17 +752,17 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 23,
"id": "00315e27",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "1"
+ "13"
]
},
- "execution_count": 17,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -789,29 +781,32 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 24,
"id": "52769d80",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "{'tactic': 'simp only [mk_eq_div, cast_div, cast_coe_int]',\n",
- " 'annotated_tactic': ['simp only [mk_eq_div, cast_div, cast_coe_int]',\n",
- " [{'full_name': 'rat.mk_eq_div',\n",
- " 'def_path': 'src/data/rat/defs.lean',\n",
- " 'def_pos': [535, 9]},\n",
- " {'full_name': 'rat.cast_div',\n",
- " 'def_path': 'src/data/rat/cast.lean',\n",
- " 'def_pos': [204, 28]},\n",
- " {'full_name': 'rat.cast_coe_int',\n",
- " 'def_path': 'src/data/rat/cast.lean',\n",
- " 'def_pos': [41, 28]}]],\n",
- " 'state_before': 'α : Type u_3,\\n_inst_1 : division_ring α,\\n_inst_2 : char_zero α,\\na b : ℤ\\n⊢ ↑(a /. b) = ↑a / ↑b',\n",
- " 'state_after': 'no goals'}"
+ "{'tactic': \"have h₂' : read a ⟨i, h₁⟩ = read b ⟨i, h₁⟩ ∧ d_array.beq_aux a b i _ = tt\",\n",
+ " 'annotated_tactic': [\"have h₂' : read a ⟨i, h₁⟩ = read b ⟨i, h₁⟩ ∧ d_array.beq_aux a b i _ = tt\",\n",
+ " [{'full_name': 'd_array.read',\n",
+ " 'def_path': '_target/deps/lean/library/init/data/array/basic.lean',\n",
+ " 'def_pos': [22, 5]},\n",
+ " {'full_name': 'd_array.read',\n",
+ " 'def_path': '_target/deps/lean/library/init/data/array/basic.lean',\n",
+ " 'def_pos': [22, 5]},\n",
+ " {'full_name': 'd_array.beq_aux',\n",
+ " 'def_path': '_target/deps/lean/library/init/data/array/basic.lean',\n",
+ " 'def_pos': [74, 15]},\n",
+ " {'full_name': 'bool.tt',\n",
+ " 'def_path': '_target/deps/lean/library/init/core.lean',\n",
+ " 'def_pos': [242, 11]}]],\n",
+ " 'state_before': \"n : ℕ,\\nα : fin n → Type u,\\n_inst_1 : Π (i : fin n), decidable_eq (α i),\\na b : d_array n α,\\nof_beq_aux_eq_tt :\\n ∀ (i : ℕ) (h : i ≤ n),\\n a.beq_aux b i h = tt → ∀ (j : ℕ) (h' : j < i), a.read ⟨j, _⟩ = b.read ⟨j, _⟩,\\ni : ℕ,\\nh₁ : i + 1 ≤ n,\\nh₂ : a.beq_aux b (i + 1) h₁ = tt,\\nj : ℕ,\\nh₃ : j < i + 1\\n⊢ a.read ⟨j, _⟩ = b.read ⟨j, _⟩\",\n",
+ " 'state_after': \"3 goals\\nn : ℕ,\\nα : fin n → Type u,\\n_inst_1 : Π (i : fin n), decidable_eq (α i),\\na b : d_array n α,\\nof_beq_aux_eq_tt :\\n ∀ (i : ℕ) (h : i ≤ n),\\n a.beq_aux b i h = tt → ∀ (j : ℕ) (h' : j < i), a.read ⟨j, _⟩ = b.read ⟨j, _⟩,\\ni : ℕ,\\nh₁ : i + 1 ≤ n,\\nh₂ : a.beq_aux b (i + 1) h₁ = tt,\\nj : ℕ,\\nh₃ : j < i + 1\\n⊢ a.read ⟨i, h₁⟩ = b.read ⟨i, h₁⟩ ∧ a.beq_aux b i ?m_1 = tt\\n\\nn : ℕ,\\nα : fin n → Type u,\\n_inst_1 : Π (i : fin n), decidable_eq (α i),\\na b : d_array n α,\\nof_beq_aux_eq_tt :\\n ∀ (i : ℕ) (h : i ≤ n),\\n a.beq_aux b i h = tt → ∀ (j : ℕ) (h' : j < i), a.read ⟨j, _⟩ = b.read ⟨j, _⟩,\\ni : ℕ,\\nh₁ : i + 1 ≤ n,\\nh₂ : a.beq_aux b (i + 1) h₁ = tt,\\nj : ℕ,\\nh₃ : j < i + 1,\\nh₂' : a.read ⟨i, h₁⟩ = b.read ⟨i, h₁⟩ ∧ a.beq_aux b i ?m_1 = tt\\n⊢ a.read ⟨j, _⟩ = b.read ⟨j, _⟩\\n\\nn : ℕ,\\nα : fin n → Type u,\\n_inst_1 : Π (i : fin n), decidable_eq (α i),\\na b : d_array n α,\\nof_beq_aux_eq_tt :\\n ∀ (i : ℕ) (h : i ≤ n),\\n a.beq_aux b i h = tt → ∀ (j : ℕ) (h' : j < i), a.read ⟨j, _⟩ = b.read ⟨j, _⟩,\\ni : ℕ,\\nh₁ : i + 1 ≤ n,\\nh₂ : a.beq_aux b (i + 1) h₁ = tt,\\nj : ℕ,\\nh₃ : j < i + 1\\n⊢ i ≤ n\"}"
]
},
- "execution_count": 19,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -840,7 +835,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 25,
"id": "bc457ce9",
"metadata": {},
"outputs": [
@@ -848,14 +843,14 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "\u001b[32m2024-01-05 08:18:03.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/facebookresearch-miniF2F-5271ddec788677c815cf818a06f368ef6498a106/miniF2F\u001b[0m\n",
- "2024-01-05 08:18:08,327\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
- "100%|██████████| 1159/1159 [02:23<00:00, 8.05it/s] \n",
- "\u001b[32m2024-01-05 08:20:47.523\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m118\u001b[0m - \u001b[33m\u001b[1m../leandojo_minif2f already exists. Removing it now.\u001b[0m\n",
- "\u001b[32m2024-01-05 08:20:47.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m244 theorems and 549 tactics saved to ../leandojo_minif2f/default/val.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:20:47.575\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
- "\u001b[32m2024-01-05 08:20:47.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m245 theorems and 781 tactics saved to ../leandojo_minif2f/default/test.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:21:02.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m72\u001b[0m - \u001b[1m67170 theorems/definitions from 1159 files saved to ../leandojo_minif2f/corpus.jsonl\u001b[0m\n"
+ "\u001b[32m2024-01-08 13:03:50.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m193\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/facebookresearch-miniF2F-5271ddec788677c815cf818a06f368ef6498a106/miniF2F\u001b[0m\n",
+ "2024-01-08 13:03:58,678\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
+ "100%|██████████| 1159/1159 [02:29<00:00, 7.77it/s] \n",
+ "\u001b[32m2024-01-08 13:06:43.405\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m109\u001b[0m - \u001b[33m\u001b[1m../leandojo_minif2f already exists. Removing it now.\u001b[0m\n",
+ "\u001b[32m2024-01-08 13:06:43.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m244 theorems and 549 tactics saved to ../leandojo_minif2f/default/val.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 13:06:43.445\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n",
+ "\u001b[32m2024-01-08 13:06:43.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m245 theorems and 781 tactics saved to ../leandojo_minif2f/default/test.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 13:06:58.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m63\u001b[0m - \u001b[1m67170 theorems/definitions from 1159 files saved to ../leandojo_minif2f/corpus.jsonl\u001b[0m\n"
]
}
],
@@ -884,7 +879,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 26,
"id": "12f436c7",
"metadata": {},
"outputs": [
@@ -892,12 +887,18 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "\u001b[32m2024-01-05 08:21:03.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/zhangir-azerbayev-ProofNet-e8645aa830ce17c33a8b8482a8195f0f97d6a74a/ProofNet\u001b[0m\n",
- "2024-01-05 08:21:09,772\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
- "100%|██████████| 1539/1539 [01:53<00:00, 13.51it/s] \n",
- "\u001b[32m2024-01-05 08:23:21.624\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m118\u001b[0m - \u001b[33m\u001b[1m../leandojo_proofnet already exists. Removing it now.\u001b[0m\n",
- "\u001b[32m2024-01-05 08:23:21.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m374 theorems and 460 tactics saved to ../leandojo_proofnet/default/test.json\u001b[0m\n",
- "\u001b[32m2024-01-05 08:23:38.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m72\u001b[0m - \u001b[1m82365 theorems/definitions from 1539 files saved to ../leandojo_proofnet/corpus.jsonl\u001b[0m\n"
+ "\u001b[32m2024-01-08 13:07:00.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m193\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/zhangir-azerbayev-ProofNet-e8645aa830ce17c33a8b8482a8195f0f97d6a74a/ProofNet\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2024-01-08 13:07:09,769\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
+ "100%|██████████| 1539/1539 [03:06<00:00, 8.25it/s] \n",
+ "\u001b[32m2024-01-08 13:10:34.216\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m109\u001b[0m - \u001b[33m\u001b[1m../leandojo_proofnet already exists. Removing it now.\u001b[0m\n",
+ "\u001b[32m2024-01-08 13:10:34.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m374 theorems and 460 tactics saved to ../leandojo_proofnet/default/test.json\u001b[0m\n",
+ "\u001b[32m2024-01-08 13:10:52.466\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m63\u001b[0m - \u001b[1m82365 theorems/definitions from 1539 files saved to ../leandojo_proofnet/corpus.jsonl\u001b[0m\n"
]
}
],
diff --git a/scripts/generate-benchmark-lean4.ipynb b/scripts/generate-benchmark-lean4.ipynb
index 7d14c396..4021a78f 100644
--- a/scripts/generate-benchmark-lean4.ipynb
+++ b/scripts/generate-benchmark-lean4.ipynb
@@ -275,7 +275,7 @@
" + \"\\n\"\n",
" )\n",
" logger.info(\n",
- " f\"{num_premises} theorems/definitions from {traced_repo.num_traced_files} files saved to {oup_path}\"\n",
+ " f\"{num_premises} theorems/definitions from {len(traced_repo.traced_files)} files saved to {oup_path}\"\n",
" )\n",
"\n",
"\n",
diff --git a/src/lean_dojo/constants.py b/src/lean_dojo/constants.py
index 26b64ed9..069ef8e7 100644
--- a/src/lean_dojo/constants.py
+++ b/src/lean_dojo/constants.py
@@ -15,7 +15,7 @@
load_dotenv()
-__version__ = "1.4.5"
+__version__ = "1.5.0rc"
logger.remove()
if "VERBOSE" in os.environ or "DEBUG" in os.environ:
diff --git a/src/lean_dojo/container.py b/src/lean_dojo/container.py
index f23e5fd7..d3d7c7b7 100644
--- a/src/lean_dojo/container.py
+++ b/src/lean_dojo/container.py
@@ -103,7 +103,7 @@ def _copy_file_or_dir(src: Path, dst: Path) -> None:
assert src.is_dir() and not src.is_relative_to(dst)
if dst.exists():
shutil.rmtree(dst)
- shutil.copytree(src, dst)
+ shutil.copytree(src, dst, symlinks=True)
class NativeContainer(Container):
diff --git a/src/lean_dojo/data_extraction/ExtractData.lean b/src/lean_dojo/data_extraction/ExtractData.lean
index ebf66042..03578e98 100644
--- a/src/lean_dojo/data_extraction/ExtractData.lean
+++ b/src/lean_dojo/data_extraction/ExtractData.lean
@@ -205,7 +205,10 @@ Convert the path of a *.lean file to its corresponding file (e.g., *.olean) in t
def toBuildDir (subDir : FilePath) (path : FilePath) (ext : String) : Option FilePath :=
let path' := (trim path).withExtension ext
match relativeTo path' $ packagesDir / "lean4/src" with
- | some p => packagesDir / "lean4/lib" / p
+ | some p =>
+ match relativeTo p "lean/lake" with
+ | some p' => packagesDir / "lean4/lib/lean" / p'
+ | none => packagesDir / "lean4/lib" / p
| none => match relativeTo path' packagesDir with
| some p =>
match p.components with
@@ -448,6 +451,7 @@ unsafe def processFile (path : FilePath) : IO Unit := do
continue
if found then
p := p ++ FilePath.pathSeparator.toString ++ c
+ p := p.replace "/lean4/src/lean/Lake" "/lean4/src/lean/lake/Lake"
assert! ← FilePath.mk p |>.pathExists
s := s ++ "\n" ++ p
@@ -464,16 +468,17 @@ open LeanDojo
/--
Whether a *.lean file should be traced.
-/
-def shouldProcess (path : FilePath) (noDep : Bool) : IO Bool := do
+def shouldProcess (path : FilePath) (noDeps : Bool) : IO Bool := do
if (← path.isDir) ∨ path.extension != "lean" then
return false
- if noDep ∧ Path.isRelativeTo path Path.packagesDir then
- return false
-
let cwd ← IO.currentDir
let some relativePath := Path.relativeTo path cwd |
throw $ IO.userError s!"Invalid path: {path}"
+
+ if noDeps ∧ Path.isRelativeTo relativePath Path.packagesDir then
+ return false
+
let some oleanPath := Path.toBuildDir "lib" relativePath "olean" |
throw $ IO.userError s!"Invalid path: {path}"
return ← oleanPath.pathExists
@@ -482,14 +487,14 @@ def shouldProcess (path : FilePath) (noDep : Bool) : IO Bool := do
/--
Trace all *.lean files in the current directory whose corresponding *.olean file exists.
-/
-def processAllFiles (noDep : Bool) : IO Unit := do
+def processAllFiles (noDeps : Bool) : IO Unit := do
let cwd ← IO.currentDir
assert! cwd.fileName != "lean4"
println! "Extracting data at {cwd}"
let mut tasks := #[]
for path in ← System.FilePath.walkDir cwd do
- if ← shouldProcess path noDep then
+ if ← shouldProcess path noDeps then
println! path
let t ← IO.asTask $ IO.Process.run
{cmd := "lake", args := #["env", "lean", "--run", "ExtractData.lean", path.toString]}
@@ -506,6 +511,6 @@ def processAllFiles (noDep : Bool) : IO Unit := do
unsafe def main (args : List String) : IO Unit := do
match args with
- | "nodep" :: _ => processAllFiles true
+ | "nodeps" :: _ => processAllFiles true
| path :: _ => processFile (← Path.toAbsolute ⟨path⟩)
| [] => processAllFiles false
diff --git a/src/lean_dojo/data_extraction/build_lean4_repo.py b/src/lean_dojo/data_extraction/build_lean4_repo.py
index eb41126b..1e11a732 100644
--- a/src/lean_dojo/data_extraction/build_lean4_repo.py
+++ b/src/lean_dojo/data_extraction/build_lean4_repo.py
@@ -4,8 +4,8 @@
"""
import os
import re
-import sys
import shutil
+import argparse
import itertools
import subprocess
from tqdm import tqdm
@@ -104,16 +104,23 @@ def get_lean_version() -> str:
return m["version"]
-def check_files() -> None:
+def check_files(packages_path: str, no_deps: bool) -> None:
"""Check if all *.lean files have been processed to produce *.ast.json and *.dep_paths files."""
cwd = Path.cwd()
jsons = {
- p.with_suffix("").with_suffix("") for p in cwd.glob("**/build/ir/**/*.ast.json")
+ p.with_suffix("").with_suffix("")
+ for p in cwd.glob("**/build/ir/**/*.ast.json")
+ if not no_deps or not p.is_relative_to(packages_path)
+ }
+ deps = {
+ p.with_suffix("")
+ for p in cwd.glob("**/build/ir/**/*.dep_paths")
+ if not no_deps or not p.is_relative_to(packages_path)
}
- deps = {p.with_suffix("") for p in cwd.glob("**/build/ir/**/*.dep_paths")}
oleans = {
Path(str(p.with_suffix("")).replace("/build/lib/", "/build/ir/"))
for p in cwd.glob("**/build/lib/**/*.olean")
+ if not no_deps or not p.is_relative_to(packages_path)
}
assert len(jsons) <= len(oleans) and len(deps) <= len(oleans)
missing_jsons = {p.with_suffix(".ast.json") for p in oleans - jsons}
@@ -123,7 +130,7 @@ def check_files() -> None:
logger.warning(f"Missing {p}")
-def is_new_version(v) -> bool:
+def is_new_version(v: str) -> bool:
"""Check if ``v`` is at least `4.3.0-rc2`."""
major, minor, patch = [int(_) for _ in v.split("-")[0].split(".")]
if major < 4 or (major == 4 and minor < 3):
@@ -143,12 +150,23 @@ def is_new_version(v) -> bool:
def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("repo_name")
+ parser.add_argument("--no-deps", action="store_true")
+ args = parser.parse_args()
+
num_procs = int(os.environ["NUM_PROCS"])
- repo_name = sys.argv[1]
+ repo_name = args.repo_name
os.chdir(repo_name)
# Build the repo using lake.
logger.info(f"Building {repo_name}")
+ if args.no_deps:
+ # The additional *.olean files wouldn't matter.
+ try:
+ run_cmd("lake exe cache get")
+ except subprocess.CalledProcessError:
+ pass
run_cmd("lake build")
# Copy the Lean 4 stdlib into the path of packages.
@@ -162,14 +180,17 @@ def main() -> None:
shutil.copytree(lean_prefix, f"{packages_path}/lean4")
# Run ExtractData.lean to extract ASTs, tactic states, and premise information.
+ dirs_to_monitor = [build_path]
+ if not args.no_deps:
+ dirs_to_monitor.append(packages_path)
logger.info(f"Tracing {repo_name}")
- with launch_progressbar([build_path, packages_path]):
- run_cmd(
- f"lake env lean --threads {num_procs} --run ExtractData.lean",
- capture_output=True,
- )
+ with launch_progressbar(dirs_to_monitor):
+ cmd = f"lake env lean --threads {num_procs} --run ExtractData.lean"
+ if args.no_deps:
+ cmd += " nodeps"
+ run_cmd(cmd, capture_output=True)
- check_files()
+ check_files(packages_path, args.no_deps)
if __name__ == "__main__":
diff --git a/src/lean_dojo/data_extraction/cache.py b/src/lean_dojo/data_extraction/cache.py
index b876e8ef..52afb761 100644
--- a/src/lean_dojo/data_extraction/cache.py
+++ b/src/lean_dojo/data_extraction/cache.py
@@ -87,15 +87,13 @@ def get(self, url: str, commit: str) -> Optional[Path]:
def store(self, src: Path) -> Path:
"""Store a traced repo at path ``src``. Return its path in the cache."""
- dirs = list(src.glob("*"))
- assert len(dirs) == 1, f"Unexpected number of directories in {src}"
- url, commit = get_repo_info(dirs[0])
+ url, commit = get_repo_info(src)
dirpath = self.cache_dir / _format_dirname(url, commit)
+ _, repo_name = _split_git_url(url)
if not dirpath.exists():
with self.lock:
with report_critical_failure(_CACHE_CORRPUTION_MSG):
- shutil.copytree(src, dirpath)
- _, repo_name = _split_git_url(url)
+ shutil.copytree(src, dirpath / repo_name)
return dirpath / repo_name
diff --git a/src/lean_dojo/data_extraction/lean.py b/src/lean_dojo/data_extraction/lean.py
index 075f2d6a..a4379439 100644
--- a/src/lean_dojo/data_extraction/lean.py
+++ b/src/lean_dojo/data_extraction/lean.py
@@ -601,7 +601,7 @@ def _get_lean4_dependencies(
try:
lake_manifest = (
- self.get_config("lake-manifest.json")
+ self.get_config("lake-manifest.json", num_retries=0)
if path is None
else json.load((Path(path) / "lake-manifest.json").open())
)
@@ -638,10 +638,10 @@ def _get_config_url(self, filename: str) -> str:
url = self.url.replace("github.com", "raw.githubusercontent.com")
return f"{url}/{self.commit}/{filename}"
- def get_config(self, filename: str) -> Dict[str, Any]:
+ def get_config(self, filename: str, num_retries: int = 2) -> Dict[str, Any]:
"""Return the repo's files."""
config_url = self._get_config_url(filename)
- content = read_url(config_url)
+ content = read_url(config_url, num_retries)
if filename.endswith(".toml"):
return toml.loads(content)
elif filename.endswith(".json"):
diff --git a/src/lean_dojo/data_extraction/trace.py b/src/lean_dojo/data_extraction/trace.py
index 1cdc2375..a4707293 100644
--- a/src/lean_dojo/data_extraction/trace.py
+++ b/src/lean_dojo/data_extraction/trace.py
@@ -55,15 +55,16 @@ def _modify_lean3(version: str) -> None:
raise ex
-def _trace(repo: LeanGitRepo) -> None:
+def _trace(repo: LeanGitRepo, build_deps: bool) -> None:
assert (
repo.exists()
), f"The {repo} does not exist. Please check the URL `{repo.commit_url}`."
if repo.uses_lean3:
+ assert build_deps
_trace_lean3(repo)
else:
assert repo.uses_lean4
- _trace_lean4(repo)
+ _trace_lean4(repo, build_deps)
def _trace_lean3(repo: LeanGitRepo) -> None:
@@ -101,7 +102,7 @@ def _trace_lean3(repo: LeanGitRepo) -> None:
raise ex
-def _trace_lean4(repo: LeanGitRepo) -> None:
+def _trace_lean4(repo: LeanGitRepo, build_deps: bool) -> None:
# Trace `repo` in the current working directory.
assert not repo.is_lean4, "Cannot trace the Lean 4 repo itself."
repo.clone_and_checkout()
@@ -113,9 +114,13 @@ def _trace_lean4(repo: LeanGitRepo) -> None:
LEAN4_BUILD_SCRIPT_PATH: f"/workspace/{LEAN4_BUILD_SCRIPT_PATH.name}",
LEAN4_DATA_EXTRACTOR_PATH: f"/workspace/{repo.name}/{LEAN4_DATA_EXTRACTOR_PATH.name}",
}
+ cmd = f"python3 build_lean4_repo.py {repo.name}"
+ if not build_deps:
+ cmd += " --no-deps"
+
try:
container.run(
- f"python3 build_lean4_repo.py {repo.name}",
+ cmd,
create_mounts(mts),
{"NUM_PROCS": NUM_PROCS},
as_current_user=True,
@@ -134,13 +139,14 @@ def is_available_in_cache(repo: LeanGitRepo) -> bool:
return cache.get(repo.url, repo.commit) is not None
-def get_traced_repo_path(repo: LeanGitRepo) -> Path:
+def get_traced_repo_path(repo: LeanGitRepo, build_deps: bool = True) -> Path:
"""Return the path of a traced repo in the cache.
The function will trace a repo if it is not available in the cache. See :ref:`caching` for details.
Args:
repo (LeanGitRepo): The Lean repo to trace.
+ build_deps (bool): Whether to build the dependencies of ``repo``. Defaults to True.
Returns:
Path: The path of the traced repo in the cache, e.g. :file:`/home/kaiyu/.cache/lean_dojo/leanprover-community-mathlib-2196ab363eb097c008d4497125e0dde23fb36db2`
@@ -150,16 +156,20 @@ def get_traced_repo_path(repo: LeanGitRepo) -> Path:
logger.info(f"Tracing {repo}")
with working_directory() as tmp_dir:
logger.debug(f"Working in the temporary directory {tmp_dir}")
- _trace(repo)
- traced_repo = TracedRepo.from_traced_files(tmp_dir / repo.name)
+ _trace(repo, build_deps)
+ traced_repo = TracedRepo.from_traced_files(tmp_dir / repo.name, build_deps)
traced_repo.save_to_disk()
- path = cache.store(tmp_dir)
+ path = cache.store(tmp_dir / repo.name)
else:
logger.debug("The traced repo is available in the cache.")
return path
-def trace(repo: LeanGitRepo, dst_dir: Optional[Union[str, Path]] = None) -> TracedRepo:
+def trace(
+ repo: LeanGitRepo,
+ dst_dir: Optional[Union[str, Path]] = None,
+ build_deps: bool = True,
+) -> TracedRepo:
"""Trace a repo (and its dependencies), saving the results to ``dst_dir``.
The function only traces the repo when it's not available in the cache. Otherwise,
@@ -168,6 +178,7 @@ def trace(repo: LeanGitRepo, dst_dir: Optional[Union[str, Path]] = None) -> Trac
Args:
repo (LeanGitRepo): The Lean repo to trace.
dst_dir (Union[str, Path]): The directory for saving the traced repo. If None, the traced repo is only saved in the cahe.
+ build_deps (bool): Whether to build the dependencies of ``repo``. Defaults to True.
Returns:
TracedRepo: A :class:`TracedRepo` object corresponding to the files at ``dst_dir``.
@@ -178,9 +189,9 @@ def trace(repo: LeanGitRepo, dst_dir: Optional[Union[str, Path]] = None) -> Trac
not dst_dir.exists()
), f"The destination directory {dst_dir} already exists."
- cached_path = get_traced_repo_path(repo)
+ cached_path = get_traced_repo_path(repo, build_deps)
logger.info(f"Loading the traced repo from {cached_path}")
- traced_repo = TracedRepo.load_from_disk(cached_path)
+ traced_repo = TracedRepo.load_from_disk(cached_path, build_deps)
traced_repo.check_sanity()
if dst_dir is not None:
diff --git a/src/lean_dojo/data_extraction/traced_data.py b/src/lean_dojo/data_extraction/traced_data.py
index 2c70bdf7..632848ba 100644
--- a/src/lean_dojo/data_extraction/traced_data.py
+++ b/src/lean_dojo/data_extraction/traced_data.py
@@ -1291,7 +1291,10 @@ class TracedRepo:
"""Root directory of the traced repo.
"""
- traced_files_graph: nx.DiGraph = field(repr=False)
+ traced_files: List[TracedFile] = field(repr=False)
+ """List of traced files in the repo."""
+
+ traced_files_graph: Optional[nx.DiGraph] = field(repr=False)
"""Dependency graph between files in the repo.
The graph is a DAG, and there is an edge from file :file:`X` to file :file:`Y`
@@ -1335,7 +1338,9 @@ def check_sanity(self) -> None:
for k, v in self.dependencies.items():
assert isinstance(k, str) and isinstance(v, LeanGitRepo)
assert isinstance(self.root_dir, Path)
- assert isinstance(self.traced_files_graph, nx.DiGraph)
+ assert self.traced_files_graph is None or isinstance(
+ self.traced_files_graph, nx.DiGraph
+ )
assert self.repo not in self.dependencies.values()
assert len(self.dependencies) == 0 or not self.repo.is_lean
@@ -1353,34 +1358,38 @@ def check_sanity(self) -> None:
p.relative_to(self.root_dir) for p in self.root_dir.glob("**/*.dep_paths")
}
- if not LOAD_USED_PACKAGES_ONLY:
- assert len(json_files) == self.traced_files_graph.number_of_nodes()
-
- for path_str, tf_node in self.traced_files_graph.nodes.items():
- tf = tf_node["traced_file"]
- path = Path(path_str)
- tf.check_sanity()
- assert tf.path == path and tf.root_dir == self.root_dir
- assert tf.traced_repo is None or tf.traced_repo is self
- assert path in lean_files
- assert (
- to_dep_path(self.root_dir, path, self.repo) in path_files
- ), to_dep_path(self.root_dir, path, self.repo)
- assert (
- to_json_path(self.root_dir, path, self.repo) in json_files
- ), to_json_path(self.root_dir, path, self.repo)
- if len(xml_files) > 0:
+ if self.traced_files_graph is not None:
+ if not LOAD_USED_PACKAGES_ONLY:
+ assert len(json_files) == self.traced_files_graph.number_of_nodes()
+
+ for path_str, tf_node in self.traced_files_graph.nodes.items():
+ tf = tf_node["traced_file"]
+ path = Path(path_str)
+ tf.check_sanity()
+ assert tf.path == path and tf.root_dir == self.root_dir
+ assert tf.traced_repo is None or tf.traced_repo is self
+ assert path in lean_files
+ assert (
+ to_dep_path(self.root_dir, path, self.repo) in path_files
+ ), to_dep_path(self.root_dir, path, self.repo)
assert (
- to_xml_path(self.root_dir, path, self.repo) in xml_files
- ), to_xml_path(self.root_dir, path, self.repo)
+ to_json_path(self.root_dir, path, self.repo) in json_files
+ ), to_json_path(self.root_dir, path, self.repo)
+ if len(xml_files) > 0:
+ assert (
+ to_xml_path(self.root_dir, path, self.repo) in xml_files
+ ), to_xml_path(self.root_dir, path, self.repo)
@classmethod
- def from_traced_files(cls, root_dir: Union[str, Path]) -> "TracedRepo":
+ def from_traced_files(
+ cls, root_dir: Union[str, Path], build_deps: bool = True
+ ) -> "TracedRepo":
"""Construct a :class:`TracedRepo` object by parsing :file:`*.ast.json` and :file:`*.path` files
produced by :code:`lean --ast --tsast --tspp` (Lean 3) or :file:`ExtractData.lean` (Lean 4).
Args:
root_dir (Union[str, Path]): Root directory of the traced repo.
+ build_deps (bool, optional): Whether to build the dependency graph between files.
"""
root_dir = Path(root_dir).resolve()
if not is_git_repo(root_dir):
@@ -1410,22 +1419,17 @@ def from_traced_files(cls, root_dir: Union[str, Path]) -> "TracedRepo":
)
dependencies = repo.get_dependencies(root_dir)
- traced_files_graph = _build_dependency_graph(traced_files, root_dir, repo)
- traced_repo = cls(repo, dependencies, root_dir, traced_files_graph)
+ if build_deps:
+ traced_files_graph = _build_dependency_graph(traced_files, root_dir, repo)
+ else:
+ traced_files_graph = None
+
+ traced_repo = cls(
+ repo, dependencies, root_dir, traced_files, traced_files_graph
+ )
traced_repo._update_traced_files()
return traced_repo
- @property
- def traced_files(self) -> Generator[TracedFile, None, None]:
- """Return an iterator of traced files from their dependency graph."""
- for _, tf_node in self.traced_files_graph.nodes.items():
- yield tf_node["traced_file"]
-
- @property
- def num_traced_files(self) -> int:
- """Number of traced files in the repo."""
- return len(self.traced_files_graph)
-
def get_traced_file(self, path: Union[str, Path]) -> TracedFile:
"""Return a traced file by its path."""
return self.traced_files_graph.nodes[str(path)]["traced_file"]
@@ -1436,11 +1440,12 @@ def _update_traced_files(self) -> None:
def save_to_disk(self) -> None:
"""Save all traced files in the repo to the disk as :file:`*.trace.xml` files."""
+ num_traced_files = len(self.traced_files)
logger.debug(
- f"Saving {self.num_traced_files} traced XML files to {self.root_dir} with {NUM_WORKERS} workers"
+ f"Saving {num_traced_files} traced XML files to {self.root_dir} with {NUM_WORKERS} workers"
)
if NUM_WORKERS <= 1:
- for tf in tqdm(self.traced_files, total=self.num_traced_files):
+ for tf in tqdm(self.traced_files, total=num_traced_files):
_save_xml_to_disk(tf)
else:
with ray_actor_pool(_TracedRepoHelper, self.root_dir, self.repo) as pool:
@@ -1450,12 +1455,14 @@ def save_to_disk(self) -> None:
lambda a, tf: a.save_xml_to_disk.remote(tf),
self.traced_files,
),
- total=self.num_traced_files,
+ total=num_traced_files,
)
)
@classmethod
- def load_from_disk(cls, root_dir: Union[str, Path]) -> "TracedRepo":
+ def load_from_disk(
+ cls, root_dir: Union[str, Path], build_deps: bool = True
+ ) -> "TracedRepo":
"""Load a traced repo from :file:`*.trace.xml` files."""
root_dir = Path(root_dir).resolve()
if not is_git_repo(root_dir):
@@ -1492,8 +1499,14 @@ def load_from_disk(cls, root_dir: Union[str, Path]) -> "TracedRepo":
)
dependencies = repo.get_dependencies(root_dir)
- traced_files_graph = _build_dependency_graph(traced_files, root_dir, repo)
- traced_repo = cls(repo, dependencies, root_dir, traced_files_graph)
+ if build_deps:
+ traced_files_graph = _build_dependency_graph(traced_files, root_dir, repo)
+ else:
+ traced_files_graph = None
+
+ traced_repo = cls(
+ repo, dependencies, root_dir, traced_files, traced_files_graph
+ )
traced_repo._update_traced_files()
return traced_repo
diff --git a/src/lean_dojo/utils.py b/src/lean_dojo/utils.py
index 1a691762..0f792e60 100644
--- a/src/lean_dojo/utils.py
+++ b/src/lean_dojo/utils.py
@@ -292,7 +292,11 @@ def _from_lean_path(root_dir: Path, path: Path, repo, ext: str) -> Path:
build_dir = repo.get_build_dir()
assert root_dir.name != "lean4"
- if path.is_relative_to(packages_dir / "lean4/src"):
+ if path.is_relative_to(packages_dir / "lean4/src/lean/lake"):
+ # E.g., "lake-packages/lean4/src/lean/lake/Lake/CLI/Error.lean"
+ p = path.relative_to(packages_dir / "lean4/src/lean/lake")
+ return packages_dir / "lean4/lib/lean" / p.with_suffix(ext)
+ elif path.is_relative_to(packages_dir / "lean4/src"):
# E.g., "lake-packages/lean4/src/lean/Init.lean"
p = path.relative_to(packages_dir / "lean4/src").with_suffix(ext)
return packages_dir / "lean4/lib" / p
@@ -335,7 +339,13 @@ def to_lean_path(root_dir: Path, path: Path, repo) -> bool:
build_dir = repo.get_build_dir()
assert root_dir.name != "lean4"
- if path.is_relative_to(packages_dir / "lean4/lib"):
+ if path == packages_dir / "lean4/lib/lean/Lake.lean":
+ return packages_dir / "lean4/src/lean/lake/Lake.lean"
+ elif path.is_relative_to(packages_dir / "lean4/lib/lean/Lake"):
+ # E.g., "lake-packages/lean4/lib/lean/Lake/Util/List.lean"
+ p = path.relative_to(packages_dir / "lean4/lib/lean/Lake")
+ return packages_dir / "lean4/src/lean/lake/Lake" / p
+ elif path.is_relative_to(packages_dir / "lean4/lib"):
# E.g., "lake-packages/lean4/lib/lean/Init.lean"
p = path.relative_to(packages_dir / "lean4/lib")
return packages_dir / "lean4/src" / p