diff --git a/pyproject.toml b/pyproject.toml index 617b06dc..be069149 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ exclude = [ [project] name = "lean-dojo" -version = "1.4.5" +version = "1.5.0rc" authors = [ { name="Kaiyu Yang", email="kaiyuy@caltech.edu" }, ] diff --git a/scripts/generate-benchmark-lean3.ipynb b/scripts/generate-benchmark-lean3.ipynb index b76598d5..a1a3db36 100644 --- a/scripts/generate-benchmark-lean3.ipynb +++ b/scripts/generate-benchmark-lean3.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "id": "cda71d78", "metadata": {}, "outputs": [], @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "id": "acdb4e64", "metadata": {}, "outputs": [], @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "id": "0d7bcf76", "metadata": {}, "outputs": [], @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "id": "a9e1fe70", "metadata": {}, "outputs": [], @@ -140,7 +140,6 @@ " # Figure out the number of theorems in train/val/test.\n", " num_theorems = len(traced_theorems)\n", " num_val_test = NUM_VAL + NUM_TEST\n", - " num_train = num_theorems - num_val_test\n", " theorems_val_test = set()\n", "\n", " # Map each premise to a list of theorems using it.\n", @@ -179,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "id": "8509a475", "metadata": {}, "outputs": [], @@ -206,20 +205,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "id": "ac984a99", "metadata": {}, "outputs": [], "source": [ - "def _get_file_path(traced_repo: TracedRepo, thm: TracedTheorem) -> str:\n", - " if thm.repo == traced_repo.repo:\n", - " # The theorem belongs to the traced repo itself.\n", - " return str(thm.theorem.file_path)\n", - " else:\n", - " # The theorem belongs to one of the dependencies.\n", - " return f\"{LEAN3_PACKAGES_DIR}/{thm.repo.name}/{thm.theorem.file_path}\"\n", - "\n", - "\n", "def export_proofs(\n", " traced_repo: TracedRepo, splits: Dict[SPLIT_STRATEGY, SPLIT], dst_path: Path\n", ") -> None:\n", @@ -249,7 +239,7 @@ " {\n", " \"url\": thm.repo.url,\n", " \"commit\": thm.repo.commit,\n", - " \"file_path\": _get_file_path(traced_repo, thm),\n", + " \"file_path\": str(thm.theorem.file_path),\n", " \"full_name\": thm.theorem.full_name,\n", " \"start\": list(thm.start),\n", " \"end\": list(thm.end),\n", @@ -283,7 +273,7 @@ " + \"\\n\"\n", " )\n", " logger.info(\n", - " f\"{num_premises} theorems/definitions from {traced_repo.num_traced_files} files saved to {oup_path}\"\n", + " f\"{num_premises} theorems/definitions from {len(traced_repo.traced_files)} files saved to {oup_path}\"\n", " )\n", "\n", "\n", @@ -354,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "id": "59eaebb3", "metadata": { "scrolled": true @@ -364,113 +354,114 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-01-05 08:04:17.816\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/leanprover-community-mathlib-19c869efa56bbb8b500f2724c0b77261edbfa28c/mathlib\u001b[0m\n", - "2024-01-05 08:04:19,851\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n", - "100%|██████████| 3384/3384 [04:14<00:00, 13.30it/s] \n", - "\u001b[32m2024-01-05 08:08:58.885\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_data\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1m98734 theorems in total\u001b[0m\n", - "\u001b[32m2024-01-05 08:08:58.886\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_randomly\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mSplitting the theorems randomly\u001b[0m\n", - "\u001b[32m2024-01-05 08:08:58.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_by_premise\u001b[0m:\u001b[36m8\u001b[0m - \u001b[1mSplitting the theorems by premises\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.056\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.817\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.817\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.818\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.819\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.820\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.821\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.821\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.821\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.821\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.822\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.822\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.823\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.823\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.823\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.824\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:05.921\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.453\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.454\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.455\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.455\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:06.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:08.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:08.334\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:08.661\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:08.662\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:10.228\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:10.509\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:10.510\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:11.668\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:11.668\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:12.193\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:19.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m94734 theorems and 208949 tactics saved to ../leandojo_benchmark/random/train.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:20.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m2000 theorems and 4561 tactics saved to ../leandojo_benchmark/random/val.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:20.235\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:20.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m2000 theorems and 4266 tactics saved to ../leandojo_benchmark/random/test.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:24.907\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:24.908\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:24.974\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:24.975\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:25.415\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.436\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.438\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.456\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.457\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.458\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.459\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.460\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.461\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.462\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.462\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.462\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.463\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.463\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.464\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.482\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.483\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.484\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.484\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.485\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.486\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.486\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.487\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.487\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.488\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.489\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.490\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.491\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:26.491\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:33.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m94734 theorems and 191184 tactics saved to ../leandojo_benchmark/novel_premises/train.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:34.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m2000 theorems and 13321 tactics saved to ../leandojo_benchmark/novel_premises/val.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:09:35.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m2000 theorems and 13271 tactics saved to ../leandojo_benchmark/novel_premises/test.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:10:05.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m72\u001b[0m - \u001b[1m130283 theorems/definitions from 3384 files saved to ../leandojo_benchmark/corpus.jsonl\u001b[0m\n" + "\u001b[32m2024-01-08 12:50:44.659\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m193\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/leanprover-community-mathlib-19c869efa56bbb8b500f2724c0b77261edbfa28c/mathlib\u001b[0m\n", + "2024-01-08 12:50:49,468\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n", + "100%|██████████| 3384/3384 [05:19<00:00, 10.58it/s] \n", + "\u001b[32m2024-01-08 12:56:33.412\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_data\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1m98734 theorems in total\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:33.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_randomly\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mSplitting the theorems randomly\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:33.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_by_premise\u001b[0m:\u001b[36m8\u001b[0m - \u001b[1mSplitting the theorems by premises\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:39.143\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m109\u001b[0m - \u001b[33m\u001b[1m../leandojo_benchmark already exists. Removing it now.\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.115\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.115\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.115\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.116\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.116\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.118\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.118\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.656\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:40.657\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:41.625\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:43.772\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:44.077\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:44.429\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:44.430\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.043\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.044\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.045\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.045\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.046\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.046\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.047\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.047\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.047\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.048\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.048\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.049\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.050\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.050\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:45.749\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:46.377\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:46.379\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:46.649\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:48.358\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:48.359\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:55.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m94734 theorems and 208460 tactics saved to ../leandojo_benchmark/random/train.json\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:55.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m2000 theorems and 4652 tactics saved to ../leandojo_benchmark/random/val.json\u001b[0m\n", + "\u001b[32m2024-01-08 12:56:56.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m2000 theorems and 4664 tactics saved to ../leandojo_benchmark/random/test.json\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:00.367\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:00.369\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:00.442\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:00.443\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:00.710\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:01.998\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:01.999\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.000\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.001\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.001\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.002\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.003\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.004\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.004\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.005\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.006\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.006\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.038\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.040\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.104\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.105\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.ind\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.107\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.107\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.108\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.108\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.108\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.109\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.109\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.110\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.lift\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.111\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.112\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.113\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:02.114\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:09.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m94734 theorems and 191524 tactics saved to ../leandojo_benchmark/novel_premises/train.json\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:10.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m2000 theorems and 12678 tactics saved to ../leandojo_benchmark/novel_premises/val.json\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:11.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m2000 theorems and 13574 tactics saved to ../leandojo_benchmark/novel_premises/test.json\u001b[0m\n", + "\u001b[32m2024-01-08 12:57:44.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m63\u001b[0m - \u001b[1m130283 theorems/definitions from 3384 files saved to ../leandojo_benchmark/corpus.jsonl\u001b[0m\n" ] } ], @@ -524,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "id": "b1b97c12", "metadata": {}, "outputs": [ @@ -550,7 +541,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 16, "id": "0eefe7b8", "metadata": {}, "outputs": [ @@ -560,7 +551,7 @@ "dict_keys(['path', 'imports', 'premises'])" ] }, - "execution_count": 10, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -582,20 +573,21 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 17, "id": "05e1740f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('src/data/polynomial/induction.lean',\n", - " ['src/ring_theory/ideal/basic.lean',\n", - " '_target/deps/lean/library/init/default.lean',\n", - " 'src/data/polynomial/basic.lean'])" + "('src/linear_algebra/affine_space/affine_equiv.lean',\n", + " ['_target/deps/lean/library/init/default.lean',\n", + " 'src/linear_algebra/general_linear_group.lean',\n", + " 'src/linear_algebra/affine_space/affine_map.lean',\n", + " 'src/algebra/invertible.lean'])" ] }, - "execution_count": 11, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -606,17 +598,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 18, "id": "04e6e71f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "5" + "94" ] }, - "execution_count": 12, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -635,21 +627,21 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 19, "id": "c4b447b0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'full_name': 'polynomial.induction_on',\n", - " 'code': '@[elab_as_eliminator] protected lemma induction_on {M : R[X] → Prop} (p : R[X])\\n (h_C : ∀a, M (C a))\\n (h_add : ∀p q, M p → M q → M (p + q))\\n (h_monomial : ∀(n : ℕ) (a : R), M (C a * X^n) → M (C a * X^(n+1))) :\\n M p',\n", - " 'start': [34, 1],\n", - " 'end': [51, 4],\n", - " 'kind': 'theorem'}" + "{'full_name': 'affine_equiv',\n", + " 'code': \"@[nolint has_nonempty_instance]\\nstructure affine_equiv (k P₁ P₂ : Type*) {V₁ V₂ : Type*} [ring k]\\n [add_comm_group V₁] [module k V₁] [add_torsor V₁ P₁]\\n [add_comm_group V₂] [module k V₂] [add_torsor V₂ P₂] extends P₁ ≃ P₂ :=\\n(linear : V₁ ≃ₗ[k] V₂)\\n(map_vadd' : ∀ (p : P₁) (v : V₁), to_equiv (v +ᵥ p) = linear v +ᵥ to_equiv p)\",\n", + " 'start': [46, 1],\n", + " 'end': [51, 78],\n", + " 'kind': 'structure'}" ] }, - "execution_count": 13, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -673,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 20, "id": "a59b38b6", "metadata": {}, "outputs": [ @@ -683,7 +675,7 @@ "94734" ] }, - "execution_count": 14, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -704,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 21, "id": "9dc67dd2", "metadata": {}, "outputs": [ @@ -714,13 +706,13 @@ "dict_keys(['url', 'commit', 'file_path', 'full_name', 'start', 'end', 'traced_tactics'])" ] }, - "execution_count": 15, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "for proof in proofs_train[::-1]:\n", + "for proof in proofs_train:\n", " if proof[\"traced_tactics\"] != []:\n", " break\n", "proof.keys()" @@ -728,20 +720,20 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 22, "id": "cc2bc340", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "('https://github.com/leanprover-community/mathlib',\n", - " '19c869efa56bbb8b500f2724c0b77261edbfa28c',\n", - " 'src/data/rat/cast.lean',\n", - " 'rat.cast_mk')" + "('https://github.com/leanprover-community/lean',\n", + " 'cce7990ea86a78bdb383e38ed7f9b5ba93c60ce0',\n", + " 'library/init/data/array/basic.lean',\n", + " 'd_array.of_beq_aux_eq_tt')" ] }, - "execution_count": 16, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -760,17 +752,17 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 23, "id": "00315e27", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1" + "13" ] }, - "execution_count": 17, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -789,29 +781,32 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 24, "id": "52769d80", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'tactic': 'simp only [mk_eq_div, cast_div, cast_coe_int]',\n", - " 'annotated_tactic': ['simp only [mk_eq_div, cast_div, cast_coe_int]',\n", - " [{'full_name': 'rat.mk_eq_div',\n", - " 'def_path': 'src/data/rat/defs.lean',\n", - " 'def_pos': [535, 9]},\n", - " {'full_name': 'rat.cast_div',\n", - " 'def_path': 'src/data/rat/cast.lean',\n", - " 'def_pos': [204, 28]},\n", - " {'full_name': 'rat.cast_coe_int',\n", - " 'def_path': 'src/data/rat/cast.lean',\n", - " 'def_pos': [41, 28]}]],\n", - " 'state_before': 'α : Type u_3,\\n_inst_1 : division_ring α,\\n_inst_2 : char_zero α,\\na b : ℤ\\n⊢ ↑(a /. b) = ↑a / ↑b',\n", - " 'state_after': 'no goals'}" + "{'tactic': \"have h₂' : read a ⟨i, h₁⟩ = read b ⟨i, h₁⟩ ∧ d_array.beq_aux a b i _ = tt\",\n", + " 'annotated_tactic': [\"have h₂' : read a ⟨i, h₁⟩ = read b ⟨i, h₁⟩ ∧ d_array.beq_aux a b i _ = tt\",\n", + " [{'full_name': 'd_array.read',\n", + " 'def_path': '_target/deps/lean/library/init/data/array/basic.lean',\n", + " 'def_pos': [22, 5]},\n", + " {'full_name': 'd_array.read',\n", + " 'def_path': '_target/deps/lean/library/init/data/array/basic.lean',\n", + " 'def_pos': [22, 5]},\n", + " {'full_name': 'd_array.beq_aux',\n", + " 'def_path': '_target/deps/lean/library/init/data/array/basic.lean',\n", + " 'def_pos': [74, 15]},\n", + " {'full_name': 'bool.tt',\n", + " 'def_path': '_target/deps/lean/library/init/core.lean',\n", + " 'def_pos': [242, 11]}]],\n", + " 'state_before': \"n : ℕ,\\nα : fin n → Type u,\\n_inst_1 : Π (i : fin n), decidable_eq (α i),\\na b : d_array n α,\\nof_beq_aux_eq_tt :\\n ∀ (i : ℕ) (h : i ≤ n),\\n a.beq_aux b i h = tt → ∀ (j : ℕ) (h' : j < i), a.read ⟨j, _⟩ = b.read ⟨j, _⟩,\\ni : ℕ,\\nh₁ : i + 1 ≤ n,\\nh₂ : a.beq_aux b (i + 1) h₁ = tt,\\nj : ℕ,\\nh₃ : j < i + 1\\n⊢ a.read ⟨j, _⟩ = b.read ⟨j, _⟩\",\n", + " 'state_after': \"3 goals\\nn : ℕ,\\nα : fin n → Type u,\\n_inst_1 : Π (i : fin n), decidable_eq (α i),\\na b : d_array n α,\\nof_beq_aux_eq_tt :\\n ∀ (i : ℕ) (h : i ≤ n),\\n a.beq_aux b i h = tt → ∀ (j : ℕ) (h' : j < i), a.read ⟨j, _⟩ = b.read ⟨j, _⟩,\\ni : ℕ,\\nh₁ : i + 1 ≤ n,\\nh₂ : a.beq_aux b (i + 1) h₁ = tt,\\nj : ℕ,\\nh₃ : j < i + 1\\n⊢ a.read ⟨i, h₁⟩ = b.read ⟨i, h₁⟩ ∧ a.beq_aux b i ?m_1 = tt\\n\\nn : ℕ,\\nα : fin n → Type u,\\n_inst_1 : Π (i : fin n), decidable_eq (α i),\\na b : d_array n α,\\nof_beq_aux_eq_tt :\\n ∀ (i : ℕ) (h : i ≤ n),\\n a.beq_aux b i h = tt → ∀ (j : ℕ) (h' : j < i), a.read ⟨j, _⟩ = b.read ⟨j, _⟩,\\ni : ℕ,\\nh₁ : i + 1 ≤ n,\\nh₂ : a.beq_aux b (i + 1) h₁ = tt,\\nj : ℕ,\\nh₃ : j < i + 1,\\nh₂' : a.read ⟨i, h₁⟩ = b.read ⟨i, h₁⟩ ∧ a.beq_aux b i ?m_1 = tt\\n⊢ a.read ⟨j, _⟩ = b.read ⟨j, _⟩\\n\\nn : ℕ,\\nα : fin n → Type u,\\n_inst_1 : Π (i : fin n), decidable_eq (α i),\\na b : d_array n α,\\nof_beq_aux_eq_tt :\\n ∀ (i : ℕ) (h : i ≤ n),\\n a.beq_aux b i h = tt → ∀ (j : ℕ) (h' : j < i), a.read ⟨j, _⟩ = b.read ⟨j, _⟩,\\ni : ℕ,\\nh₁ : i + 1 ≤ n,\\nh₂ : a.beq_aux b (i + 1) h₁ = tt,\\nj : ℕ,\\nh₃ : j < i + 1\\n⊢ i ≤ n\"}" ] }, - "execution_count": 19, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -840,7 +835,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 25, "id": "bc457ce9", "metadata": {}, "outputs": [ @@ -848,14 +843,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-01-05 08:18:03.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/facebookresearch-miniF2F-5271ddec788677c815cf818a06f368ef6498a106/miniF2F\u001b[0m\n", - "2024-01-05 08:18:08,327\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n", - "100%|██████████| 1159/1159 [02:23<00:00, 8.05it/s] \n", - "\u001b[32m2024-01-05 08:20:47.523\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m118\u001b[0m - \u001b[33m\u001b[1m../leandojo_minif2f already exists. Removing it now.\u001b[0m\n", - "\u001b[32m2024-01-05 08:20:47.565\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m244 theorems and 549 tactics saved to ../leandojo_minif2f/default/val.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:20:47.575\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", - "\u001b[32m2024-01-05 08:20:47.599\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m245 theorems and 781 tactics saved to ../leandojo_minif2f/default/test.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:21:02.156\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m72\u001b[0m - \u001b[1m67170 theorems/definitions from 1159 files saved to ../leandojo_minif2f/corpus.jsonl\u001b[0m\n" + "\u001b[32m2024-01-08 13:03:50.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m193\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/facebookresearch-miniF2F-5271ddec788677c815cf818a06f368ef6498a106/miniF2F\u001b[0m\n", + "2024-01-08 13:03:58,678\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n", + "100%|██████████| 1159/1159 [02:29<00:00, 7.77it/s] \n", + "\u001b[32m2024-01-08 13:06:43.405\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m109\u001b[0m - \u001b[33m\u001b[1m../leandojo_minif2f already exists. Removing it now.\u001b[0m\n", + "\u001b[32m2024-01-08 13:06:43.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m244 theorems and 549 tactics saved to ../leandojo_minif2f/default/val.json\u001b[0m\n", + "\u001b[32m2024-01-08 13:06:43.445\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.traced_data\u001b[0m:\u001b[36m_callback3\u001b[0m:\u001b[36m226\u001b[0m - \u001b[33m\u001b[1mUnable to locate quot.mk\u001b[0m\n", + "\u001b[32m2024-01-08 13:06:43.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m245 theorems and 781 tactics saved to ../leandojo_minif2f/default/test.json\u001b[0m\n", + "\u001b[32m2024-01-08 13:06:58.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m63\u001b[0m - \u001b[1m67170 theorems/definitions from 1159 files saved to ../leandojo_minif2f/corpus.jsonl\u001b[0m\n" ] } ], @@ -884,7 +879,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 26, "id": "12f436c7", "metadata": {}, "outputs": [ @@ -892,12 +887,18 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-01-05 08:21:03.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/zhangir-azerbayev-ProofNet-e8645aa830ce17c33a8b8482a8195f0f97d6a74a/ProofNet\u001b[0m\n", - "2024-01-05 08:21:09,772\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n", - "100%|██████████| 1539/1539 [01:53<00:00, 13.51it/s] \n", - "\u001b[32m2024-01-05 08:23:21.624\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m118\u001b[0m - \u001b[33m\u001b[1m../leandojo_proofnet already exists. Removing it now.\u001b[0m\n", - "\u001b[32m2024-01-05 08:23:21.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1m374 theorems and 460 tactics saved to ../leandojo_proofnet/default/test.json\u001b[0m\n", - "\u001b[32m2024-01-05 08:23:38.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m72\u001b[0m - \u001b[1m82365 theorems/definitions from 1539 files saved to ../leandojo_proofnet/corpus.jsonl\u001b[0m\n" + "\u001b[32m2024-01-08 13:07:00.088\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m193\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/zhangir-azerbayev-ProofNet-e8645aa830ce17c33a8b8482a8195f0f97d6a74a/ProofNet\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-01-08 13:07:09,769\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n", + "100%|██████████| 1539/1539 [03:06<00:00, 8.25it/s] \n", + "\u001b[32m2024-01-08 13:10:34.216\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_data\u001b[0m:\u001b[36m109\u001b[0m - \u001b[33m\u001b[1m../leandojo_proofnet already exists. Removing it now.\u001b[0m\n", + "\u001b[32m2024-01-08 13:10:34.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m39\u001b[0m - \u001b[1m374 theorems and 460 tactics saved to ../leandojo_proofnet/default/test.json\u001b[0m\n", + "\u001b[32m2024-01-08 13:10:52.466\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m63\u001b[0m - \u001b[1m82365 theorems/definitions from 1539 files saved to ../leandojo_proofnet/corpus.jsonl\u001b[0m\n" ] } ], diff --git a/scripts/generate-benchmark-lean4.ipynb b/scripts/generate-benchmark-lean4.ipynb index 7d14c396..4021a78f 100644 --- a/scripts/generate-benchmark-lean4.ipynb +++ b/scripts/generate-benchmark-lean4.ipynb @@ -275,7 +275,7 @@ " + \"\\n\"\n", " )\n", " logger.info(\n", - " f\"{num_premises} theorems/definitions from {traced_repo.num_traced_files} files saved to {oup_path}\"\n", + " f\"{num_premises} theorems/definitions from {len(traced_repo.traced_files)} files saved to {oup_path}\"\n", " )\n", "\n", "\n", diff --git a/src/lean_dojo/constants.py b/src/lean_dojo/constants.py index 26b64ed9..069ef8e7 100644 --- a/src/lean_dojo/constants.py +++ b/src/lean_dojo/constants.py @@ -15,7 +15,7 @@ load_dotenv() -__version__ = "1.4.5" +__version__ = "1.5.0rc" logger.remove() if "VERBOSE" in os.environ or "DEBUG" in os.environ: diff --git a/src/lean_dojo/container.py b/src/lean_dojo/container.py index f23e5fd7..d3d7c7b7 100644 --- a/src/lean_dojo/container.py +++ b/src/lean_dojo/container.py @@ -103,7 +103,7 @@ def _copy_file_or_dir(src: Path, dst: Path) -> None: assert src.is_dir() and not src.is_relative_to(dst) if dst.exists(): shutil.rmtree(dst) - shutil.copytree(src, dst) + shutil.copytree(src, dst, symlinks=True) class NativeContainer(Container): diff --git a/src/lean_dojo/data_extraction/ExtractData.lean b/src/lean_dojo/data_extraction/ExtractData.lean index ebf66042..03578e98 100644 --- a/src/lean_dojo/data_extraction/ExtractData.lean +++ b/src/lean_dojo/data_extraction/ExtractData.lean @@ -205,7 +205,10 @@ Convert the path of a *.lean file to its corresponding file (e.g., *.olean) in t def toBuildDir (subDir : FilePath) (path : FilePath) (ext : String) : Option FilePath := let path' := (trim path).withExtension ext match relativeTo path' $ packagesDir / "lean4/src" with - | some p => packagesDir / "lean4/lib" / p + | some p => + match relativeTo p "lean/lake" with + | some p' => packagesDir / "lean4/lib/lean" / p' + | none => packagesDir / "lean4/lib" / p | none => match relativeTo path' packagesDir with | some p => match p.components with @@ -448,6 +451,7 @@ unsafe def processFile (path : FilePath) : IO Unit := do continue if found then p := p ++ FilePath.pathSeparator.toString ++ c + p := p.replace "/lean4/src/lean/Lake" "/lean4/src/lean/lake/Lake" assert! ← FilePath.mk p |>.pathExists s := s ++ "\n" ++ p @@ -464,16 +468,17 @@ open LeanDojo /-- Whether a *.lean file should be traced. -/ -def shouldProcess (path : FilePath) (noDep : Bool) : IO Bool := do +def shouldProcess (path : FilePath) (noDeps : Bool) : IO Bool := do if (← path.isDir) ∨ path.extension != "lean" then return false - if noDep ∧ Path.isRelativeTo path Path.packagesDir then - return false - let cwd ← IO.currentDir let some relativePath := Path.relativeTo path cwd | throw $ IO.userError s!"Invalid path: {path}" + + if noDeps ∧ Path.isRelativeTo relativePath Path.packagesDir then + return false + let some oleanPath := Path.toBuildDir "lib" relativePath "olean" | throw $ IO.userError s!"Invalid path: {path}" return ← oleanPath.pathExists @@ -482,14 +487,14 @@ def shouldProcess (path : FilePath) (noDep : Bool) : IO Bool := do /-- Trace all *.lean files in the current directory whose corresponding *.olean file exists. -/ -def processAllFiles (noDep : Bool) : IO Unit := do +def processAllFiles (noDeps : Bool) : IO Unit := do let cwd ← IO.currentDir assert! cwd.fileName != "lean4" println! "Extracting data at {cwd}" let mut tasks := #[] for path in ← System.FilePath.walkDir cwd do - if ← shouldProcess path noDep then + if ← shouldProcess path noDeps then println! path let t ← IO.asTask $ IO.Process.run {cmd := "lake", args := #["env", "lean", "--run", "ExtractData.lean", path.toString]} @@ -506,6 +511,6 @@ def processAllFiles (noDep : Bool) : IO Unit := do unsafe def main (args : List String) : IO Unit := do match args with - | "nodep" :: _ => processAllFiles true + | "nodeps" :: _ => processAllFiles true | path :: _ => processFile (← Path.toAbsolute ⟨path⟩) | [] => processAllFiles false diff --git a/src/lean_dojo/data_extraction/build_lean4_repo.py b/src/lean_dojo/data_extraction/build_lean4_repo.py index eb41126b..1e11a732 100644 --- a/src/lean_dojo/data_extraction/build_lean4_repo.py +++ b/src/lean_dojo/data_extraction/build_lean4_repo.py @@ -4,8 +4,8 @@ """ import os import re -import sys import shutil +import argparse import itertools import subprocess from tqdm import tqdm @@ -104,16 +104,23 @@ def get_lean_version() -> str: return m["version"] -def check_files() -> None: +def check_files(packages_path: str, no_deps: bool) -> None: """Check if all *.lean files have been processed to produce *.ast.json and *.dep_paths files.""" cwd = Path.cwd() jsons = { - p.with_suffix("").with_suffix("") for p in cwd.glob("**/build/ir/**/*.ast.json") + p.with_suffix("").with_suffix("") + for p in cwd.glob("**/build/ir/**/*.ast.json") + if not no_deps or not p.is_relative_to(packages_path) + } + deps = { + p.with_suffix("") + for p in cwd.glob("**/build/ir/**/*.dep_paths") + if not no_deps or not p.is_relative_to(packages_path) } - deps = {p.with_suffix("") for p in cwd.glob("**/build/ir/**/*.dep_paths")} oleans = { Path(str(p.with_suffix("")).replace("/build/lib/", "/build/ir/")) for p in cwd.glob("**/build/lib/**/*.olean") + if not no_deps or not p.is_relative_to(packages_path) } assert len(jsons) <= len(oleans) and len(deps) <= len(oleans) missing_jsons = {p.with_suffix(".ast.json") for p in oleans - jsons} @@ -123,7 +130,7 @@ def check_files() -> None: logger.warning(f"Missing {p}") -def is_new_version(v) -> bool: +def is_new_version(v: str) -> bool: """Check if ``v`` is at least `4.3.0-rc2`.""" major, minor, patch = [int(_) for _ in v.split("-")[0].split(".")] if major < 4 or (major == 4 and minor < 3): @@ -143,12 +150,23 @@ def is_new_version(v) -> bool: def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("repo_name") + parser.add_argument("--no-deps", action="store_true") + args = parser.parse_args() + num_procs = int(os.environ["NUM_PROCS"]) - repo_name = sys.argv[1] + repo_name = args.repo_name os.chdir(repo_name) # Build the repo using lake. logger.info(f"Building {repo_name}") + if args.no_deps: + # The additional *.olean files wouldn't matter. + try: + run_cmd("lake exe cache get") + except subprocess.CalledProcessError: + pass run_cmd("lake build") # Copy the Lean 4 stdlib into the path of packages. @@ -162,14 +180,17 @@ def main() -> None: shutil.copytree(lean_prefix, f"{packages_path}/lean4") # Run ExtractData.lean to extract ASTs, tactic states, and premise information. + dirs_to_monitor = [build_path] + if not args.no_deps: + dirs_to_monitor.append(packages_path) logger.info(f"Tracing {repo_name}") - with launch_progressbar([build_path, packages_path]): - run_cmd( - f"lake env lean --threads {num_procs} --run ExtractData.lean", - capture_output=True, - ) + with launch_progressbar(dirs_to_monitor): + cmd = f"lake env lean --threads {num_procs} --run ExtractData.lean" + if args.no_deps: + cmd += " nodeps" + run_cmd(cmd, capture_output=True) - check_files() + check_files(packages_path, args.no_deps) if __name__ == "__main__": diff --git a/src/lean_dojo/data_extraction/cache.py b/src/lean_dojo/data_extraction/cache.py index b876e8ef..52afb761 100644 --- a/src/lean_dojo/data_extraction/cache.py +++ b/src/lean_dojo/data_extraction/cache.py @@ -87,15 +87,13 @@ def get(self, url: str, commit: str) -> Optional[Path]: def store(self, src: Path) -> Path: """Store a traced repo at path ``src``. Return its path in the cache.""" - dirs = list(src.glob("*")) - assert len(dirs) == 1, f"Unexpected number of directories in {src}" - url, commit = get_repo_info(dirs[0]) + url, commit = get_repo_info(src) dirpath = self.cache_dir / _format_dirname(url, commit) + _, repo_name = _split_git_url(url) if not dirpath.exists(): with self.lock: with report_critical_failure(_CACHE_CORRPUTION_MSG): - shutil.copytree(src, dirpath) - _, repo_name = _split_git_url(url) + shutil.copytree(src, dirpath / repo_name) return dirpath / repo_name diff --git a/src/lean_dojo/data_extraction/lean.py b/src/lean_dojo/data_extraction/lean.py index 075f2d6a..a4379439 100644 --- a/src/lean_dojo/data_extraction/lean.py +++ b/src/lean_dojo/data_extraction/lean.py @@ -601,7 +601,7 @@ def _get_lean4_dependencies( try: lake_manifest = ( - self.get_config("lake-manifest.json") + self.get_config("lake-manifest.json", num_retries=0) if path is None else json.load((Path(path) / "lake-manifest.json").open()) ) @@ -638,10 +638,10 @@ def _get_config_url(self, filename: str) -> str: url = self.url.replace("github.com", "raw.githubusercontent.com") return f"{url}/{self.commit}/{filename}" - def get_config(self, filename: str) -> Dict[str, Any]: + def get_config(self, filename: str, num_retries: int = 2) -> Dict[str, Any]: """Return the repo's files.""" config_url = self._get_config_url(filename) - content = read_url(config_url) + content = read_url(config_url, num_retries) if filename.endswith(".toml"): return toml.loads(content) elif filename.endswith(".json"): diff --git a/src/lean_dojo/data_extraction/trace.py b/src/lean_dojo/data_extraction/trace.py index 1cdc2375..a4707293 100644 --- a/src/lean_dojo/data_extraction/trace.py +++ b/src/lean_dojo/data_extraction/trace.py @@ -55,15 +55,16 @@ def _modify_lean3(version: str) -> None: raise ex -def _trace(repo: LeanGitRepo) -> None: +def _trace(repo: LeanGitRepo, build_deps: bool) -> None: assert ( repo.exists() ), f"The {repo} does not exist. Please check the URL `{repo.commit_url}`." if repo.uses_lean3: + assert build_deps _trace_lean3(repo) else: assert repo.uses_lean4 - _trace_lean4(repo) + _trace_lean4(repo, build_deps) def _trace_lean3(repo: LeanGitRepo) -> None: @@ -101,7 +102,7 @@ def _trace_lean3(repo: LeanGitRepo) -> None: raise ex -def _trace_lean4(repo: LeanGitRepo) -> None: +def _trace_lean4(repo: LeanGitRepo, build_deps: bool) -> None: # Trace `repo` in the current working directory. assert not repo.is_lean4, "Cannot trace the Lean 4 repo itself." repo.clone_and_checkout() @@ -113,9 +114,13 @@ def _trace_lean4(repo: LeanGitRepo) -> None: LEAN4_BUILD_SCRIPT_PATH: f"/workspace/{LEAN4_BUILD_SCRIPT_PATH.name}", LEAN4_DATA_EXTRACTOR_PATH: f"/workspace/{repo.name}/{LEAN4_DATA_EXTRACTOR_PATH.name}", } + cmd = f"python3 build_lean4_repo.py {repo.name}" + if not build_deps: + cmd += " --no-deps" + try: container.run( - f"python3 build_lean4_repo.py {repo.name}", + cmd, create_mounts(mts), {"NUM_PROCS": NUM_PROCS}, as_current_user=True, @@ -134,13 +139,14 @@ def is_available_in_cache(repo: LeanGitRepo) -> bool: return cache.get(repo.url, repo.commit) is not None -def get_traced_repo_path(repo: LeanGitRepo) -> Path: +def get_traced_repo_path(repo: LeanGitRepo, build_deps: bool = True) -> Path: """Return the path of a traced repo in the cache. The function will trace a repo if it is not available in the cache. See :ref:`caching` for details. Args: repo (LeanGitRepo): The Lean repo to trace. + build_deps (bool): Whether to build the dependencies of ``repo``. Defaults to True. Returns: Path: The path of the traced repo in the cache, e.g. :file:`/home/kaiyu/.cache/lean_dojo/leanprover-community-mathlib-2196ab363eb097c008d4497125e0dde23fb36db2` @@ -150,16 +156,20 @@ def get_traced_repo_path(repo: LeanGitRepo) -> Path: logger.info(f"Tracing {repo}") with working_directory() as tmp_dir: logger.debug(f"Working in the temporary directory {tmp_dir}") - _trace(repo) - traced_repo = TracedRepo.from_traced_files(tmp_dir / repo.name) + _trace(repo, build_deps) + traced_repo = TracedRepo.from_traced_files(tmp_dir / repo.name, build_deps) traced_repo.save_to_disk() - path = cache.store(tmp_dir) + path = cache.store(tmp_dir / repo.name) else: logger.debug("The traced repo is available in the cache.") return path -def trace(repo: LeanGitRepo, dst_dir: Optional[Union[str, Path]] = None) -> TracedRepo: +def trace( + repo: LeanGitRepo, + dst_dir: Optional[Union[str, Path]] = None, + build_deps: bool = True, +) -> TracedRepo: """Trace a repo (and its dependencies), saving the results to ``dst_dir``. The function only traces the repo when it's not available in the cache. Otherwise, @@ -168,6 +178,7 @@ def trace(repo: LeanGitRepo, dst_dir: Optional[Union[str, Path]] = None) -> Trac Args: repo (LeanGitRepo): The Lean repo to trace. dst_dir (Union[str, Path]): The directory for saving the traced repo. If None, the traced repo is only saved in the cahe. + build_deps (bool): Whether to build the dependencies of ``repo``. Defaults to True. Returns: TracedRepo: A :class:`TracedRepo` object corresponding to the files at ``dst_dir``. @@ -178,9 +189,9 @@ def trace(repo: LeanGitRepo, dst_dir: Optional[Union[str, Path]] = None) -> Trac not dst_dir.exists() ), f"The destination directory {dst_dir} already exists." - cached_path = get_traced_repo_path(repo) + cached_path = get_traced_repo_path(repo, build_deps) logger.info(f"Loading the traced repo from {cached_path}") - traced_repo = TracedRepo.load_from_disk(cached_path) + traced_repo = TracedRepo.load_from_disk(cached_path, build_deps) traced_repo.check_sanity() if dst_dir is not None: diff --git a/src/lean_dojo/data_extraction/traced_data.py b/src/lean_dojo/data_extraction/traced_data.py index 2c70bdf7..632848ba 100644 --- a/src/lean_dojo/data_extraction/traced_data.py +++ b/src/lean_dojo/data_extraction/traced_data.py @@ -1291,7 +1291,10 @@ class TracedRepo: """Root directory of the traced repo. """ - traced_files_graph: nx.DiGraph = field(repr=False) + traced_files: List[TracedFile] = field(repr=False) + """List of traced files in the repo.""" + + traced_files_graph: Optional[nx.DiGraph] = field(repr=False) """Dependency graph between files in the repo. The graph is a DAG, and there is an edge from file :file:`X` to file :file:`Y` @@ -1335,7 +1338,9 @@ def check_sanity(self) -> None: for k, v in self.dependencies.items(): assert isinstance(k, str) and isinstance(v, LeanGitRepo) assert isinstance(self.root_dir, Path) - assert isinstance(self.traced_files_graph, nx.DiGraph) + assert self.traced_files_graph is None or isinstance( + self.traced_files_graph, nx.DiGraph + ) assert self.repo not in self.dependencies.values() assert len(self.dependencies) == 0 or not self.repo.is_lean @@ -1353,34 +1358,38 @@ def check_sanity(self) -> None: p.relative_to(self.root_dir) for p in self.root_dir.glob("**/*.dep_paths") } - if not LOAD_USED_PACKAGES_ONLY: - assert len(json_files) == self.traced_files_graph.number_of_nodes() - - for path_str, tf_node in self.traced_files_graph.nodes.items(): - tf = tf_node["traced_file"] - path = Path(path_str) - tf.check_sanity() - assert tf.path == path and tf.root_dir == self.root_dir - assert tf.traced_repo is None or tf.traced_repo is self - assert path in lean_files - assert ( - to_dep_path(self.root_dir, path, self.repo) in path_files - ), to_dep_path(self.root_dir, path, self.repo) - assert ( - to_json_path(self.root_dir, path, self.repo) in json_files - ), to_json_path(self.root_dir, path, self.repo) - if len(xml_files) > 0: + if self.traced_files_graph is not None: + if not LOAD_USED_PACKAGES_ONLY: + assert len(json_files) == self.traced_files_graph.number_of_nodes() + + for path_str, tf_node in self.traced_files_graph.nodes.items(): + tf = tf_node["traced_file"] + path = Path(path_str) + tf.check_sanity() + assert tf.path == path and tf.root_dir == self.root_dir + assert tf.traced_repo is None or tf.traced_repo is self + assert path in lean_files + assert ( + to_dep_path(self.root_dir, path, self.repo) in path_files + ), to_dep_path(self.root_dir, path, self.repo) assert ( - to_xml_path(self.root_dir, path, self.repo) in xml_files - ), to_xml_path(self.root_dir, path, self.repo) + to_json_path(self.root_dir, path, self.repo) in json_files + ), to_json_path(self.root_dir, path, self.repo) + if len(xml_files) > 0: + assert ( + to_xml_path(self.root_dir, path, self.repo) in xml_files + ), to_xml_path(self.root_dir, path, self.repo) @classmethod - def from_traced_files(cls, root_dir: Union[str, Path]) -> "TracedRepo": + def from_traced_files( + cls, root_dir: Union[str, Path], build_deps: bool = True + ) -> "TracedRepo": """Construct a :class:`TracedRepo` object by parsing :file:`*.ast.json` and :file:`*.path` files produced by :code:`lean --ast --tsast --tspp` (Lean 3) or :file:`ExtractData.lean` (Lean 4). Args: root_dir (Union[str, Path]): Root directory of the traced repo. + build_deps (bool, optional): Whether to build the dependency graph between files. """ root_dir = Path(root_dir).resolve() if not is_git_repo(root_dir): @@ -1410,22 +1419,17 @@ def from_traced_files(cls, root_dir: Union[str, Path]) -> "TracedRepo": ) dependencies = repo.get_dependencies(root_dir) - traced_files_graph = _build_dependency_graph(traced_files, root_dir, repo) - traced_repo = cls(repo, dependencies, root_dir, traced_files_graph) + if build_deps: + traced_files_graph = _build_dependency_graph(traced_files, root_dir, repo) + else: + traced_files_graph = None + + traced_repo = cls( + repo, dependencies, root_dir, traced_files, traced_files_graph + ) traced_repo._update_traced_files() return traced_repo - @property - def traced_files(self) -> Generator[TracedFile, None, None]: - """Return an iterator of traced files from their dependency graph.""" - for _, tf_node in self.traced_files_graph.nodes.items(): - yield tf_node["traced_file"] - - @property - def num_traced_files(self) -> int: - """Number of traced files in the repo.""" - return len(self.traced_files_graph) - def get_traced_file(self, path: Union[str, Path]) -> TracedFile: """Return a traced file by its path.""" return self.traced_files_graph.nodes[str(path)]["traced_file"] @@ -1436,11 +1440,12 @@ def _update_traced_files(self) -> None: def save_to_disk(self) -> None: """Save all traced files in the repo to the disk as :file:`*.trace.xml` files.""" + num_traced_files = len(self.traced_files) logger.debug( - f"Saving {self.num_traced_files} traced XML files to {self.root_dir} with {NUM_WORKERS} workers" + f"Saving {num_traced_files} traced XML files to {self.root_dir} with {NUM_WORKERS} workers" ) if NUM_WORKERS <= 1: - for tf in tqdm(self.traced_files, total=self.num_traced_files): + for tf in tqdm(self.traced_files, total=num_traced_files): _save_xml_to_disk(tf) else: with ray_actor_pool(_TracedRepoHelper, self.root_dir, self.repo) as pool: @@ -1450,12 +1455,14 @@ def save_to_disk(self) -> None: lambda a, tf: a.save_xml_to_disk.remote(tf), self.traced_files, ), - total=self.num_traced_files, + total=num_traced_files, ) ) @classmethod - def load_from_disk(cls, root_dir: Union[str, Path]) -> "TracedRepo": + def load_from_disk( + cls, root_dir: Union[str, Path], build_deps: bool = True + ) -> "TracedRepo": """Load a traced repo from :file:`*.trace.xml` files.""" root_dir = Path(root_dir).resolve() if not is_git_repo(root_dir): @@ -1492,8 +1499,14 @@ def load_from_disk(cls, root_dir: Union[str, Path]) -> "TracedRepo": ) dependencies = repo.get_dependencies(root_dir) - traced_files_graph = _build_dependency_graph(traced_files, root_dir, repo) - traced_repo = cls(repo, dependencies, root_dir, traced_files_graph) + if build_deps: + traced_files_graph = _build_dependency_graph(traced_files, root_dir, repo) + else: + traced_files_graph = None + + traced_repo = cls( + repo, dependencies, root_dir, traced_files, traced_files_graph + ) traced_repo._update_traced_files() return traced_repo diff --git a/src/lean_dojo/utils.py b/src/lean_dojo/utils.py index 1a691762..0f792e60 100644 --- a/src/lean_dojo/utils.py +++ b/src/lean_dojo/utils.py @@ -292,7 +292,11 @@ def _from_lean_path(root_dir: Path, path: Path, repo, ext: str) -> Path: build_dir = repo.get_build_dir() assert root_dir.name != "lean4" - if path.is_relative_to(packages_dir / "lean4/src"): + if path.is_relative_to(packages_dir / "lean4/src/lean/lake"): + # E.g., "lake-packages/lean4/src/lean/lake/Lake/CLI/Error.lean" + p = path.relative_to(packages_dir / "lean4/src/lean/lake") + return packages_dir / "lean4/lib/lean" / p.with_suffix(ext) + elif path.is_relative_to(packages_dir / "lean4/src"): # E.g., "lake-packages/lean4/src/lean/Init.lean" p = path.relative_to(packages_dir / "lean4/src").with_suffix(ext) return packages_dir / "lean4/lib" / p @@ -335,7 +339,13 @@ def to_lean_path(root_dir: Path, path: Path, repo) -> bool: build_dir = repo.get_build_dir() assert root_dir.name != "lean4" - if path.is_relative_to(packages_dir / "lean4/lib"): + if path == packages_dir / "lean4/lib/lean/Lake.lean": + return packages_dir / "lean4/src/lean/lake/Lake.lean" + elif path.is_relative_to(packages_dir / "lean4/lib/lean/Lake"): + # E.g., "lake-packages/lean4/lib/lean/Lake/Util/List.lean" + p = path.relative_to(packages_dir / "lean4/lib/lean/Lake") + return packages_dir / "lean4/src/lean/lake/Lake" / p + elif path.is_relative_to(packages_dir / "lean4/lib"): # E.g., "lake-packages/lean4/lib/lean/Init.lean" p = path.relative_to(packages_dir / "lean4/lib") return packages_dir / "lean4/src" / p