From b2d7fd819b5eedc92711220376c05d81c4f75471 Mon Sep 17 00:00:00 2001 From: Albert Jiang Date: Fri, 12 Jan 2024 10:48:01 +0000 Subject: [PATCH 1/2] a fix to make sure path is properly parsed --- src/lean_dojo/data_extraction/lean.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/lean_dojo/data_extraction/lean.py b/src/lean_dojo/data_extraction/lean.py index 075f2d6a..8355af6b 100644 --- a/src/lean_dojo/data_extraction/lean.py +++ b/src/lean_dojo/data_extraction/lean.py @@ -37,9 +37,9 @@ ) -def cleanse_string(s: str) -> str: +def cleanse_string(s: str | Path) -> str: """Replace : and / with _ in a string.""" - return s.replace("/", "_").replace(":", "_") + return str(s).replace("/", "_").replace(":", "_") @cache @@ -681,4 +681,9 @@ def __post_init__(self) -> None: @property def uid(self) -> str: """Unique identifier of the theorem.""" - return f"{cleanse_string(self.repo.url)}@{cleanse_string(self.repo.commit)}:{cleanse_string(self.file_path)}:{cleanse_string(self.full_name)}" + return f"{cleanse_string(self.repo.url)}@{cleanse_string(self.repo.commit)}:{cleanse_string(self.file_path.__str__())}:{cleanse_string(self.full_name)}" + + @property + def uhash(self) -> str: + """Unique hash of the theorem.""" + return str(hash(self.uid)**2) \ No newline at end of file From 3c24062e1719403aaf5995448ffc57380d47ee6e Mon Sep 17 00:00:00 2001 From: Albert Jiang Date: Fri, 12 Jan 2024 22:07:15 +0000 Subject: [PATCH 2/2] black --- src/lean_dojo/data_extraction/lean.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lean_dojo/data_extraction/lean.py b/src/lean_dojo/data_extraction/lean.py index 8355af6b..5adc83dc 100644 --- a/src/lean_dojo/data_extraction/lean.py +++ b/src/lean_dojo/data_extraction/lean.py @@ -686,4 +686,4 @@ def uid(self) -> str: @property def uhash(self) -> str: """Unique hash of the theorem.""" - return str(hash(self.uid)**2) \ No newline at end of file + return str(hash(self.uid) ** 2)