Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorporate Recent Commits #192

Merged
merged 40 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
9c266a9
add repo types: github/local/remote
RexWzh Jul 9, 2024
60907a8
update property for repo types
RexWzh Jul 9, 2024
46f3289
add tests
RexWzh Jul 9, 2024
cdf28c3
update method & test for remote url
RexWzh Jul 10, 2024
7c02c91
update trace method & tests
RexWzh Jul 10, 2024
268fc2d
add tests for interaction
RexWzh Jul 10, 2024
36661d6
use tempdir for local type & resolve cache problem
RexWzh Jul 10, 2024
9571432
update from_path & simplify tests
RexWzh Jul 20, 2024
83bffb8
Merge remote-tracking branch 'origin/main'
RexWzh Jul 20, 2024
6429b41
fix git version for windows
RexWzh Jul 22, 2024
0a8796d
add more tests for repo type
RexWzh Jul 23, 2024
9213d8e
Merge remote-tracking branch 'origin/main' into check-git-version
RexWzh Jul 23, 2024
12d23cb
fix commit url & git clone for windows
RexWzh Jul 23, 2024
9756514
add tests for lean repo & dojo
RexWzh Jul 23, 2024
a8a0977
simplify commit hash & delay initalization of lean4 repo
RexWzh Jul 23, 2024
fab19e2
lean version: use string instead of commit
RexWzh Jul 24, 2024
f58eb51
Merge pull request #188 from Lean-zh/check-git-version
yangky11 Jul 24, 2024
60a500e
format code
yangky11 Jul 24, 2024
570e787
Merge remote-tracking branch 'origin/dev' into simplify-funcs
RexWzh Jul 25, 2024
eabaa22
update cache method
RexWzh Jul 25, 2024
ff0f9f4
update functions for git Repos & add tests
RexWzh Jul 25, 2024
6c7ef63
fix url_exists
RexWzh Jul 25, 2024
e5941ff
allow repo_type of [email protected]
RexWzh Jul 26, 2024
3ff8ccd
Merge branch 'simplify-funcs'
RexWzh Jul 29, 2024
8f1b765
simplify repo cache
RexWzh Jul 29, 2024
ad23694
Merge pull request #179 from Lean-zh/main
yangky11 Jul 30, 2024
fbfa0cd
bump to v2.1.0
yangky11 Jul 30, 2024
7adf033
minor cleanup
yangky11 Aug 5, 2024
43f770b
move _split_git_url
yangky11 Aug 5, 2024
515d361
edit imports
yangky11 Aug 5, 2024
897c6f1
minor edits to get_traced_repo_path
yangky11 Aug 5, 2024
592f9b5
minor edits
yangky11 Aug 5, 2024
0d38707
minox fix
yangky11 Aug 5, 2024
493d890
fix get_cache_dirname
yangky11 Aug 5, 2024
819ed53
format code
yangky11 Aug 5, 2024
44742d9
add @cache
yangky11 Aug 5, 2024
777b3c3
fix some typing errors
yangky11 Aug 5, 2024
e662bc5
minor fix
yangky11 Aug 5, 2024
ddfe9c5
update tests
yangky11 Aug 5, 2024
297b96e
minor edits
yangky11 Aug 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,6 @@ dmypy.json

# Pyre type checker
.pyre/

# vscode debug config
.vscode/
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
project = "LeanDojo"
copyright = "2023, LeanDojo Team"
author = "Kaiyu Yang"
release = "2.0.3"
release = "2.1.0"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ exclude = [

[project]
name = "lean-dojo"
version = "2.0.3"
version = "2.1.0"
authors = [
{ name="Kaiyu Yang", email="[email protected]" },
]
Expand All @@ -31,6 +31,7 @@ dependencies = [
"python-dotenv",
"loguru",
"filelock",
"gitpython",
"psutil",
"pexpect",
"types-psutil",
Expand Down
13 changes: 7 additions & 6 deletions src/lean_dojo/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

load_dotenv()

__version__ = "2.0.3"
__version__ = "2.1.0"

logger.remove()
if "VERBOSE" in os.environ or "DEBUG" in os.environ:
Expand Down Expand Up @@ -71,15 +71,16 @@
assert re.fullmatch(r"\d+g", TACTIC_MEMORY_LIMIT)


def check_git_version(min_version: Tuple[int, int, int]) -> Tuple[int, int, int]:
def check_git_version(min_version: Tuple[int, int, int]) -> None:
"""Check the version of Git installed on the system."""
res = subprocess.run("git --version", shell=True, capture_output=True, check=True)
output = res.stdout.decode()
output = res.stdout.decode().strip()
error = res.stderr.decode()
assert error == "", error
m = re.match(r"git version (?P<version>[0-9.]+)", output)
version = tuple(int(_) for _ in m["version"].split("."))

m = re.search(r"git version (\d+\.\d+\.\d+)", output)
assert m, f"Could not parse Git version from: {output}"
# Convert version number string to tuple of integers
version = tuple(int(_) for _ in m.group(1).split("."))
version_str = ".".join(str(_) for _ in version)
min_version_str = ".".join(str(_) for _ in min_version)
assert (
Expand Down
57 changes: 24 additions & 33 deletions src/lean_dojo/data_extraction/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@
from pathlib import Path
from loguru import logger
from filelock import FileLock
from typing import Optional, Generator
from dataclasses import dataclass, field
from typing import Optional, Tuple, Generator

from ..utils import (
execute,
url_exists,
get_repo_info,
report_critical_failure,
)
from ..constants import (
Expand All @@ -23,22 +22,6 @@
)


def _split_git_url(url: str) -> Tuple[str, str]:
"""Split a Git URL into user name and repo name."""
if url.endswith("/"):
url = url[:-1]
assert not url.endswith("/"), f"Unexpected URL: {url}"
fields = url.split("/")
user_name = fields[-2]
repo_name = fields[-1]
return user_name, repo_name


def _format_dirname(url: str, commit: str) -> str:
user_name, repo_name = _split_git_url(url)
return f"{user_name}-{repo_name}-{commit}"


_CACHE_CORRPUTION_MSG = "The cache may have been corrputed!"


Expand All @@ -59,16 +42,20 @@ def __post_init__(self):
lock_path = self.cache_dir.with_suffix(".lock")
object.__setattr__(self, "lock", FileLock(lock_path))

def get(self, url: str, commit: str) -> Optional[Path]:
"""Get the path of a traced repo with URL ``url`` and commit hash ``commit``. Return None if no such repo can be found."""
_, repo_name = _split_git_url(url)
dirname = _format_dirname(url, commit)
def get(self, rel_cache_dir: Path) -> Optional[Path]:
"""Get the cache repo at ``CACHE_DIR / rel_cache_dir`` from the cache.

Args:
rel_cache_dir (Path): The relative path of the stored repo in the cache.
"""
dirname = rel_cache_dir.parent
dirpath = self.cache_dir / dirname
cache_path = self.cache_dir / rel_cache_dir

with self.lock:
if dirpath.exists():
assert (dirpath / repo_name).exists()
return dirpath / repo_name
assert cache_path.exists()
return cache_path

elif not DISABLE_REMOTE_CACHE:
url = os.path.join(REMOTE_CACHE_URL, f"{dirname}.tar.gz")
Expand All @@ -83,23 +70,27 @@ def get(self, url: str, commit: str) -> Optional[Path]:
with tarfile.open(f"{dirpath}.tar.gz") as tar:
tar.extractall(self.cache_dir)
os.remove(f"{dirpath}.tar.gz")
assert (dirpath / repo_name).exists()
assert (cache_path).exists()

return dirpath / repo_name
return cache_path

else:
return None

def store(self, src: Path) -> Path:
"""Store a traced repo at path ``src``. Return its path in the cache."""
url, commit = get_repo_info(src)
dirpath = self.cache_dir / _format_dirname(url, commit)
_, repo_name = _split_git_url(url)
def store(self, src: Path, rel_cache_dir: Path) -> Path:
"""Store a repo at path ``src``. Return its path in the cache.

Args:
src (Path): Path to the repo.
rel_cache_dir (Path): The relative path of the stored repo in the cache.
"""
dirpath = self.cache_dir / rel_cache_dir.parent
cache_path = self.cache_dir / rel_cache_dir
if not dirpath.exists():
with self.lock:
with report_critical_failure(_CACHE_CORRPUTION_MSG):
shutil.copytree(src, dirpath / repo_name)
return dirpath / repo_name
shutil.copytree(src, cache_path)
return cache_path


cache = Cache(CACHE_DIR)
Expand Down
Loading