Skip to content

Commit

Permalink
feat: convert hashes to bytes from hex (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
baszalmstra authored Apr 30, 2024
1 parent b8f8ea6 commit c4f4368
Showing 1 changed file with 21 additions and 7 deletions.
28 changes: 21 additions & 7 deletions py/split_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@
BarColumn,
)

fake_token = {
"token": "iamasecrettoken"
}
fake_token = {"token": "iamasecrettoken"}


def download_file(url):
Expand Down Expand Up @@ -52,6 +50,14 @@ def sha256(data):
return hash.digest(), hash.hexdigest()


def pack_package_record(record):
if record["sha256"]:
record["sha256"] = bytes.fromhex(record["sha256"])
if record["md5"]:
record["md5"] = bytes.fromhex(record["md5"])
return record


def split_repo(repo_url, subdir, folder):
repodata = folder / subdir / "repodata.json"

Expand Down Expand Up @@ -96,16 +102,23 @@ def split_repo(repo_url, subdir, folder):
shards.mkdir(exist_ok=True)
shards_index = {"info": repodata["info"], "shards": {}}
shards_index["info"]["base_url"] = f"{repo_url}/{subdir}/"

compressor = zstd.ZstdCompressor(level=19)

before = 0
after_compression = 0

# create a rich progress bar
for name in track(all_names, description=f"Processing {subdir}"):
d = {"packages": {fn: packages[fn] for fn in package_names.get(name, [])}}
d = {
"packages": {
fn: pack_package_record(packages[fn])
for fn in package_names.get(name, [])
}
}
d["packages.conda"] = {
fn: conda_packages[fn] for fn in conda_package_names.get(name, [])
fn: pack_package_record(conda_packages[fn])
for fn in conda_package_names.get(name, [])
}

encoded = msgpack.dumps(d)
Expand All @@ -132,6 +145,7 @@ def split_repo(repo_url, subdir, folder):
repodata_shards_file = folder / subdir / "repodata_shards.msgpack.zst"
repodata_shards = compressor.compress(msgpack.dumps(shards_index))
repodata_shards_file.write_bytes(repodata_shards)

return package_names


Expand Down Expand Up @@ -286,10 +300,10 @@ def files_to_upload(outpath, timestamp, subdir, channel_name):
)

# Upload the fake token
tempfile = Path(__file__).parent / "token"
tempfile = outpath / subdir / "token"
tempfile.write_text(json.dumps(fake_token))
upload(
tempfile,
"fast-repo",
f"{channel_name}/{subdir}/token",
)
)

0 comments on commit c4f4368

Please sign in to comment.