Skip to content

Commit

Permalink
load_zarr_stats.py removes duplicate. jax_samples.csv links to update…
Browse files Browse the repository at this point in the history
…d samples
  • Loading branch information
will-moore committed Oct 15, 2024
1 parent f9785f6 commit d94e589
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
4 changes: 2 additions & 2 deletions samples/jax_samples.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
https://raw.githubusercontent.com/TheJacksonLaboratory/jax-ngff-challenge-2024/66a060da191bf80f4e06fdd1f6b4e7196d6c6b45/KOMP_adult_lacZ.csv
https://raw.githubusercontent.com/TheJacksonLaboratory/jax-ngff-challenge-2024/66a060da191bf80f4e06fdd1f6b4e7196d6c6b45/KOMP_histopathology.csv
https://raw.githubusercontent.com/will-moore/jax-ngff-challenge-2024/refs/heads/add_shape_and_ontology_columns/KOMP_adult_lacZ.csv
https://raw.githubusercontent.com/will-moore/jax-ngff-challenge-2024/refs/heads/add_shape_and_ontology_columns/KOMP_histopathology.csv
14 changes: 13 additions & 1 deletion samples/load_zarr_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ def load_zarr(zarr_url, average_count=5):

column_names = []
column_data = []

unique_urls = set()
# open a local csv file and iterate through rows...
with Path(csv_name).open(newline="") as csvfile:
csvreader = csv.reader(csvfile, delimiter=",")
Expand All @@ -205,8 +207,18 @@ def load_zarr(zarr_url, average_count=5):
zarr_url = row[url_col]
if zarr_url.endswith(".csv"):
continue
if zarr_url in unique_urls:
# print(f"Skipping duplicate url: {zarr_url}")
continue
unique_urls.add(zarr_url)
average_count = 5 if "written" not in column_names else 1
stats = load_zarr(zarr_url, average_count)
stats = {}
if (
"written" not in column_names
or "shape" not in column_names
or "license" not in column_names
):
stats = load_zarr(zarr_url, average_count)
# Add the extra column data here...
if "written" not in column_names:
row.append(stats.get("written", 0))
Expand Down

0 comments on commit d94e589

Please sign in to comment.