From c315c4b1ff5b3b83d2fb298e18ae461369082fc2 Mon Sep 17 00:00:00 2001 From: wshahn <162052406+wshahn@users.noreply.github.com> Date: Mon, 9 Sep 2024 16:04:09 -0400 Subject: [PATCH] Update get_collection_storagesize.py Slight alteration of script to ignore processing the root dataverse collection, and only get the size of sub-collections. Additionally, the instance url has been changed to use 'unc' instead of 'root'. --- get_collection_storagesize.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/get_collection_storagesize.py b/get_collection_storagesize.py index cd0b6bc..353a99c 100755 --- a/get_collection_storagesize.py +++ b/get_collection_storagesize.py @@ -30,7 +30,7 @@ def get_size(dataverse,collection,token): # throws I/O errors in TRSA case #dvurl = dataverse + '/api/dataverses/' + collection + '/storagesize?includeCached=true&key=' + token dvurl = dataverse + '/api/dataverses/' + collection + '/storagesize?key=' + token - r = requests.get(dvurl) + r = requests.get(dvurl) j = r.json() # strip out "size of this ... bytes" error = "Couldn't get storagesize for collection: " + collection @@ -77,15 +77,16 @@ def format_size(byte_size): readablesize = format_size(size) dvfilecount = get_filecount(dataverse,collection,token) print(collection + ': ' + str(size) + ' bytes' + ' (' + readablesize + '), ' + str(dvfilecount) + ' files.') - + else: - # start with the root dataverse - collection = 'root' - size = get_size(dataverse,collection,token) - dvfilecount = get_filecount(dataverse,collection,token) - print(collection + ': ' + str(size) + ' bytes, ' + str(dvfilecount) + ' files.') - # now iterate through sub-collections - instanceurl = dataverse + '/api/dataverses/root/contents' + # don't process root for performance reasons + #collection = 'root' + #size = get_size(dataverse,collection,token) + #dvfilecount = get_filecount(dataverse,collection,token) + #print(collection + ': ' + str(size) + ' bytes, ' + str(dvfilecount) + ' files.') + + # iterate through sub-collections + instanceurl = dataverse + '/api/dataverses/unc/contents' r = requests.get(instanceurl) j = r.json() for i in range(len(j["data"])): @@ -97,6 +98,9 @@ def format_size(byte_size): ar = requests.get(aliasurl) aj = ar.json() collection = aj["data"]["alias"] + # get size size = get_size(dataverse,collection,token) + readablesize = format_size(size) dvfilecount = get_filecount(dataverse,collection,token) print(collection + ': ' + str(size) + ' bytes' + ' (' + readablesize + '), ' + str(dvfilecount) + ' files.') +