From 40bd44a813a1f69dc8edd8b6113d6ccea2e28d01 Mon Sep 17 00:00:00 2001 From: PascalIversen Date: Wed, 16 Oct 2024 16:36:32 +0200 Subject: [PATCH] fix redownload --- drevalpy/datasets/utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drevalpy/datasets/utils.py b/drevalpy/datasets/utils.py index 6b817b8..09b166f 100644 --- a/drevalpy/datasets/utils.py +++ b/drevalpy/datasets/utils.py @@ -24,16 +24,19 @@ def download_dataset( """ file_name = f"{dataset_name}.zip" file_path = os.path.join(data_path, file_name) - if os.path.exists(file_path) and not redownload: - print(f"{dataset_name} already exists, skipping download.") + extracted_folder_path = os.path.join(data_path, dataset_name) + + # Check if the extracted data exists and skip download if not redownloading + if os.path.exists(extracted_folder_path) and not redownload: + print(f"{dataset_name} is already extracted, skipping download.") else: url = "https://zenodo.org/doi/10.5281/zenodo.12633909" # Fetch the latest record - response = requests.get(url, timeout=10) + response = requests.get(url, timeout=60) if response.status_code != 200: raise requests.exceptions.HTTPError(f"Error fetching record: {response.status_code}") latest_url = response.links["linkset"]["url"] - response = requests.get(latest_url, timeout=10) + response = requests.get(latest_url, timeout=60) if response.status_code != 200: raise requests.exceptions.HTTPError(f"Error fetching record: {response.status_code}") data = response.json() @@ -46,7 +49,7 @@ def download_dataset( file_url = name_to_url[file_name] # Download the file print(f"Downloading {dataset_name} from {file_url}...") - response = requests.get(file_url, timeout=10) + response = requests.get(file_url, timeout=60) if response.status_code != 200: raise requests.exceptions.HTTPError(f"Error downloading file {dataset_name}: " f"{response.status_code}")