Skip to content

Commit

Permalink
feat: update code;
Browse files Browse the repository at this point in the history
  • Loading branch information
WenjieDu committed Jan 15, 2024
1 parent cdc8410 commit 26ae553
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 118 deletions.
2 changes: 1 addition & 1 deletion tests/test_tsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ def test_3_dataset_purging(self):
cached_datasets = tsdb.list_cache()
assert isinstance(cached_datasets, list)
tsdb.delete_cache("physionet_2012") # delete single
tsdb.delete_cache() # delete all

def test_4_migrate(self):
os.makedirs("dir_for_migration")
with open("dir_for_migration/test.txt", "a") as f:
f.write("hello world")
tsdb.migrate("dir_for_migration", "new_dir/put_it_here")
tsdb.migrate_cache("new_cache_dir")
tsdb.delete_cache() # delete all datasets

def test_5_logging(self):
# different level logging
Expand Down
2 changes: 1 addition & 1 deletion tsdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
__version__ = "0.3.1"

from .data_processing import (
CACHED_DATASET_DIR,
list,
load,
download_and_extract,
list_cache,
delete_cache,
CACHED_DATASET_DIR,
)
from .utils.file import (
purge_path,
Expand Down
43 changes: 27 additions & 16 deletions tsdb/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import shutil
import warnings

from .database import AVAILABLE_DATASETS, CACHED_DATASET_DIR
from .database import AVAILABLE_DATASETS
from .loading_funcs import (
load_physionet2012,
load_physionet2019,
Expand All @@ -20,9 +20,11 @@
load_ais,
)
from .utils.downloading import download_and_extract
from .utils.file import purge_path, pickle_load, pickle_dump
from .utils.file import purge_path, pickle_load, pickle_dump, determine_data_home
from .utils.logging import logger

CACHED_DATASET_DIR = determine_data_home()


def list() -> list:
"""List the database.
Expand Down Expand Up @@ -145,14 +147,28 @@ def list_cache() -> list:
return dir_content


def delete_cache(dataset_name=None) -> None:
"""Delete CACHED_DATASET_DIR if exists."""
def delete_cache(dataset_name: str = None) -> None:
"""Delete CACHED_DATASET_DIR if exists.
Parameters
----------
dataset_name : str, optional
The name of the specific dataset in database.DATABASE.
If dataset is not cached, then abort.
Delete all cached datasets if dataset_name is left as None.
"""
# if CACHED_DATASET_DIR does not exist, abort
if not os.path.exists(CACHED_DATASET_DIR):
logger.error("❌ No cached data. Operation aborted.")
else:
# if CACHED_DATASET_DIR exists, then purge
if dataset_name is not None:
# if CACHED_DATASET_DIR exists, then execute purging procedure
if dataset_name is None: # if dataset_name is not given, then purge all
logger.info(
f"`dataset_name` not given. Purging all cached data under {CACHED_DATASET_DIR}..."
)
purge_path(CACHED_DATASET_DIR)
os.makedirs(CACHED_DATASET_DIR)
else:
assert (
dataset_name in AVAILABLE_DATASETS
), f"{dataset_name} is not available in TSDB, so it has no cache. Please check your dataset name."
Expand All @@ -162,13 +178,8 @@ def delete_cache(dataset_name=None) -> None:
f"❌ Dataset {dataset_name} is not cached. Operation aborted."
)
return
logger.info(
f"Purging cached dataset {dataset_name} under {dir_to_delete}..."
)
else:
logger.info(
f"`dataset_name` not given. Purging all cached data under {CACHED_DATASET_DIR}..."
)
dir_to_delete = CACHED_DATASET_DIR

purge_path(dir_to_delete)
else:
logger.info(
f"Purging cached dataset {dataset_name} under {dir_to_delete}..."
)
purge_path(dir_to_delete)
37 changes: 0 additions & 37 deletions tsdb/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,6 @@
# Created by Wenjie Du <[email protected]>
# License: BSD-3-Clause

import os
from configparser import ConfigParser

from .utils.logging import logger

config = ConfigParser()
tsdb_config_path = os.path.join(os.path.dirname(__file__), "config.ini")
config.read(tsdb_config_path)

# read from the config file
data_home_path = config.get("path", "data_home")
# replace '~' with the absolute path if existing in the path
data_home_path = data_home_path.replace("~", os.path.expanduser("~"))
old_cached_dataset_dir = os.path.join(os.path.expanduser("~"), ".tsdb_cached_datasets")

if os.path.exists(old_cached_dataset_dir):
# use the old path and warn the user
logger.warning(
"‼️ Detected the home dir of the old version TSDB. "
"Since v0.3, TSDB has changed the default cache dir to '~/.tsdb'. "
"You can migrate downloaded datasets by invoking the new function "
"tsdb.migrate(old='~/.tsdb_cached_datasets', new='~/.tsdb')"
)
CACHED_DATASET_DIR = old_cached_dataset_dir
elif os.path.exists(data_home_path):
# use the path directly, may be in a portable disk
CACHED_DATASET_DIR = data_home_path
else:
# use the default path
default_path = os.path.join(os.path.expanduser("~"), ".tsdb")
CACHED_DATASET_DIR = default_path
if os.path.abspath(data_home_path) != os.path.abspath(default_path):
logger.warning(
f"‼️ The preset data_home path '{data_home_path}' doesn't exist. "
f"Using the default path '{default_path}'."
)


_DATABASE = {
# http://www.physionet.org/challenge/2012
Expand Down
31 changes: 31 additions & 0 deletions tsdb/utils/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Config functions for TSDB.
"""

# Created by Wenjie Du <[email protected]>
# License: BSD-3-Clause

import os
from configparser import ConfigParser

from .logging import logger

TSDB_BASE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
TSDB_CONFIG_FILE = os.path.join(TSDB_BASE_PATH, "config.ini")


def read_configs():
config_parser = ConfigParser()
config_parser.read(TSDB_CONFIG_FILE)
return config_parser


def write_configs(config_parser, key_value_set):
for section, key in key_value_set.items():
value = key_value_set[section][key]
config_parser.set(section, key, value)

with open(TSDB_CONFIG_FILE, "w") as f:
config_parser.write(f)

logger.info("Wrote new configs to config.ini successfully.")
155 changes: 92 additions & 63 deletions tsdb/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,99 @@
import os
import pickle
import shutil
from configparser import ConfigParser
from typing import Optional

from .config import read_configs, write_configs
from .logging import logger
from ..database import tsdb_config_path, CACHED_DATASET_DIR


def migrate(old_path: str, new_path: str) -> None:
"""Migrate files in a directory from old_path to new_path.
Parameters
----------
old_path:
The old path of the dataset.
new_path:
The new path of the dataset.
"""
if not os.path.exists(old_path):
raise FileNotFoundError(f"Given old_path {old_path} does not exist.")

if os.path.exists(new_path):
logger.warning(f"‼️ Please note that new_path {new_path} already exists.")
# if new_path exists, we have to move everything from old_path into it
all_old_files = os.listdir(old_path)
for f in all_old_files:
old_f_path = os.path.join(old_path, f)
if os.path.isdir(old_f_path):
new_f_path = os.path.join(new_path, f)
shutil.copytree(old_f_path, new_f_path)
else:
shutil.move(old_f_path, new_path)
shutil.rmtree(old_path, ignore_errors=True)
else:
# if new_path does not exist, just rename the old_path into it
new_parent_dir = os.path.abspath(os.path.join(new_path, ".."))
if not os.path.exists(new_parent_dir):
os.makedirs(new_parent_dir, exist_ok=True)
os.rename(old_path, new_path)

logger.info(
f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}"
)


def determine_data_home():
# read data_home from the config file
config = read_configs()
data_home_path = config.get("path", "data_home")
# replace '~' with the absolute path if existing in the path
data_home_path = data_home_path.replace("~", os.path.expanduser("~"))
old_cached_dataset_dir = os.path.join(
os.path.expanduser("~"), ".tsdb_cached_datasets"
)

if os.path.exists(old_cached_dataset_dir):
# use the old path and warn the user
logger.warning(
"‼️ Detected the home dir of the old version TSDB. "
"Since v0.3, TSDB has changed the default cache dir to '~/.tsdb'. "
"Auto migrating downloaded datasets to the new path. "
)
cached_dataset_dir = data_home_path
migrate(old_cached_dataset_dir, cached_dataset_dir)
elif os.path.exists(data_home_path):
# use the path directly, may be in a portable disk
cached_dataset_dir = data_home_path
else:
# use the default path
default_path = os.path.join(os.path.expanduser("~"), ".tsdb")
cached_dataset_dir = default_path
if os.path.abspath(data_home_path) != os.path.abspath(default_path):
logger.warning(
f"‼️ The preset data_home path '{data_home_path}' doesn't exist. "
f"Using the default path '{default_path}'."
)
return cached_dataset_dir


def migrate_cache(target_path: str) -> None:
"""Migrate datasets from old_path to new_path.
Parameters
----------
target_path:
The new path for TSDB to store cached datasets.
"""
cached_dataset_dir = determine_data_home()
migrate(cached_dataset_dir, target_path)
config_parser = read_configs()
write_configs(config_parser, {"path": {"data_home": target_path}})
logger.info(f"Have set {target_path} as the default cache dir.")


def pickle_dump(data: object, path: str) -> Optional[str]:
Expand Down Expand Up @@ -90,68 +178,9 @@ def purge_path(path: str, ignore_errors: bool = True) -> None:
if not os.path.exists(path):
logger.info(f"Successfully deleted {path}.")
else:
cached_dataset_dir = determine_data_home()
raise FileExistsError(
f"Deleting operation failed. {CACHED_DATASET_DIR} still exists."
f"Deleting operation failed. {cached_dataset_dir} still exists."
)
except shutil.Error:
raise shutil.Error("Operation failed.")


def migrate(old_path: str, new_path: str) -> None:
"""Migrate files in a directory from old_path to new_path.
Parameters
----------
old_path:
The old path of the dataset.
new_path:
The new path of the dataset.
"""
if not os.path.exists(old_path):
raise FileNotFoundError(f"Given old_path {old_path} does not exist.")

if os.path.exists(new_path):
logger.warning(f"‼️ Please note that new_path {new_path} already exists.")
# if new_path exists, we have to move everything from old_path into it
all_old_files = os.listdir(old_path)
for f in all_old_files:
old_f_path = os.path.join(old_path, f)
if os.path.isdir(old_f_path):
new_f_path = os.path.join(new_path, f)
shutil.copytree(old_f_path, new_f_path)
else:
shutil.move(old_f_path, new_path)
shutil.rmtree(old_path, ignore_errors=True)
else:
# if new_path does not exist, just rename the old_path into it
new_parent_dir = os.path.abspath(os.path.join(new_path, ".."))
if not os.path.exists(new_parent_dir):
os.makedirs(new_parent_dir, exist_ok=True)
os.rename(old_path, new_path)

logger.info(
f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}"
)


def migrate_cache(target_path: str) -> None:
"""Migrate datasets from old_path to new_path.
Parameters
----------
target_path:
The new path for TSDB to store cached datasets.
"""

migrate(CACHED_DATASET_DIR, target_path)

config = ConfigParser()
config.read(tsdb_config_path)
config.set("path", "data_home", target_path)
with open(tsdb_config_path, "w") as f:
config.write(f)

logger.info(f"Have set {target_path} as the default cache dir.")

0 comments on commit 26ae553

Please sign in to comment.